Merge tag 'sched-urgent-2021-07-11' of git://git.kernel.org/pub/scm/linux/kernel...
[platform/kernel/linux-rpi.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <net/tc_act/tc_gact.h>
38 #include <net/tc_act/tc_skbedit.h>
39 #include <linux/mlx5/fs.h>
40 #include <linux/mlx5/device.h>
41 #include <linux/rhashtable.h>
42 #include <linux/refcount.h>
43 #include <linux/completion.h>
44 #include <net/tc_act/tc_mirred.h>
45 #include <net/tc_act/tc_vlan.h>
46 #include <net/tc_act/tc_tunnel_key.h>
47 #include <net/tc_act/tc_pedit.h>
48 #include <net/tc_act/tc_csum.h>
49 #include <net/tc_act/tc_mpls.h>
50 #include <net/psample.h>
51 #include <net/arp.h>
52 #include <net/ipv6_stubs.h>
53 #include <net/bareudp.h>
54 #include <net/bonding.h>
55 #include "en.h"
56 #include "en_rep.h"
57 #include "en/rep/tc.h"
58 #include "en/rep/neigh.h"
59 #include "en_tc.h"
60 #include "eswitch.h"
61 #include "fs_core.h"
62 #include "en/port.h"
63 #include "en/tc_tun.h"
64 #include "en/mapping.h"
65 #include "en/tc_ct.h"
66 #include "en/mod_hdr.h"
67 #include "en/tc_priv.h"
68 #include "en/tc_tun_encap.h"
69 #include "esw/sample.h"
70 #include "lib/devcom.h"
71 #include "lib/geneve.h"
72 #include "lib/fs_chains.h"
73 #include "diag/en_tc_tracepoint.h"
74 #include <asm/div64.h>
75
76 #define nic_chains(priv) ((priv)->fs.tc.chains)
77 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
78
79 #define MLX5E_TC_TABLE_NUM_GROUPS 4
80 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
81
82 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
83         [CHAIN_TO_REG] = {
84                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
85                 .moffset = 0,
86                 .mlen = 16,
87         },
88         [VPORT_TO_REG] = {
89                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
90                 .moffset = 16,
91                 .mlen = 16,
92         },
93         [TUNNEL_TO_REG] = {
94                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
95                 .moffset = 8,
96                 .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
97                 .soffset = MLX5_BYTE_OFF(fte_match_param,
98                                          misc_parameters_2.metadata_reg_c_1),
99         },
100         [ZONE_TO_REG] = zone_to_reg_ct,
101         [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
102         [CTSTATE_TO_REG] = ctstate_to_reg_ct,
103         [MARK_TO_REG] = mark_to_reg_ct,
104         [LABELS_TO_REG] = labels_to_reg_ct,
105         [FTEID_TO_REG] = fteid_to_reg_ct,
106         /* For NIC rules we store the retore metadata directly
107          * into reg_b that is passed to SW since we don't
108          * jump between steering domains.
109          */
110         [NIC_CHAIN_TO_REG] = {
111                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
112                 .moffset = 0,
113                 .mlen = 16,
114         },
115         [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
116 };
117
118 /* To avoid false lock dependency warning set the tc_ht lock
119  * class different than the lock class of the ht being used when deleting
120  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
121  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
122  * it's different than the ht->mutex here.
123  */
124 static struct lock_class_key tc_ht_lock_key;
125
126 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
127
128 void
129 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
130                             enum mlx5e_tc_attr_to_reg type,
131                             u32 val,
132                             u32 mask)
133 {
134         void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
135         int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
136         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
137         int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
138         u32 max_mask = GENMASK(match_len - 1, 0);
139         __be32 curr_mask_be, curr_val_be;
140         u32 curr_mask, curr_val;
141
142         fmask = headers_c + soffset;
143         fval = headers_v + soffset;
144
145         memcpy(&curr_mask_be, fmask, 4);
146         memcpy(&curr_val_be, fval, 4);
147
148         curr_mask = be32_to_cpu(curr_mask_be);
149         curr_val = be32_to_cpu(curr_val_be);
150
151         //move to correct offset
152         WARN_ON(mask > max_mask);
153         mask <<= moffset;
154         val <<= moffset;
155         max_mask <<= moffset;
156
157         //zero val and mask
158         curr_mask &= ~max_mask;
159         curr_val &= ~max_mask;
160
161         //add current to mask
162         curr_mask |= mask;
163         curr_val |= val;
164
165         //back to be32 and write
166         curr_mask_be = cpu_to_be32(curr_mask);
167         curr_val_be = cpu_to_be32(curr_val);
168
169         memcpy(fmask, &curr_mask_be, 4);
170         memcpy(fval, &curr_val_be, 4);
171
172         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
173 }
174
175 void
176 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
177                                 enum mlx5e_tc_attr_to_reg type,
178                                 u32 *val,
179                                 u32 *mask)
180 {
181         void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
182         int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
183         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
184         int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
185         u32 max_mask = GENMASK(match_len - 1, 0);
186         __be32 curr_mask_be, curr_val_be;
187         u32 curr_mask, curr_val;
188
189         fmask = headers_c + soffset;
190         fval = headers_v + soffset;
191
192         memcpy(&curr_mask_be, fmask, 4);
193         memcpy(&curr_val_be, fval, 4);
194
195         curr_mask = be32_to_cpu(curr_mask_be);
196         curr_val = be32_to_cpu(curr_val_be);
197
198         *mask = (curr_mask >> moffset) & max_mask;
199         *val = (curr_val >> moffset) & max_mask;
200 }
201
202 int
203 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
204                                      struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
205                                      enum mlx5_flow_namespace_type ns,
206                                      enum mlx5e_tc_attr_to_reg type,
207                                      u32 data)
208 {
209         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
210         int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
211         int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
212         char *modact;
213         int err;
214
215         err = alloc_mod_hdr_actions(mdev, ns, mod_hdr_acts);
216         if (err)
217                 return err;
218
219         modact = mod_hdr_acts->actions +
220                  (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
221
222         /* Firmware has 5bit length field and 0 means 32bits */
223         if (mlen == 32)
224                 mlen = 0;
225
226         MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
227         MLX5_SET(set_action_in, modact, field, mfield);
228         MLX5_SET(set_action_in, modact, offset, moffset);
229         MLX5_SET(set_action_in, modact, length, mlen);
230         MLX5_SET(set_action_in, modact, data, data);
231         err = mod_hdr_acts->num_actions;
232         mod_hdr_acts->num_actions++;
233
234         return err;
235 }
236
237 static struct mlx5_tc_ct_priv *
238 get_ct_priv(struct mlx5e_priv *priv)
239 {
240         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
241         struct mlx5_rep_uplink_priv *uplink_priv;
242         struct mlx5e_rep_priv *uplink_rpriv;
243
244         if (is_mdev_switchdev_mode(priv->mdev)) {
245                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
246                 uplink_priv = &uplink_rpriv->uplink_priv;
247
248                 return uplink_priv->ct_priv;
249         }
250
251         return priv->fs.tc.ct;
252 }
253
254 #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
255 static struct mlx5_esw_psample *
256 get_sample_priv(struct mlx5e_priv *priv)
257 {
258         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
259         struct mlx5_rep_uplink_priv *uplink_priv;
260         struct mlx5e_rep_priv *uplink_rpriv;
261
262         if (is_mdev_switchdev_mode(priv->mdev)) {
263                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
264                 uplink_priv = &uplink_rpriv->uplink_priv;
265
266                 return uplink_priv->esw_psample;
267         }
268
269         return NULL;
270 }
271 #endif
272
273 struct mlx5_flow_handle *
274 mlx5_tc_rule_insert(struct mlx5e_priv *priv,
275                     struct mlx5_flow_spec *spec,
276                     struct mlx5_flow_attr *attr)
277 {
278         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
279
280         if (is_mdev_switchdev_mode(priv->mdev))
281                 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
282
283         return  mlx5e_add_offloaded_nic_rule(priv, spec, attr);
284 }
285
286 void
287 mlx5_tc_rule_delete(struct mlx5e_priv *priv,
288                     struct mlx5_flow_handle *rule,
289                     struct mlx5_flow_attr *attr)
290 {
291         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
292
293         if (is_mdev_switchdev_mode(priv->mdev)) {
294                 mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
295
296                 return;
297         }
298
299         mlx5e_del_offloaded_nic_rule(priv, rule, attr);
300 }
301
302 int
303 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
304                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
305                           enum mlx5_flow_namespace_type ns,
306                           enum mlx5e_tc_attr_to_reg type,
307                           u32 data)
308 {
309         int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
310
311         return ret < 0 ? ret : 0;
312 }
313
314 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
315                                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
316                                           enum mlx5e_tc_attr_to_reg type,
317                                           int act_id, u32 data)
318 {
319         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
320         int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
321         int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
322         char *modact;
323
324         modact = mod_hdr_acts->actions + (act_id * MLX5_MH_ACT_SZ);
325
326         /* Firmware has 5bit length field and 0 means 32bits */
327         if (mlen == 32)
328                 mlen = 0;
329
330         MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
331         MLX5_SET(set_action_in, modact, field, mfield);
332         MLX5_SET(set_action_in, modact, offset, moffset);
333         MLX5_SET(set_action_in, modact, length, mlen);
334         MLX5_SET(set_action_in, modact, data, data);
335 }
336
337 struct mlx5e_hairpin {
338         struct mlx5_hairpin *pair;
339
340         struct mlx5_core_dev *func_mdev;
341         struct mlx5e_priv *func_priv;
342         u32 tdn;
343         u32 tirn;
344
345         int num_channels;
346         struct mlx5e_rqt indir_rqt;
347         u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
348         struct mlx5e_ttc_table ttc;
349 };
350
351 struct mlx5e_hairpin_entry {
352         /* a node of a hash table which keeps all the  hairpin entries */
353         struct hlist_node hairpin_hlist;
354
355         /* protects flows list */
356         spinlock_t flows_lock;
357         /* flows sharing the same hairpin */
358         struct list_head flows;
359         /* hpe's that were not fully initialized when dead peer update event
360          * function traversed them.
361          */
362         struct list_head dead_peer_wait_list;
363
364         u16 peer_vhca_id;
365         u8 prio;
366         struct mlx5e_hairpin *hp;
367         refcount_t refcnt;
368         struct completion res_ready;
369 };
370
371 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
372                               struct mlx5e_tc_flow *flow);
373
374 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
375 {
376         if (!flow || !refcount_inc_not_zero(&flow->refcnt))
377                 return ERR_PTR(-EINVAL);
378         return flow;
379 }
380
381 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
382 {
383         if (refcount_dec_and_test(&flow->refcnt)) {
384                 mlx5e_tc_del_flow(priv, flow);
385                 kfree_rcu(flow, rcu_head);
386         }
387 }
388
389 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
390 {
391         return flow_flag_test(flow, ESWITCH);
392 }
393
394 static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
395 {
396         return flow_flag_test(flow, FT);
397 }
398
399 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
400 {
401         return flow_flag_test(flow, OFFLOADED);
402 }
403
404 static int get_flow_name_space(struct mlx5e_tc_flow *flow)
405 {
406         return mlx5e_is_eswitch_flow(flow) ?
407                 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
408 }
409
410 static struct mod_hdr_tbl *
411 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
412 {
413         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
414
415         return get_flow_name_space(flow) == MLX5_FLOW_NAMESPACE_FDB ?
416                 &esw->offloads.mod_hdr :
417                 &priv->fs.tc.mod_hdr;
418 }
419
420 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
421                                 struct mlx5e_tc_flow *flow,
422                                 struct mlx5e_tc_flow_parse_attr *parse_attr)
423 {
424         struct mlx5_modify_hdr *modify_hdr;
425         struct mlx5e_mod_hdr_handle *mh;
426
427         mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
428                                   get_flow_name_space(flow),
429                                   &parse_attr->mod_hdr_acts);
430         if (IS_ERR(mh))
431                 return PTR_ERR(mh);
432
433         modify_hdr = mlx5e_mod_hdr_get(mh);
434         flow->attr->modify_hdr = modify_hdr;
435         flow->mh = mh;
436
437         return 0;
438 }
439
440 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
441                                  struct mlx5e_tc_flow *flow)
442 {
443         /* flow wasn't fully initialized */
444         if (!flow->mh)
445                 return;
446
447         mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
448                              flow->mh);
449         flow->mh = NULL;
450 }
451
452 static
453 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
454 {
455         struct net_device *netdev;
456         struct mlx5e_priv *priv;
457
458         netdev = __dev_get_by_index(net, ifindex);
459         priv = netdev_priv(netdev);
460         return priv->mdev;
461 }
462
463 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
464 {
465         u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {};
466         void *tirc;
467         int err;
468
469         err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
470         if (err)
471                 goto alloc_tdn_err;
472
473         tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
474
475         MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
476         MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
477         MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
478
479         err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn);
480         if (err)
481                 goto create_tir_err;
482
483         return 0;
484
485 create_tir_err:
486         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
487 alloc_tdn_err:
488         return err;
489 }
490
491 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
492 {
493         mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
494         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
495 }
496
497 static int mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
498 {
499         struct mlx5e_priv *priv = hp->func_priv;
500         int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
501         u32 *indirection_rqt, rqn;
502
503         indirection_rqt = kcalloc(sz, sizeof(*indirection_rqt), GFP_KERNEL);
504         if (!indirection_rqt)
505                 return -ENOMEM;
506
507         mlx5e_build_default_indir_rqt(indirection_rqt, sz,
508                                       hp->num_channels);
509
510         for (i = 0; i < sz; i++) {
511                 ix = i;
512                 if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
513                         ix = mlx5e_bits_invert(i, ilog2(sz));
514                 ix = indirection_rqt[ix];
515                 rqn = hp->pair->rqn[ix];
516                 MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
517         }
518
519         kfree(indirection_rqt);
520         return 0;
521 }
522
523 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
524 {
525         int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
526         struct mlx5e_priv *priv = hp->func_priv;
527         struct mlx5_core_dev *mdev = priv->mdev;
528         void *rqtc;
529         u32 *in;
530
531         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
532         in = kvzalloc(inlen, GFP_KERNEL);
533         if (!in)
534                 return -ENOMEM;
535
536         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
537
538         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
539         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
540
541         err = mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
542         if (err)
543                 goto out;
544
545         err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
546         if (!err)
547                 hp->indir_rqt.enabled = true;
548
549 out:
550         kvfree(in);
551         return err;
552 }
553
554 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
555 {
556         struct mlx5e_priv *priv = hp->func_priv;
557         u32 in[MLX5_ST_SZ_DW(create_tir_in)];
558         int tt, i, err;
559         void *tirc;
560
561         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
562                 struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
563
564                 memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
565                 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
566
567                 MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
568                 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
569                 MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
570                 mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
571
572                 err = mlx5_core_create_tir(hp->func_mdev, in,
573                                            &hp->indir_tirn[tt]);
574                 if (err) {
575                         mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
576                         goto err_destroy_tirs;
577                 }
578         }
579         return 0;
580
581 err_destroy_tirs:
582         for (i = 0; i < tt; i++)
583                 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
584         return err;
585 }
586
587 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
588 {
589         int tt;
590
591         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
592                 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
593 }
594
595 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
596                                          struct ttc_params *ttc_params)
597 {
598         struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
599         int tt;
600
601         memset(ttc_params, 0, sizeof(*ttc_params));
602
603         ttc_params->any_tt_tirn = hp->tirn;
604
605         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
606                 ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
607
608         ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
609         ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
610         ft_attr->prio = MLX5E_TC_PRIO;
611 }
612
613 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
614 {
615         struct mlx5e_priv *priv = hp->func_priv;
616         struct ttc_params ttc_params;
617         int err;
618
619         err = mlx5e_hairpin_create_indirect_rqt(hp);
620         if (err)
621                 return err;
622
623         err = mlx5e_hairpin_create_indirect_tirs(hp);
624         if (err)
625                 goto err_create_indirect_tirs;
626
627         mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
628         err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
629         if (err)
630                 goto err_create_ttc_table;
631
632         netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
633                    hp->num_channels, hp->ttc.ft.t->id);
634
635         return 0;
636
637 err_create_ttc_table:
638         mlx5e_hairpin_destroy_indirect_tirs(hp);
639 err_create_indirect_tirs:
640         mlx5e_destroy_rqt(priv, &hp->indir_rqt);
641
642         return err;
643 }
644
645 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
646 {
647         struct mlx5e_priv *priv = hp->func_priv;
648
649         mlx5e_destroy_ttc_table(priv, &hp->ttc);
650         mlx5e_hairpin_destroy_indirect_tirs(hp);
651         mlx5e_destroy_rqt(priv, &hp->indir_rqt);
652 }
653
654 static struct mlx5e_hairpin *
655 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
656                      int peer_ifindex)
657 {
658         struct mlx5_core_dev *func_mdev, *peer_mdev;
659         struct mlx5e_hairpin *hp;
660         struct mlx5_hairpin *pair;
661         int err;
662
663         hp = kzalloc(sizeof(*hp), GFP_KERNEL);
664         if (!hp)
665                 return ERR_PTR(-ENOMEM);
666
667         func_mdev = priv->mdev;
668         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
669
670         pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
671         if (IS_ERR(pair)) {
672                 err = PTR_ERR(pair);
673                 goto create_pair_err;
674         }
675         hp->pair = pair;
676         hp->func_mdev = func_mdev;
677         hp->func_priv = priv;
678         hp->num_channels = params->num_channels;
679
680         err = mlx5e_hairpin_create_transport(hp);
681         if (err)
682                 goto create_transport_err;
683
684         if (hp->num_channels > 1) {
685                 err = mlx5e_hairpin_rss_init(hp);
686                 if (err)
687                         goto rss_init_err;
688         }
689
690         return hp;
691
692 rss_init_err:
693         mlx5e_hairpin_destroy_transport(hp);
694 create_transport_err:
695         mlx5_core_hairpin_destroy(hp->pair);
696 create_pair_err:
697         kfree(hp);
698         return ERR_PTR(err);
699 }
700
701 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
702 {
703         if (hp->num_channels > 1)
704                 mlx5e_hairpin_rss_cleanup(hp);
705         mlx5e_hairpin_destroy_transport(hp);
706         mlx5_core_hairpin_destroy(hp->pair);
707         kvfree(hp);
708 }
709
710 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
711 {
712         return (peer_vhca_id << 16 | prio);
713 }
714
715 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
716                                                      u16 peer_vhca_id, u8 prio)
717 {
718         struct mlx5e_hairpin_entry *hpe;
719         u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
720
721         hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
722                                hairpin_hlist, hash_key) {
723                 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
724                         refcount_inc(&hpe->refcnt);
725                         return hpe;
726                 }
727         }
728
729         return NULL;
730 }
731
732 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
733                               struct mlx5e_hairpin_entry *hpe)
734 {
735         /* no more hairpin flows for us, release the hairpin pair */
736         if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
737                 return;
738         hash_del(&hpe->hairpin_hlist);
739         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
740
741         if (!IS_ERR_OR_NULL(hpe->hp)) {
742                 netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
743                            dev_name(hpe->hp->pair->peer_mdev->device));
744
745                 mlx5e_hairpin_destroy(hpe->hp);
746         }
747
748         WARN_ON(!list_empty(&hpe->flows));
749         kfree(hpe);
750 }
751
752 #define UNKNOWN_MATCH_PRIO 8
753
754 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
755                                   struct mlx5_flow_spec *spec, u8 *match_prio,
756                                   struct netlink_ext_ack *extack)
757 {
758         void *headers_c, *headers_v;
759         u8 prio_val, prio_mask = 0;
760         bool vlan_present;
761
762 #ifdef CONFIG_MLX5_CORE_EN_DCB
763         if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
764                 NL_SET_ERR_MSG_MOD(extack,
765                                    "only PCP trust state supported for hairpin");
766                 return -EOPNOTSUPP;
767         }
768 #endif
769         headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
770         headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
771
772         vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
773         if (vlan_present) {
774                 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
775                 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
776         }
777
778         if (!vlan_present || !prio_mask) {
779                 prio_val = UNKNOWN_MATCH_PRIO;
780         } else if (prio_mask != 0x7) {
781                 NL_SET_ERR_MSG_MOD(extack,
782                                    "masked priority match not supported for hairpin");
783                 return -EOPNOTSUPP;
784         }
785
786         *match_prio = prio_val;
787         return 0;
788 }
789
790 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
791                                   struct mlx5e_tc_flow *flow,
792                                   struct mlx5e_tc_flow_parse_attr *parse_attr,
793                                   struct netlink_ext_ack *extack)
794 {
795         int peer_ifindex = parse_attr->mirred_ifindex[0];
796         struct mlx5_hairpin_params params;
797         struct mlx5_core_dev *peer_mdev;
798         struct mlx5e_hairpin_entry *hpe;
799         struct mlx5e_hairpin *hp;
800         u64 link_speed64;
801         u32 link_speed;
802         u8 match_prio;
803         u16 peer_id;
804         int err;
805
806         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
807         if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
808                 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
809                 return -EOPNOTSUPP;
810         }
811
812         peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
813         err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
814                                      extack);
815         if (err)
816                 return err;
817
818         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
819         hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
820         if (hpe) {
821                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
822                 wait_for_completion(&hpe->res_ready);
823
824                 if (IS_ERR(hpe->hp)) {
825                         err = -EREMOTEIO;
826                         goto out_err;
827                 }
828                 goto attach_flow;
829         }
830
831         hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
832         if (!hpe) {
833                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
834                 return -ENOMEM;
835         }
836
837         spin_lock_init(&hpe->flows_lock);
838         INIT_LIST_HEAD(&hpe->flows);
839         INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
840         hpe->peer_vhca_id = peer_id;
841         hpe->prio = match_prio;
842         refcount_set(&hpe->refcnt, 1);
843         init_completion(&hpe->res_ready);
844
845         hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
846                  hash_hairpin_info(peer_id, match_prio));
847         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
848
849         params.log_data_size = 16;
850         params.log_data_size = min_t(u8, params.log_data_size,
851                                      MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
852         params.log_data_size = max_t(u8, params.log_data_size,
853                                      MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
854
855         params.log_num_packets = params.log_data_size -
856                                  MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
857         params.log_num_packets = min_t(u8, params.log_num_packets,
858                                        MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
859
860         params.q_counter = priv->q_counter;
861         /* set hairpin pair per each 50Gbs share of the link */
862         mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
863         link_speed = max_t(u32, link_speed, 50000);
864         link_speed64 = link_speed;
865         do_div(link_speed64, 50000);
866         params.num_channels = link_speed64;
867
868         hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
869         hpe->hp = hp;
870         complete_all(&hpe->res_ready);
871         if (IS_ERR(hp)) {
872                 err = PTR_ERR(hp);
873                 goto out_err;
874         }
875
876         netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
877                    hp->tirn, hp->pair->rqn[0],
878                    dev_name(hp->pair->peer_mdev->device),
879                    hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
880
881 attach_flow:
882         if (hpe->hp->num_channels > 1) {
883                 flow_flag_set(flow, HAIRPIN_RSS);
884                 flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
885         } else {
886                 flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn;
887         }
888
889         flow->hpe = hpe;
890         spin_lock(&hpe->flows_lock);
891         list_add(&flow->hairpin, &hpe->flows);
892         spin_unlock(&hpe->flows_lock);
893
894         return 0;
895
896 out_err:
897         mlx5e_hairpin_put(priv, hpe);
898         return err;
899 }
900
901 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
902                                    struct mlx5e_tc_flow *flow)
903 {
904         /* flow wasn't fully initialized */
905         if (!flow->hpe)
906                 return;
907
908         spin_lock(&flow->hpe->flows_lock);
909         list_del(&flow->hairpin);
910         spin_unlock(&flow->hpe->flows_lock);
911
912         mlx5e_hairpin_put(priv, flow->hpe);
913         flow->hpe = NULL;
914 }
915
916 struct mlx5_flow_handle *
917 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
918                              struct mlx5_flow_spec *spec,
919                              struct mlx5_flow_attr *attr)
920 {
921         struct mlx5_flow_context *flow_context = &spec->flow_context;
922         struct mlx5_fs_chains *nic_chains = nic_chains(priv);
923         struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
924         struct mlx5e_tc_table *tc = &priv->fs.tc;
925         struct mlx5_flow_destination dest[2] = {};
926         struct mlx5_flow_act flow_act = {
927                 .action = attr->action,
928                 .flags    = FLOW_ACT_NO_APPEND,
929         };
930         struct mlx5_flow_handle *rule;
931         struct mlx5_flow_table *ft;
932         int dest_ix = 0;
933
934         flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
935         flow_context->flow_tag = nic_attr->flow_tag;
936
937         if (attr->dest_ft) {
938                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
939                 dest[dest_ix].ft = attr->dest_ft;
940                 dest_ix++;
941         } else if (nic_attr->hairpin_ft) {
942                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
943                 dest[dest_ix].ft = nic_attr->hairpin_ft;
944                 dest_ix++;
945         } else if (nic_attr->hairpin_tirn) {
946                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
947                 dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
948                 dest_ix++;
949         } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
950                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
951                 if (attr->dest_chain) {
952                         dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
953                                                                  attr->dest_chain, 1,
954                                                                  MLX5E_TC_FT_LEVEL);
955                         if (IS_ERR(dest[dest_ix].ft))
956                                 return ERR_CAST(dest[dest_ix].ft);
957                 } else {
958                         dest[dest_ix].ft = mlx5e_vlan_get_flowtable(priv->fs.vlan);
959                 }
960                 dest_ix++;
961         }
962
963         if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
964             MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
965                 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
966
967         if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
968                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
969                 dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
970                 dest_ix++;
971         }
972
973         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
974                 flow_act.modify_hdr = attr->modify_hdr;
975
976         mutex_lock(&tc->t_lock);
977         if (IS_ERR_OR_NULL(tc->t)) {
978                 /* Create the root table here if doesn't exist yet */
979                 tc->t =
980                         mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
981
982                 if (IS_ERR(tc->t)) {
983                         mutex_unlock(&tc->t_lock);
984                         netdev_err(priv->netdev,
985                                    "Failed to create tc offload table\n");
986                         rule = ERR_CAST(priv->fs.tc.t);
987                         goto err_ft_get;
988                 }
989         }
990         mutex_unlock(&tc->t_lock);
991
992         if (attr->chain || attr->prio)
993                 ft = mlx5_chains_get_table(nic_chains,
994                                            attr->chain, attr->prio,
995                                            MLX5E_TC_FT_LEVEL);
996         else
997                 ft = attr->ft;
998
999         if (IS_ERR(ft)) {
1000                 rule = ERR_CAST(ft);
1001                 goto err_ft_get;
1002         }
1003
1004         if (attr->outer_match_level != MLX5_MATCH_NONE)
1005                 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1006
1007         rule = mlx5_add_flow_rules(ft, spec,
1008                                    &flow_act, dest, dest_ix);
1009         if (IS_ERR(rule))
1010                 goto err_rule;
1011
1012         return rule;
1013
1014 err_rule:
1015         if (attr->chain || attr->prio)
1016                 mlx5_chains_put_table(nic_chains,
1017                                       attr->chain, attr->prio,
1018                                       MLX5E_TC_FT_LEVEL);
1019 err_ft_get:
1020         if (attr->dest_chain)
1021                 mlx5_chains_put_table(nic_chains,
1022                                       attr->dest_chain, 1,
1023                                       MLX5E_TC_FT_LEVEL);
1024
1025         return ERR_CAST(rule);
1026 }
1027
1028 static int
1029 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1030                       struct mlx5e_tc_flow_parse_attr *parse_attr,
1031                       struct mlx5e_tc_flow *flow,
1032                       struct netlink_ext_ack *extack)
1033 {
1034         struct mlx5_flow_attr *attr = flow->attr;
1035         struct mlx5_core_dev *dev = priv->mdev;
1036         struct mlx5_fc *counter = NULL;
1037         int err;
1038
1039         if (flow_flag_test(flow, HAIRPIN)) {
1040                 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1041                 if (err)
1042                         return err;
1043         }
1044
1045         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1046                 counter = mlx5_fc_create(dev, true);
1047                 if (IS_ERR(counter))
1048                         return PTR_ERR(counter);
1049
1050                 attr->counter = counter;
1051         }
1052
1053         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1054                 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1055                 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1056                 if (err)
1057                         return err;
1058         }
1059
1060         if (flow_flag_test(flow, CT))
1061                 flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec,
1062                                                         attr, &parse_attr->mod_hdr_acts);
1063         else
1064                 flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
1065                                                              attr);
1066
1067         return PTR_ERR_OR_ZERO(flow->rule[0]);
1068 }
1069
1070 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1071                                   struct mlx5_flow_handle *rule,
1072                                   struct mlx5_flow_attr *attr)
1073 {
1074         struct mlx5_fs_chains *nic_chains = nic_chains(priv);
1075
1076         mlx5_del_flow_rules(rule);
1077
1078         if (attr->chain || attr->prio)
1079                 mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1080                                       MLX5E_TC_FT_LEVEL);
1081
1082         if (attr->dest_chain)
1083                 mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1084                                       MLX5E_TC_FT_LEVEL);
1085 }
1086
1087 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1088                                   struct mlx5e_tc_flow *flow)
1089 {
1090         struct mlx5_flow_attr *attr = flow->attr;
1091         struct mlx5e_tc_table *tc = &priv->fs.tc;
1092
1093         flow_flag_clear(flow, OFFLOADED);
1094
1095         if (flow_flag_test(flow, CT))
1096                 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1097         else if (!IS_ERR_OR_NULL(flow->rule[0]))
1098                 mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1099
1100         /* Remove root table if no rules are left to avoid
1101          * extra steering hops.
1102          */
1103         mutex_lock(&priv->fs.tc.t_lock);
1104         if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1105             !IS_ERR_OR_NULL(tc->t)) {
1106                 mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL);
1107                 priv->fs.tc.t = NULL;
1108         }
1109         mutex_unlock(&priv->fs.tc.t_lock);
1110
1111         kvfree(attr->parse_attr);
1112
1113         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1114                 mlx5e_detach_mod_hdr(priv, flow);
1115
1116         mlx5_fc_destroy(priv->mdev, attr->counter);
1117
1118         if (flow_flag_test(flow, HAIRPIN))
1119                 mlx5e_hairpin_flow_del(priv, flow);
1120
1121         kfree(flow->attr);
1122 }
1123
1124 struct mlx5_flow_handle *
1125 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1126                            struct mlx5e_tc_flow *flow,
1127                            struct mlx5_flow_spec *spec,
1128                            struct mlx5_flow_attr *attr)
1129 {
1130         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1131         struct mlx5_flow_handle *rule;
1132
1133         if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
1134                 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1135
1136         if (flow_flag_test(flow, CT)) {
1137                 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1138
1139                 rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
1140                                                flow, spec, attr,
1141                                                mod_hdr_acts);
1142 #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
1143         } else if (flow_flag_test(flow, SAMPLE)) {
1144                 rule = mlx5_esw_sample_offload(get_sample_priv(flow->priv), spec, attr);
1145 #endif
1146         } else {
1147                 rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1148         }
1149
1150         if (IS_ERR(rule))
1151                 return rule;
1152
1153         if (attr->esw_attr->split_count) {
1154                 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1155                 if (IS_ERR(flow->rule[1])) {
1156                         if (flow_flag_test(flow, CT))
1157                                 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1158                         else
1159                                 mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
1160                         return flow->rule[1];
1161                 }
1162         }
1163
1164         return rule;
1165 }
1166
1167 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1168                                   struct mlx5e_tc_flow *flow,
1169                                   struct mlx5_flow_attr *attr)
1170 {
1171         flow_flag_clear(flow, OFFLOADED);
1172
1173         if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
1174                 goto offload_rule_0;
1175
1176         if (flow_flag_test(flow, CT)) {
1177                 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1178                 return;
1179         }
1180
1181 #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
1182         if (flow_flag_test(flow, SAMPLE)) {
1183                 mlx5_esw_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
1184                 return;
1185         }
1186 #endif
1187
1188         if (attr->esw_attr->split_count)
1189                 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1190
1191 offload_rule_0:
1192         mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1193 }
1194
1195 struct mlx5_flow_handle *
1196 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1197                               struct mlx5e_tc_flow *flow,
1198                               struct mlx5_flow_spec *spec)
1199 {
1200         struct mlx5_flow_attr *slow_attr;
1201         struct mlx5_flow_handle *rule;
1202
1203         slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1204         if (!slow_attr)
1205                 return ERR_PTR(-ENOMEM);
1206
1207         memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1208         slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1209         slow_attr->esw_attr->split_count = 0;
1210         slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1211
1212         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1213         if (!IS_ERR(rule))
1214                 flow_flag_set(flow, SLOW);
1215
1216         kfree(slow_attr);
1217
1218         return rule;
1219 }
1220
1221 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1222                                        struct mlx5e_tc_flow *flow)
1223 {
1224         struct mlx5_flow_attr *slow_attr;
1225
1226         slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1227         if (!slow_attr) {
1228                 mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1229                 return;
1230         }
1231
1232         memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1233         slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1234         slow_attr->esw_attr->split_count = 0;
1235         slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1236         mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1237         flow_flag_clear(flow, SLOW);
1238         kfree(slow_attr);
1239 }
1240
1241 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1242  * function.
1243  */
1244 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1245                              struct list_head *unready_flows)
1246 {
1247         flow_flag_set(flow, NOT_READY);
1248         list_add_tail(&flow->unready, unready_flows);
1249 }
1250
1251 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1252  * function.
1253  */
1254 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1255 {
1256         list_del(&flow->unready);
1257         flow_flag_clear(flow, NOT_READY);
1258 }
1259
1260 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1261 {
1262         struct mlx5_rep_uplink_priv *uplink_priv;
1263         struct mlx5e_rep_priv *rpriv;
1264         struct mlx5_eswitch *esw;
1265
1266         esw = flow->priv->mdev->priv.eswitch;
1267         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1268         uplink_priv = &rpriv->uplink_priv;
1269
1270         mutex_lock(&uplink_priv->unready_flows_lock);
1271         unready_flow_add(flow, &uplink_priv->unready_flows);
1272         mutex_unlock(&uplink_priv->unready_flows_lock);
1273 }
1274
1275 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1276 {
1277         struct mlx5_rep_uplink_priv *uplink_priv;
1278         struct mlx5e_rep_priv *rpriv;
1279         struct mlx5_eswitch *esw;
1280
1281         esw = flow->priv->mdev->priv.eswitch;
1282         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1283         uplink_priv = &rpriv->uplink_priv;
1284
1285         mutex_lock(&uplink_priv->unready_flows_lock);
1286         unready_flow_del(flow);
1287         mutex_unlock(&uplink_priv->unready_flows_lock);
1288 }
1289
1290 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);
1291
1292 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
1293 {
1294         struct mlx5_core_dev *out_mdev, *route_mdev;
1295         struct mlx5e_priv *out_priv, *route_priv;
1296
1297         out_priv = netdev_priv(out_dev);
1298         out_mdev = out_priv->mdev;
1299         route_priv = netdev_priv(route_dev);
1300         route_mdev = route_priv->mdev;
1301
1302         if (out_mdev->coredev_type != MLX5_COREDEV_PF ||
1303             route_mdev->coredev_type != MLX5_COREDEV_VF)
1304                 return false;
1305
1306         return same_hw_devs(out_priv, route_priv);
1307 }
1308
1309 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
1310 {
1311         struct mlx5e_priv *out_priv, *route_priv;
1312         struct mlx5_core_dev *route_mdev;
1313         struct mlx5_eswitch *esw;
1314         u16 vhca_id;
1315         int err;
1316
1317         out_priv = netdev_priv(out_dev);
1318         esw = out_priv->mdev->priv.eswitch;
1319         route_priv = netdev_priv(route_dev);
1320         route_mdev = route_priv->mdev;
1321
1322         vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
1323         err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1324         return err;
1325 }
1326
1327 int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
1328                               struct mlx5e_tc_flow_parse_attr *parse_attr,
1329                               struct mlx5e_tc_flow *flow)
1330 {
1331         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts;
1332         struct mlx5_modify_hdr *mod_hdr;
1333
1334         mod_hdr = mlx5_modify_header_alloc(priv->mdev,
1335                                            get_flow_name_space(flow),
1336                                            mod_hdr_acts->num_actions,
1337                                            mod_hdr_acts->actions);
1338         if (IS_ERR(mod_hdr))
1339                 return PTR_ERR(mod_hdr);
1340
1341         WARN_ON(flow->attr->modify_hdr);
1342         flow->attr->modify_hdr = mod_hdr;
1343
1344         return 0;
1345 }
1346
1347 static int
1348 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1349                       struct mlx5e_tc_flow *flow,
1350                       struct netlink_ext_ack *extack)
1351 {
1352         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1353         struct mlx5e_tc_flow_parse_attr *parse_attr;
1354         struct mlx5_flow_attr *attr = flow->attr;
1355         bool vf_tun = false, encap_valid = true;
1356         struct net_device *encap_dev = NULL;
1357         struct mlx5_esw_flow_attr *esw_attr;
1358         struct mlx5_fc *counter = NULL;
1359         struct mlx5e_rep_priv *rpriv;
1360         struct mlx5e_priv *out_priv;
1361         u32 max_prio, max_chain;
1362         int err = 0;
1363         int out_index;
1364
1365         /* We check chain range only for tc flows.
1366          * For ft flows, we checked attr->chain was originally 0 and set it to
1367          * FDB_FT_CHAIN which is outside tc range.
1368          * See mlx5e_rep_setup_ft_cb().
1369          */
1370         max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1371         if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1372                 NL_SET_ERR_MSG_MOD(extack,
1373                                    "Requested chain is out of supported range");
1374                 err = -EOPNOTSUPP;
1375                 goto err_out;
1376         }
1377
1378         max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1379         if (attr->prio > max_prio) {
1380                 NL_SET_ERR_MSG_MOD(extack,
1381                                    "Requested priority is out of supported range");
1382                 err = -EOPNOTSUPP;
1383                 goto err_out;
1384         }
1385
1386         if (flow_flag_test(flow, TUN_RX)) {
1387                 err = mlx5e_attach_decap_route(priv, flow);
1388                 if (err)
1389                         goto err_out;
1390         }
1391
1392         if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1393                 err = mlx5e_attach_decap(priv, flow, extack);
1394                 if (err)
1395                         goto err_out;
1396         }
1397
1398         parse_attr = attr->parse_attr;
1399         esw_attr = attr->esw_attr;
1400
1401         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1402                 struct net_device *out_dev;
1403                 int mirred_ifindex;
1404
1405                 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1406                         continue;
1407
1408                 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1409                 out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1410                 if (!out_dev) {
1411                         NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1412                         err = -ENODEV;
1413                         goto err_out;
1414                 }
1415                 err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
1416                                          extack, &encap_dev, &encap_valid);
1417                 dev_put(out_dev);
1418                 if (err)
1419                         goto err_out;
1420
1421                 if (esw_attr->dests[out_index].flags &
1422                     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
1423                         vf_tun = true;
1424                 out_priv = netdev_priv(encap_dev);
1425                 rpriv = out_priv->ppriv;
1426                 esw_attr->dests[out_index].rep = rpriv->rep;
1427                 esw_attr->dests[out_index].mdev = out_priv->mdev;
1428         }
1429
1430         if (vf_tun && esw_attr->out_count > 1) {
1431                 NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1432                 err = -EOPNOTSUPP;
1433                 goto err_out;
1434         }
1435
1436         err = mlx5_eswitch_add_vlan_action(esw, attr);
1437         if (err)
1438                 goto err_out;
1439
1440         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
1441             !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
1442                 if (vf_tun) {
1443                         err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
1444                         if (err)
1445                                 goto err_out;
1446                 } else {
1447                         err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1448                         if (err)
1449                                 goto err_out;
1450                 }
1451         }
1452
1453         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1454                 counter = mlx5_fc_create(esw_attr->counter_dev, true);
1455                 if (IS_ERR(counter)) {
1456                         err = PTR_ERR(counter);
1457                         goto err_out;
1458                 }
1459
1460                 attr->counter = counter;
1461         }
1462
1463         /* we get here if one of the following takes place:
1464          * (1) there's no error
1465          * (2) there's an encap action and we don't have valid neigh
1466          */
1467         if (!encap_valid)
1468                 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1469         else
1470                 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1471
1472         if (IS_ERR(flow->rule[0])) {
1473                 err = PTR_ERR(flow->rule[0]);
1474                 goto err_out;
1475         }
1476         flow_flag_set(flow, OFFLOADED);
1477
1478         return 0;
1479
1480 err_out:
1481         flow_flag_set(flow, FAILED);
1482         return err;
1483 }
1484
1485 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1486 {
1487         struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
1488         void *headers_v = MLX5_ADDR_OF(fte_match_param,
1489                                        spec->match_value,
1490                                        misc_parameters_3);
1491         u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1492                                              headers_v,
1493                                              geneve_tlv_option_0_data);
1494
1495         return !!geneve_tlv_opt_0_data;
1496 }
1497
1498 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1499                                   struct mlx5e_tc_flow *flow)
1500 {
1501         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1502         struct mlx5_flow_attr *attr = flow->attr;
1503         struct mlx5_esw_flow_attr *esw_attr;
1504         bool vf_tun = false;
1505         int out_index;
1506
1507         esw_attr = attr->esw_attr;
1508         mlx5e_put_flow_tunnel_id(flow);
1509
1510         if (flow_flag_test(flow, NOT_READY))
1511                 remove_unready_flow(flow);
1512
1513         if (mlx5e_is_offloaded_flow(flow)) {
1514                 if (flow_flag_test(flow, SLOW))
1515                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
1516                 else
1517                         mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1518         }
1519
1520         if (mlx5_flow_has_geneve_opt(flow))
1521                 mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1522
1523         mlx5_eswitch_del_vlan_action(esw, attr);
1524
1525         if (flow->decap_route)
1526                 mlx5e_detach_decap_route(priv, flow);
1527
1528         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1529                 if (esw_attr->dests[out_index].flags &
1530                     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
1531                         vf_tun = true;
1532                 if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
1533                         mlx5e_detach_encap(priv, flow, out_index);
1534                         kfree(attr->parse_attr->tun_info[out_index]);
1535                 }
1536         }
1537
1538         mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
1539
1540         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1541                 dealloc_mod_hdr_actions(&attr->parse_attr->mod_hdr_acts);
1542                 if (vf_tun && attr->modify_hdr)
1543                         mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1544                 else
1545                         mlx5e_detach_mod_hdr(priv, flow);
1546         }
1547         kvfree(attr->parse_attr);
1548         kvfree(attr->esw_attr->rx_tun_attr);
1549
1550         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1551                 mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
1552
1553         if (flow_flag_test(flow, L3_TO_L2_DECAP))
1554                 mlx5e_detach_decap(priv, flow);
1555
1556         kfree(flow->attr->esw_attr->sample);
1557         kfree(flow->attr);
1558 }
1559
1560 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1561 {
1562         return flow->attr->counter;
1563 }
1564
1565 /* Iterate over tmp_list of flows attached to flow_list head. */
1566 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1567 {
1568         struct mlx5e_tc_flow *flow, *tmp;
1569
1570         list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1571                 mlx5e_flow_put(priv, flow);
1572 }
1573
1574 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1575 {
1576         struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1577
1578         if (!flow_flag_test(flow, ESWITCH) ||
1579             !flow_flag_test(flow, DUP))
1580                 return;
1581
1582         mutex_lock(&esw->offloads.peer_mutex);
1583         list_del(&flow->peer);
1584         mutex_unlock(&esw->offloads.peer_mutex);
1585
1586         flow_flag_clear(flow, DUP);
1587
1588         if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1589                 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1590                 kfree(flow->peer_flow);
1591         }
1592
1593         flow->peer_flow = NULL;
1594 }
1595
1596 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1597 {
1598         struct mlx5_core_dev *dev = flow->priv->mdev;
1599         struct mlx5_devcom *devcom = dev->priv.devcom;
1600         struct mlx5_eswitch *peer_esw;
1601
1602         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1603         if (!peer_esw)
1604                 return;
1605
1606         __mlx5e_tc_del_fdb_peer_flow(flow);
1607         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1608 }
1609
1610 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
1611                               struct mlx5e_tc_flow *flow)
1612 {
1613         if (mlx5e_is_eswitch_flow(flow)) {
1614                 mlx5e_tc_del_fdb_peer_flow(flow);
1615                 mlx5e_tc_del_fdb_flow(priv, flow);
1616         } else {
1617                 mlx5e_tc_del_nic_flow(priv, flow);
1618         }
1619 }
1620
1621 static int flow_has_tc_fwd_action(struct flow_cls_offload *f)
1622 {
1623         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1624         struct flow_action *flow_action = &rule->action;
1625         const struct flow_action_entry *act;
1626         int i;
1627
1628         flow_action_for_each(i, act, flow_action) {
1629                 switch (act->id) {
1630                 case FLOW_ACTION_GOTO:
1631                         return true;
1632                 default:
1633                         continue;
1634                 }
1635         }
1636
1637         return false;
1638 }
1639
1640 static int
1641 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
1642                                     struct flow_dissector_key_enc_opts *opts,
1643                                     struct netlink_ext_ack *extack,
1644                                     bool *dont_care)
1645 {
1646         struct geneve_opt *opt;
1647         int off = 0;
1648
1649         *dont_care = true;
1650
1651         while (opts->len > off) {
1652                 opt = (struct geneve_opt *)&opts->data[off];
1653
1654                 if (!(*dont_care) || opt->opt_class || opt->type ||
1655                     memchr_inv(opt->opt_data, 0, opt->length * 4)) {
1656                         *dont_care = false;
1657
1658                         if (opt->opt_class != htons(U16_MAX) ||
1659                             opt->type != U8_MAX) {
1660                                 NL_SET_ERR_MSG(extack,
1661                                                "Partial match of tunnel options in chain > 0 isn't supported");
1662                                 netdev_warn(priv->netdev,
1663                                             "Partial match of tunnel options in chain > 0 isn't supported");
1664                                 return -EOPNOTSUPP;
1665                         }
1666                 }
1667
1668                 off += sizeof(struct geneve_opt) + opt->length * 4;
1669         }
1670
1671         return 0;
1672 }
1673
1674 #define COPY_DISSECTOR(rule, diss_key, dst)\
1675 ({ \
1676         struct flow_rule *__rule = (rule);\
1677         typeof(dst) __dst = dst;\
1678 \
1679         memcpy(__dst,\
1680                skb_flow_dissector_target(__rule->match.dissector,\
1681                                          diss_key,\
1682                                          __rule->match.key),\
1683                sizeof(*__dst));\
1684 })
1685
1686 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
1687                                     struct mlx5e_tc_flow *flow,
1688                                     struct flow_cls_offload *f,
1689                                     struct net_device *filter_dev)
1690 {
1691         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1692         struct netlink_ext_ack *extack = f->common.extack;
1693         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1694         struct flow_match_enc_opts enc_opts_match;
1695         struct tunnel_match_enc_opts tun_enc_opts;
1696         struct mlx5_rep_uplink_priv *uplink_priv;
1697         struct mlx5_flow_attr *attr = flow->attr;
1698         struct mlx5e_rep_priv *uplink_rpriv;
1699         struct tunnel_match_key tunnel_key;
1700         bool enc_opts_is_dont_care = true;
1701         u32 tun_id, enc_opts_id = 0;
1702         struct mlx5_eswitch *esw;
1703         u32 value, mask;
1704         int err;
1705
1706         esw = priv->mdev->priv.eswitch;
1707         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1708         uplink_priv = &uplink_rpriv->uplink_priv;
1709
1710         memset(&tunnel_key, 0, sizeof(tunnel_key));
1711         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
1712                        &tunnel_key.enc_control);
1713         if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
1714                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
1715                                &tunnel_key.enc_ipv4);
1716         else
1717                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
1718                                &tunnel_key.enc_ipv6);
1719         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
1720         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
1721                        &tunnel_key.enc_tp);
1722         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
1723                        &tunnel_key.enc_key_id);
1724         tunnel_key.filter_ifindex = filter_dev->ifindex;
1725
1726         err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
1727         if (err)
1728                 return err;
1729
1730         flow_rule_match_enc_opts(rule, &enc_opts_match);
1731         err = enc_opts_is_dont_care_or_full_match(priv,
1732                                                   enc_opts_match.mask,
1733                                                   extack,
1734                                                   &enc_opts_is_dont_care);
1735         if (err)
1736                 goto err_enc_opts;
1737
1738         if (!enc_opts_is_dont_care) {
1739                 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
1740                 memcpy(&tun_enc_opts.key, enc_opts_match.key,
1741                        sizeof(*enc_opts_match.key));
1742                 memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
1743                        sizeof(*enc_opts_match.mask));
1744
1745                 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
1746                                   &tun_enc_opts, &enc_opts_id);
1747                 if (err)
1748                         goto err_enc_opts;
1749         }
1750
1751         value = tun_id << ENC_OPTS_BITS | enc_opts_id;
1752         mask = enc_opts_id ? TUNNEL_ID_MASK :
1753                              (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
1754
1755         if (attr->chain) {
1756                 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
1757                                             TUNNEL_TO_REG, value, mask);
1758         } else {
1759                 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1760                 err = mlx5e_tc_match_to_reg_set(priv->mdev,
1761                                                 mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
1762                                                 TUNNEL_TO_REG, value);
1763                 if (err)
1764                         goto err_set;
1765
1766                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1767         }
1768
1769         flow->tunnel_id = value;
1770         return 0;
1771
1772 err_set:
1773         if (enc_opts_id)
1774                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
1775                                enc_opts_id);
1776 err_enc_opts:
1777         mapping_remove(uplink_priv->tunnel_mapping, tun_id);
1778         return err;
1779 }
1780
1781 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
1782 {
1783         u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
1784         u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
1785         struct mlx5_rep_uplink_priv *uplink_priv;
1786         struct mlx5e_rep_priv *uplink_rpriv;
1787         struct mlx5_eswitch *esw;
1788
1789         esw = flow->priv->mdev->priv.eswitch;
1790         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1791         uplink_priv = &uplink_rpriv->uplink_priv;
1792
1793         if (tun_id)
1794                 mapping_remove(uplink_priv->tunnel_mapping, tun_id);
1795         if (enc_opts_id)
1796                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
1797                                enc_opts_id);
1798 }
1799
1800 u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
1801 {
1802         return flow->tunnel_id;
1803 }
1804
1805 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
1806                             struct flow_match_basic *match, bool outer,
1807                             void *headers_c, void *headers_v)
1808 {
1809         bool ip_version_cap;
1810
1811         ip_version_cap = outer ?
1812                 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
1813                                           ft_field_support.outer_ip_version) :
1814                 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
1815                                           ft_field_support.inner_ip_version);
1816
1817         if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
1818             (match->key->n_proto == htons(ETH_P_IP) ||
1819              match->key->n_proto == htons(ETH_P_IPV6))) {
1820                 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
1821                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
1822                          match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
1823         } else {
1824                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
1825                          ntohs(match->mask->n_proto));
1826                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
1827                          ntohs(match->key->n_proto));
1828         }
1829 }
1830
1831 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
1832 {
1833         void *headers_v;
1834         u16 ethertype;
1835         u8 ip_version;
1836
1837         if (outer)
1838                 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1839         else
1840                 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
1841
1842         ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
1843         /* Return ip_version converted from ethertype anyway */
1844         if (!ip_version) {
1845                 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
1846                 if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
1847                         ip_version = 4;
1848                 else if (ethertype == ETH_P_IPV6)
1849                         ip_version = 6;
1850         }
1851         return ip_version;
1852 }
1853
1854 static int parse_tunnel_attr(struct mlx5e_priv *priv,
1855                              struct mlx5e_tc_flow *flow,
1856                              struct mlx5_flow_spec *spec,
1857                              struct flow_cls_offload *f,
1858                              struct net_device *filter_dev,
1859                              u8 *match_level,
1860                              bool *match_inner)
1861 {
1862         struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
1863         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1864         struct netlink_ext_ack *extack = f->common.extack;
1865         bool needs_mapping, sets_mapping;
1866         int err;
1867
1868         if (!mlx5e_is_eswitch_flow(flow))
1869                 return -EOPNOTSUPP;
1870
1871         needs_mapping = !!flow->attr->chain;
1872         sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f);
1873         *match_inner = !needs_mapping;
1874
1875         if ((needs_mapping || sets_mapping) &&
1876             !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
1877                 NL_SET_ERR_MSG(extack,
1878                                "Chains on tunnel devices isn't supported without register loopback support");
1879                 netdev_warn(priv->netdev,
1880                             "Chains on tunnel devices isn't supported without register loopback support");
1881                 return -EOPNOTSUPP;
1882         }
1883
1884         if (!flow->attr->chain) {
1885                 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
1886                                          match_level);
1887                 if (err) {
1888                         NL_SET_ERR_MSG_MOD(extack,
1889                                            "Failed to parse tunnel attributes");
1890                         netdev_warn(priv->netdev,
1891                                     "Failed to parse tunnel attributes");
1892                         return err;
1893                 }
1894
1895                 /* With mpls over udp we decapsulate using packet reformat
1896                  * object
1897                  */
1898                 if (!netif_is_bareudp(filter_dev))
1899                         flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
1900                 err = mlx5e_tc_set_attr_rx_tun(flow, spec);
1901                 if (err)
1902                         return err;
1903         } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
1904                 struct mlx5_flow_spec *tmp_spec;
1905
1906                 tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
1907                 if (!tmp_spec) {
1908                         NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec");
1909                         netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec");
1910                         return -ENOMEM;
1911                 }
1912                 memcpy(tmp_spec, spec, sizeof(*tmp_spec));
1913
1914                 err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
1915                 if (err) {
1916                         kvfree(tmp_spec);
1917                         NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
1918                         netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
1919                         return err;
1920                 }
1921                 err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
1922                 kvfree(tmp_spec);
1923                 if (err)
1924                         return err;
1925         }
1926
1927         if (!needs_mapping && !sets_mapping)
1928                 return 0;
1929
1930         return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
1931 }
1932
1933 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
1934 {
1935         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1936                             inner_headers);
1937 }
1938
1939 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
1940 {
1941         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
1942                             inner_headers);
1943 }
1944
1945 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
1946 {
1947         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1948                             outer_headers);
1949 }
1950
1951 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
1952 {
1953         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
1954                             outer_headers);
1955 }
1956
1957 static void *get_match_headers_value(u32 flags,
1958                                      struct mlx5_flow_spec *spec)
1959 {
1960         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
1961                 get_match_inner_headers_value(spec) :
1962                 get_match_outer_headers_value(spec);
1963 }
1964
1965 static void *get_match_headers_criteria(u32 flags,
1966                                         struct mlx5_flow_spec *spec)
1967 {
1968         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
1969                 get_match_inner_headers_criteria(spec) :
1970                 get_match_outer_headers_criteria(spec);
1971 }
1972
1973 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
1974                                    struct flow_cls_offload *f)
1975 {
1976         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1977         struct netlink_ext_ack *extack = f->common.extack;
1978         struct net_device *ingress_dev;
1979         struct flow_match_meta match;
1980
1981         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
1982                 return 0;
1983
1984         flow_rule_match_meta(rule, &match);
1985         if (!match.mask->ingress_ifindex)
1986                 return 0;
1987
1988         if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
1989                 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
1990                 return -EOPNOTSUPP;
1991         }
1992
1993         ingress_dev = __dev_get_by_index(dev_net(filter_dev),
1994                                          match.key->ingress_ifindex);
1995         if (!ingress_dev) {
1996                 NL_SET_ERR_MSG_MOD(extack,
1997                                    "Can't find the ingress port to match on");
1998                 return -ENOENT;
1999         }
2000
2001         if (ingress_dev != filter_dev) {
2002                 NL_SET_ERR_MSG_MOD(extack,
2003                                    "Can't match on the ingress filter port");
2004                 return -EOPNOTSUPP;
2005         }
2006
2007         return 0;
2008 }
2009
2010 static bool skip_key_basic(struct net_device *filter_dev,
2011                            struct flow_cls_offload *f)
2012 {
2013         /* When doing mpls over udp decap, the user needs to provide
2014          * MPLS_UC as the protocol in order to be able to match on mpls
2015          * label fields.  However, the actual ethertype is IP so we want to
2016          * avoid matching on this, otherwise we'll fail the match.
2017          */
2018         if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2019                 return true;
2020
2021         return false;
2022 }
2023
2024 static int __parse_cls_flower(struct mlx5e_priv *priv,
2025                               struct mlx5e_tc_flow *flow,
2026                               struct mlx5_flow_spec *spec,
2027                               struct flow_cls_offload *f,
2028                               struct net_device *filter_dev,
2029                               u8 *inner_match_level, u8 *outer_match_level)
2030 {
2031         struct netlink_ext_ack *extack = f->common.extack;
2032         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2033                                        outer_headers);
2034         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2035                                        outer_headers);
2036         void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2037                                     misc_parameters);
2038         void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2039                                     misc_parameters);
2040         void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2041                                     misc_parameters_3);
2042         void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2043                                     misc_parameters_3);
2044         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2045         struct flow_dissector *dissector = rule->match.dissector;
2046         enum fs_flow_table_type fs_type;
2047         u16 addr_type = 0;
2048         u8 ip_proto = 0;
2049         u8 *match_level;
2050         int err;
2051
2052         fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
2053         match_level = outer_match_level;
2054
2055         if (dissector->used_keys &
2056             ~(BIT(FLOW_DISSECTOR_KEY_META) |
2057               BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2058               BIT(FLOW_DISSECTOR_KEY_BASIC) |
2059               BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2060               BIT(FLOW_DISSECTOR_KEY_VLAN) |
2061               BIT(FLOW_DISSECTOR_KEY_CVLAN) |
2062               BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2063               BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2064               BIT(FLOW_DISSECTOR_KEY_PORTS) |
2065               BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2066               BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2067               BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2068               BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
2069               BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2070               BIT(FLOW_DISSECTOR_KEY_TCP) |
2071               BIT(FLOW_DISSECTOR_KEY_IP)  |
2072               BIT(FLOW_DISSECTOR_KEY_CT) |
2073               BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2074               BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2075               BIT(FLOW_DISSECTOR_KEY_ICMP) |
2076               BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2077                 NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2078                 netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
2079                            dissector->used_keys);
2080                 return -EOPNOTSUPP;
2081         }
2082
2083         if (mlx5e_get_tc_tun(filter_dev)) {
2084                 bool match_inner = false;
2085
2086                 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2087                                         outer_match_level, &match_inner);
2088                 if (err)
2089                         return err;
2090
2091                 if (match_inner) {
2092                         /* header pointers should point to the inner headers
2093                          * if the packet was decapsulated already.
2094                          * outer headers are set by parse_tunnel_attr.
2095                          */
2096                         match_level = inner_match_level;
2097                         headers_c = get_match_inner_headers_criteria(spec);
2098                         headers_v = get_match_inner_headers_value(spec);
2099                 }
2100         }
2101
2102         err = mlx5e_flower_parse_meta(filter_dev, f);
2103         if (err)
2104                 return err;
2105
2106         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2107             !skip_key_basic(filter_dev, f)) {
2108                 struct flow_match_basic match;
2109
2110                 flow_rule_match_basic(rule, &match);
2111                 mlx5e_tc_set_ethertype(priv->mdev, &match,
2112                                        match_level == outer_match_level,
2113                                        headers_c, headers_v);
2114
2115                 if (match.mask->n_proto)
2116                         *match_level = MLX5_MATCH_L2;
2117         }
2118         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2119             is_vlan_dev(filter_dev)) {
2120                 struct flow_dissector_key_vlan filter_dev_mask;
2121                 struct flow_dissector_key_vlan filter_dev_key;
2122                 struct flow_match_vlan match;
2123
2124                 if (is_vlan_dev(filter_dev)) {
2125                         match.key = &filter_dev_key;
2126                         match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2127                         match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2128                         match.key->vlan_priority = 0;
2129                         match.mask = &filter_dev_mask;
2130                         memset(match.mask, 0xff, sizeof(*match.mask));
2131                         match.mask->vlan_priority = 0;
2132                 } else {
2133                         flow_rule_match_vlan(rule, &match);
2134                 }
2135                 if (match.mask->vlan_id ||
2136                     match.mask->vlan_priority ||
2137                     match.mask->vlan_tpid) {
2138                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2139                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2140                                          svlan_tag, 1);
2141                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2142                                          svlan_tag, 1);
2143                         } else {
2144                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2145                                          cvlan_tag, 1);
2146                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2147                                          cvlan_tag, 1);
2148                         }
2149
2150                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2151                                  match.mask->vlan_id);
2152                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2153                                  match.key->vlan_id);
2154
2155                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2156                                  match.mask->vlan_priority);
2157                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2158                                  match.key->vlan_priority);
2159
2160                         *match_level = MLX5_MATCH_L2;
2161                 }
2162         } else if (*match_level != MLX5_MATCH_NONE) {
2163                 /* cvlan_tag enabled in match criteria and
2164                  * disabled in match value means both S & C tags
2165                  * don't exist (untagged of both)
2166                  */
2167                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2168                 *match_level = MLX5_MATCH_L2;
2169         }
2170
2171         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2172                 struct flow_match_vlan match;
2173
2174                 flow_rule_match_cvlan(rule, &match);
2175                 if (match.mask->vlan_id ||
2176                     match.mask->vlan_priority ||
2177                     match.mask->vlan_tpid) {
2178                         if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
2179                                                      fs_type)) {
2180                                 NL_SET_ERR_MSG_MOD(extack,
2181                                                    "Matching on CVLAN is not supported");
2182                                 return -EOPNOTSUPP;
2183                         }
2184
2185                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2186                                 MLX5_SET(fte_match_set_misc, misc_c,
2187                                          outer_second_svlan_tag, 1);
2188                                 MLX5_SET(fte_match_set_misc, misc_v,
2189                                          outer_second_svlan_tag, 1);
2190                         } else {
2191                                 MLX5_SET(fte_match_set_misc, misc_c,
2192                                          outer_second_cvlan_tag, 1);
2193                                 MLX5_SET(fte_match_set_misc, misc_v,
2194                                          outer_second_cvlan_tag, 1);
2195                         }
2196
2197                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2198                                  match.mask->vlan_id);
2199                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2200                                  match.key->vlan_id);
2201                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2202                                  match.mask->vlan_priority);
2203                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2204                                  match.key->vlan_priority);
2205
2206                         *match_level = MLX5_MATCH_L2;
2207                         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
2208                 }
2209         }
2210
2211         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2212                 struct flow_match_eth_addrs match;
2213
2214                 flow_rule_match_eth_addrs(rule, &match);
2215                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2216                                              dmac_47_16),
2217                                 match.mask->dst);
2218                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2219                                              dmac_47_16),
2220                                 match.key->dst);
2221
2222                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2223                                              smac_47_16),
2224                                 match.mask->src);
2225                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2226                                              smac_47_16),
2227                                 match.key->src);
2228
2229                 if (!is_zero_ether_addr(match.mask->src) ||
2230                     !is_zero_ether_addr(match.mask->dst))
2231                         *match_level = MLX5_MATCH_L2;
2232         }
2233
2234         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2235                 struct flow_match_control match;
2236
2237                 flow_rule_match_control(rule, &match);
2238                 addr_type = match.key->addr_type;
2239
2240                 /* the HW doesn't support frag first/later */
2241                 if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
2242                         return -EOPNOTSUPP;
2243
2244                 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2245                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2246                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2247                                  match.key->flags & FLOW_DIS_IS_FRAGMENT);
2248
2249                         /* the HW doesn't need L3 inline to match on frag=no */
2250                         if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2251                                 *match_level = MLX5_MATCH_L2;
2252         /* ***  L2 attributes parsing up to here *** */
2253                         else
2254                                 *match_level = MLX5_MATCH_L3;
2255                 }
2256         }
2257
2258         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2259                 struct flow_match_basic match;
2260
2261                 flow_rule_match_basic(rule, &match);
2262                 ip_proto = match.key->ip_proto;
2263
2264                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2265                          match.mask->ip_proto);
2266                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2267                          match.key->ip_proto);
2268
2269                 if (match.mask->ip_proto)
2270                         *match_level = MLX5_MATCH_L3;
2271         }
2272
2273         if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2274                 struct flow_match_ipv4_addrs match;
2275
2276                 flow_rule_match_ipv4_addrs(rule, &match);
2277                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2278                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2279                        &match.mask->src, sizeof(match.mask->src));
2280                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2281                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2282                        &match.key->src, sizeof(match.key->src));
2283                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2284                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2285                        &match.mask->dst, sizeof(match.mask->dst));
2286                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2287                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2288                        &match.key->dst, sizeof(match.key->dst));
2289
2290                 if (match.mask->src || match.mask->dst)
2291                         *match_level = MLX5_MATCH_L3;
2292         }
2293
2294         if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2295                 struct flow_match_ipv6_addrs match;
2296
2297                 flow_rule_match_ipv6_addrs(rule, &match);
2298                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2299                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2300                        &match.mask->src, sizeof(match.mask->src));
2301                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2302                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2303                        &match.key->src, sizeof(match.key->src));
2304
2305                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2306                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2307                        &match.mask->dst, sizeof(match.mask->dst));
2308                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2309                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2310                        &match.key->dst, sizeof(match.key->dst));
2311
2312                 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2313                     ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2314                         *match_level = MLX5_MATCH_L3;
2315         }
2316
2317         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2318                 struct flow_match_ip match;
2319
2320                 flow_rule_match_ip(rule, &match);
2321                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2322                          match.mask->tos & 0x3);
2323                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2324                          match.key->tos & 0x3);
2325
2326                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2327                          match.mask->tos >> 2);
2328                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2329                          match.key->tos  >> 2);
2330
2331                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2332                          match.mask->ttl);
2333                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2334                          match.key->ttl);
2335
2336                 if (match.mask->ttl &&
2337                     !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2338                                                 ft_field_support.outer_ipv4_ttl)) {
2339                         NL_SET_ERR_MSG_MOD(extack,
2340                                            "Matching on TTL is not supported");
2341                         return -EOPNOTSUPP;
2342                 }
2343
2344                 if (match.mask->tos || match.mask->ttl)
2345                         *match_level = MLX5_MATCH_L3;
2346         }
2347
2348         /* ***  L3 attributes parsing up to here *** */
2349
2350         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2351                 struct flow_match_ports match;
2352
2353                 flow_rule_match_ports(rule, &match);
2354                 switch (ip_proto) {
2355                 case IPPROTO_TCP:
2356                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2357                                  tcp_sport, ntohs(match.mask->src));
2358                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2359                                  tcp_sport, ntohs(match.key->src));
2360
2361                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2362                                  tcp_dport, ntohs(match.mask->dst));
2363                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2364                                  tcp_dport, ntohs(match.key->dst));
2365                         break;
2366
2367                 case IPPROTO_UDP:
2368                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2369                                  udp_sport, ntohs(match.mask->src));
2370                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2371                                  udp_sport, ntohs(match.key->src));
2372
2373                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2374                                  udp_dport, ntohs(match.mask->dst));
2375                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2376                                  udp_dport, ntohs(match.key->dst));
2377                         break;
2378                 default:
2379                         NL_SET_ERR_MSG_MOD(extack,
2380                                            "Only UDP and TCP transports are supported for L4 matching");
2381                         netdev_err(priv->netdev,
2382                                    "Only UDP and TCP transport are supported\n");
2383                         return -EINVAL;
2384                 }
2385
2386                 if (match.mask->src || match.mask->dst)
2387                         *match_level = MLX5_MATCH_L4;
2388         }
2389
2390         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2391                 struct flow_match_tcp match;
2392
2393                 flow_rule_match_tcp(rule, &match);
2394                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2395                          ntohs(match.mask->flags));
2396                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2397                          ntohs(match.key->flags));
2398
2399                 if (match.mask->flags)
2400                         *match_level = MLX5_MATCH_L4;
2401         }
2402         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
2403                 struct flow_match_icmp match;
2404
2405                 flow_rule_match_icmp(rule, &match);
2406                 switch (ip_proto) {
2407                 case IPPROTO_ICMP:
2408                         if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2409                               MLX5_FLEX_PROTO_ICMP))
2410                                 return -EOPNOTSUPP;
2411                         MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
2412                                  match.mask->type);
2413                         MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
2414                                  match.key->type);
2415                         MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
2416                                  match.mask->code);
2417                         MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
2418                                  match.key->code);
2419                         break;
2420                 case IPPROTO_ICMPV6:
2421                         if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2422                               MLX5_FLEX_PROTO_ICMPV6))
2423                                 return -EOPNOTSUPP;
2424                         MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
2425                                  match.mask->type);
2426                         MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
2427                                  match.key->type);
2428                         MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
2429                                  match.mask->code);
2430                         MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
2431                                  match.key->code);
2432                         break;
2433                 default:
2434                         NL_SET_ERR_MSG_MOD(extack,
2435                                            "Code and type matching only with ICMP and ICMPv6");
2436                         netdev_err(priv->netdev,
2437                                    "Code and type matching only with ICMP and ICMPv6\n");
2438                         return -EINVAL;
2439                 }
2440                 if (match.mask->code || match.mask->type) {
2441                         *match_level = MLX5_MATCH_L4;
2442                         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
2443                 }
2444         }
2445         /* Currenlty supported only for MPLS over UDP */
2446         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
2447             !netif_is_bareudp(filter_dev)) {
2448                 NL_SET_ERR_MSG_MOD(extack,
2449                                    "Matching on MPLS is supported only for MPLS over UDP");
2450                 netdev_err(priv->netdev,
2451                            "Matching on MPLS is supported only for MPLS over UDP\n");
2452                 return -EOPNOTSUPP;
2453         }
2454
2455         return 0;
2456 }
2457
2458 static int parse_cls_flower(struct mlx5e_priv *priv,
2459                             struct mlx5e_tc_flow *flow,
2460                             struct mlx5_flow_spec *spec,
2461                             struct flow_cls_offload *f,
2462                             struct net_device *filter_dev)
2463 {
2464         u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2465         struct netlink_ext_ack *extack = f->common.extack;
2466         struct mlx5_core_dev *dev = priv->mdev;
2467         struct mlx5_eswitch *esw = dev->priv.eswitch;
2468         struct mlx5e_rep_priv *rpriv = priv->ppriv;
2469         struct mlx5_eswitch_rep *rep;
2470         bool is_eswitch_flow;
2471         int err;
2472
2473         inner_match_level = MLX5_MATCH_NONE;
2474         outer_match_level = MLX5_MATCH_NONE;
2475
2476         err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
2477                                  &inner_match_level, &outer_match_level);
2478         non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2479                                  outer_match_level : inner_match_level;
2480
2481         is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2482         if (!err && is_eswitch_flow) {
2483                 rep = rpriv->rep;
2484                 if (rep->vport != MLX5_VPORT_UPLINK &&
2485                     (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
2486                     esw->offloads.inline_mode < non_tunnel_match_level)) {
2487                         NL_SET_ERR_MSG_MOD(extack,
2488                                            "Flow is not offloaded due to min inline setting");
2489                         netdev_warn(priv->netdev,
2490                                     "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2491                                     non_tunnel_match_level, esw->offloads.inline_mode);
2492                         return -EOPNOTSUPP;
2493                 }
2494         }
2495
2496         flow->attr->inner_match_level = inner_match_level;
2497         flow->attr->outer_match_level = outer_match_level;
2498
2499
2500         return err;
2501 }
2502
2503 struct pedit_headers {
2504         struct ethhdr  eth;
2505         struct vlan_hdr vlan;
2506         struct iphdr   ip4;
2507         struct ipv6hdr ip6;
2508         struct tcphdr  tcp;
2509         struct udphdr  udp;
2510 };
2511
2512 struct pedit_headers_action {
2513         struct pedit_headers    vals;
2514         struct pedit_headers    masks;
2515         u32                     pedits;
2516 };
2517
2518 static int pedit_header_offsets[] = {
2519         [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
2520         [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
2521         [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
2522         [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
2523         [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
2524 };
2525
2526 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
2527
2528 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
2529                          struct pedit_headers_action *hdrs)
2530 {
2531         u32 *curr_pmask, *curr_pval;
2532
2533         curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
2534         curr_pval  = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
2535
2536         if (*curr_pmask & mask)  /* disallow acting twice on the same location */
2537                 goto out_err;
2538
2539         *curr_pmask |= mask;
2540         *curr_pval  |= (val & mask);
2541
2542         return 0;
2543
2544 out_err:
2545         return -EOPNOTSUPP;
2546 }
2547
2548 struct mlx5_fields {
2549         u8  field;
2550         u8  field_bsize;
2551         u32 field_mask;
2552         u32 offset;
2553         u32 match_offset;
2554 };
2555
2556 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2557                 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2558                  offsetof(struct pedit_headers, field) + (off), \
2559                  MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2560
2561 /* masked values are the same and there are no rewrites that do not have a
2562  * match.
2563  */
2564 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2565         type matchmaskx = *(type *)(matchmaskp); \
2566         type matchvalx = *(type *)(matchvalp); \
2567         type maskx = *(type *)(maskp); \
2568         type valx = *(type *)(valp); \
2569         \
2570         (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2571                                                                  matchmaskx)); \
2572 })
2573
2574 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2575                          void *matchmaskp, u8 bsize)
2576 {
2577         bool same = false;
2578
2579         switch (bsize) {
2580         case 8:
2581                 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2582                 break;
2583         case 16:
2584                 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2585                 break;
2586         case 32:
2587                 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2588                 break;
2589         }
2590
2591         return same;
2592 }
2593
2594 static struct mlx5_fields fields[] = {
2595         OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
2596         OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
2597         OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
2598         OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
2599         OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
2600         OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
2601
2602         OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
2603         OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
2604         OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
2605         OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2606
2607         OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
2608                 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
2609         OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
2610                 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
2611         OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
2612                 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
2613         OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
2614                 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
2615         OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
2616                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
2617         OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
2618                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
2619         OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
2620                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
2621         OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
2622                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
2623         OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
2624         OFFLOAD(IP_DSCP, 16,  0xc00f, ip6, 0, ip_dscp),
2625
2626         OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
2627         OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
2628         /* in linux iphdr tcp_flags is 8 bits long */
2629         OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
2630
2631         OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
2632         OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
2633 };
2634
2635 static unsigned long mask_to_le(unsigned long mask, int size)
2636 {
2637         __be32 mask_be32;
2638         __be16 mask_be16;
2639
2640         if (size == 32) {
2641                 mask_be32 = (__force __be32)(mask);
2642                 mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
2643         } else if (size == 16) {
2644                 mask_be32 = (__force __be32)(mask);
2645                 mask_be16 = *(__be16 *)&mask_be32;
2646                 mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
2647         }
2648
2649         return mask;
2650 }
2651 static int offload_pedit_fields(struct mlx5e_priv *priv,
2652                                 int namespace,
2653                                 struct pedit_headers_action *hdrs,
2654                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2655                                 u32 *action_flags,
2656                                 struct netlink_ext_ack *extack)
2657 {
2658         struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
2659         int i, action_size, first, last, next_z;
2660         void *headers_c, *headers_v, *action, *vals_p;
2661         u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
2662         struct mlx5e_tc_mod_hdr_acts *mod_acts;
2663         struct mlx5_fields *f;
2664         unsigned long mask, field_mask;
2665         int err;
2666         u8 cmd;
2667
2668         mod_acts = &parse_attr->mod_hdr_acts;
2669         headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
2670         headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
2671
2672         set_masks = &hdrs[0].masks;
2673         add_masks = &hdrs[1].masks;
2674         set_vals = &hdrs[0].vals;
2675         add_vals = &hdrs[1].vals;
2676
2677         action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
2678
2679         for (i = 0; i < ARRAY_SIZE(fields); i++) {
2680                 bool skip;
2681
2682                 f = &fields[i];
2683                 /* avoid seeing bits set from previous iterations */
2684                 s_mask = 0;
2685                 a_mask = 0;
2686
2687                 s_masks_p = (void *)set_masks + f->offset;
2688                 a_masks_p = (void *)add_masks + f->offset;
2689
2690                 s_mask = *s_masks_p & f->field_mask;
2691                 a_mask = *a_masks_p & f->field_mask;
2692
2693                 if (!s_mask && !a_mask) /* nothing to offload here */
2694                         continue;
2695
2696                 if (s_mask && a_mask) {
2697                         NL_SET_ERR_MSG_MOD(extack,
2698                                            "can't set and add to the same HW field");
2699                         printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
2700                         return -EOPNOTSUPP;
2701                 }
2702
2703                 skip = false;
2704                 if (s_mask) {
2705                         void *match_mask = headers_c + f->match_offset;
2706                         void *match_val = headers_v + f->match_offset;
2707
2708                         cmd  = MLX5_ACTION_TYPE_SET;
2709                         mask = s_mask;
2710                         vals_p = (void *)set_vals + f->offset;
2711                         /* don't rewrite if we have a match on the same value */
2712                         if (cmp_val_mask(vals_p, s_masks_p, match_val,
2713                                          match_mask, f->field_bsize))
2714                                 skip = true;
2715                         /* clear to denote we consumed this field */
2716                         *s_masks_p &= ~f->field_mask;
2717                 } else {
2718                         cmd  = MLX5_ACTION_TYPE_ADD;
2719                         mask = a_mask;
2720                         vals_p = (void *)add_vals + f->offset;
2721                         /* add 0 is no change */
2722                         if ((*(u32 *)vals_p & f->field_mask) == 0)
2723                                 skip = true;
2724                         /* clear to denote we consumed this field */
2725                         *a_masks_p &= ~f->field_mask;
2726                 }
2727                 if (skip)
2728                         continue;
2729
2730                 mask = mask_to_le(mask, f->field_bsize);
2731
2732                 first = find_first_bit(&mask, f->field_bsize);
2733                 next_z = find_next_zero_bit(&mask, f->field_bsize, first);
2734                 last  = find_last_bit(&mask, f->field_bsize);
2735                 if (first < next_z && next_z < last) {
2736                         NL_SET_ERR_MSG_MOD(extack,
2737                                            "rewrite of few sub-fields isn't supported");
2738                         printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
2739                                mask);
2740                         return -EOPNOTSUPP;
2741                 }
2742
2743                 err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts);
2744                 if (err) {
2745                         NL_SET_ERR_MSG_MOD(extack,
2746                                            "too many pedit actions, can't offload");
2747                         mlx5_core_warn(priv->mdev,
2748                                        "mlx5: parsed %d pedit actions, can't do more\n",
2749                                        mod_acts->num_actions);
2750                         return err;
2751                 }
2752
2753                 action = mod_acts->actions +
2754                          (mod_acts->num_actions * action_size);
2755                 MLX5_SET(set_action_in, action, action_type, cmd);
2756                 MLX5_SET(set_action_in, action, field, f->field);
2757
2758                 if (cmd == MLX5_ACTION_TYPE_SET) {
2759                         int start;
2760
2761                         field_mask = mask_to_le(f->field_mask, f->field_bsize);
2762
2763                         /* if field is bit sized it can start not from first bit */
2764                         start = find_first_bit(&field_mask, f->field_bsize);
2765
2766                         MLX5_SET(set_action_in, action, offset, first - start);
2767                         /* length is num of bits to be written, zero means length of 32 */
2768                         MLX5_SET(set_action_in, action, length, (last - first + 1));
2769                 }
2770
2771                 if (f->field_bsize == 32)
2772                         MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
2773                 else if (f->field_bsize == 16)
2774                         MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
2775                 else if (f->field_bsize == 8)
2776                         MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
2777
2778                 ++mod_acts->num_actions;
2779         }
2780
2781         return 0;
2782 }
2783
2784 static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
2785                                                   int namespace)
2786 {
2787         if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
2788                 return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
2789         else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
2790                 return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
2791 }
2792
2793 int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
2794                           int namespace,
2795                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
2796 {
2797         int action_size, new_num_actions, max_hw_actions;
2798         size_t new_sz, old_sz;
2799         void *ret;
2800
2801         if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
2802                 return 0;
2803
2804         action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
2805
2806         max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev,
2807                                                                 namespace);
2808         new_num_actions = min(max_hw_actions,
2809                               mod_hdr_acts->actions ?
2810                               mod_hdr_acts->max_actions * 2 : 1);
2811         if (mod_hdr_acts->max_actions == new_num_actions)
2812                 return -ENOSPC;
2813
2814         new_sz = action_size * new_num_actions;
2815         old_sz = mod_hdr_acts->max_actions * action_size;
2816         ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
2817         if (!ret)
2818                 return -ENOMEM;
2819
2820         memset(ret + old_sz, 0, new_sz - old_sz);
2821         mod_hdr_acts->actions = ret;
2822         mod_hdr_acts->max_actions = new_num_actions;
2823
2824         return 0;
2825 }
2826
2827 void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
2828 {
2829         kfree(mod_hdr_acts->actions);
2830         mod_hdr_acts->actions = NULL;
2831         mod_hdr_acts->num_actions = 0;
2832         mod_hdr_acts->max_actions = 0;
2833 }
2834
2835 static const struct pedit_headers zero_masks = {};
2836
2837 static int
2838 parse_pedit_to_modify_hdr(struct mlx5e_priv *priv,
2839                           const struct flow_action_entry *act, int namespace,
2840                           struct mlx5e_tc_flow_parse_attr *parse_attr,
2841                           struct pedit_headers_action *hdrs,
2842                           struct netlink_ext_ack *extack)
2843 {
2844         u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
2845         int err = -EOPNOTSUPP;
2846         u32 mask, val, offset;
2847         u8 htype;
2848
2849         htype = act->mangle.htype;
2850         err = -EOPNOTSUPP; /* can't be all optimistic */
2851
2852         if (htype == FLOW_ACT_MANGLE_UNSPEC) {
2853                 NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
2854                 goto out_err;
2855         }
2856
2857         if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
2858                 NL_SET_ERR_MSG_MOD(extack,
2859                                    "The pedit offload action is not supported");
2860                 goto out_err;
2861         }
2862
2863         mask = act->mangle.mask;
2864         val = act->mangle.val;
2865         offset = act->mangle.offset;
2866
2867         err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
2868         if (err)
2869                 goto out_err;
2870
2871         hdrs[cmd].pedits++;
2872
2873         return 0;
2874 out_err:
2875         return err;
2876 }
2877
2878 static int
2879 parse_pedit_to_reformat(struct mlx5e_priv *priv,
2880                         const struct flow_action_entry *act,
2881                         struct mlx5e_tc_flow_parse_attr *parse_attr,
2882                         struct netlink_ext_ack *extack)
2883 {
2884         u32 mask, val, offset;
2885         u32 *p;
2886
2887         if (act->id != FLOW_ACTION_MANGLE)
2888                 return -EOPNOTSUPP;
2889
2890         if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) {
2891                 NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported");
2892                 return -EOPNOTSUPP;
2893         }
2894
2895         mask = ~act->mangle.mask;
2896         val = act->mangle.val;
2897         offset = act->mangle.offset;
2898         p = (u32 *)&parse_attr->eth;
2899         *(p + (offset >> 2)) |= (val & mask);
2900
2901         return 0;
2902 }
2903
2904 static int parse_tc_pedit_action(struct mlx5e_priv *priv,
2905                                  const struct flow_action_entry *act, int namespace,
2906                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
2907                                  struct pedit_headers_action *hdrs,
2908                                  struct mlx5e_tc_flow *flow,
2909                                  struct netlink_ext_ack *extack)
2910 {
2911         if (flow && flow_flag_test(flow, L3_TO_L2_DECAP))
2912                 return parse_pedit_to_reformat(priv, act, parse_attr, extack);
2913
2914         return parse_pedit_to_modify_hdr(priv, act, namespace,
2915                                          parse_attr, hdrs, extack);
2916 }
2917
2918 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
2919                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
2920                                  struct pedit_headers_action *hdrs,
2921                                  u32 *action_flags,
2922                                  struct netlink_ext_ack *extack)
2923 {
2924         struct pedit_headers *cmd_masks;
2925         int err;
2926         u8 cmd;
2927
2928         err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
2929                                    action_flags, extack);
2930         if (err < 0)
2931                 goto out_dealloc_parsed_actions;
2932
2933         for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
2934                 cmd_masks = &hdrs[cmd].masks;
2935                 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
2936                         NL_SET_ERR_MSG_MOD(extack,
2937                                            "attempt to offload an unsupported field");
2938                         netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
2939                         print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
2940                                        16, 1, cmd_masks, sizeof(zero_masks), true);
2941                         err = -EOPNOTSUPP;
2942                         goto out_dealloc_parsed_actions;
2943                 }
2944         }
2945
2946         return 0;
2947
2948 out_dealloc_parsed_actions:
2949         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
2950         return err;
2951 }
2952
2953 static bool csum_offload_supported(struct mlx5e_priv *priv,
2954                                    u32 action,
2955                                    u32 update_flags,
2956                                    struct netlink_ext_ack *extack)
2957 {
2958         u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
2959                          TCA_CSUM_UPDATE_FLAG_UDP;
2960
2961         /*  The HW recalcs checksums only if re-writing headers */
2962         if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
2963                 NL_SET_ERR_MSG_MOD(extack,
2964                                    "TC csum action is only offloaded with pedit");
2965                 netdev_warn(priv->netdev,
2966                             "TC csum action is only offloaded with pedit\n");
2967                 return false;
2968         }
2969
2970         if (update_flags & ~prot_flags) {
2971                 NL_SET_ERR_MSG_MOD(extack,
2972                                    "can't offload TC csum action for some header/s");
2973                 netdev_warn(priv->netdev,
2974                             "can't offload TC csum action for some header/s - flags %#x\n",
2975                             update_flags);
2976                 return false;
2977         }
2978
2979         return true;
2980 }
2981
2982 struct ip_ttl_word {
2983         __u8    ttl;
2984         __u8    protocol;
2985         __sum16 check;
2986 };
2987
2988 struct ipv6_hoplimit_word {
2989         __be16  payload_len;
2990         __u8    nexthdr;
2991         __u8    hop_limit;
2992 };
2993
2994 static int is_action_keys_supported(const struct flow_action_entry *act,
2995                                     bool ct_flow, bool *modify_ip_header,
2996                                     bool *modify_tuple,
2997                                     struct netlink_ext_ack *extack)
2998 {
2999         u32 mask, offset;
3000         u8 htype;
3001
3002         htype = act->mangle.htype;
3003         offset = act->mangle.offset;
3004         mask = ~act->mangle.mask;
3005         /* For IPv4 & IPv6 header check 4 byte word,
3006          * to determine that modified fields
3007          * are NOT ttl & hop_limit only.
3008          */
3009         if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3010                 struct ip_ttl_word *ttl_word =
3011                         (struct ip_ttl_word *)&mask;
3012
3013                 if (offset != offsetof(struct iphdr, ttl) ||
3014                     ttl_word->protocol ||
3015                     ttl_word->check) {
3016                         *modify_ip_header = true;
3017                 }
3018
3019                 if (offset >= offsetof(struct iphdr, saddr))
3020                         *modify_tuple = true;
3021
3022                 if (ct_flow && *modify_tuple) {
3023                         NL_SET_ERR_MSG_MOD(extack,
3024                                            "can't offload re-write of ipv4 address with action ct");
3025                         return -EOPNOTSUPP;
3026                 }
3027         } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3028                 struct ipv6_hoplimit_word *hoplimit_word =
3029                         (struct ipv6_hoplimit_word *)&mask;
3030
3031                 if (offset != offsetof(struct ipv6hdr, payload_len) ||
3032                     hoplimit_word->payload_len ||
3033                     hoplimit_word->nexthdr) {
3034                         *modify_ip_header = true;
3035                 }
3036
3037                 if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
3038                         *modify_tuple = true;
3039
3040                 if (ct_flow && *modify_tuple) {
3041                         NL_SET_ERR_MSG_MOD(extack,
3042                                            "can't offload re-write of ipv6 address with action ct");
3043                         return -EOPNOTSUPP;
3044                 }
3045         } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
3046                    htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
3047                 *modify_tuple = true;
3048                 if (ct_flow) {
3049                         NL_SET_ERR_MSG_MOD(extack,
3050                                            "can't offload re-write of transport header ports with action ct");
3051                         return -EOPNOTSUPP;
3052                 }
3053         }
3054
3055         return 0;
3056 }
3057
3058 static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
3059                                    bool ct_flow, struct netlink_ext_ack *extack,
3060                                    struct mlx5e_priv *priv,
3061                                    struct mlx5_flow_spec *spec)
3062 {
3063         if (!modify_tuple || ct_clear)
3064                 return true;
3065
3066         if (ct_flow) {
3067                 NL_SET_ERR_MSG_MOD(extack,
3068                                    "can't offload tuple modification with non-clear ct()");
3069                 netdev_info(priv->netdev,
3070                             "can't offload tuple modification with non-clear ct()");
3071                 return false;
3072         }
3073
3074         /* Add ct_state=-trk match so it will be offloaded for non ct flows
3075          * (or after clear action), as otherwise, since the tuple is changed,
3076          * we can't restore ct state
3077          */
3078         if (mlx5_tc_ct_add_no_trk_match(spec)) {
3079                 NL_SET_ERR_MSG_MOD(extack,
3080                                    "can't offload tuple modification with ct matches and no ct(clear) action");
3081                 netdev_info(priv->netdev,
3082                             "can't offload tuple modification with ct matches and no ct(clear) action");
3083                 return false;
3084         }
3085
3086         return true;
3087 }
3088
3089 static bool modify_header_match_supported(struct mlx5e_priv *priv,
3090                                           struct mlx5_flow_spec *spec,
3091                                           struct flow_action *flow_action,
3092                                           u32 actions, bool ct_flow,
3093                                           bool ct_clear,
3094                                           struct netlink_ext_ack *extack)
3095 {
3096         const struct flow_action_entry *act;
3097         bool modify_ip_header, modify_tuple;
3098         void *headers_c;
3099         void *headers_v;
3100         u16 ethertype;
3101         u8 ip_proto;
3102         int i, err;
3103
3104         headers_c = get_match_headers_criteria(actions, spec);
3105         headers_v = get_match_headers_value(actions, spec);
3106         ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3107
3108         /* for non-IP we only re-write MACs, so we're okay */
3109         if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3110             ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3111                 goto out_ok;
3112
3113         modify_ip_header = false;
3114         modify_tuple = false;
3115         flow_action_for_each(i, act, flow_action) {
3116                 if (act->id != FLOW_ACTION_MANGLE &&
3117                     act->id != FLOW_ACTION_ADD)
3118                         continue;
3119
3120                 err = is_action_keys_supported(act, ct_flow,
3121                                                &modify_ip_header,
3122                                                &modify_tuple, extack);
3123                 if (err)
3124                         return err;
3125         }
3126
3127         if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
3128                                     priv, spec))
3129                 return false;
3130
3131         ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3132         if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3133             ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3134                 NL_SET_ERR_MSG_MOD(extack,
3135                                    "can't offload re-write of non TCP/UDP");
3136                 netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3137                             ip_proto);
3138                 return false;
3139         }
3140
3141 out_ok:
3142         return true;
3143 }
3144
3145 static bool actions_match_supported(struct mlx5e_priv *priv,
3146                                     struct flow_action *flow_action,
3147                                     struct mlx5e_tc_flow_parse_attr *parse_attr,
3148                                     struct mlx5e_tc_flow *flow,
3149                                     struct netlink_ext_ack *extack)
3150 {
3151         bool ct_flow = false, ct_clear = false;
3152         u32 actions;
3153
3154         ct_clear = flow->attr->ct_attr.ct_action &
3155                 TCA_CT_ACT_CLEAR;
3156         ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3157         actions = flow->attr->action;
3158
3159         if (mlx5e_is_eswitch_flow(flow)) {
3160                 if (flow->attr->esw_attr->split_count && ct_flow &&
3161                     !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) {
3162                         /* All registers used by ct are cleared when using
3163                          * split rules.
3164                          */
3165                         NL_SET_ERR_MSG_MOD(extack,
3166                                            "Can't offload mirroring with action ct");
3167                         return false;
3168                 }
3169         }
3170
3171         if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
3172                 return modify_header_match_supported(priv, &parse_attr->spec,
3173                                                      flow_action, actions,
3174                                                      ct_flow, ct_clear,
3175                                                      extack);
3176
3177         return true;
3178 }
3179
3180 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3181 {
3182         return priv->mdev == peer_priv->mdev;
3183 }
3184
3185 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3186 {
3187         struct mlx5_core_dev *fmdev, *pmdev;
3188         u64 fsystem_guid, psystem_guid;
3189
3190         fmdev = priv->mdev;
3191         pmdev = peer_priv->mdev;
3192
3193         fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3194         psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3195
3196         return (fsystem_guid == psystem_guid);
3197 }
3198
3199 static bool same_vf_reps(struct mlx5e_priv *priv,
3200                          struct net_device *out_dev)
3201 {
3202         return mlx5e_eswitch_vf_rep(priv->netdev) &&
3203                priv->netdev == out_dev;
3204 }
3205
3206 static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
3207                                    const struct flow_action_entry *act,
3208                                    struct mlx5e_tc_flow_parse_attr *parse_attr,
3209                                    struct pedit_headers_action *hdrs,
3210                                    u32 *action, struct netlink_ext_ack *extack)
3211 {
3212         u16 mask16 = VLAN_VID_MASK;
3213         u16 val16 = act->vlan.vid & VLAN_VID_MASK;
3214         const struct flow_action_entry pedit_act = {
3215                 .id = FLOW_ACTION_MANGLE,
3216                 .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
3217                 .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
3218                 .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
3219                 .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
3220         };
3221         u8 match_prio_mask, match_prio_val;
3222         void *headers_c, *headers_v;
3223         int err;
3224
3225         headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
3226         headers_v = get_match_headers_value(*action, &parse_attr->spec);
3227
3228         if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
3229               MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
3230                 NL_SET_ERR_MSG_MOD(extack,
3231                                    "VLAN rewrite action must have VLAN protocol match");
3232                 return -EOPNOTSUPP;
3233         }
3234
3235         match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
3236         match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
3237         if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
3238                 NL_SET_ERR_MSG_MOD(extack,
3239                                    "Changing VLAN prio is not supported");
3240                 return -EOPNOTSUPP;
3241         }
3242
3243         err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack);
3244         *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3245
3246         return err;
3247 }
3248
3249 static int
3250 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
3251                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3252                                  struct pedit_headers_action *hdrs,
3253                                  u32 *action, struct netlink_ext_ack *extack)
3254 {
3255         const struct flow_action_entry prio_tag_act = {
3256                 .vlan.vid = 0,
3257                 .vlan.prio =
3258                         MLX5_GET(fte_match_set_lyr_2_4,
3259                                  get_match_headers_value(*action,
3260                                                          &parse_attr->spec),
3261                                  first_prio) &
3262                         MLX5_GET(fte_match_set_lyr_2_4,
3263                                  get_match_headers_criteria(*action,
3264                                                             &parse_attr->spec),
3265                                  first_prio),
3266         };
3267
3268         return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
3269                                        &prio_tag_act, parse_attr, hdrs, action,
3270                                        extack);
3271 }
3272
3273 static int validate_goto_chain(struct mlx5e_priv *priv,
3274                                struct mlx5e_tc_flow *flow,
3275                                const struct flow_action_entry *act,
3276                                u32 actions,
3277                                struct netlink_ext_ack *extack)
3278 {
3279         bool is_esw = mlx5e_is_eswitch_flow(flow);
3280         struct mlx5_flow_attr *attr = flow->attr;
3281         bool ft_flow = mlx5e_is_ft_flow(flow);
3282         u32 dest_chain = act->chain_index;
3283         struct mlx5_fs_chains *chains;
3284         struct mlx5_eswitch *esw;
3285         u32 reformat_and_fwd;
3286         u32 max_chain;
3287
3288         esw = priv->mdev->priv.eswitch;
3289         chains = is_esw ? esw_chains(esw) : nic_chains(priv);
3290         max_chain = mlx5_chains_get_chain_range(chains);
3291         reformat_and_fwd = is_esw ?
3292                            MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
3293                            MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
3294
3295         if (ft_flow) {
3296                 NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
3297                 return -EOPNOTSUPP;
3298         }
3299
3300         if (!mlx5_chains_backwards_supported(chains) &&
3301             dest_chain <= attr->chain) {
3302                 NL_SET_ERR_MSG_MOD(extack,
3303                                    "Goto lower numbered chain isn't supported");
3304                 return -EOPNOTSUPP;
3305         }
3306
3307         if (dest_chain > max_chain) {
3308                 NL_SET_ERR_MSG_MOD(extack,
3309                                    "Requested destination chain is out of supported range");
3310                 return -EOPNOTSUPP;
3311         }
3312
3313         if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
3314                        MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
3315             !reformat_and_fwd) {
3316                 NL_SET_ERR_MSG_MOD(extack,
3317                                    "Goto chain is not allowed if action has reformat or decap");
3318                 return -EOPNOTSUPP;
3319         }
3320
3321         return 0;
3322 }
3323
3324 static int parse_tc_nic_actions(struct mlx5e_priv *priv,
3325                                 struct flow_action *flow_action,
3326                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
3327                                 struct mlx5e_tc_flow *flow,
3328                                 struct netlink_ext_ack *extack)
3329 {
3330         struct mlx5_flow_attr *attr = flow->attr;
3331         struct pedit_headers_action hdrs[2] = {};
3332         const struct flow_action_entry *act;
3333         struct mlx5_nic_flow_attr *nic_attr;
3334         u32 action = 0;
3335         int err, i;
3336
3337         if (!flow_action_has_entries(flow_action))
3338                 return -EINVAL;
3339
3340         if (!flow_action_hw_stats_check(flow_action, extack,
3341                                         FLOW_ACTION_HW_STATS_DELAYED_BIT))
3342                 return -EOPNOTSUPP;
3343
3344         nic_attr = attr->nic_attr;
3345
3346         nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3347
3348         flow_action_for_each(i, act, flow_action) {
3349                 switch (act->id) {
3350                 case FLOW_ACTION_ACCEPT:
3351                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3352                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
3353                         break;
3354                 case FLOW_ACTION_DROP:
3355                         action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
3356                         if (MLX5_CAP_FLOWTABLE(priv->mdev,
3357                                                flow_table_properties_nic_receive.flow_counter))
3358                                 action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3359                         break;
3360                 case FLOW_ACTION_MANGLE:
3361                 case FLOW_ACTION_ADD:
3362                         err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
3363                                                     parse_attr, hdrs, NULL, extack);
3364                         if (err)
3365                                 return err;
3366
3367                         action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3368                         break;
3369                 case FLOW_ACTION_VLAN_MANGLE:
3370                         err = add_vlan_rewrite_action(priv,
3371                                                       MLX5_FLOW_NAMESPACE_KERNEL,
3372                                                       act, parse_attr, hdrs,
3373                                                       &action, extack);
3374                         if (err)
3375                                 return err;
3376
3377                         break;
3378                 case FLOW_ACTION_CSUM:
3379                         if (csum_offload_supported(priv, action,
3380                                                    act->csum_flags,
3381                                                    extack))
3382                                 break;
3383
3384                         return -EOPNOTSUPP;
3385                 case FLOW_ACTION_REDIRECT: {
3386                         struct net_device *peer_dev = act->dev;
3387
3388                         if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
3389                             same_hw_devs(priv, netdev_priv(peer_dev))) {
3390                                 parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
3391                                 flow_flag_set(flow, HAIRPIN);
3392                                 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3393                                           MLX5_FLOW_CONTEXT_ACTION_COUNT;
3394                         } else {
3395                                 NL_SET_ERR_MSG_MOD(extack,
3396                                                    "device is not on same HW, can't offload");
3397                                 netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
3398                                             peer_dev->name);
3399                                 return -EINVAL;
3400                         }
3401                         }
3402                         break;
3403                 case FLOW_ACTION_MARK: {
3404                         u32 mark = act->mark;
3405
3406                         if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
3407                                 NL_SET_ERR_MSG_MOD(extack,
3408                                                    "Bad flow mark - only 16 bit is supported");
3409                                 return -EINVAL;
3410                         }
3411
3412                         nic_attr->flow_tag = mark;
3413                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3414                         }
3415                         break;
3416                 case FLOW_ACTION_GOTO:
3417                         err = validate_goto_chain(priv, flow, act, action,
3418                                                   extack);
3419                         if (err)
3420                                 return err;
3421
3422                         action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3423                         attr->dest_chain = act->chain_index;
3424                         break;
3425                 case FLOW_ACTION_CT:
3426                         err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
3427                         if (err)
3428                                 return err;
3429
3430                         flow_flag_set(flow, CT);
3431                         break;
3432                 default:
3433                         NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3434                         return -EOPNOTSUPP;
3435                 }
3436         }
3437
3438         if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
3439             hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
3440                 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
3441                                             parse_attr, hdrs, &action, extack);
3442                 if (err)
3443                         return err;
3444                 /* in case all pedit actions are skipped, remove the MOD_HDR
3445                  * flag.
3446                  */
3447                 if (parse_attr->mod_hdr_acts.num_actions == 0) {
3448                         action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3449                         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3450                 }
3451         }
3452
3453         attr->action = action;
3454
3455         if (attr->dest_chain) {
3456                 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
3457                         NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
3458                         return -EOPNOTSUPP;
3459                 }
3460                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3461         }
3462
3463         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
3464                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3465
3466         if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
3467                 return -EOPNOTSUPP;
3468
3469         return 0;
3470 }
3471
3472 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
3473                                   struct net_device *peer_netdev)
3474 {
3475         struct mlx5e_priv *peer_priv;
3476
3477         peer_priv = netdev_priv(peer_netdev);
3478
3479         return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
3480                 mlx5e_eswitch_vf_rep(priv->netdev) &&
3481                 mlx5e_eswitch_vf_rep(peer_netdev) &&
3482                 same_hw_devs(priv, peer_priv));
3483 }
3484
3485 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
3486                                 const struct flow_action_entry *act,
3487                                 struct mlx5_esw_flow_attr *attr,
3488                                 u32 *action)
3489 {
3490         u8 vlan_idx = attr->total_vlan;
3491
3492         if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
3493                 return -EOPNOTSUPP;
3494
3495         switch (act->id) {
3496         case FLOW_ACTION_VLAN_POP:
3497                 if (vlan_idx) {
3498                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3499                                                                  MLX5_FS_VLAN_DEPTH))
3500                                 return -EOPNOTSUPP;
3501
3502                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
3503                 } else {
3504                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3505                 }
3506                 break;
3507         case FLOW_ACTION_VLAN_PUSH:
3508                 attr->vlan_vid[vlan_idx] = act->vlan.vid;
3509                 attr->vlan_prio[vlan_idx] = act->vlan.prio;
3510                 attr->vlan_proto[vlan_idx] = act->vlan.proto;
3511                 if (!attr->vlan_proto[vlan_idx])
3512                         attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
3513
3514                 if (vlan_idx) {
3515                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3516                                                                  MLX5_FS_VLAN_DEPTH))
3517                                 return -EOPNOTSUPP;
3518
3519                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
3520                 } else {
3521                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
3522                             (act->vlan.proto != htons(ETH_P_8021Q) ||
3523                              act->vlan.prio))
3524                                 return -EOPNOTSUPP;
3525
3526                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
3527                 }
3528                 break;
3529         default:
3530                 return -EINVAL;
3531         }
3532
3533         attr->total_vlan = vlan_idx + 1;
3534
3535         return 0;
3536 }
3537
3538 static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev,
3539                                           struct net_device *out_dev)
3540 {
3541         struct net_device *fdb_out_dev = out_dev;
3542         struct net_device *uplink_upper;
3543
3544         rcu_read_lock();
3545         uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
3546         if (uplink_upper && netif_is_lag_master(uplink_upper) &&
3547             uplink_upper == out_dev) {
3548                 fdb_out_dev = uplink_dev;
3549         } else if (netif_is_lag_master(out_dev)) {
3550                 fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
3551                 if (fdb_out_dev &&
3552                     (!mlx5e_eswitch_rep(fdb_out_dev) ||
3553                      !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
3554                         fdb_out_dev = NULL;
3555         }
3556         rcu_read_unlock();
3557         return fdb_out_dev;
3558 }
3559
3560 static int add_vlan_push_action(struct mlx5e_priv *priv,
3561                                 struct mlx5_flow_attr *attr,
3562                                 struct net_device **out_dev,
3563                                 u32 *action)
3564 {
3565         struct net_device *vlan_dev = *out_dev;
3566         struct flow_action_entry vlan_act = {
3567                 .id = FLOW_ACTION_VLAN_PUSH,
3568                 .vlan.vid = vlan_dev_vlan_id(vlan_dev),
3569                 .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
3570                 .vlan.prio = 0,
3571         };
3572         int err;
3573
3574         err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
3575         if (err)
3576                 return err;
3577
3578         rcu_read_lock();
3579         *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev));
3580         rcu_read_unlock();
3581         if (!*out_dev)
3582                 return -ENODEV;
3583
3584         if (is_vlan_dev(*out_dev))
3585                 err = add_vlan_push_action(priv, attr, out_dev, action);
3586
3587         return err;
3588 }
3589
3590 static int add_vlan_pop_action(struct mlx5e_priv *priv,
3591                                struct mlx5_flow_attr *attr,
3592                                u32 *action)
3593 {
3594         struct flow_action_entry vlan_act = {
3595                 .id = FLOW_ACTION_VLAN_POP,
3596         };
3597         int nest_level, err = 0;
3598
3599         nest_level = attr->parse_attr->filter_dev->lower_level -
3600                                                 priv->netdev->lower_level;
3601         while (nest_level--) {
3602                 err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
3603                 if (err)
3604                         return err;
3605         }
3606
3607         return err;
3608 }
3609
3610 static bool same_hw_reps(struct mlx5e_priv *priv,
3611                          struct net_device *peer_netdev)
3612 {
3613         struct mlx5e_priv *peer_priv;
3614
3615         peer_priv = netdev_priv(peer_netdev);
3616
3617         return mlx5e_eswitch_rep(priv->netdev) &&
3618                mlx5e_eswitch_rep(peer_netdev) &&
3619                same_hw_devs(priv, peer_priv);
3620 }
3621
3622 static bool is_lag_dev(struct mlx5e_priv *priv,
3623                        struct net_device *peer_netdev)
3624 {
3625         return ((mlx5_lag_is_sriov(priv->mdev) ||
3626                  mlx5_lag_is_multipath(priv->mdev)) &&
3627                  same_hw_reps(priv, peer_netdev));
3628 }
3629
3630 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
3631                                     struct net_device *out_dev)
3632 {
3633         if (is_merged_eswitch_vfs(priv, out_dev))
3634                 return true;
3635
3636         if (is_lag_dev(priv, out_dev))
3637                 return true;
3638
3639         return mlx5e_eswitch_rep(out_dev) &&
3640                same_port_devs(priv, netdev_priv(out_dev));
3641 }
3642
3643 static bool is_duplicated_output_device(struct net_device *dev,
3644                                         struct net_device *out_dev,
3645                                         int *ifindexes, int if_count,
3646                                         struct netlink_ext_ack *extack)
3647 {
3648         int i;
3649
3650         for (i = 0; i < if_count; i++) {
3651                 if (ifindexes[i] == out_dev->ifindex) {
3652                         NL_SET_ERR_MSG_MOD(extack,
3653                                            "can't duplicate output to same device");
3654                         netdev_err(dev, "can't duplicate output to same device: %s\n",
3655                                    out_dev->name);
3656                         return true;
3657                 }
3658         }
3659
3660         return false;
3661 }
3662
3663 static int verify_uplink_forwarding(struct mlx5e_priv *priv,
3664                                     struct mlx5e_tc_flow *flow,
3665                                     struct net_device *out_dev,
3666                                     struct netlink_ext_ack *extack)
3667 {
3668         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
3669         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3670         struct mlx5e_rep_priv *rep_priv;
3671
3672         /* Forwarding non encapsulated traffic between
3673          * uplink ports is allowed only if
3674          * termination_table_raw_traffic cap is set.
3675          *
3676          * Input vport was stored attr->in_rep.
3677          * In LAG case, *priv* is the private data of
3678          * uplink which may be not the input vport.
3679          */
3680         rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
3681
3682         if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
3683               mlx5e_eswitch_uplink_rep(out_dev)))
3684                 return 0;
3685
3686         if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
3687                                         termination_table_raw_traffic)) {
3688                 NL_SET_ERR_MSG_MOD(extack,
3689                                    "devices are both uplink, can't offload forwarding");
3690                         pr_err("devices %s %s are both uplink, can't offload forwarding\n",
3691                                priv->netdev->name, out_dev->name);
3692                         return -EOPNOTSUPP;
3693         } else if (out_dev != rep_priv->netdev) {
3694                 NL_SET_ERR_MSG_MOD(extack,
3695                                    "devices are not the same uplink, can't offload forwarding");
3696                 pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
3697                        priv->netdev->name, out_dev->name);
3698                 return -EOPNOTSUPP;
3699         }
3700         return 0;
3701 }
3702
3703 static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
3704                                 struct flow_action *flow_action,
3705                                 struct mlx5e_tc_flow *flow,
3706                                 struct netlink_ext_ack *extack,
3707                                 struct net_device *filter_dev)
3708 {
3709         struct pedit_headers_action hdrs[2] = {};
3710         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3711         struct mlx5e_tc_flow_parse_attr *parse_attr;
3712         struct mlx5e_rep_priv *rpriv = priv->ppriv;
3713         const struct ip_tunnel_info *info = NULL;
3714         struct mlx5_flow_attr *attr = flow->attr;
3715         int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
3716         bool ft_flow = mlx5e_is_ft_flow(flow);
3717         const struct flow_action_entry *act;
3718         struct mlx5_esw_flow_attr *esw_attr;
3719         struct mlx5_sample_attr sample = {};
3720         bool encap = false, decap = false;
3721         u32 action = attr->action;
3722         int err, i, if_count = 0;
3723         bool mpls_push = false;
3724
3725         if (!flow_action_has_entries(flow_action))
3726                 return -EINVAL;
3727
3728         if (!flow_action_hw_stats_check(flow_action, extack,
3729                                         FLOW_ACTION_HW_STATS_DELAYED_BIT))
3730                 return -EOPNOTSUPP;
3731
3732         esw_attr = attr->esw_attr;
3733         parse_attr = attr->parse_attr;
3734
3735         flow_action_for_each(i, act, flow_action) {
3736                 switch (act->id) {
3737                 case FLOW_ACTION_DROP:
3738                         action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
3739                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
3740                         break;
3741                 case FLOW_ACTION_TRAP:
3742                         if (!flow_offload_has_one_action(flow_action)) {
3743                                 NL_SET_ERR_MSG_MOD(extack,
3744                                                    "action trap is supported as a sole action only");
3745                                 return -EOPNOTSUPP;
3746                         }
3747                         action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3748                                    MLX5_FLOW_CONTEXT_ACTION_COUNT);
3749                         attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
3750                         break;
3751                 case FLOW_ACTION_MPLS_PUSH:
3752                         if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
3753                                                         reformat_l2_to_l3_tunnel) ||
3754                             act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
3755                                 NL_SET_ERR_MSG_MOD(extack,
3756                                                    "mpls push is supported only for mpls_uc protocol");
3757                                 return -EOPNOTSUPP;
3758                         }
3759                         mpls_push = true;
3760                         break;
3761                 case FLOW_ACTION_MPLS_POP:
3762                         /* we only support mpls pop if it is the first action
3763                          * and the filter net device is bareudp. Subsequent
3764                          * actions can be pedit and the last can be mirred
3765                          * egress redirect.
3766                          */
3767                         if (i) {
3768                                 NL_SET_ERR_MSG_MOD(extack,
3769                                                    "mpls pop supported only as first action");
3770                                 return -EOPNOTSUPP;
3771                         }
3772                         if (!netif_is_bareudp(filter_dev)) {
3773                                 NL_SET_ERR_MSG_MOD(extack,
3774                                                    "mpls pop supported only on bareudp devices");
3775                                 return -EOPNOTSUPP;
3776                         }
3777
3778                         parse_attr->eth.h_proto = act->mpls_pop.proto;
3779                         action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
3780                         flow_flag_set(flow, L3_TO_L2_DECAP);
3781                         break;
3782                 case FLOW_ACTION_MANGLE:
3783                 case FLOW_ACTION_ADD:
3784                         err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
3785                                                     parse_attr, hdrs, flow, extack);
3786                         if (err)
3787                                 return err;
3788
3789                         if (!flow_flag_test(flow, L3_TO_L2_DECAP)) {
3790                                 action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3791                                 esw_attr->split_count = esw_attr->out_count;
3792                         }
3793                         break;
3794                 case FLOW_ACTION_CSUM:
3795                         if (csum_offload_supported(priv, action,
3796                                                    act->csum_flags, extack))
3797                                 break;
3798
3799                         return -EOPNOTSUPP;
3800                 case FLOW_ACTION_REDIRECT:
3801                 case FLOW_ACTION_MIRRED: {
3802                         struct mlx5e_priv *out_priv;
3803                         struct net_device *out_dev;
3804
3805                         out_dev = act->dev;
3806                         if (!out_dev) {
3807                                 /* out_dev is NULL when filters with
3808                                  * non-existing mirred device are replayed to
3809                                  * the driver.
3810                                  */
3811                                 return -EINVAL;
3812                         }
3813
3814                         if (mpls_push && !netif_is_bareudp(out_dev)) {
3815                                 NL_SET_ERR_MSG_MOD(extack,
3816                                                    "mpls is supported only through a bareudp device");
3817                                 return -EOPNOTSUPP;
3818                         }
3819
3820                         if (ft_flow && out_dev == priv->netdev) {
3821                                 /* Ignore forward to self rules generated
3822                                  * by adding both mlx5 devs to the flow table
3823                                  * block on a normal nft offload setup.
3824                                  */
3825                                 return -EOPNOTSUPP;
3826                         }
3827
3828                         if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
3829                                 NL_SET_ERR_MSG_MOD(extack,
3830                                                    "can't support more output ports, can't offload forwarding");
3831                                 netdev_warn(priv->netdev,
3832                                             "can't support more than %d output ports, can't offload forwarding\n",
3833                                             esw_attr->out_count);
3834                                 return -EOPNOTSUPP;
3835                         }
3836
3837                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3838                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
3839                         if (encap) {
3840                                 parse_attr->mirred_ifindex[esw_attr->out_count] =
3841                                         out_dev->ifindex;
3842                                 parse_attr->tun_info[esw_attr->out_count] =
3843                                         mlx5e_dup_tun_info(info);
3844                                 if (!parse_attr->tun_info[esw_attr->out_count])
3845                                         return -ENOMEM;
3846                                 encap = false;
3847                                 esw_attr->dests[esw_attr->out_count].flags |=
3848                                         MLX5_ESW_DEST_ENCAP;
3849                                 esw_attr->out_count++;
3850                                 /* attr->dests[].rep is resolved when we
3851                                  * handle encap
3852                                  */
3853                         } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
3854                                 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3855                                 struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
3856
3857                                 if (is_duplicated_output_device(priv->netdev,
3858                                                                 out_dev,
3859                                                                 ifindexes,
3860                                                                 if_count,
3861                                                                 extack))
3862                                         return -EOPNOTSUPP;
3863
3864                                 ifindexes[if_count] = out_dev->ifindex;
3865                                 if_count++;
3866
3867                                 out_dev = get_fdb_out_dev(uplink_dev, out_dev);
3868                                 if (!out_dev)
3869                                         return -ENODEV;
3870
3871                                 if (is_vlan_dev(out_dev)) {
3872                                         err = add_vlan_push_action(priv, attr,
3873                                                                    &out_dev,
3874                                                                    &action);
3875                                         if (err)
3876                                                 return err;
3877                                 }
3878
3879                                 if (is_vlan_dev(parse_attr->filter_dev)) {
3880                                         err = add_vlan_pop_action(priv, attr,
3881                                                                   &action);
3882                                         if (err)
3883                                                 return err;
3884                                 }
3885
3886                                 err = verify_uplink_forwarding(priv, flow, out_dev, extack);
3887                                 if (err)
3888                                         return err;
3889
3890                                 if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
3891                                         NL_SET_ERR_MSG_MOD(extack,
3892                                                            "devices are not on same switch HW, can't offload forwarding");
3893                                         return -EOPNOTSUPP;
3894                                 }
3895
3896                                 if (same_vf_reps(priv, out_dev)) {
3897                                         NL_SET_ERR_MSG_MOD(extack,
3898                                                            "can't forward from a VF to itself");
3899                                         return -EOPNOTSUPP;
3900                                 }
3901
3902                                 out_priv = netdev_priv(out_dev);
3903                                 rpriv = out_priv->ppriv;
3904                                 esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
3905                                 esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
3906                                 esw_attr->out_count++;
3907                         } else if (parse_attr->filter_dev != priv->netdev) {
3908                                 /* All mlx5 devices are called to configure
3909                                  * high level device filters. Therefore, the
3910                                  * *attempt* to  install a filter on invalid
3911                                  * eswitch should not trigger an explicit error
3912                                  */
3913                                 return -EINVAL;
3914                         } else {
3915                                 NL_SET_ERR_MSG_MOD(extack,
3916                                                    "devices are not on same switch HW, can't offload forwarding");
3917                                 netdev_warn(priv->netdev,
3918                                             "devices %s %s not on same switch HW, can't offload forwarding\n",
3919                                             priv->netdev->name,
3920                                             out_dev->name);
3921                                 return -EINVAL;
3922                         }
3923                         }
3924                         break;
3925                 case FLOW_ACTION_TUNNEL_ENCAP:
3926                         info = act->tunnel;
3927                         if (info)
3928                                 encap = true;
3929                         else
3930                                 return -EOPNOTSUPP;
3931
3932                         break;
3933                 case FLOW_ACTION_VLAN_PUSH:
3934                 case FLOW_ACTION_VLAN_POP:
3935                         if (act->id == FLOW_ACTION_VLAN_PUSH &&
3936                             (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
3937                                 /* Replace vlan pop+push with vlan modify */
3938                                 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3939                                 err = add_vlan_rewrite_action(priv,
3940                                                               MLX5_FLOW_NAMESPACE_FDB,
3941                                                               act, parse_attr, hdrs,
3942                                                               &action, extack);
3943                         } else {
3944                                 err = parse_tc_vlan_action(priv, act, esw_attr, &action);
3945                         }
3946                         if (err)
3947                                 return err;
3948
3949                         esw_attr->split_count = esw_attr->out_count;
3950                         break;
3951                 case FLOW_ACTION_VLAN_MANGLE:
3952                         err = add_vlan_rewrite_action(priv,
3953                                                       MLX5_FLOW_NAMESPACE_FDB,
3954                                                       act, parse_attr, hdrs,
3955                                                       &action, extack);
3956                         if (err)
3957                                 return err;
3958
3959                         esw_attr->split_count = esw_attr->out_count;
3960                         break;
3961                 case FLOW_ACTION_TUNNEL_DECAP:
3962                         decap = true;
3963                         break;
3964                 case FLOW_ACTION_GOTO:
3965                         err = validate_goto_chain(priv, flow, act, action,
3966                                                   extack);
3967                         if (err)
3968                                 return err;
3969
3970                         action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3971                         attr->dest_chain = act->chain_index;
3972                         break;
3973                 case FLOW_ACTION_CT:
3974                         if (flow_flag_test(flow, SAMPLE)) {
3975                                 NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
3976                                 return -EOPNOTSUPP;
3977                         }
3978                         err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
3979                         if (err)
3980                                 return err;
3981
3982                         flow_flag_set(flow, CT);
3983                         esw_attr->split_count = esw_attr->out_count;
3984                         break;
3985                 case FLOW_ACTION_SAMPLE:
3986                         if (flow_flag_test(flow, CT)) {
3987                                 NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
3988                                 return -EOPNOTSUPP;
3989                         }
3990                         sample.rate = act->sample.rate;
3991                         sample.group_num = act->sample.psample_group->group_num;
3992                         if (act->sample.truncate)
3993                                 sample.trunc_size = act->sample.trunc_size;
3994                         flow_flag_set(flow, SAMPLE);
3995                         break;
3996                 default:
3997                         NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3998                         return -EOPNOTSUPP;
3999                 }
4000         }
4001
4002         /* always set IP version for indirect table handling */
4003         attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
4004
4005         if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
4006             action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
4007                 /* For prio tag mode, replace vlan pop with rewrite vlan prio
4008                  * tag rewrite.
4009                  */
4010                 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4011                 err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
4012                                                        &action, extack);
4013                 if (err)
4014                         return err;
4015         }
4016
4017         if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
4018             hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
4019                 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
4020                                             parse_attr, hdrs, &action, extack);
4021                 if (err)
4022                         return err;
4023                 /* in case all pedit actions are skipped, remove the MOD_HDR
4024                  * flag. we might have set split_count either by pedit or
4025                  * pop/push. if there is no pop/push either, reset it too.
4026                  */
4027                 if (parse_attr->mod_hdr_acts.num_actions == 0) {
4028                         action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4029                         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4030                         if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
4031                               (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
4032                                 esw_attr->split_count = 0;
4033                 }
4034         }
4035
4036         attr->action = action;
4037         if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
4038                 return -EOPNOTSUPP;
4039
4040         if (attr->dest_chain) {
4041                 if (decap) {
4042                         /* It can be supported if we'll create a mapping for
4043                          * the tunnel device only (without tunnel), and set
4044                          * this tunnel id with this decap flow.
4045                          *
4046                          * On restore (miss), we'll just set this saved tunnel
4047                          * device.
4048                          */
4049
4050                         NL_SET_ERR_MSG(extack,
4051                                        "Decap with goto isn't supported");
4052                         netdev_warn(priv->netdev,
4053                                     "Decap with goto isn't supported");
4054                         return -EOPNOTSUPP;
4055                 }
4056
4057                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
4058         }
4059
4060         if (!(attr->action &
4061               (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
4062                 NL_SET_ERR_MSG_MOD(extack,
4063                                    "Rule must have at least one forward/drop action");
4064                 return -EOPNOTSUPP;
4065         }
4066
4067         if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
4068                 NL_SET_ERR_MSG_MOD(extack,
4069                                    "current firmware doesn't support split rule for port mirroring");
4070                 netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
4071                 return -EOPNOTSUPP;
4072         }
4073
4074         /* Allocate sample attribute only when there is a sample action and
4075          * no errors after parsing.
4076          */
4077         if (flow_flag_test(flow, SAMPLE)) {
4078                 esw_attr->sample = kzalloc(sizeof(*esw_attr->sample), GFP_KERNEL);
4079                 if (!esw_attr->sample)
4080                         return -ENOMEM;
4081                 *esw_attr->sample = sample;
4082         }
4083
4084         return 0;
4085 }
4086
4087 static void get_flags(int flags, unsigned long *flow_flags)
4088 {
4089         unsigned long __flow_flags = 0;
4090
4091         if (flags & MLX5_TC_FLAG(INGRESS))
4092                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4093         if (flags & MLX5_TC_FLAG(EGRESS))
4094                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4095
4096         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4097                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4098         if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4099                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4100         if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4101                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
4102
4103         *flow_flags = __flow_flags;
4104 }
4105
4106 static const struct rhashtable_params tc_ht_params = {
4107         .head_offset = offsetof(struct mlx5e_tc_flow, node),
4108         .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
4109         .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
4110         .automatic_shrinking = true,
4111 };
4112
4113 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4114                                     unsigned long flags)
4115 {
4116         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4117         struct mlx5e_rep_priv *uplink_rpriv;
4118
4119         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
4120                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
4121                 return &uplink_rpriv->uplink_priv.tc_ht;
4122         } else /* NIC offload */
4123                 return &priv->fs.tc.ht;
4124 }
4125
4126 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
4127 {
4128         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4129         struct mlx5_flow_attr *attr = flow->attr;
4130         bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4131                 flow_flag_test(flow, INGRESS);
4132         bool act_is_encap = !!(attr->action &
4133                                MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4134         bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
4135                                                 MLX5_DEVCOM_ESW_OFFLOADS);
4136
4137         if (!esw_paired)
4138                 return false;
4139
4140         if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4141              mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4142             (is_rep_ingress || act_is_encap))
4143                 return true;
4144
4145         return false;
4146 }
4147
4148 struct mlx5_flow_attr *
4149 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
4150 {
4151         u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB)  ?
4152                                 sizeof(struct mlx5_esw_flow_attr) :
4153                                 sizeof(struct mlx5_nic_flow_attr);
4154         struct mlx5_flow_attr *attr;
4155
4156         return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4157 }
4158
4159 static int
4160 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4161                  struct flow_cls_offload *f, unsigned long flow_flags,
4162                  struct mlx5e_tc_flow_parse_attr **__parse_attr,
4163                  struct mlx5e_tc_flow **__flow)
4164 {
4165         struct mlx5e_tc_flow_parse_attr *parse_attr;
4166         struct mlx5_flow_attr *attr;
4167         struct mlx5e_tc_flow *flow;
4168         int err = -ENOMEM;
4169         int out_index;
4170
4171         flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4172         parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4173         if (!parse_attr || !flow)
4174                 goto err_free;
4175
4176         flow->flags = flow_flags;
4177         flow->cookie = f->cookie;
4178         flow->priv = priv;
4179
4180         attr = mlx5_alloc_flow_attr(get_flow_name_space(flow));
4181         if (!attr)
4182                 goto err_free;
4183
4184         flow->attr = attr;
4185
4186         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4187                 INIT_LIST_HEAD(&flow->encaps[out_index].list);
4188         INIT_LIST_HEAD(&flow->hairpin);
4189         INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4190         refcount_set(&flow->refcnt, 1);
4191         init_completion(&flow->init_done);
4192
4193         *__flow = flow;
4194         *__parse_attr = parse_attr;
4195
4196         return 0;
4197
4198 err_free:
4199         kfree(flow);
4200         kvfree(parse_attr);
4201         return err;
4202 }
4203
4204 static void
4205 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4206                      struct mlx5e_tc_flow_parse_attr *parse_attr,
4207                      struct flow_cls_offload *f)
4208 {
4209         attr->parse_attr = parse_attr;
4210         attr->chain = f->common.chain_index;
4211         attr->prio = f->common.prio;
4212 }
4213
4214 static void
4215 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4216                          struct mlx5e_priv *priv,
4217                          struct mlx5e_tc_flow_parse_attr *parse_attr,
4218                          struct flow_cls_offload *f,
4219                          struct mlx5_eswitch_rep *in_rep,
4220                          struct mlx5_core_dev *in_mdev)
4221 {
4222         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4223         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4224
4225         mlx5e_flow_attr_init(attr, parse_attr, f);
4226
4227         esw_attr->in_rep = in_rep;
4228         esw_attr->in_mdev = in_mdev;
4229
4230         if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4231             MLX5_COUNTER_SOURCE_ESWITCH)
4232                 esw_attr->counter_dev = in_mdev;
4233         else
4234                 esw_attr->counter_dev = priv->mdev;
4235 }
4236
4237 static struct mlx5e_tc_flow *
4238 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4239                      struct flow_cls_offload *f,
4240                      unsigned long flow_flags,
4241                      struct net_device *filter_dev,
4242                      struct mlx5_eswitch_rep *in_rep,
4243                      struct mlx5_core_dev *in_mdev)
4244 {
4245         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4246         struct netlink_ext_ack *extack = f->common.extack;
4247         struct mlx5e_tc_flow_parse_attr *parse_attr;
4248         struct mlx5e_tc_flow *flow;
4249         int attr_size, err;
4250
4251         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4252         attr_size  = sizeof(struct mlx5_esw_flow_attr);
4253         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4254                                &parse_attr, &flow);
4255         if (err)
4256                 goto out;
4257
4258         parse_attr->filter_dev = filter_dev;
4259         mlx5e_flow_esw_attr_init(flow->attr,
4260                                  priv, parse_attr,
4261                                  f, in_rep, in_mdev);
4262
4263         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4264                                f, filter_dev);
4265         if (err)
4266                 goto err_free;
4267
4268         /* actions validation depends on parsing the ct matches first */
4269         err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4270                                    &flow->attr->ct_attr, extack);
4271         if (err)
4272                 goto err_free;
4273
4274         err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
4275         if (err)
4276                 goto err_free;
4277
4278         err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4279         complete_all(&flow->init_done);
4280         if (err) {
4281                 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4282                         goto err_free;
4283
4284                 add_unready_flow(flow);
4285         }
4286
4287         return flow;
4288
4289 err_free:
4290         mlx5e_flow_put(priv, flow);
4291 out:
4292         return ERR_PTR(err);
4293 }
4294
4295 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4296                                       struct mlx5e_tc_flow *flow,
4297                                       unsigned long flow_flags)
4298 {
4299         struct mlx5e_priv *priv = flow->priv, *peer_priv;
4300         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4301         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4302         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4303         struct mlx5e_tc_flow_parse_attr *parse_attr;
4304         struct mlx5e_rep_priv *peer_urpriv;
4305         struct mlx5e_tc_flow *peer_flow;
4306         struct mlx5_core_dev *in_mdev;
4307         int err = 0;
4308
4309         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4310         if (!peer_esw)
4311                 return -ENODEV;
4312
4313         peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4314         peer_priv = netdev_priv(peer_urpriv->netdev);
4315
4316         /* in_mdev is assigned of which the packet originated from.
4317          * So packets redirected to uplink use the same mdev of the
4318          * original flow and packets redirected from uplink use the
4319          * peer mdev.
4320          */
4321         if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
4322                 in_mdev = peer_priv->mdev;
4323         else
4324                 in_mdev = priv->mdev;
4325
4326         parse_attr = flow->attr->parse_attr;
4327         peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4328                                          parse_attr->filter_dev,
4329                                          attr->in_rep, in_mdev);
4330         if (IS_ERR(peer_flow)) {
4331                 err = PTR_ERR(peer_flow);
4332                 goto out;
4333         }
4334
4335         flow->peer_flow = peer_flow;
4336         flow_flag_set(flow, DUP);
4337         mutex_lock(&esw->offloads.peer_mutex);
4338         list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4339         mutex_unlock(&esw->offloads.peer_mutex);
4340
4341 out:
4342         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4343         return err;
4344 }
4345
4346 static int
4347 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4348                    struct flow_cls_offload *f,
4349                    unsigned long flow_flags,
4350                    struct net_device *filter_dev,
4351                    struct mlx5e_tc_flow **__flow)
4352 {
4353         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4354         struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4355         struct mlx5_core_dev *in_mdev = priv->mdev;
4356         struct mlx5e_tc_flow *flow;
4357         int err;
4358
4359         flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4360                                     in_mdev);
4361         if (IS_ERR(flow))
4362                 return PTR_ERR(flow);
4363
4364         if (is_peer_flow_needed(flow)) {
4365                 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4366                 if (err) {
4367                         mlx5e_tc_del_fdb_flow(priv, flow);
4368                         goto out;
4369                 }
4370         }
4371
4372         *__flow = flow;
4373
4374         return 0;
4375
4376 out:
4377         return err;
4378 }
4379
4380 static int
4381 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4382                    struct flow_cls_offload *f,
4383                    unsigned long flow_flags,
4384                    struct net_device *filter_dev,
4385                    struct mlx5e_tc_flow **__flow)
4386 {
4387         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4388         struct netlink_ext_ack *extack = f->common.extack;
4389         struct mlx5e_tc_flow_parse_attr *parse_attr;
4390         struct mlx5e_tc_flow *flow;
4391         int attr_size, err;
4392
4393         if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4394                 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4395                         return -EOPNOTSUPP;
4396         } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4397                 return -EOPNOTSUPP;
4398         }
4399
4400         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4401         attr_size  = sizeof(struct mlx5_nic_flow_attr);
4402         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4403                                &parse_attr, &flow);
4404         if (err)
4405                 goto out;
4406
4407         parse_attr->filter_dev = filter_dev;
4408         mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4409
4410         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4411                                f, filter_dev);
4412         if (err)
4413                 goto err_free;
4414
4415         err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4416                                    &flow->attr->ct_attr, extack);
4417         if (err)
4418                 goto err_free;
4419
4420         err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
4421         if (err)
4422                 goto err_free;
4423
4424         err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
4425         if (err)
4426                 goto err_free;
4427
4428         flow_flag_set(flow, OFFLOADED);
4429         *__flow = flow;
4430
4431         return 0;
4432
4433 err_free:
4434         flow_flag_set(flow, FAILED);
4435         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4436         mlx5e_flow_put(priv, flow);
4437 out:
4438         return err;
4439 }
4440
4441 static int
4442 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4443                   struct flow_cls_offload *f,
4444                   unsigned long flags,
4445                   struct net_device *filter_dev,
4446                   struct mlx5e_tc_flow **flow)
4447 {
4448         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4449         unsigned long flow_flags;
4450         int err;
4451
4452         get_flags(flags, &flow_flags);
4453
4454         if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4455                 return -EOPNOTSUPP;
4456
4457         if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4458                 err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4459                                          filter_dev, flow);
4460         else
4461                 err = mlx5e_add_nic_flow(priv, f, flow_flags,
4462                                          filter_dev, flow);
4463
4464         return err;
4465 }
4466
4467 static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4468                                            struct mlx5e_rep_priv *rpriv)
4469 {
4470         /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4471          * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4472          * function is called from NIC mode.
4473          */
4474         return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4475 }
4476
4477 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4478                            struct flow_cls_offload *f, unsigned long flags)
4479 {
4480         struct netlink_ext_ack *extack = f->common.extack;
4481         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4482         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4483         struct mlx5e_tc_flow *flow;
4484         int err = 0;
4485
4486         if (!mlx5_esw_hold(priv->mdev))
4487                 return -EAGAIN;
4488
4489         mlx5_esw_get(priv->mdev);
4490
4491         rcu_read_lock();
4492         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4493         if (flow) {
4494                 /* Same flow rule offloaded to non-uplink representor sharing tc block,
4495                  * just return 0.
4496                  */
4497                 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4498                         goto rcu_unlock;
4499
4500                 NL_SET_ERR_MSG_MOD(extack,
4501                                    "flow cookie already exists, ignoring");
4502                 netdev_warn_once(priv->netdev,
4503                                  "flow cookie %lx already exists, ignoring\n",
4504                                  f->cookie);
4505                 err = -EEXIST;
4506                 goto rcu_unlock;
4507         }
4508 rcu_unlock:
4509         rcu_read_unlock();
4510         if (flow)
4511                 goto out;
4512
4513         trace_mlx5e_configure_flower(f);
4514         err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4515         if (err)
4516                 goto out;
4517
4518         /* Flow rule offloaded to non-uplink representor sharing tc block,
4519          * set the flow's owner dev.
4520          */
4521         if (is_flow_rule_duplicate_allowed(dev, rpriv))
4522                 flow->orig_dev = dev;
4523
4524         err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4525         if (err)
4526                 goto err_free;
4527
4528         mlx5_esw_release(priv->mdev);
4529         return 0;
4530
4531 err_free:
4532         mlx5e_flow_put(priv, flow);
4533 out:
4534         mlx5_esw_put(priv->mdev);
4535         mlx5_esw_release(priv->mdev);
4536         return err;
4537 }
4538
4539 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4540 {
4541         bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4542         bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4543
4544         return flow_flag_test(flow, INGRESS) == dir_ingress &&
4545                 flow_flag_test(flow, EGRESS) == dir_egress;
4546 }
4547
4548 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4549                         struct flow_cls_offload *f, unsigned long flags)
4550 {
4551         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4552         struct mlx5e_tc_flow *flow;
4553         int err;
4554
4555         rcu_read_lock();
4556         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4557         if (!flow || !same_flow_direction(flow, flags)) {
4558                 err = -EINVAL;
4559                 goto errout;
4560         }
4561
4562         /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4563          * set.
4564          */
4565         if (flow_flag_test_and_set(flow, DELETED)) {
4566                 err = -EINVAL;
4567                 goto errout;
4568         }
4569         rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4570         rcu_read_unlock();
4571
4572         trace_mlx5e_delete_flower(f);
4573         mlx5e_flow_put(priv, flow);
4574
4575         mlx5_esw_put(priv->mdev);
4576         return 0;
4577
4578 errout:
4579         rcu_read_unlock();
4580         return err;
4581 }
4582
4583 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4584                        struct flow_cls_offload *f, unsigned long flags)
4585 {
4586         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4587         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4588         struct mlx5_eswitch *peer_esw;
4589         struct mlx5e_tc_flow *flow;
4590         struct mlx5_fc *counter;
4591         u64 lastuse = 0;
4592         u64 packets = 0;
4593         u64 bytes = 0;
4594         int err = 0;
4595
4596         rcu_read_lock();
4597         flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4598                                                 tc_ht_params));
4599         rcu_read_unlock();
4600         if (IS_ERR(flow))
4601                 return PTR_ERR(flow);
4602
4603         if (!same_flow_direction(flow, flags)) {
4604                 err = -EINVAL;
4605                 goto errout;
4606         }
4607
4608         if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
4609                 counter = mlx5e_tc_get_counter(flow);
4610                 if (!counter)
4611                         goto errout;
4612
4613                 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4614         }
4615
4616         /* Under multipath it's possible for one rule to be currently
4617          * un-offloaded while the other rule is offloaded.
4618          */
4619         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4620         if (!peer_esw)
4621                 goto out;
4622
4623         if (flow_flag_test(flow, DUP) &&
4624             flow_flag_test(flow->peer_flow, OFFLOADED)) {
4625                 u64 bytes2;
4626                 u64 packets2;
4627                 u64 lastuse2;
4628
4629                 counter = mlx5e_tc_get_counter(flow->peer_flow);
4630                 if (!counter)
4631                         goto no_peer_counter;
4632                 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4633
4634                 bytes += bytes2;
4635                 packets += packets2;
4636                 lastuse = max_t(u64, lastuse, lastuse2);
4637         }
4638
4639 no_peer_counter:
4640         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4641 out:
4642         flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
4643                           FLOW_ACTION_HW_STATS_DELAYED);
4644         trace_mlx5e_stats_flower(f);
4645 errout:
4646         mlx5e_flow_put(priv, flow);
4647         return err;
4648 }
4649
4650 static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
4651                                struct netlink_ext_ack *extack)
4652 {
4653         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4654         struct mlx5_eswitch *esw;
4655         u32 rate_mbps = 0;
4656         u16 vport_num;
4657         int err;
4658
4659         vport_num = rpriv->rep->vport;
4660         if (vport_num >= MLX5_VPORT_ECPF) {
4661                 NL_SET_ERR_MSG_MOD(extack,
4662                                    "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4663                 return -EOPNOTSUPP;
4664         }
4665
4666         esw = priv->mdev->priv.eswitch;
4667         /* rate is given in bytes/sec.
4668          * First convert to bits/sec and then round to the nearest mbit/secs.
4669          * mbit means million bits.
4670          * Moreover, if rate is non zero we choose to configure to a minimum of
4671          * 1 mbit/sec.
4672          */
4673         if (rate) {
4674                 rate = (rate * BITS_PER_BYTE) + 500000;
4675                 do_div(rate, 1000000);
4676                 rate_mbps = max_t(u32, rate, 1);
4677         }
4678
4679         err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
4680         if (err)
4681                 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
4682
4683         return err;
4684 }
4685
4686 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
4687                                         struct flow_action *flow_action,
4688                                         struct netlink_ext_ack *extack)
4689 {
4690         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4691         const struct flow_action_entry *act;
4692         int err;
4693         int i;
4694
4695         if (!flow_action_has_entries(flow_action)) {
4696                 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
4697                 return -EINVAL;
4698         }
4699
4700         if (!flow_offload_has_one_action(flow_action)) {
4701                 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
4702                 return -EOPNOTSUPP;
4703         }
4704
4705         if (!flow_action_basic_hw_stats_check(flow_action, extack))
4706                 return -EOPNOTSUPP;
4707
4708         flow_action_for_each(i, act, flow_action) {
4709                 switch (act->id) {
4710                 case FLOW_ACTION_POLICE:
4711                         if (act->police.rate_pkt_ps) {
4712                                 NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second");
4713                                 return -EOPNOTSUPP;
4714                         }
4715                         err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
4716                         if (err)
4717                                 return err;
4718
4719                         rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
4720                         break;
4721                 default:
4722                         NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
4723                         return -EOPNOTSUPP;
4724                 }
4725         }
4726
4727         return 0;
4728 }
4729
4730 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
4731                                 struct tc_cls_matchall_offload *ma)
4732 {
4733         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4734         struct netlink_ext_ack *extack = ma->common.extack;
4735
4736         if (!mlx5_esw_qos_enabled(esw)) {
4737                 NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device");
4738                 return -EOPNOTSUPP;
4739         }
4740
4741         if (ma->common.prio != 1) {
4742                 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
4743                 return -EINVAL;
4744         }
4745
4746         return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
4747 }
4748
4749 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
4750                              struct tc_cls_matchall_offload *ma)
4751 {
4752         struct netlink_ext_ack *extack = ma->common.extack;
4753
4754         return apply_police_params(priv, 0, extack);
4755 }
4756
4757 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
4758                              struct tc_cls_matchall_offload *ma)
4759 {
4760         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4761         struct rtnl_link_stats64 cur_stats;
4762         u64 dbytes;
4763         u64 dpkts;
4764
4765         cur_stats = priv->stats.vf_vport;
4766         dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
4767         dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
4768         rpriv->prev_vf_vport_stats = cur_stats;
4769         flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
4770                           FLOW_ACTION_HW_STATS_DELAYED);
4771 }
4772
4773 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
4774                                               struct mlx5e_priv *peer_priv)
4775 {
4776         struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
4777         struct mlx5e_hairpin_entry *hpe, *tmp;
4778         LIST_HEAD(init_wait_list);
4779         u16 peer_vhca_id;
4780         int bkt;
4781
4782         if (!same_hw_devs(priv, peer_priv))
4783                 return;
4784
4785         peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
4786
4787         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
4788         hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
4789                 if (refcount_inc_not_zero(&hpe->refcnt))
4790                         list_add(&hpe->dead_peer_wait_list, &init_wait_list);
4791         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
4792
4793         list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
4794                 wait_for_completion(&hpe->res_ready);
4795                 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
4796                         mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
4797
4798                 mlx5e_hairpin_put(priv, hpe);
4799         }
4800 }
4801
4802 static int mlx5e_tc_netdev_event(struct notifier_block *this,
4803                                  unsigned long event, void *ptr)
4804 {
4805         struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
4806         struct mlx5e_flow_steering *fs;
4807         struct mlx5e_priv *peer_priv;
4808         struct mlx5e_tc_table *tc;
4809         struct mlx5e_priv *priv;
4810
4811         if (ndev->netdev_ops != &mlx5e_netdev_ops ||
4812             event != NETDEV_UNREGISTER ||
4813             ndev->reg_state == NETREG_REGISTERED)
4814                 return NOTIFY_DONE;
4815
4816         tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
4817         fs = container_of(tc, struct mlx5e_flow_steering, tc);
4818         priv = container_of(fs, struct mlx5e_priv, fs);
4819         peer_priv = netdev_priv(ndev);
4820         if (priv == peer_priv ||
4821             !(priv->netdev->features & NETIF_F_HW_TC))
4822                 return NOTIFY_DONE;
4823
4824         mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
4825
4826         return NOTIFY_DONE;
4827 }
4828
4829 static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
4830 {
4831         int tc_grp_size, tc_tbl_size;
4832         u32 max_flow_counter;
4833
4834         max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
4835                             MLX5_CAP_GEN(dev, max_flow_counter_15_0);
4836
4837         tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
4838
4839         tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
4840                             BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
4841
4842         return tc_tbl_size;
4843 }
4844
4845 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
4846 {
4847         struct mlx5e_tc_table *tc = &priv->fs.tc;
4848         struct mlx5_core_dev *dev = priv->mdev;
4849         struct mapping_ctx *chains_mapping;
4850         struct mlx5_chains_attr attr = {};
4851         int err;
4852
4853         mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
4854         mutex_init(&tc->t_lock);
4855         mutex_init(&tc->hairpin_tbl_lock);
4856         hash_init(tc->hairpin_tbl);
4857
4858         err = rhashtable_init(&tc->ht, &tc_ht_params);
4859         if (err)
4860                 return err;
4861
4862         lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
4863
4864         chains_mapping = mapping_create(sizeof(struct mlx5_mapped_obj),
4865                                         MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
4866         if (IS_ERR(chains_mapping)) {
4867                 err = PTR_ERR(chains_mapping);
4868                 goto err_mapping;
4869         }
4870         tc->mapping = chains_mapping;
4871
4872         if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
4873                 attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
4874                         MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
4875         attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
4876         attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
4877         attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
4878         attr.default_ft = mlx5e_vlan_get_flowtable(priv->fs.vlan);
4879         attr.mapping = chains_mapping;
4880
4881         tc->chains = mlx5_chains_create(dev, &attr);
4882         if (IS_ERR(tc->chains)) {
4883                 err = PTR_ERR(tc->chains);
4884                 goto err_chains;
4885         }
4886
4887         tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
4888                                  MLX5_FLOW_NAMESPACE_KERNEL);
4889
4890         tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
4891         err = register_netdevice_notifier_dev_net(priv->netdev,
4892                                                   &tc->netdevice_nb,
4893                                                   &tc->netdevice_nn);
4894         if (err) {
4895                 tc->netdevice_nb.notifier_call = NULL;
4896                 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
4897                 goto err_reg;
4898         }
4899
4900         return 0;
4901
4902 err_reg:
4903         mlx5_tc_ct_clean(tc->ct);
4904         mlx5_chains_destroy(tc->chains);
4905 err_chains:
4906         mapping_destroy(chains_mapping);
4907 err_mapping:
4908         rhashtable_destroy(&tc->ht);
4909         return err;
4910 }
4911
4912 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
4913 {
4914         struct mlx5e_tc_flow *flow = ptr;
4915         struct mlx5e_priv *priv = flow->priv;
4916
4917         mlx5e_tc_del_flow(priv, flow);
4918         kfree(flow);
4919 }
4920
4921 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
4922 {
4923         struct mlx5e_tc_table *tc = &priv->fs.tc;
4924
4925         if (tc->netdevice_nb.notifier_call)
4926                 unregister_netdevice_notifier_dev_net(priv->netdev,
4927                                                       &tc->netdevice_nb,
4928                                                       &tc->netdevice_nn);
4929
4930         mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
4931         mutex_destroy(&tc->hairpin_tbl_lock);
4932
4933         rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
4934
4935         if (!IS_ERR_OR_NULL(tc->t)) {
4936                 mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
4937                 tc->t = NULL;
4938         }
4939         mutex_destroy(&tc->t_lock);
4940
4941         mlx5_tc_ct_clean(tc->ct);
4942         mapping_destroy(tc->mapping);
4943         mlx5_chains_destroy(tc->chains);
4944 }
4945
4946 int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
4947 {
4948         const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
4949         struct mlx5_rep_uplink_priv *uplink_priv;
4950         struct mlx5e_rep_priv *rpriv;
4951         struct mapping_ctx *mapping;
4952         struct mlx5_eswitch *esw;
4953         struct mlx5e_priv *priv;
4954         int err = 0;
4955
4956         uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
4957         rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
4958         priv = netdev_priv(rpriv->netdev);
4959         esw = priv->mdev->priv.eswitch;
4960
4961         uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
4962                                                esw_chains(esw),
4963                                                &esw->offloads.mod_hdr,
4964                                                MLX5_FLOW_NAMESPACE_FDB);
4965
4966 #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
4967         uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev));
4968 #endif
4969
4970         mapping = mapping_create(sizeof(struct tunnel_match_key),
4971                                  TUNNEL_INFO_BITS_MASK, true);
4972         if (IS_ERR(mapping)) {
4973                 err = PTR_ERR(mapping);
4974                 goto err_tun_mapping;
4975         }
4976         uplink_priv->tunnel_mapping = mapping;
4977
4978         /* 0xFFF is reserved for stack devices slow path table mark */
4979         mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
4980         if (IS_ERR(mapping)) {
4981                 err = PTR_ERR(mapping);
4982                 goto err_enc_opts_mapping;
4983         }
4984         uplink_priv->tunnel_enc_opts_mapping = mapping;
4985
4986         err = rhashtable_init(tc_ht, &tc_ht_params);
4987         if (err)
4988                 goto err_ht_init;
4989
4990         lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
4991
4992         uplink_priv->encap = mlx5e_tc_tun_init(priv);
4993         if (IS_ERR(uplink_priv->encap)) {
4994                 err = PTR_ERR(uplink_priv->encap);
4995                 goto err_register_fib_notifier;
4996         }
4997
4998         return 0;
4999
5000 err_register_fib_notifier:
5001         rhashtable_destroy(tc_ht);
5002 err_ht_init:
5003         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5004 err_enc_opts_mapping:
5005         mapping_destroy(uplink_priv->tunnel_mapping);
5006 err_tun_mapping:
5007 #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
5008         mlx5_esw_sample_cleanup(uplink_priv->esw_psample);
5009 #endif
5010         mlx5_tc_ct_clean(uplink_priv->ct_priv);
5011         netdev_warn(priv->netdev,
5012                     "Failed to initialize tc (eswitch), err: %d", err);
5013         return err;
5014 }
5015
5016 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
5017 {
5018         struct mlx5_rep_uplink_priv *uplink_priv;
5019
5020         uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5021
5022         rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5023         mlx5e_tc_tun_cleanup(uplink_priv->encap);
5024
5025         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5026         mapping_destroy(uplink_priv->tunnel_mapping);
5027
5028 #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
5029         mlx5_esw_sample_cleanup(uplink_priv->esw_psample);
5030 #endif
5031         mlx5_tc_ct_clean(uplink_priv->ct_priv);
5032 }
5033
5034 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
5035 {
5036         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5037
5038         return atomic_read(&tc_ht->nelems);
5039 }
5040
5041 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5042 {
5043         struct mlx5e_tc_flow *flow, *tmp;
5044
5045         list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
5046                 __mlx5e_tc_del_fdb_peer_flow(flow);
5047 }
5048
5049 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5050 {
5051         struct mlx5_rep_uplink_priv *rpriv =
5052                 container_of(work, struct mlx5_rep_uplink_priv,
5053                              reoffload_flows_work);
5054         struct mlx5e_tc_flow *flow, *tmp;
5055
5056         mutex_lock(&rpriv->unready_flows_lock);
5057         list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5058                 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5059                         unready_flow_del(flow);
5060         }
5061         mutex_unlock(&rpriv->unready_flows_lock);
5062 }
5063
5064 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5065                                      struct flow_cls_offload *cls_flower,
5066                                      unsigned long flags)
5067 {
5068         switch (cls_flower->command) {
5069         case FLOW_CLS_REPLACE:
5070                 return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5071                                               flags);
5072         case FLOW_CLS_DESTROY:
5073                 return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5074                                            flags);
5075         case FLOW_CLS_STATS:
5076                 return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5077                                           flags);
5078         default:
5079                 return -EOPNOTSUPP;
5080         }
5081 }
5082
5083 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5084                             void *cb_priv)
5085 {
5086         unsigned long flags = MLX5_TC_FLAG(INGRESS);
5087         struct mlx5e_priv *priv = cb_priv;
5088
5089         if (!priv->netdev || !netif_device_present(priv->netdev))
5090                 return -EOPNOTSUPP;
5091
5092         if (mlx5e_is_uplink_rep(priv))
5093                 flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
5094         else
5095                 flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
5096
5097         switch (type) {
5098         case TC_SETUP_CLSFLOWER:
5099                 return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5100         default:
5101                 return -EOPNOTSUPP;
5102         }
5103 }
5104
5105 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
5106                          struct sk_buff *skb)
5107 {
5108 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
5109         u32 chain = 0, chain_tag, reg_b, zone_restore_id;
5110         struct mlx5e_priv *priv = netdev_priv(skb->dev);
5111         struct mlx5e_tc_table *tc = &priv->fs.tc;
5112         struct mlx5_mapped_obj mapped_obj;
5113         struct tc_skb_ext *tc_skb_ext;
5114         int err;
5115
5116         reg_b = be32_to_cpu(cqe->ft_metadata);
5117
5118         chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5119
5120         err = mapping_find(tc->mapping, chain_tag, &mapped_obj);
5121         if (err) {
5122                 netdev_dbg(priv->netdev,
5123                            "Couldn't find chain for chain tag: %d, err: %d\n",
5124                            chain_tag, err);
5125                 return false;
5126         }
5127
5128         if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
5129                 chain = mapped_obj.chain;
5130                 tc_skb_ext = tc_skb_ext_alloc(skb);
5131                 if (WARN_ON(!tc_skb_ext))
5132                         return false;
5133
5134                 tc_skb_ext->chain = chain;
5135
5136                 zone_restore_id = (reg_b >> REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
5137                         ESW_ZONE_ID_MASK;
5138
5139                 if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
5140                                               zone_restore_id))
5141                         return false;
5142         } else {
5143                 netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
5144                 return false;
5145         }
5146 #endif /* CONFIG_NET_TC_SKB_EXT */
5147
5148         return true;
5149 }