Merge tag 'core_urgent_for_v6.3_rc4' of git://git.kernel.org/pub/scm/linux/kernel...
[platform/kernel/linux-rpi.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <linux/mlx5/fs.h>
38 #include <linux/mlx5/device.h>
39 #include <linux/rhashtable.h>
40 #include <linux/refcount.h>
41 #include <linux/completion.h>
42 #include <net/arp.h>
43 #include <net/ipv6_stubs.h>
44 #include <net/bareudp.h>
45 #include <net/bonding.h>
46 #include <net/dst_metadata.h>
47 #include "en.h"
48 #include "en/tc/post_act.h"
49 #include "en/tc/act_stats.h"
50 #include "en_rep.h"
51 #include "en/rep/tc.h"
52 #include "en/rep/neigh.h"
53 #include "en_tc.h"
54 #include "eswitch.h"
55 #include "fs_core.h"
56 #include "en/port.h"
57 #include "en/tc_tun.h"
58 #include "en/mapping.h"
59 #include "en/tc_ct.h"
60 #include "en/mod_hdr.h"
61 #include "en/tc_tun_encap.h"
62 #include "en/tc/sample.h"
63 #include "en/tc/act/act.h"
64 #include "en/tc/post_meter.h"
65 #include "lib/devcom.h"
66 #include "lib/geneve.h"
67 #include "lib/fs_chains.h"
68 #include "diag/en_tc_tracepoint.h"
69 #include <asm/div64.h>
70 #include "lag/lag.h"
71 #include "lag/mp.h"
72
73 #define MLX5E_TC_TABLE_NUM_GROUPS 4
74 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
75
76 struct mlx5e_hairpin_params {
77         struct mlx5_core_dev *mdev;
78         u32 num_queues;
79         u32 queue_size;
80 };
81
82 struct mlx5e_tc_table {
83         /* Protects the dynamic assignment of the t parameter
84          * which is the nic tc root table.
85          */
86         struct mutex                    t_lock;
87         struct mlx5e_priv               *priv;
88         struct mlx5_flow_table          *t;
89         struct mlx5_flow_table          *miss_t;
90         struct mlx5_fs_chains           *chains;
91         struct mlx5e_post_act           *post_act;
92
93         struct rhashtable               ht;
94
95         struct mod_hdr_tbl mod_hdr;
96         struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
97         DECLARE_HASHTABLE(hairpin_tbl, 8);
98
99         struct notifier_block     netdevice_nb;
100         struct netdev_net_notifier      netdevice_nn;
101
102         struct mlx5_tc_ct_priv         *ct;
103         struct mapping_ctx             *mapping;
104         struct mlx5e_hairpin_params    hairpin_params;
105         struct dentry                  *dfs_root;
106
107         /* tc action stats */
108         struct mlx5e_tc_act_stats_handle *action_stats_handle;
109 };
110
111 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
112         [MAPPED_OBJ_TO_REG] = {
113                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
114                 .moffset = 0,
115                 .mlen = 16,
116         },
117         [VPORT_TO_REG] = {
118                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
119                 .moffset = 16,
120                 .mlen = 16,
121         },
122         [TUNNEL_TO_REG] = {
123                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
124                 .moffset = 8,
125                 .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
126                 .soffset = MLX5_BYTE_OFF(fte_match_param,
127                                          misc_parameters_2.metadata_reg_c_1),
128         },
129         [ZONE_TO_REG] = zone_to_reg_ct,
130         [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
131         [CTSTATE_TO_REG] = ctstate_to_reg_ct,
132         [MARK_TO_REG] = mark_to_reg_ct,
133         [LABELS_TO_REG] = labels_to_reg_ct,
134         [FTEID_TO_REG] = fteid_to_reg_ct,
135         /* For NIC rules we store the restore metadata directly
136          * into reg_b that is passed to SW since we don't
137          * jump between steering domains.
138          */
139         [NIC_MAPPED_OBJ_TO_REG] = {
140                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
141                 .moffset = 0,
142                 .mlen = 16,
143         },
144         [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
145         [PACKET_COLOR_TO_REG] = packet_color_to_reg,
146 };
147
148 struct mlx5e_tc_jump_state {
149         u32 jump_count;
150         bool jump_target;
151         struct mlx5_flow_attr *jumping_attr;
152
153         enum flow_action_id last_id;
154         u32 last_index;
155 };
156
157 struct mlx5e_tc_table *mlx5e_tc_table_alloc(void)
158 {
159         struct mlx5e_tc_table *tc;
160
161         tc = kvzalloc(sizeof(*tc), GFP_KERNEL);
162         return tc ? tc : ERR_PTR(-ENOMEM);
163 }
164
165 void mlx5e_tc_table_free(struct mlx5e_tc_table *tc)
166 {
167         kvfree(tc);
168 }
169
170 struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc)
171 {
172         return tc->chains;
173 }
174
175 /* To avoid false lock dependency warning set the tc_ht lock
176  * class different than the lock class of the ht being used when deleting
177  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
178  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
179  * it's different than the ht->mutex here.
180  */
181 static struct lock_class_key tc_ht_lock_key;
182 static struct lock_class_key tc_ht_wq_key;
183
184 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
185 static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
186 static void mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr);
187
188 void
189 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
190                             enum mlx5e_tc_attr_to_reg type,
191                             u32 val,
192                             u32 mask)
193 {
194         void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
195         int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
196         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
197         int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
198         u32 max_mask = GENMASK(match_len - 1, 0);
199         __be32 curr_mask_be, curr_val_be;
200         u32 curr_mask, curr_val;
201
202         fmask = headers_c + soffset;
203         fval = headers_v + soffset;
204
205         memcpy(&curr_mask_be, fmask, 4);
206         memcpy(&curr_val_be, fval, 4);
207
208         curr_mask = be32_to_cpu(curr_mask_be);
209         curr_val = be32_to_cpu(curr_val_be);
210
211         //move to correct offset
212         WARN_ON(mask > max_mask);
213         mask <<= moffset;
214         val <<= moffset;
215         max_mask <<= moffset;
216
217         //zero val and mask
218         curr_mask &= ~max_mask;
219         curr_val &= ~max_mask;
220
221         //add current to mask
222         curr_mask |= mask;
223         curr_val |= val;
224
225         //back to be32 and write
226         curr_mask_be = cpu_to_be32(curr_mask);
227         curr_val_be = cpu_to_be32(curr_val);
228
229         memcpy(fmask, &curr_mask_be, 4);
230         memcpy(fval, &curr_val_be, 4);
231
232         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
233 }
234
235 void
236 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
237                                 enum mlx5e_tc_attr_to_reg type,
238                                 u32 *val,
239                                 u32 *mask)
240 {
241         void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
242         int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
243         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
244         int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
245         u32 max_mask = GENMASK(match_len - 1, 0);
246         __be32 curr_mask_be, curr_val_be;
247         u32 curr_mask, curr_val;
248
249         fmask = headers_c + soffset;
250         fval = headers_v + soffset;
251
252         memcpy(&curr_mask_be, fmask, 4);
253         memcpy(&curr_val_be, fval, 4);
254
255         curr_mask = be32_to_cpu(curr_mask_be);
256         curr_val = be32_to_cpu(curr_val_be);
257
258         *mask = (curr_mask >> moffset) & max_mask;
259         *val = (curr_val >> moffset) & max_mask;
260 }
261
262 int
263 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
264                                      struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
265                                      enum mlx5_flow_namespace_type ns,
266                                      enum mlx5e_tc_attr_to_reg type,
267                                      u32 data)
268 {
269         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
270         int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
271         int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
272         char *modact;
273         int err;
274
275         modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts);
276         if (IS_ERR(modact))
277                 return PTR_ERR(modact);
278
279         /* Firmware has 5bit length field and 0 means 32bits */
280         if (mlen == 32)
281                 mlen = 0;
282
283         MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
284         MLX5_SET(set_action_in, modact, field, mfield);
285         MLX5_SET(set_action_in, modact, offset, moffset);
286         MLX5_SET(set_action_in, modact, length, mlen);
287         MLX5_SET(set_action_in, modact, data, data);
288         err = mod_hdr_acts->num_actions;
289         mod_hdr_acts->num_actions++;
290
291         return err;
292 }
293
294 static struct mlx5e_tc_act_stats_handle  *
295 get_act_stats_handle(struct mlx5e_priv *priv)
296 {
297         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
298         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
299         struct mlx5_rep_uplink_priv *uplink_priv;
300         struct mlx5e_rep_priv *uplink_rpriv;
301
302         if (is_mdev_switchdev_mode(priv->mdev)) {
303                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
304                 uplink_priv = &uplink_rpriv->uplink_priv;
305
306                 return uplink_priv->action_stats_handle;
307         }
308
309         return tc->action_stats_handle;
310 }
311
312 struct mlx5e_tc_int_port_priv *
313 mlx5e_get_int_port_priv(struct mlx5e_priv *priv)
314 {
315         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
316         struct mlx5_rep_uplink_priv *uplink_priv;
317         struct mlx5e_rep_priv *uplink_rpriv;
318
319         if (is_mdev_switchdev_mode(priv->mdev)) {
320                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
321                 uplink_priv = &uplink_rpriv->uplink_priv;
322
323                 return uplink_priv->int_port_priv;
324         }
325
326         return NULL;
327 }
328
329 struct mlx5e_flow_meters *
330 mlx5e_get_flow_meters(struct mlx5_core_dev *dev)
331 {
332         struct mlx5_eswitch *esw = dev->priv.eswitch;
333         struct mlx5_rep_uplink_priv *uplink_priv;
334         struct mlx5e_rep_priv *uplink_rpriv;
335         struct mlx5e_priv *priv;
336
337         if (is_mdev_switchdev_mode(dev)) {
338                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
339                 uplink_priv = &uplink_rpriv->uplink_priv;
340                 priv = netdev_priv(uplink_rpriv->netdev);
341                 if (!uplink_priv->flow_meters)
342                         uplink_priv->flow_meters =
343                                 mlx5e_flow_meters_init(priv,
344                                                        MLX5_FLOW_NAMESPACE_FDB,
345                                                        uplink_priv->post_act);
346                 if (!IS_ERR(uplink_priv->flow_meters))
347                         return uplink_priv->flow_meters;
348         }
349
350         return NULL;
351 }
352
353 static struct mlx5_tc_ct_priv *
354 get_ct_priv(struct mlx5e_priv *priv)
355 {
356         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
357         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
358         struct mlx5_rep_uplink_priv *uplink_priv;
359         struct mlx5e_rep_priv *uplink_rpriv;
360
361         if (is_mdev_switchdev_mode(priv->mdev)) {
362                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
363                 uplink_priv = &uplink_rpriv->uplink_priv;
364
365                 return uplink_priv->ct_priv;
366         }
367
368         return tc->ct;
369 }
370
371 static struct mlx5e_tc_psample *
372 get_sample_priv(struct mlx5e_priv *priv)
373 {
374         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
375         struct mlx5_rep_uplink_priv *uplink_priv;
376         struct mlx5e_rep_priv *uplink_rpriv;
377
378         if (is_mdev_switchdev_mode(priv->mdev)) {
379                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
380                 uplink_priv = &uplink_rpriv->uplink_priv;
381
382                 return uplink_priv->tc_psample;
383         }
384
385         return NULL;
386 }
387
388 static struct mlx5e_post_act *
389 get_post_action(struct mlx5e_priv *priv)
390 {
391         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
392         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
393         struct mlx5_rep_uplink_priv *uplink_priv;
394         struct mlx5e_rep_priv *uplink_rpriv;
395
396         if (is_mdev_switchdev_mode(priv->mdev)) {
397                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
398                 uplink_priv = &uplink_rpriv->uplink_priv;
399
400                 return uplink_priv->post_act;
401         }
402
403         return tc->post_act;
404 }
405
406 struct mlx5_flow_handle *
407 mlx5_tc_rule_insert(struct mlx5e_priv *priv,
408                     struct mlx5_flow_spec *spec,
409                     struct mlx5_flow_attr *attr)
410 {
411         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
412
413         if (is_mdev_switchdev_mode(priv->mdev))
414                 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
415
416         return  mlx5e_add_offloaded_nic_rule(priv, spec, attr);
417 }
418
419 void
420 mlx5_tc_rule_delete(struct mlx5e_priv *priv,
421                     struct mlx5_flow_handle *rule,
422                     struct mlx5_flow_attr *attr)
423 {
424         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
425
426         if (is_mdev_switchdev_mode(priv->mdev)) {
427                 mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
428                 return;
429         }
430
431         mlx5e_del_offloaded_nic_rule(priv, rule, attr);
432 }
433
434 static bool
435 is_flow_meter_action(struct mlx5_flow_attr *attr)
436 {
437         return (((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
438                  (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)) ||
439                 attr->flags & MLX5_ATTR_FLAG_MTU);
440 }
441
442 static int
443 mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv,
444                         struct mlx5_flow_attr *attr)
445 {
446         struct mlx5e_post_act *post_act = get_post_action(priv);
447         struct mlx5e_post_meter_priv *post_meter;
448         enum mlx5_flow_namespace_type ns_type;
449         struct mlx5e_flow_meter_handle *meter;
450         enum mlx5e_post_meter_type type;
451
452         meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params);
453         if (IS_ERR(meter)) {
454                 mlx5_core_err(priv->mdev, "Failed to get flow meter\n");
455                 return PTR_ERR(meter);
456         }
457
458         ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters);
459         type = meter->params.mtu ? MLX5E_POST_METER_MTU : MLX5E_POST_METER_RATE;
460         post_meter = mlx5e_post_meter_init(priv, ns_type, post_act,
461                                            type,
462                                            meter->act_counter, meter->drop_counter,
463                                            attr->branch_true, attr->branch_false);
464         if (IS_ERR(post_meter)) {
465                 mlx5_core_err(priv->mdev, "Failed to init post meter\n");
466                 goto err_meter_init;
467         }
468
469         attr->meter_attr.meter = meter;
470         attr->meter_attr.post_meter = post_meter;
471         attr->dest_ft = mlx5e_post_meter_get_ft(post_meter);
472         attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
473
474         return 0;
475
476 err_meter_init:
477         mlx5e_tc_meter_put(meter);
478         return PTR_ERR(post_meter);
479 }
480
481 static void
482 mlx5e_tc_del_flow_meter(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
483 {
484         mlx5e_post_meter_cleanup(esw, attr->meter_attr.post_meter);
485         mlx5e_tc_meter_put(attr->meter_attr.meter);
486 }
487
488 struct mlx5_flow_handle *
489 mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
490                       struct mlx5_flow_spec *spec,
491                       struct mlx5_flow_attr *attr)
492 {
493         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
494         int err;
495
496         if (attr->flags & MLX5_ATTR_FLAG_CT) {
497                 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts =
498                         &attr->parse_attr->mod_hdr_acts;
499
500                 return mlx5_tc_ct_flow_offload(get_ct_priv(priv),
501                                                spec, attr,
502                                                mod_hdr_acts);
503         }
504
505         if (!is_mdev_switchdev_mode(priv->mdev))
506                 return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
507
508         if (attr->flags & MLX5_ATTR_FLAG_SAMPLE)
509                 return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr);
510
511         if (is_flow_meter_action(attr)) {
512                 err = mlx5e_tc_add_flow_meter(priv, attr);
513                 if (err)
514                         return ERR_PTR(err);
515         }
516
517         return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
518 }
519
520 void
521 mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
522                         struct mlx5_flow_handle *rule,
523                         struct mlx5_flow_attr *attr)
524 {
525         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
526
527         if (attr->flags & MLX5_ATTR_FLAG_CT) {
528                 mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr);
529                 return;
530         }
531
532         if (!is_mdev_switchdev_mode(priv->mdev)) {
533                 mlx5e_del_offloaded_nic_rule(priv, rule, attr);
534                 return;
535         }
536
537         if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
538                 mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr);
539                 return;
540         }
541
542         mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
543
544         if (attr->meter_attr.meter)
545                 mlx5e_tc_del_flow_meter(esw, attr);
546 }
547
548 int
549 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
550                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
551                           enum mlx5_flow_namespace_type ns,
552                           enum mlx5e_tc_attr_to_reg type,
553                           u32 data)
554 {
555         int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
556
557         return ret < 0 ? ret : 0;
558 }
559
560 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
561                                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
562                                           enum mlx5e_tc_attr_to_reg type,
563                                           int act_id, u32 data)
564 {
565         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
566         int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
567         int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
568         char *modact;
569
570         modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id);
571
572         /* Firmware has 5bit length field and 0 means 32bits */
573         if (mlen == 32)
574                 mlen = 0;
575
576         MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
577         MLX5_SET(set_action_in, modact, field, mfield);
578         MLX5_SET(set_action_in, modact, offset, moffset);
579         MLX5_SET(set_action_in, modact, length, mlen);
580         MLX5_SET(set_action_in, modact, data, data);
581 }
582
583 struct mlx5e_hairpin {
584         struct mlx5_hairpin *pair;
585
586         struct mlx5_core_dev *func_mdev;
587         struct mlx5e_priv *func_priv;
588         u32 tdn;
589         struct mlx5e_tir direct_tir;
590
591         int num_channels;
592         struct mlx5e_rqt indir_rqt;
593         struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
594         struct mlx5_ttc_table *ttc;
595 };
596
597 struct mlx5e_hairpin_entry {
598         /* a node of a hash table which keeps all the  hairpin entries */
599         struct hlist_node hairpin_hlist;
600
601         /* protects flows list */
602         spinlock_t flows_lock;
603         /* flows sharing the same hairpin */
604         struct list_head flows;
605         /* hpe's that were not fully initialized when dead peer update event
606          * function traversed them.
607          */
608         struct list_head dead_peer_wait_list;
609
610         u16 peer_vhca_id;
611         u8 prio;
612         struct mlx5e_hairpin *hp;
613         refcount_t refcnt;
614         struct completion res_ready;
615 };
616
617 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
618                               struct mlx5e_tc_flow *flow);
619
620 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
621 {
622         if (!flow || !refcount_inc_not_zero(&flow->refcnt))
623                 return ERR_PTR(-EINVAL);
624         return flow;
625 }
626
627 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
628 {
629         if (refcount_dec_and_test(&flow->refcnt)) {
630                 mlx5e_tc_del_flow(priv, flow);
631                 kfree_rcu(flow, rcu_head);
632         }
633 }
634
635 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
636 {
637         return flow_flag_test(flow, ESWITCH);
638 }
639
640 bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
641 {
642         return flow_flag_test(flow, FT);
643 }
644
645 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
646 {
647         return flow_flag_test(flow, OFFLOADED);
648 }
649
650 int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow)
651 {
652         return mlx5e_is_eswitch_flow(flow) ?
653                 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
654 }
655
656 static struct mlx5_core_dev *
657 get_flow_counter_dev(struct mlx5e_tc_flow *flow)
658 {
659         return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev;
660 }
661
662 static struct mod_hdr_tbl *
663 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
664 {
665         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
666         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
667
668         return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ?
669                 &esw->offloads.mod_hdr :
670                 &tc->mod_hdr;
671 }
672
673 int mlx5e_tc_attach_mod_hdr(struct mlx5e_priv *priv,
674                             struct mlx5e_tc_flow *flow,
675                             struct mlx5_flow_attr *attr)
676 {
677         struct mlx5e_mod_hdr_handle *mh;
678
679         mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
680                                   mlx5e_get_flow_namespace(flow),
681                                   &attr->parse_attr->mod_hdr_acts);
682         if (IS_ERR(mh))
683                 return PTR_ERR(mh);
684
685         WARN_ON(attr->modify_hdr);
686         attr->modify_hdr = mlx5e_mod_hdr_get(mh);
687         attr->mh = mh;
688
689         return 0;
690 }
691
692 void mlx5e_tc_detach_mod_hdr(struct mlx5e_priv *priv,
693                              struct mlx5e_tc_flow *flow,
694                              struct mlx5_flow_attr *attr)
695 {
696         /* flow wasn't fully initialized */
697         if (!attr->mh)
698                 return;
699
700         mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
701                              attr->mh);
702         attr->mh = NULL;
703 }
704
705 static
706 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
707 {
708         struct mlx5_core_dev *mdev;
709         struct net_device *netdev;
710         struct mlx5e_priv *priv;
711
712         netdev = dev_get_by_index(net, ifindex);
713         if (!netdev)
714                 return ERR_PTR(-ENODEV);
715
716         priv = netdev_priv(netdev);
717         mdev = priv->mdev;
718         dev_put(netdev);
719
720         /* Mirred tc action holds a refcount on the ifindex net_device (see
721          * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
722          * after dev_put(netdev), while we're in the context of adding a tc flow.
723          *
724          * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
725          * stored in a hairpin object, which exists until all flows, that refer to it, get
726          * removed.
727          *
728          * On the other hand, after a hairpin object has been created, the peer net_device may
729          * be removed/unbound while there are still some hairpin flows that are using it. This
730          * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
731          * NETDEV_UNREGISTER event of the peer net_device.
732          */
733         return mdev;
734 }
735
736 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
737 {
738         struct mlx5e_tir_builder *builder;
739         int err;
740
741         builder = mlx5e_tir_builder_alloc(false);
742         if (!builder)
743                 return -ENOMEM;
744
745         err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
746         if (err)
747                 goto out;
748
749         mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]);
750         err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false);
751         if (err)
752                 goto create_tir_err;
753
754 out:
755         mlx5e_tir_builder_free(builder);
756         return err;
757
758 create_tir_err:
759         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
760
761         goto out;
762 }
763
764 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
765 {
766         mlx5e_tir_destroy(&hp->direct_tir);
767         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
768 }
769
770 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
771 {
772         struct mlx5e_priv *priv = hp->func_priv;
773         struct mlx5_core_dev *mdev = priv->mdev;
774         struct mlx5e_rss_params_indir *indir;
775         int err;
776
777         indir = kvmalloc(sizeof(*indir), GFP_KERNEL);
778         if (!indir)
779                 return -ENOMEM;
780
781         mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
782         err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
783                                    mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
784                                    indir);
785
786         kvfree(indir);
787         return err;
788 }
789
790 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
791 {
792         struct mlx5e_priv *priv = hp->func_priv;
793         struct mlx5e_rss_params_hash rss_hash;
794         enum mlx5_traffic_types tt, max_tt;
795         struct mlx5e_tir_builder *builder;
796         int err = 0;
797
798         builder = mlx5e_tir_builder_alloc(false);
799         if (!builder)
800                 return -ENOMEM;
801
802         rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
803
804         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
805                 struct mlx5e_rss_params_traffic_type rss_tt;
806
807                 rss_tt = mlx5e_rss_get_default_tt_config(tt);
808
809                 mlx5e_tir_builder_build_rqt(builder, hp->tdn,
810                                             mlx5e_rqt_get_rqtn(&hp->indir_rqt),
811                                             false);
812                 mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
813
814                 err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
815                 if (err) {
816                         mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
817                         goto err_destroy_tirs;
818                 }
819
820                 mlx5e_tir_builder_clear(builder);
821         }
822
823 out:
824         mlx5e_tir_builder_free(builder);
825         return err;
826
827 err_destroy_tirs:
828         max_tt = tt;
829         for (tt = 0; tt < max_tt; tt++)
830                 mlx5e_tir_destroy(&hp->indir_tir[tt]);
831
832         goto out;
833 }
834
835 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
836 {
837         int tt;
838
839         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
840                 mlx5e_tir_destroy(&hp->indir_tir[tt]);
841 }
842
843 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
844                                          struct ttc_params *ttc_params)
845 {
846         struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
847         int tt;
848
849         memset(ttc_params, 0, sizeof(*ttc_params));
850
851         ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
852                                                  MLX5_FLOW_NAMESPACE_KERNEL);
853         for (tt = 0; tt < MLX5_NUM_TT; tt++) {
854                 ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
855                 ttc_params->dests[tt].tir_num =
856                         tt == MLX5_TT_ANY ?
857                                 mlx5e_tir_get_tirn(&hp->direct_tir) :
858                                 mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
859         }
860
861         ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
862         ft_attr->prio = MLX5E_TC_PRIO;
863 }
864
865 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
866 {
867         struct mlx5e_priv *priv = hp->func_priv;
868         struct ttc_params ttc_params;
869         struct mlx5_ttc_table *ttc;
870         int err;
871
872         err = mlx5e_hairpin_create_indirect_rqt(hp);
873         if (err)
874                 return err;
875
876         err = mlx5e_hairpin_create_indirect_tirs(hp);
877         if (err)
878                 goto err_create_indirect_tirs;
879
880         mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
881         hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
882         if (IS_ERR(hp->ttc)) {
883                 err = PTR_ERR(hp->ttc);
884                 goto err_create_ttc_table;
885         }
886
887         ttc = mlx5e_fs_get_ttc(priv->fs, false);
888         netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
889                    hp->num_channels,
890                    mlx5_get_ttc_flow_table(ttc)->id);
891
892         return 0;
893
894 err_create_ttc_table:
895         mlx5e_hairpin_destroy_indirect_tirs(hp);
896 err_create_indirect_tirs:
897         mlx5e_rqt_destroy(&hp->indir_rqt);
898
899         return err;
900 }
901
902 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
903 {
904         mlx5_destroy_ttc_table(hp->ttc);
905         mlx5e_hairpin_destroy_indirect_tirs(hp);
906         mlx5e_rqt_destroy(&hp->indir_rqt);
907 }
908
909 static struct mlx5e_hairpin *
910 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
911                      int peer_ifindex)
912 {
913         struct mlx5_core_dev *func_mdev, *peer_mdev;
914         struct mlx5e_hairpin *hp;
915         struct mlx5_hairpin *pair;
916         int err;
917
918         hp = kzalloc(sizeof(*hp), GFP_KERNEL);
919         if (!hp)
920                 return ERR_PTR(-ENOMEM);
921
922         func_mdev = priv->mdev;
923         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
924         if (IS_ERR(peer_mdev)) {
925                 err = PTR_ERR(peer_mdev);
926                 goto create_pair_err;
927         }
928
929         pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
930         if (IS_ERR(pair)) {
931                 err = PTR_ERR(pair);
932                 goto create_pair_err;
933         }
934         hp->pair = pair;
935         hp->func_mdev = func_mdev;
936         hp->func_priv = priv;
937         hp->num_channels = params->num_channels;
938
939         err = mlx5e_hairpin_create_transport(hp);
940         if (err)
941                 goto create_transport_err;
942
943         if (hp->num_channels > 1) {
944                 err = mlx5e_hairpin_rss_init(hp);
945                 if (err)
946                         goto rss_init_err;
947         }
948
949         return hp;
950
951 rss_init_err:
952         mlx5e_hairpin_destroy_transport(hp);
953 create_transport_err:
954         mlx5_core_hairpin_destroy(hp->pair);
955 create_pair_err:
956         kfree(hp);
957         return ERR_PTR(err);
958 }
959
960 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
961 {
962         if (hp->num_channels > 1)
963                 mlx5e_hairpin_rss_cleanup(hp);
964         mlx5e_hairpin_destroy_transport(hp);
965         mlx5_core_hairpin_destroy(hp->pair);
966         kvfree(hp);
967 }
968
969 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
970 {
971         return (peer_vhca_id << 16 | prio);
972 }
973
974 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
975                                                      u16 peer_vhca_id, u8 prio)
976 {
977         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
978         struct mlx5e_hairpin_entry *hpe;
979         u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
980
981         hash_for_each_possible(tc->hairpin_tbl, hpe,
982                                hairpin_hlist, hash_key) {
983                 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
984                         refcount_inc(&hpe->refcnt);
985                         return hpe;
986                 }
987         }
988
989         return NULL;
990 }
991
992 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
993                               struct mlx5e_hairpin_entry *hpe)
994 {
995         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
996         /* no more hairpin flows for us, release the hairpin pair */
997         if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock))
998                 return;
999         hash_del(&hpe->hairpin_hlist);
1000         mutex_unlock(&tc->hairpin_tbl_lock);
1001
1002         if (!IS_ERR_OR_NULL(hpe->hp)) {
1003                 netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
1004                            dev_name(hpe->hp->pair->peer_mdev->device));
1005
1006                 mlx5e_hairpin_destroy(hpe->hp);
1007         }
1008
1009         WARN_ON(!list_empty(&hpe->flows));
1010         kfree(hpe);
1011 }
1012
1013 #define UNKNOWN_MATCH_PRIO 8
1014
1015 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
1016                                   struct mlx5_flow_spec *spec, u8 *match_prio,
1017                                   struct netlink_ext_ack *extack)
1018 {
1019         void *headers_c, *headers_v;
1020         u8 prio_val, prio_mask = 0;
1021         bool vlan_present;
1022
1023 #ifdef CONFIG_MLX5_CORE_EN_DCB
1024         if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
1025                 NL_SET_ERR_MSG_MOD(extack,
1026                                    "only PCP trust state supported for hairpin");
1027                 return -EOPNOTSUPP;
1028         }
1029 #endif
1030         headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1031         headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1032
1033         vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
1034         if (vlan_present) {
1035                 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
1036                 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
1037         }
1038
1039         if (!vlan_present || !prio_mask) {
1040                 prio_val = UNKNOWN_MATCH_PRIO;
1041         } else if (prio_mask != 0x7) {
1042                 NL_SET_ERR_MSG_MOD(extack,
1043                                    "masked priority match not supported for hairpin");
1044                 return -EOPNOTSUPP;
1045         }
1046
1047         *match_prio = prio_val;
1048         return 0;
1049 }
1050
1051 static int debugfs_hairpin_num_active_get(void *data, u64 *val)
1052 {
1053         struct mlx5e_tc_table *tc = data;
1054         struct mlx5e_hairpin_entry *hpe;
1055         u32 cnt = 0;
1056         u32 bkt;
1057
1058         mutex_lock(&tc->hairpin_tbl_lock);
1059         hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
1060                 cnt++;
1061         mutex_unlock(&tc->hairpin_tbl_lock);
1062
1063         *val = cnt;
1064
1065         return 0;
1066 }
1067 DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_num_active,
1068                          debugfs_hairpin_num_active_get, NULL, "%llu\n");
1069
1070 static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv)
1071
1072 {
1073         struct mlx5e_tc_table *tc = file->private;
1074         struct mlx5e_hairpin_entry *hpe;
1075         u32 bkt;
1076
1077         mutex_lock(&tc->hairpin_tbl_lock);
1078         hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
1079                 seq_printf(file, "Hairpin peer_vhca_id %u prio %u refcnt %u\n",
1080                            hpe->peer_vhca_id, hpe->prio,
1081                            refcount_read(&hpe->refcnt));
1082         mutex_unlock(&tc->hairpin_tbl_lock);
1083
1084         return 0;
1085 }
1086 DEFINE_SHOW_ATTRIBUTE(debugfs_hairpin_table_dump);
1087
1088 static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc,
1089                                   struct dentry *dfs_root)
1090 {
1091         if (IS_ERR_OR_NULL(dfs_root))
1092                 return;
1093
1094         tc->dfs_root = debugfs_create_dir("tc", dfs_root);
1095
1096         debugfs_create_file("hairpin_num_active", 0444, tc->dfs_root, tc,
1097                             &fops_hairpin_num_active);
1098         debugfs_create_file("hairpin_table_dump", 0444, tc->dfs_root, tc,
1099                             &debugfs_hairpin_table_dump_fops);
1100 }
1101
1102 static void
1103 mlx5e_hairpin_params_init(struct mlx5e_hairpin_params *hairpin_params,
1104                           struct mlx5_core_dev *mdev)
1105 {
1106         u32 link_speed = 0;
1107         u64 link_speed64;
1108
1109         hairpin_params->mdev = mdev;
1110         /* set hairpin pair per each 50Gbs share of the link */
1111         mlx5e_port_max_linkspeed(mdev, &link_speed);
1112         link_speed = max_t(u32, link_speed, 50000);
1113         link_speed64 = link_speed;
1114         do_div(link_speed64, 50000);
1115         hairpin_params->num_queues = link_speed64;
1116
1117         hairpin_params->queue_size =
1118                 BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev),
1119                           MLX5_CAP_GEN(mdev, log_max_hairpin_num_packets)));
1120 }
1121
1122 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
1123                                   struct mlx5e_tc_flow *flow,
1124                                   struct mlx5e_tc_flow_parse_attr *parse_attr,
1125                                   struct netlink_ext_ack *extack)
1126 {
1127         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1128         int peer_ifindex = parse_attr->mirred_ifindex[0];
1129         struct mlx5_hairpin_params params;
1130         struct mlx5_core_dev *peer_mdev;
1131         struct mlx5e_hairpin_entry *hpe;
1132         struct mlx5e_hairpin *hp;
1133         u8 match_prio;
1134         u16 peer_id;
1135         int err;
1136
1137         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
1138         if (IS_ERR(peer_mdev)) {
1139                 NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
1140                 return PTR_ERR(peer_mdev);
1141         }
1142
1143         if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
1144                 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
1145                 return -EOPNOTSUPP;
1146         }
1147
1148         peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
1149         err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
1150                                      extack);
1151         if (err)
1152                 return err;
1153
1154         mutex_lock(&tc->hairpin_tbl_lock);
1155         hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
1156         if (hpe) {
1157                 mutex_unlock(&tc->hairpin_tbl_lock);
1158                 wait_for_completion(&hpe->res_ready);
1159
1160                 if (IS_ERR(hpe->hp)) {
1161                         err = -EREMOTEIO;
1162                         goto out_err;
1163                 }
1164                 goto attach_flow;
1165         }
1166
1167         hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
1168         if (!hpe) {
1169                 mutex_unlock(&tc->hairpin_tbl_lock);
1170                 return -ENOMEM;
1171         }
1172
1173         spin_lock_init(&hpe->flows_lock);
1174         INIT_LIST_HEAD(&hpe->flows);
1175         INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
1176         hpe->peer_vhca_id = peer_id;
1177         hpe->prio = match_prio;
1178         refcount_set(&hpe->refcnt, 1);
1179         init_completion(&hpe->res_ready);
1180
1181         hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist,
1182                  hash_hairpin_info(peer_id, match_prio));
1183         mutex_unlock(&tc->hairpin_tbl_lock);
1184
1185         params.log_num_packets = ilog2(tc->hairpin_params.queue_size);
1186         params.log_data_size =
1187                 clamp_t(u32,
1188                         params.log_num_packets +
1189                                 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev),
1190                         MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz),
1191                         MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
1192
1193         params.q_counter = priv->q_counter;
1194         params.num_channels = tc->hairpin_params.num_queues;
1195
1196         hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
1197         hpe->hp = hp;
1198         complete_all(&hpe->res_ready);
1199         if (IS_ERR(hp)) {
1200                 err = PTR_ERR(hp);
1201                 goto out_err;
1202         }
1203
1204         netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
1205                    mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0],
1206                    dev_name(hp->pair->peer_mdev->device),
1207                    hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
1208
1209 attach_flow:
1210         if (hpe->hp->num_channels > 1) {
1211                 flow_flag_set(flow, HAIRPIN_RSS);
1212                 flow->attr->nic_attr->hairpin_ft =
1213                         mlx5_get_ttc_flow_table(hpe->hp->ttc);
1214         } else {
1215                 flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
1216         }
1217
1218         flow->hpe = hpe;
1219         spin_lock(&hpe->flows_lock);
1220         list_add(&flow->hairpin, &hpe->flows);
1221         spin_unlock(&hpe->flows_lock);
1222
1223         return 0;
1224
1225 out_err:
1226         mlx5e_hairpin_put(priv, hpe);
1227         return err;
1228 }
1229
1230 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
1231                                    struct mlx5e_tc_flow *flow)
1232 {
1233         /* flow wasn't fully initialized */
1234         if (!flow->hpe)
1235                 return;
1236
1237         spin_lock(&flow->hpe->flows_lock);
1238         list_del(&flow->hairpin);
1239         spin_unlock(&flow->hpe->flows_lock);
1240
1241         mlx5e_hairpin_put(priv, flow->hpe);
1242         flow->hpe = NULL;
1243 }
1244
1245 struct mlx5_flow_handle *
1246 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
1247                              struct mlx5_flow_spec *spec,
1248                              struct mlx5_flow_attr *attr)
1249 {
1250         struct mlx5_flow_context *flow_context = &spec->flow_context;
1251         struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs);
1252         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1253         struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
1254         struct mlx5_flow_destination dest[2] = {};
1255         struct mlx5_fs_chains *nic_chains;
1256         struct mlx5_flow_act flow_act = {
1257                 .action = attr->action,
1258                 .flags    = FLOW_ACT_NO_APPEND,
1259         };
1260         struct mlx5_flow_handle *rule;
1261         struct mlx5_flow_table *ft;
1262         int dest_ix = 0;
1263
1264         nic_chains = mlx5e_nic_chains(tc);
1265         flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
1266         flow_context->flow_tag = nic_attr->flow_tag;
1267
1268         if (attr->dest_ft) {
1269                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1270                 dest[dest_ix].ft = attr->dest_ft;
1271                 dest_ix++;
1272         } else if (nic_attr->hairpin_ft) {
1273                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1274                 dest[dest_ix].ft = nic_attr->hairpin_ft;
1275                 dest_ix++;
1276         } else if (nic_attr->hairpin_tirn) {
1277                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1278                 dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
1279                 dest_ix++;
1280         } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1281                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1282                 if (attr->dest_chain) {
1283                         dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
1284                                                                  attr->dest_chain, 1,
1285                                                                  MLX5E_TC_FT_LEVEL);
1286                         if (IS_ERR(dest[dest_ix].ft))
1287                                 return ERR_CAST(dest[dest_ix].ft);
1288                 } else {
1289                         dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan);
1290                 }
1291                 dest_ix++;
1292         }
1293
1294         if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1295             MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
1296                 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1297
1298         if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1299                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1300                 dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
1301                 dest_ix++;
1302         }
1303
1304         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1305                 flow_act.modify_hdr = attr->modify_hdr;
1306
1307         mutex_lock(&tc->t_lock);
1308         if (IS_ERR_OR_NULL(tc->t)) {
1309                 /* Create the root table here if doesn't exist yet */
1310                 tc->t =
1311                         mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
1312
1313                 if (IS_ERR(tc->t)) {
1314                         mutex_unlock(&tc->t_lock);
1315                         netdev_err(priv->netdev,
1316                                    "Failed to create tc offload table\n");
1317                         rule = ERR_CAST(tc->t);
1318                         goto err_ft_get;
1319                 }
1320         }
1321         mutex_unlock(&tc->t_lock);
1322
1323         if (attr->chain || attr->prio)
1324                 ft = mlx5_chains_get_table(nic_chains,
1325                                            attr->chain, attr->prio,
1326                                            MLX5E_TC_FT_LEVEL);
1327         else
1328                 ft = attr->ft;
1329
1330         if (IS_ERR(ft)) {
1331                 rule = ERR_CAST(ft);
1332                 goto err_ft_get;
1333         }
1334
1335         if (attr->outer_match_level != MLX5_MATCH_NONE)
1336                 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1337
1338         rule = mlx5_add_flow_rules(ft, spec,
1339                                    &flow_act, dest, dest_ix);
1340         if (IS_ERR(rule))
1341                 goto err_rule;
1342
1343         return rule;
1344
1345 err_rule:
1346         if (attr->chain || attr->prio)
1347                 mlx5_chains_put_table(nic_chains,
1348                                       attr->chain, attr->prio,
1349                                       MLX5E_TC_FT_LEVEL);
1350 err_ft_get:
1351         if (attr->dest_chain)
1352                 mlx5_chains_put_table(nic_chains,
1353                                       attr->dest_chain, 1,
1354                                       MLX5E_TC_FT_LEVEL);
1355
1356         return ERR_CAST(rule);
1357 }
1358
1359 static int
1360 alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev,
1361                         struct mlx5_flow_attr *attr)
1362
1363 {
1364         struct mlx5_fc *counter;
1365
1366         counter = mlx5_fc_create(counter_dev, true);
1367         if (IS_ERR(counter))
1368                 return PTR_ERR(counter);
1369
1370         attr->counter = counter;
1371         return 0;
1372 }
1373
1374 static int
1375 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1376                       struct mlx5e_tc_flow *flow,
1377                       struct netlink_ext_ack *extack)
1378 {
1379         struct mlx5e_tc_flow_parse_attr *parse_attr;
1380         struct mlx5_flow_attr *attr = flow->attr;
1381         struct mlx5_core_dev *dev = priv->mdev;
1382         int err;
1383
1384         parse_attr = attr->parse_attr;
1385
1386         if (flow_flag_test(flow, HAIRPIN)) {
1387                 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1388                 if (err)
1389                         return err;
1390         }
1391
1392         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1393                 err = alloc_flow_attr_counter(dev, attr);
1394                 if (err)
1395                         return err;
1396         }
1397
1398         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1399                 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1400                 if (err)
1401                         return err;
1402         }
1403
1404         if (attr->flags & MLX5_ATTR_FLAG_CT)
1405                 flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec,
1406                                                         attr, &parse_attr->mod_hdr_acts);
1407         else
1408                 flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
1409                                                              attr);
1410
1411         return PTR_ERR_OR_ZERO(flow->rule[0]);
1412 }
1413
1414 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1415                                   struct mlx5_flow_handle *rule,
1416                                   struct mlx5_flow_attr *attr)
1417 {
1418         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1419         struct mlx5_fs_chains *nic_chains;
1420
1421         nic_chains = mlx5e_nic_chains(tc);
1422         mlx5_del_flow_rules(rule);
1423
1424         if (attr->chain || attr->prio)
1425                 mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1426                                       MLX5E_TC_FT_LEVEL);
1427
1428         if (attr->dest_chain)
1429                 mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1430                                       MLX5E_TC_FT_LEVEL);
1431 }
1432
1433 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1434                                   struct mlx5e_tc_flow *flow)
1435 {
1436         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1437         struct mlx5_flow_attr *attr = flow->attr;
1438
1439         flow_flag_clear(flow, OFFLOADED);
1440
1441         if (attr->flags & MLX5_ATTR_FLAG_CT)
1442                 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
1443         else if (!IS_ERR_OR_NULL(flow->rule[0]))
1444                 mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1445
1446         /* Remove root table if no rules are left to avoid
1447          * extra steering hops.
1448          */
1449         mutex_lock(&tc->t_lock);
1450         if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1451             !IS_ERR_OR_NULL(tc->t)) {
1452                 mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL);
1453                 tc->t = NULL;
1454         }
1455         mutex_unlock(&tc->t_lock);
1456
1457         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1458                 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
1459                 mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1460         }
1461
1462         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1463                 mlx5_fc_destroy(priv->mdev, attr->counter);
1464
1465         if (flow_flag_test(flow, HAIRPIN))
1466                 mlx5e_hairpin_flow_del(priv, flow);
1467
1468         free_flow_post_acts(flow);
1469
1470         kvfree(attr->parse_attr);
1471         kfree(flow->attr);
1472 }
1473
1474 struct mlx5_flow_handle *
1475 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1476                            struct mlx5e_tc_flow *flow,
1477                            struct mlx5_flow_spec *spec,
1478                            struct mlx5_flow_attr *attr)
1479 {
1480         struct mlx5_flow_handle *rule;
1481
1482         if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1483                 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1484
1485         rule = mlx5e_tc_rule_offload(flow->priv, spec, attr);
1486
1487         if (IS_ERR(rule))
1488                 return rule;
1489
1490         if (attr->esw_attr->split_count) {
1491                 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1492                 if (IS_ERR(flow->rule[1]))
1493                         goto err_rule1;
1494         }
1495
1496         return rule;
1497
1498 err_rule1:
1499         mlx5e_tc_rule_unoffload(flow->priv, rule, attr);
1500         return flow->rule[1];
1501 }
1502
1503 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1504                                   struct mlx5e_tc_flow *flow,
1505                                   struct mlx5_flow_attr *attr)
1506 {
1507         flow_flag_clear(flow, OFFLOADED);
1508
1509         if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1510                 return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1511
1512         if (attr->esw_attr->split_count)
1513                 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1514
1515         mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr);
1516 }
1517
1518 struct mlx5_flow_handle *
1519 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1520                               struct mlx5e_tc_flow *flow,
1521                               struct mlx5_flow_spec *spec)
1522 {
1523         struct mlx5e_tc_mod_hdr_acts mod_acts = {};
1524         struct mlx5e_mod_hdr_handle *mh = NULL;
1525         struct mlx5_flow_attr *slow_attr;
1526         struct mlx5_flow_handle *rule;
1527         bool fwd_and_modify_cap;
1528         u32 chain_mapping = 0;
1529         int err;
1530
1531         slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1532         if (!slow_attr)
1533                 return ERR_PTR(-ENOMEM);
1534
1535         memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1536         slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1537         slow_attr->esw_attr->split_count = 0;
1538         slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1539
1540         fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table);
1541         if (!fwd_and_modify_cap)
1542                 goto skip_restore;
1543
1544         err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping);
1545         if (err)
1546                 goto err_get_chain;
1547
1548         err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
1549                                         MAPPED_OBJ_TO_REG, chain_mapping);
1550         if (err)
1551                 goto err_reg_set;
1552
1553         mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow),
1554                                   MLX5_FLOW_NAMESPACE_FDB, &mod_acts);
1555         if (IS_ERR(mh)) {
1556                 err = PTR_ERR(mh);
1557                 goto err_attach;
1558         }
1559
1560         slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1561         slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh);
1562
1563 skip_restore:
1564         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1565         if (IS_ERR(rule)) {
1566                 err = PTR_ERR(rule);
1567                 goto err_offload;
1568         }
1569
1570         flow->attr->slow_mh = mh;
1571         flow->chain_mapping = chain_mapping;
1572         flow_flag_set(flow, SLOW);
1573
1574         mlx5e_mod_hdr_dealloc(&mod_acts);
1575         kfree(slow_attr);
1576
1577         return rule;
1578
1579 err_offload:
1580         if (fwd_and_modify_cap)
1581                 mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh);
1582 err_attach:
1583 err_reg_set:
1584         if (fwd_and_modify_cap)
1585                 mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping);
1586 err_get_chain:
1587         mlx5e_mod_hdr_dealloc(&mod_acts);
1588         kfree(slow_attr);
1589         return ERR_PTR(err);
1590 }
1591
1592 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1593                                        struct mlx5e_tc_flow *flow)
1594 {
1595         struct mlx5e_mod_hdr_handle *slow_mh = flow->attr->slow_mh;
1596         struct mlx5_flow_attr *slow_attr;
1597
1598         slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1599         if (!slow_attr) {
1600                 mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1601                 return;
1602         }
1603
1604         memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1605         slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1606         slow_attr->esw_attr->split_count = 0;
1607         slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1608         if (slow_mh) {
1609                 slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1610                 slow_attr->modify_hdr = mlx5e_mod_hdr_get(slow_mh);
1611         }
1612         mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1613         if (slow_mh) {
1614                 mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), slow_mh);
1615                 mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping);
1616                 flow->chain_mapping = 0;
1617                 flow->attr->slow_mh = NULL;
1618         }
1619         flow_flag_clear(flow, SLOW);
1620         kfree(slow_attr);
1621 }
1622
1623 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1624  * function.
1625  */
1626 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1627                              struct list_head *unready_flows)
1628 {
1629         flow_flag_set(flow, NOT_READY);
1630         list_add_tail(&flow->unready, unready_flows);
1631 }
1632
1633 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1634  * function.
1635  */
1636 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1637 {
1638         list_del(&flow->unready);
1639         flow_flag_clear(flow, NOT_READY);
1640 }
1641
1642 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1643 {
1644         struct mlx5_rep_uplink_priv *uplink_priv;
1645         struct mlx5e_rep_priv *rpriv;
1646         struct mlx5_eswitch *esw;
1647
1648         esw = flow->priv->mdev->priv.eswitch;
1649         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1650         uplink_priv = &rpriv->uplink_priv;
1651
1652         mutex_lock(&uplink_priv->unready_flows_lock);
1653         unready_flow_add(flow, &uplink_priv->unready_flows);
1654         mutex_unlock(&uplink_priv->unready_flows_lock);
1655 }
1656
1657 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1658 {
1659         struct mlx5_rep_uplink_priv *uplink_priv;
1660         struct mlx5e_rep_priv *rpriv;
1661         struct mlx5_eswitch *esw;
1662
1663         esw = flow->priv->mdev->priv.eswitch;
1664         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1665         uplink_priv = &rpriv->uplink_priv;
1666
1667         mutex_lock(&uplink_priv->unready_flows_lock);
1668         unready_flow_del(flow);
1669         mutex_unlock(&uplink_priv->unready_flows_lock);
1670 }
1671
1672 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
1673 {
1674         struct mlx5_core_dev *out_mdev, *route_mdev;
1675         struct mlx5e_priv *out_priv, *route_priv;
1676
1677         out_priv = netdev_priv(out_dev);
1678         out_mdev = out_priv->mdev;
1679         route_priv = netdev_priv(route_dev);
1680         route_mdev = route_priv->mdev;
1681
1682         if (out_mdev->coredev_type != MLX5_COREDEV_PF)
1683                 return false;
1684
1685         if (route_mdev->coredev_type != MLX5_COREDEV_VF &&
1686             route_mdev->coredev_type != MLX5_COREDEV_SF)
1687                 return false;
1688
1689         return mlx5e_same_hw_devs(out_priv, route_priv);
1690 }
1691
1692 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
1693 {
1694         struct mlx5e_priv *out_priv, *route_priv;
1695         struct mlx5_devcom *devcom = NULL;
1696         struct mlx5_core_dev *route_mdev;
1697         struct mlx5_eswitch *esw;
1698         u16 vhca_id;
1699         int err;
1700
1701         out_priv = netdev_priv(out_dev);
1702         esw = out_priv->mdev->priv.eswitch;
1703         route_priv = netdev_priv(route_dev);
1704         route_mdev = route_priv->mdev;
1705
1706         vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
1707         if (mlx5_lag_is_active(out_priv->mdev)) {
1708                 /* In lag case we may get devices from different eswitch instances.
1709                  * If we failed to get vport num, it means, mostly, that we on the wrong
1710                  * eswitch.
1711                  */
1712                 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1713                 if (err != -ENOENT)
1714                         return err;
1715
1716                 devcom = out_priv->mdev->priv.devcom;
1717                 esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1718                 if (!esw)
1719                         return -ENODEV;
1720         }
1721
1722         err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1723         if (devcom)
1724                 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1725         return err;
1726 }
1727
1728 static int
1729 set_encap_dests(struct mlx5e_priv *priv,
1730                 struct mlx5e_tc_flow *flow,
1731                 struct mlx5_flow_attr *attr,
1732                 struct netlink_ext_ack *extack,
1733                 bool *vf_tun)
1734 {
1735         struct mlx5e_tc_flow_parse_attr *parse_attr;
1736         struct mlx5_esw_flow_attr *esw_attr;
1737         struct net_device *encap_dev = NULL;
1738         struct mlx5e_rep_priv *rpriv;
1739         struct mlx5e_priv *out_priv;
1740         int out_index;
1741         int err = 0;
1742
1743         if (!mlx5e_is_eswitch_flow(flow))
1744                 return 0;
1745
1746         parse_attr = attr->parse_attr;
1747         esw_attr = attr->esw_attr;
1748         *vf_tun = false;
1749
1750         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1751                 struct net_device *out_dev;
1752                 int mirred_ifindex;
1753
1754                 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1755                         continue;
1756
1757                 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1758                 out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1759                 if (!out_dev) {
1760                         NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1761                         err = -ENODEV;
1762                         goto out;
1763                 }
1764                 err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1765                                          extack, &encap_dev);
1766                 dev_put(out_dev);
1767                 if (err)
1768                         goto out;
1769
1770                 if (esw_attr->dests[out_index].flags &
1771                     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1772                     !esw_attr->dest_int_port)
1773                         *vf_tun = true;
1774
1775                 out_priv = netdev_priv(encap_dev);
1776                 rpriv = out_priv->ppriv;
1777                 esw_attr->dests[out_index].rep = rpriv->rep;
1778                 esw_attr->dests[out_index].mdev = out_priv->mdev;
1779         }
1780
1781         if (*vf_tun && esw_attr->out_count > 1) {
1782                 NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1783                 err = -EOPNOTSUPP;
1784                 goto out;
1785         }
1786
1787 out:
1788         return err;
1789 }
1790
1791 static void
1792 clean_encap_dests(struct mlx5e_priv *priv,
1793                   struct mlx5e_tc_flow *flow,
1794                   struct mlx5_flow_attr *attr,
1795                   bool *vf_tun)
1796 {
1797         struct mlx5_esw_flow_attr *esw_attr;
1798         int out_index;
1799
1800         if (!mlx5e_is_eswitch_flow(flow))
1801                 return;
1802
1803         esw_attr = attr->esw_attr;
1804         *vf_tun = false;
1805
1806         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1807                 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1808                         continue;
1809
1810                 if (esw_attr->dests[out_index].flags &
1811                     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1812                     !esw_attr->dest_int_port)
1813                         *vf_tun = true;
1814
1815                 mlx5e_detach_encap(priv, flow, attr, out_index);
1816                 kfree(attr->parse_attr->tun_info[out_index]);
1817         }
1818 }
1819
1820 static int
1821 verify_attr_actions(u32 actions, struct netlink_ext_ack *extack)
1822 {
1823         if (!(actions &
1824               (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
1825                 NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
1826                 return -EOPNOTSUPP;
1827         }
1828
1829         if (!(~actions &
1830               (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
1831                 NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
1832                 return -EOPNOTSUPP;
1833         }
1834
1835         if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
1836             actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
1837                 NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
1838                 return -EOPNOTSUPP;
1839         }
1840
1841         return 0;
1842 }
1843
1844 static int
1845 post_process_attr(struct mlx5e_tc_flow *flow,
1846                   struct mlx5_flow_attr *attr,
1847                   struct netlink_ext_ack *extack)
1848 {
1849         bool vf_tun;
1850         int err = 0;
1851
1852         err = verify_attr_actions(attr->action, extack);
1853         if (err)
1854                 goto err_out;
1855
1856         err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun);
1857         if (err)
1858                 goto err_out;
1859
1860         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1861                 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr);
1862                 if (err)
1863                         goto err_out;
1864         }
1865
1866         if (attr->branch_true &&
1867             attr->branch_true->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1868                 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_true);
1869                 if (err)
1870                         goto err_out;
1871         }
1872
1873         if (attr->branch_false &&
1874             attr->branch_false->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1875                 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_false);
1876                 if (err)
1877                         goto err_out;
1878         }
1879
1880         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1881                 err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr);
1882                 if (err)
1883                         goto err_out;
1884         }
1885
1886 err_out:
1887         return err;
1888 }
1889
1890 static int
1891 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1892                       struct mlx5e_tc_flow *flow,
1893                       struct netlink_ext_ack *extack)
1894 {
1895         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1896         struct mlx5e_tc_flow_parse_attr *parse_attr;
1897         struct mlx5_flow_attr *attr = flow->attr;
1898         struct mlx5_esw_flow_attr *esw_attr;
1899         u32 max_prio, max_chain;
1900         int err = 0;
1901
1902         parse_attr = attr->parse_attr;
1903         esw_attr = attr->esw_attr;
1904
1905         /* We check chain range only for tc flows.
1906          * For ft flows, we checked attr->chain was originally 0 and set it to
1907          * FDB_FT_CHAIN which is outside tc range.
1908          * See mlx5e_rep_setup_ft_cb().
1909          */
1910         max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1911         if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1912                 NL_SET_ERR_MSG_MOD(extack,
1913                                    "Requested chain is out of supported range");
1914                 err = -EOPNOTSUPP;
1915                 goto err_out;
1916         }
1917
1918         max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1919         if (attr->prio > max_prio) {
1920                 NL_SET_ERR_MSG_MOD(extack,
1921                                    "Requested priority is out of supported range");
1922                 err = -EOPNOTSUPP;
1923                 goto err_out;
1924         }
1925
1926         if (flow_flag_test(flow, TUN_RX)) {
1927                 err = mlx5e_attach_decap_route(priv, flow);
1928                 if (err)
1929                         goto err_out;
1930
1931                 if (!attr->chain && esw_attr->int_port &&
1932                     attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1933                         /* If decap route device is internal port, change the
1934                          * source vport value in reg_c0 back to uplink just in
1935                          * case the rule performs goto chain > 0. If we have a miss
1936                          * on chain > 0 we want the metadata regs to hold the
1937                          * chain id so SW will resume handling of this packet
1938                          * from the proper chain.
1939                          */
1940                         u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw,
1941                                                                         esw_attr->in_rep->vport);
1942
1943                         err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
1944                                                         MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
1945                                                         metadata);
1946                         if (err)
1947                                 goto err_out;
1948
1949                         attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1950                 }
1951         }
1952
1953         if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1954                 err = mlx5e_attach_decap(priv, flow, extack);
1955                 if (err)
1956                         goto err_out;
1957         }
1958
1959         if (netif_is_ovs_master(parse_attr->filter_dev)) {
1960                 struct mlx5e_tc_int_port *int_port;
1961
1962                 if (attr->chain) {
1963                         NL_SET_ERR_MSG_MOD(extack,
1964                                            "Internal port rule is only supported on chain 0");
1965                         err = -EOPNOTSUPP;
1966                         goto err_out;
1967                 }
1968
1969                 if (attr->dest_chain) {
1970                         NL_SET_ERR_MSG_MOD(extack,
1971                                            "Internal port rule offload doesn't support goto action");
1972                         err = -EOPNOTSUPP;
1973                         goto err_out;
1974                 }
1975
1976                 int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
1977                                                  parse_attr->filter_dev->ifindex,
1978                                                  flow_flag_test(flow, EGRESS) ?
1979                                                  MLX5E_TC_INT_PORT_EGRESS :
1980                                                  MLX5E_TC_INT_PORT_INGRESS);
1981                 if (IS_ERR(int_port)) {
1982                         err = PTR_ERR(int_port);
1983                         goto err_out;
1984                 }
1985
1986                 esw_attr->int_port = int_port;
1987         }
1988
1989         err = post_process_attr(flow, attr, extack);
1990         if (err)
1991                 goto err_out;
1992
1993         err = mlx5e_tc_act_stats_add_flow(get_act_stats_handle(priv), flow);
1994         if (err)
1995                 goto err_out;
1996
1997         /* we get here if one of the following takes place:
1998          * (1) there's no error
1999          * (2) there's an encap action and we don't have valid neigh
2000          */
2001         if (flow_flag_test(flow, SLOW))
2002                 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
2003         else
2004                 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
2005
2006         if (IS_ERR(flow->rule[0])) {
2007                 err = PTR_ERR(flow->rule[0]);
2008                 goto err_out;
2009         }
2010         flow_flag_set(flow, OFFLOADED);
2011
2012         return 0;
2013
2014 err_out:
2015         flow_flag_set(flow, FAILED);
2016         return err;
2017 }
2018
2019 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
2020 {
2021         struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
2022         void *headers_v = MLX5_ADDR_OF(fte_match_param,
2023                                        spec->match_value,
2024                                        misc_parameters_3);
2025         u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
2026                                              headers_v,
2027                                              geneve_tlv_option_0_data);
2028
2029         return !!geneve_tlv_opt_0_data;
2030 }
2031
2032 static void free_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
2033 {
2034         if (!attr)
2035                 return;
2036
2037         mlx5_free_flow_attr(flow, attr);
2038         kvfree(attr->parse_attr);
2039         kfree(attr);
2040 }
2041
2042 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
2043                                   struct mlx5e_tc_flow *flow)
2044 {
2045         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2046         struct mlx5_flow_attr *attr = flow->attr;
2047         struct mlx5_esw_flow_attr *esw_attr;
2048         bool vf_tun;
2049
2050         esw_attr = attr->esw_attr;
2051         mlx5e_put_flow_tunnel_id(flow);
2052
2053         if (flow_flag_test(flow, NOT_READY))
2054                 remove_unready_flow(flow);
2055
2056         if (mlx5e_is_offloaded_flow(flow)) {
2057                 if (flow_flag_test(flow, SLOW))
2058                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
2059                 else
2060                         mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
2061         }
2062         complete_all(&flow->del_hw_done);
2063
2064         if (mlx5_flow_has_geneve_opt(flow))
2065                 mlx5_geneve_tlv_option_del(priv->mdev->geneve);
2066
2067         if (flow->decap_route)
2068                 mlx5e_detach_decap_route(priv, flow);
2069
2070         clean_encap_dests(priv, flow, attr, &vf_tun);
2071
2072         mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
2073
2074         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
2075                 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
2076                 mlx5e_tc_detach_mod_hdr(priv, flow, attr);
2077         }
2078
2079         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
2080                 mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
2081
2082         if (esw_attr->int_port)
2083                 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
2084
2085         if (esw_attr->dest_int_port)
2086                 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
2087
2088         if (flow_flag_test(flow, L3_TO_L2_DECAP))
2089                 mlx5e_detach_decap(priv, flow);
2090
2091         mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv), flow);
2092
2093         free_flow_post_acts(flow);
2094         free_branch_attr(flow, attr->branch_true);
2095         free_branch_attr(flow, attr->branch_false);
2096
2097         kvfree(attr->esw_attr->rx_tun_attr);
2098         kvfree(attr->parse_attr);
2099         kfree(flow->attr);
2100 }
2101
2102 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
2103 {
2104         struct mlx5_flow_attr *attr;
2105
2106         attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list);
2107         return attr->counter;
2108 }
2109
2110 /* Iterate over tmp_list of flows attached to flow_list head. */
2111 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
2112 {
2113         struct mlx5e_tc_flow *flow, *tmp;
2114
2115         list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
2116                 mlx5e_flow_put(priv, flow);
2117 }
2118
2119 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
2120 {
2121         struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
2122
2123         if (!flow_flag_test(flow, ESWITCH) ||
2124             !flow_flag_test(flow, DUP))
2125                 return;
2126
2127         mutex_lock(&esw->offloads.peer_mutex);
2128         list_del(&flow->peer);
2129         mutex_unlock(&esw->offloads.peer_mutex);
2130
2131         flow_flag_clear(flow, DUP);
2132
2133         if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
2134                 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
2135                 kfree(flow->peer_flow);
2136         }
2137
2138         flow->peer_flow = NULL;
2139 }
2140
2141 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
2142 {
2143         struct mlx5_core_dev *dev = flow->priv->mdev;
2144         struct mlx5_devcom *devcom = dev->priv.devcom;
2145         struct mlx5_eswitch *peer_esw;
2146
2147         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
2148         if (!peer_esw)
2149                 return;
2150
2151         __mlx5e_tc_del_fdb_peer_flow(flow);
2152         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
2153 }
2154
2155 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
2156                               struct mlx5e_tc_flow *flow)
2157 {
2158         if (mlx5e_is_eswitch_flow(flow)) {
2159                 mlx5e_tc_del_fdb_peer_flow(flow);
2160                 mlx5e_tc_del_fdb_flow(priv, flow);
2161         } else {
2162                 mlx5e_tc_del_nic_flow(priv, flow);
2163         }
2164 }
2165
2166 static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
2167 {
2168         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2169         struct flow_action *flow_action = &rule->action;
2170         const struct flow_action_entry *act;
2171         int i;
2172
2173         if (chain)
2174                 return false;
2175
2176         flow_action_for_each(i, act, flow_action) {
2177                 switch (act->id) {
2178                 case FLOW_ACTION_GOTO:
2179                         return true;
2180                 case FLOW_ACTION_SAMPLE:
2181                         return true;
2182                 default:
2183                         continue;
2184                 }
2185         }
2186
2187         return false;
2188 }
2189
2190 static int
2191 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
2192                                     struct flow_dissector_key_enc_opts *opts,
2193                                     struct netlink_ext_ack *extack,
2194                                     bool *dont_care)
2195 {
2196         struct geneve_opt *opt;
2197         int off = 0;
2198
2199         *dont_care = true;
2200
2201         while (opts->len > off) {
2202                 opt = (struct geneve_opt *)&opts->data[off];
2203
2204                 if (!(*dont_care) || opt->opt_class || opt->type ||
2205                     memchr_inv(opt->opt_data, 0, opt->length * 4)) {
2206                         *dont_care = false;
2207
2208                         if (opt->opt_class != htons(U16_MAX) ||
2209                             opt->type != U8_MAX) {
2210                                 NL_SET_ERR_MSG_MOD(extack,
2211                                                    "Partial match of tunnel options in chain > 0 isn't supported");
2212                                 netdev_warn(priv->netdev,
2213                                             "Partial match of tunnel options in chain > 0 isn't supported");
2214                                 return -EOPNOTSUPP;
2215                         }
2216                 }
2217
2218                 off += sizeof(struct geneve_opt) + opt->length * 4;
2219         }
2220
2221         return 0;
2222 }
2223
2224 #define COPY_DISSECTOR(rule, diss_key, dst)\
2225 ({ \
2226         struct flow_rule *__rule = (rule);\
2227         typeof(dst) __dst = dst;\
2228 \
2229         memcpy(__dst,\
2230                skb_flow_dissector_target(__rule->match.dissector,\
2231                                          diss_key,\
2232                                          __rule->match.key),\
2233                sizeof(*__dst));\
2234 })
2235
2236 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
2237                                     struct mlx5e_tc_flow *flow,
2238                                     struct flow_cls_offload *f,
2239                                     struct net_device *filter_dev)
2240 {
2241         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2242         struct netlink_ext_ack *extack = f->common.extack;
2243         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
2244         struct flow_match_enc_opts enc_opts_match;
2245         struct tunnel_match_enc_opts tun_enc_opts;
2246         struct mlx5_rep_uplink_priv *uplink_priv;
2247         struct mlx5_flow_attr *attr = flow->attr;
2248         struct mlx5e_rep_priv *uplink_rpriv;
2249         struct tunnel_match_key tunnel_key;
2250         bool enc_opts_is_dont_care = true;
2251         u32 tun_id, enc_opts_id = 0;
2252         struct mlx5_eswitch *esw;
2253         u32 value, mask;
2254         int err;
2255
2256         esw = priv->mdev->priv.eswitch;
2257         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2258         uplink_priv = &uplink_rpriv->uplink_priv;
2259
2260         memset(&tunnel_key, 0, sizeof(tunnel_key));
2261         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
2262                        &tunnel_key.enc_control);
2263         if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
2264                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
2265                                &tunnel_key.enc_ipv4);
2266         else
2267                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
2268                                &tunnel_key.enc_ipv6);
2269         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
2270         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
2271                        &tunnel_key.enc_tp);
2272         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
2273                        &tunnel_key.enc_key_id);
2274         tunnel_key.filter_ifindex = filter_dev->ifindex;
2275
2276         err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
2277         if (err)
2278                 return err;
2279
2280         flow_rule_match_enc_opts(rule, &enc_opts_match);
2281         err = enc_opts_is_dont_care_or_full_match(priv,
2282                                                   enc_opts_match.mask,
2283                                                   extack,
2284                                                   &enc_opts_is_dont_care);
2285         if (err)
2286                 goto err_enc_opts;
2287
2288         if (!enc_opts_is_dont_care) {
2289                 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
2290                 memcpy(&tun_enc_opts.key, enc_opts_match.key,
2291                        sizeof(*enc_opts_match.key));
2292                 memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
2293                        sizeof(*enc_opts_match.mask));
2294
2295                 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
2296                                   &tun_enc_opts, &enc_opts_id);
2297                 if (err)
2298                         goto err_enc_opts;
2299         }
2300
2301         value = tun_id << ENC_OPTS_BITS | enc_opts_id;
2302         mask = enc_opts_id ? TUNNEL_ID_MASK :
2303                              (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
2304
2305         if (attr->chain) {
2306                 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
2307                                             TUNNEL_TO_REG, value, mask);
2308         } else {
2309                 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
2310                 err = mlx5e_tc_match_to_reg_set(priv->mdev,
2311                                                 mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
2312                                                 TUNNEL_TO_REG, value);
2313                 if (err)
2314                         goto err_set;
2315
2316                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2317         }
2318
2319         flow->attr->tunnel_id = value;
2320         return 0;
2321
2322 err_set:
2323         if (enc_opts_id)
2324                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2325                                enc_opts_id);
2326 err_enc_opts:
2327         mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2328         return err;
2329 }
2330
2331 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
2332 {
2333         u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK;
2334         u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS;
2335         struct mlx5_rep_uplink_priv *uplink_priv;
2336         struct mlx5e_rep_priv *uplink_rpriv;
2337         struct mlx5_eswitch *esw;
2338
2339         esw = flow->priv->mdev->priv.eswitch;
2340         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2341         uplink_priv = &uplink_rpriv->uplink_priv;
2342
2343         if (tun_id)
2344                 mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2345         if (enc_opts_id)
2346                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2347                                enc_opts_id);
2348 }
2349
2350 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2351                             struct flow_match_basic *match, bool outer,
2352                             void *headers_c, void *headers_v)
2353 {
2354         bool ip_version_cap;
2355
2356         ip_version_cap = outer ?
2357                 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2358                                           ft_field_support.outer_ip_version) :
2359                 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2360                                           ft_field_support.inner_ip_version);
2361
2362         if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2363             (match->key->n_proto == htons(ETH_P_IP) ||
2364              match->key->n_proto == htons(ETH_P_IPV6))) {
2365                 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2366                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2367                          match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2368         } else {
2369                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2370                          ntohs(match->mask->n_proto));
2371                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2372                          ntohs(match->key->n_proto));
2373         }
2374 }
2375
2376 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
2377 {
2378         void *headers_v;
2379         u16 ethertype;
2380         u8 ip_version;
2381
2382         if (outer)
2383                 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
2384         else
2385                 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
2386
2387         ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
2388         /* Return ip_version converted from ethertype anyway */
2389         if (!ip_version) {
2390                 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
2391                 if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
2392                         ip_version = 4;
2393                 else if (ethertype == ETH_P_IPV6)
2394                         ip_version = 6;
2395         }
2396         return ip_version;
2397 }
2398
2399 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
2400  * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
2401  *      +---------+----------------------------------------+
2402  *      |Arriving |         Arriving Outer Header          |
2403  *      |   Inner +---------+---------+---------+----------+
2404  *      |  Header | Not-ECT | ECT(0)  | ECT(1)  |   CE     |
2405  *      +---------+---------+---------+---------+----------+
2406  *      | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop>   |
2407  *      |  ECT(0) |  ECT(0) | ECT(0)  | ECT(1)  |   CE*    |
2408  *      |  ECT(1) |  ECT(1) | ECT(1)  | ECT(1)* |   CE*    |
2409  *      |    CE   |   CE    |  CE     | CE      |   CE     |
2410  *      +---------+---------+---------+---------+----------+
2411  *
2412  * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
2413  * the inner ip_ecn value before hardware decap action.
2414  *
2415  * Cells marked are changed from original inner packet ip_ecn value during decap, and
2416  * so matching those values on inner ip_ecn before decap will fail.
2417  *
2418  * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
2419  * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
2420  * and such we can drop the inner ip_ecn=CE match.
2421  */
2422
2423 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
2424                                       struct flow_cls_offload *f,
2425                                       bool *match_inner_ecn)
2426 {
2427         u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
2428         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2429         struct netlink_ext_ack *extack = f->common.extack;
2430         struct flow_match_ip match;
2431
2432         *match_inner_ecn = true;
2433
2434         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
2435                 flow_rule_match_enc_ip(rule, &match);
2436                 outer_ecn_key = match.key->tos & INET_ECN_MASK;
2437                 outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
2438         }
2439
2440         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2441                 flow_rule_match_ip(rule, &match);
2442                 inner_ecn_key = match.key->tos & INET_ECN_MASK;
2443                 inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
2444         }
2445
2446         if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
2447                 NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
2448                 netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
2449                 return -EOPNOTSUPP;
2450         }
2451
2452         if (!outer_ecn_mask) {
2453                 if (!inner_ecn_mask)
2454                         return 0;
2455
2456                 NL_SET_ERR_MSG_MOD(extack,
2457                                    "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2458                 netdev_warn(priv->netdev,
2459                             "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2460                 return -EOPNOTSUPP;
2461         }
2462
2463         if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
2464                 NL_SET_ERR_MSG_MOD(extack,
2465                                    "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2466                 netdev_warn(priv->netdev,
2467                             "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2468                 return -EOPNOTSUPP;
2469         }
2470
2471         if (!inner_ecn_mask)
2472                 return 0;
2473
2474         /* Both inner and outer have full mask on ecn */
2475
2476         if (outer_ecn_key == INET_ECN_ECT_1) {
2477                 /* inner ecn might change by DECAP action */
2478
2479                 NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
2480                 netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
2481                 return -EOPNOTSUPP;
2482         }
2483
2484         if (outer_ecn_key != INET_ECN_CE)
2485                 return 0;
2486
2487         if (inner_ecn_key != INET_ECN_CE) {
2488                 /* Can't happen in software, as packet ecn will be changed to CE after decap */
2489                 NL_SET_ERR_MSG_MOD(extack,
2490                                    "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2491                 netdev_warn(priv->netdev,
2492                             "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2493                 return -EOPNOTSUPP;
2494         }
2495
2496         /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
2497          * drop match on inner ecn
2498          */
2499         *match_inner_ecn = false;
2500
2501         return 0;
2502 }
2503
2504 static int parse_tunnel_attr(struct mlx5e_priv *priv,
2505                              struct mlx5e_tc_flow *flow,
2506                              struct mlx5_flow_spec *spec,
2507                              struct flow_cls_offload *f,
2508                              struct net_device *filter_dev,
2509                              u8 *match_level,
2510                              bool *match_inner)
2511 {
2512         struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
2513         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2514         struct netlink_ext_ack *extack = f->common.extack;
2515         bool needs_mapping, sets_mapping;
2516         int err;
2517
2518         if (!mlx5e_is_eswitch_flow(flow)) {
2519                 NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported");
2520                 return -EOPNOTSUPP;
2521         }
2522
2523         needs_mapping = !!flow->attr->chain;
2524         sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
2525         *match_inner = !needs_mapping;
2526
2527         if ((needs_mapping || sets_mapping) &&
2528             !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2529                 NL_SET_ERR_MSG_MOD(extack,
2530                                    "Chains on tunnel devices isn't supported without register loopback support");
2531                 netdev_warn(priv->netdev,
2532                             "Chains on tunnel devices isn't supported without register loopback support");
2533                 return -EOPNOTSUPP;
2534         }
2535
2536         if (!flow->attr->chain) {
2537                 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2538                                          match_level);
2539                 if (err) {
2540                         NL_SET_ERR_MSG_MOD(extack,
2541                                            "Failed to parse tunnel attributes");
2542                         netdev_warn(priv->netdev,
2543                                     "Failed to parse tunnel attributes");
2544                         return err;
2545                 }
2546
2547                 /* With mpls over udp we decapsulate using packet reformat
2548                  * object
2549                  */
2550                 if (!netif_is_bareudp(filter_dev))
2551                         flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2552                 err = mlx5e_tc_set_attr_rx_tun(flow, spec);
2553                 if (err)
2554                         return err;
2555         } else if (tunnel) {
2556                 struct mlx5_flow_spec *tmp_spec;
2557
2558                 tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
2559                 if (!tmp_spec) {
2560                         NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for tunnel tmp spec");
2561                         netdev_warn(priv->netdev, "Failed to allocate memory for tunnel tmp spec");
2562                         return -ENOMEM;
2563                 }
2564                 memcpy(tmp_spec, spec, sizeof(*tmp_spec));
2565
2566                 err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
2567                 if (err) {
2568                         kvfree(tmp_spec);
2569                         NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
2570                         netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
2571                         return err;
2572                 }
2573                 err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
2574                 kvfree(tmp_spec);
2575                 if (err)
2576                         return err;
2577         }
2578
2579         if (!needs_mapping && !sets_mapping)
2580                 return 0;
2581
2582         return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2583 }
2584
2585 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2586 {
2587         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2588                             inner_headers);
2589 }
2590
2591 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2592 {
2593         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2594                             inner_headers);
2595 }
2596
2597 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2598 {
2599         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2600                             outer_headers);
2601 }
2602
2603 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2604 {
2605         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2606                             outer_headers);
2607 }
2608
2609 void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec)
2610 {
2611         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2612                 get_match_inner_headers_value(spec) :
2613                 get_match_outer_headers_value(spec);
2614 }
2615
2616 void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec)
2617 {
2618         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2619                 get_match_inner_headers_criteria(spec) :
2620                 get_match_outer_headers_criteria(spec);
2621 }
2622
2623 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2624                                    struct flow_cls_offload *f)
2625 {
2626         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2627         struct netlink_ext_ack *extack = f->common.extack;
2628         struct net_device *ingress_dev;
2629         struct flow_match_meta match;
2630
2631         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2632                 return 0;
2633
2634         flow_rule_match_meta(rule, &match);
2635         if (!match.mask->ingress_ifindex)
2636                 return 0;
2637
2638         if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2639                 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2640                 return -EOPNOTSUPP;
2641         }
2642
2643         ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2644                                          match.key->ingress_ifindex);
2645         if (!ingress_dev) {
2646                 NL_SET_ERR_MSG_MOD(extack,
2647                                    "Can't find the ingress port to match on");
2648                 return -ENOENT;
2649         }
2650
2651         if (ingress_dev != filter_dev) {
2652                 NL_SET_ERR_MSG_MOD(extack,
2653                                    "Can't match on the ingress filter port");
2654                 return -EOPNOTSUPP;
2655         }
2656
2657         return 0;
2658 }
2659
2660 static bool skip_key_basic(struct net_device *filter_dev,
2661                            struct flow_cls_offload *f)
2662 {
2663         /* When doing mpls over udp decap, the user needs to provide
2664          * MPLS_UC as the protocol in order to be able to match on mpls
2665          * label fields.  However, the actual ethertype is IP so we want to
2666          * avoid matching on this, otherwise we'll fail the match.
2667          */
2668         if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2669                 return true;
2670
2671         return false;
2672 }
2673
2674 static int __parse_cls_flower(struct mlx5e_priv *priv,
2675                               struct mlx5e_tc_flow *flow,
2676                               struct mlx5_flow_spec *spec,
2677                               struct flow_cls_offload *f,
2678                               struct net_device *filter_dev,
2679                               u8 *inner_match_level, u8 *outer_match_level)
2680 {
2681         struct netlink_ext_ack *extack = f->common.extack;
2682         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2683                                        outer_headers);
2684         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2685                                        outer_headers);
2686         void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2687                                     misc_parameters);
2688         void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2689                                     misc_parameters);
2690         void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2691                                     misc_parameters_3);
2692         void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2693                                     misc_parameters_3);
2694         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2695         struct flow_dissector *dissector = rule->match.dissector;
2696         enum fs_flow_table_type fs_type;
2697         bool match_inner_ecn = true;
2698         u16 addr_type = 0;
2699         u8 ip_proto = 0;
2700         u8 *match_level;
2701         int err;
2702
2703         fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
2704         match_level = outer_match_level;
2705
2706         if (dissector->used_keys &
2707             ~(BIT(FLOW_DISSECTOR_KEY_META) |
2708               BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2709               BIT(FLOW_DISSECTOR_KEY_BASIC) |
2710               BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2711               BIT(FLOW_DISSECTOR_KEY_VLAN) |
2712               BIT(FLOW_DISSECTOR_KEY_CVLAN) |
2713               BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2714               BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2715               BIT(FLOW_DISSECTOR_KEY_PORTS) |
2716               BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2717               BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2718               BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2719               BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
2720               BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2721               BIT(FLOW_DISSECTOR_KEY_TCP) |
2722               BIT(FLOW_DISSECTOR_KEY_IP)  |
2723               BIT(FLOW_DISSECTOR_KEY_CT) |
2724               BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2725               BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2726               BIT(FLOW_DISSECTOR_KEY_ICMP) |
2727               BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2728                 NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2729                 netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
2730                            dissector->used_keys);
2731                 return -EOPNOTSUPP;
2732         }
2733
2734         if (mlx5e_get_tc_tun(filter_dev)) {
2735                 bool match_inner = false;
2736
2737                 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2738                                         outer_match_level, &match_inner);
2739                 if (err)
2740                         return err;
2741
2742                 if (match_inner) {
2743                         /* header pointers should point to the inner headers
2744                          * if the packet was decapsulated already.
2745                          * outer headers are set by parse_tunnel_attr.
2746                          */
2747                         match_level = inner_match_level;
2748                         headers_c = get_match_inner_headers_criteria(spec);
2749                         headers_v = get_match_inner_headers_value(spec);
2750                 }
2751
2752                 err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
2753                 if (err)
2754                         return err;
2755         }
2756
2757         err = mlx5e_flower_parse_meta(filter_dev, f);
2758         if (err)
2759                 return err;
2760
2761         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2762             !skip_key_basic(filter_dev, f)) {
2763                 struct flow_match_basic match;
2764
2765                 flow_rule_match_basic(rule, &match);
2766                 mlx5e_tc_set_ethertype(priv->mdev, &match,
2767                                        match_level == outer_match_level,
2768                                        headers_c, headers_v);
2769
2770                 if (match.mask->n_proto)
2771                         *match_level = MLX5_MATCH_L2;
2772         }
2773         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2774             is_vlan_dev(filter_dev)) {
2775                 struct flow_dissector_key_vlan filter_dev_mask;
2776                 struct flow_dissector_key_vlan filter_dev_key;
2777                 struct flow_match_vlan match;
2778
2779                 if (is_vlan_dev(filter_dev)) {
2780                         match.key = &filter_dev_key;
2781                         match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2782                         match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2783                         match.key->vlan_priority = 0;
2784                         match.mask = &filter_dev_mask;
2785                         memset(match.mask, 0xff, sizeof(*match.mask));
2786                         match.mask->vlan_priority = 0;
2787                 } else {
2788                         flow_rule_match_vlan(rule, &match);
2789                 }
2790                 if (match.mask->vlan_id ||
2791                     match.mask->vlan_priority ||
2792                     match.mask->vlan_tpid) {
2793                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2794                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2795                                          svlan_tag, 1);
2796                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2797                                          svlan_tag, 1);
2798                         } else {
2799                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2800                                          cvlan_tag, 1);
2801                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2802                                          cvlan_tag, 1);
2803                         }
2804
2805                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2806                                  match.mask->vlan_id);
2807                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2808                                  match.key->vlan_id);
2809
2810                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2811                                  match.mask->vlan_priority);
2812                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2813                                  match.key->vlan_priority);
2814
2815                         *match_level = MLX5_MATCH_L2;
2816
2817                         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
2818                             match.mask->vlan_eth_type &&
2819                             MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
2820                                                     ft_field_support.outer_second_vid,
2821                                                     fs_type)) {
2822                                 MLX5_SET(fte_match_set_misc, misc_c,
2823                                          outer_second_cvlan_tag, 1);
2824                                 spec->match_criteria_enable |=
2825                                         MLX5_MATCH_MISC_PARAMETERS;
2826                         }
2827                 }
2828         } else if (*match_level != MLX5_MATCH_NONE) {
2829                 /* cvlan_tag enabled in match criteria and
2830                  * disabled in match value means both S & C tags
2831                  * don't exist (untagged of both)
2832                  */
2833                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2834                 *match_level = MLX5_MATCH_L2;
2835         }
2836
2837         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2838                 struct flow_match_vlan match;
2839
2840                 flow_rule_match_cvlan(rule, &match);
2841                 if (match.mask->vlan_id ||
2842                     match.mask->vlan_priority ||
2843                     match.mask->vlan_tpid) {
2844                         if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
2845                                                      fs_type)) {
2846                                 NL_SET_ERR_MSG_MOD(extack,
2847                                                    "Matching on CVLAN is not supported");
2848                                 return -EOPNOTSUPP;
2849                         }
2850
2851                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2852                                 MLX5_SET(fte_match_set_misc, misc_c,
2853                                          outer_second_svlan_tag, 1);
2854                                 MLX5_SET(fte_match_set_misc, misc_v,
2855                                          outer_second_svlan_tag, 1);
2856                         } else {
2857                                 MLX5_SET(fte_match_set_misc, misc_c,
2858                                          outer_second_cvlan_tag, 1);
2859                                 MLX5_SET(fte_match_set_misc, misc_v,
2860                                          outer_second_cvlan_tag, 1);
2861                         }
2862
2863                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2864                                  match.mask->vlan_id);
2865                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2866                                  match.key->vlan_id);
2867                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2868                                  match.mask->vlan_priority);
2869                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2870                                  match.key->vlan_priority);
2871
2872                         *match_level = MLX5_MATCH_L2;
2873                         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
2874                 }
2875         }
2876
2877         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2878                 struct flow_match_eth_addrs match;
2879
2880                 flow_rule_match_eth_addrs(rule, &match);
2881                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2882                                              dmac_47_16),
2883                                 match.mask->dst);
2884                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2885                                              dmac_47_16),
2886                                 match.key->dst);
2887
2888                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2889                                              smac_47_16),
2890                                 match.mask->src);
2891                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2892                                              smac_47_16),
2893                                 match.key->src);
2894
2895                 if (!is_zero_ether_addr(match.mask->src) ||
2896                     !is_zero_ether_addr(match.mask->dst))
2897                         *match_level = MLX5_MATCH_L2;
2898         }
2899
2900         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2901                 struct flow_match_control match;
2902
2903                 flow_rule_match_control(rule, &match);
2904                 addr_type = match.key->addr_type;
2905
2906                 /* the HW doesn't support frag first/later */
2907                 if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
2908                         NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported");
2909                         return -EOPNOTSUPP;
2910                 }
2911
2912                 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2913                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2914                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2915                                  match.key->flags & FLOW_DIS_IS_FRAGMENT);
2916
2917                         /* the HW doesn't need L3 inline to match on frag=no */
2918                         if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2919                                 *match_level = MLX5_MATCH_L2;
2920         /* ***  L2 attributes parsing up to here *** */
2921                         else
2922                                 *match_level = MLX5_MATCH_L3;
2923                 }
2924         }
2925
2926         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2927                 struct flow_match_basic match;
2928
2929                 flow_rule_match_basic(rule, &match);
2930                 ip_proto = match.key->ip_proto;
2931
2932                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2933                          match.mask->ip_proto);
2934                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2935                          match.key->ip_proto);
2936
2937                 if (match.mask->ip_proto)
2938                         *match_level = MLX5_MATCH_L3;
2939         }
2940
2941         if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2942                 struct flow_match_ipv4_addrs match;
2943
2944                 flow_rule_match_ipv4_addrs(rule, &match);
2945                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2946                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2947                        &match.mask->src, sizeof(match.mask->src));
2948                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2949                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2950                        &match.key->src, sizeof(match.key->src));
2951                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2952                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2953                        &match.mask->dst, sizeof(match.mask->dst));
2954                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2955                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2956                        &match.key->dst, sizeof(match.key->dst));
2957
2958                 if (match.mask->src || match.mask->dst)
2959                         *match_level = MLX5_MATCH_L3;
2960         }
2961
2962         if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2963                 struct flow_match_ipv6_addrs match;
2964
2965                 flow_rule_match_ipv6_addrs(rule, &match);
2966                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2967                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2968                        &match.mask->src, sizeof(match.mask->src));
2969                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2970                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2971                        &match.key->src, sizeof(match.key->src));
2972
2973                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2974                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2975                        &match.mask->dst, sizeof(match.mask->dst));
2976                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2977                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2978                        &match.key->dst, sizeof(match.key->dst));
2979
2980                 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2981                     ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2982                         *match_level = MLX5_MATCH_L3;
2983         }
2984
2985         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2986                 struct flow_match_ip match;
2987
2988                 flow_rule_match_ip(rule, &match);
2989                 if (match_inner_ecn) {
2990                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2991                                  match.mask->tos & 0x3);
2992                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2993                                  match.key->tos & 0x3);
2994                 }
2995
2996                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2997                          match.mask->tos >> 2);
2998                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2999                          match.key->tos  >> 2);
3000
3001                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
3002                          match.mask->ttl);
3003                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
3004                          match.key->ttl);
3005
3006                 if (match.mask->ttl &&
3007                     !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
3008                                                 ft_field_support.outer_ipv4_ttl)) {
3009                         NL_SET_ERR_MSG_MOD(extack,
3010                                            "Matching on TTL is not supported");
3011                         return -EOPNOTSUPP;
3012                 }
3013
3014                 if (match.mask->tos || match.mask->ttl)
3015                         *match_level = MLX5_MATCH_L3;
3016         }
3017
3018         /* ***  L3 attributes parsing up to here *** */
3019
3020         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
3021                 struct flow_match_ports match;
3022
3023                 flow_rule_match_ports(rule, &match);
3024                 switch (ip_proto) {
3025                 case IPPROTO_TCP:
3026                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
3027                                  tcp_sport, ntohs(match.mask->src));
3028                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
3029                                  tcp_sport, ntohs(match.key->src));
3030
3031                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
3032                                  tcp_dport, ntohs(match.mask->dst));
3033                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
3034                                  tcp_dport, ntohs(match.key->dst));
3035                         break;
3036
3037                 case IPPROTO_UDP:
3038                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
3039                                  udp_sport, ntohs(match.mask->src));
3040                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
3041                                  udp_sport, ntohs(match.key->src));
3042
3043                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
3044                                  udp_dport, ntohs(match.mask->dst));
3045                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
3046                                  udp_dport, ntohs(match.key->dst));
3047                         break;
3048                 default:
3049                         NL_SET_ERR_MSG_MOD(extack,
3050                                            "Only UDP and TCP transports are supported for L4 matching");
3051                         netdev_err(priv->netdev,
3052                                    "Only UDP and TCP transport are supported\n");
3053                         return -EINVAL;
3054                 }
3055
3056                 if (match.mask->src || match.mask->dst)
3057                         *match_level = MLX5_MATCH_L4;
3058         }
3059
3060         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
3061                 struct flow_match_tcp match;
3062
3063                 flow_rule_match_tcp(rule, &match);
3064                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
3065                          ntohs(match.mask->flags));
3066                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
3067                          ntohs(match.key->flags));
3068
3069                 if (match.mask->flags)
3070                         *match_level = MLX5_MATCH_L4;
3071         }
3072         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
3073                 struct flow_match_icmp match;
3074
3075                 flow_rule_match_icmp(rule, &match);
3076                 switch (ip_proto) {
3077                 case IPPROTO_ICMP:
3078                         if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
3079                               MLX5_FLEX_PROTO_ICMP)) {
3080                                 NL_SET_ERR_MSG_MOD(extack,
3081                                                    "Match on Flex protocols for ICMP is not supported");
3082                                 return -EOPNOTSUPP;
3083                         }
3084                         MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
3085                                  match.mask->type);
3086                         MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
3087                                  match.key->type);
3088                         MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
3089                                  match.mask->code);
3090                         MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
3091                                  match.key->code);
3092                         break;
3093                 case IPPROTO_ICMPV6:
3094                         if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
3095                               MLX5_FLEX_PROTO_ICMPV6)) {
3096                                 NL_SET_ERR_MSG_MOD(extack,
3097                                                    "Match on Flex protocols for ICMPV6 is not supported");
3098                                 return -EOPNOTSUPP;
3099                         }
3100                         MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
3101                                  match.mask->type);
3102                         MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
3103                                  match.key->type);
3104                         MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
3105                                  match.mask->code);
3106                         MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
3107                                  match.key->code);
3108                         break;
3109                 default:
3110                         NL_SET_ERR_MSG_MOD(extack,
3111                                            "Code and type matching only with ICMP and ICMPv6");
3112                         netdev_err(priv->netdev,
3113                                    "Code and type matching only with ICMP and ICMPv6\n");
3114                         return -EINVAL;
3115                 }
3116                 if (match.mask->code || match.mask->type) {
3117                         *match_level = MLX5_MATCH_L4;
3118                         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
3119                 }
3120         }
3121         /* Currently supported only for MPLS over UDP */
3122         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
3123             !netif_is_bareudp(filter_dev)) {
3124                 NL_SET_ERR_MSG_MOD(extack,
3125                                    "Matching on MPLS is supported only for MPLS over UDP");
3126                 netdev_err(priv->netdev,
3127                            "Matching on MPLS is supported only for MPLS over UDP\n");
3128                 return -EOPNOTSUPP;
3129         }
3130
3131         return 0;
3132 }
3133
3134 static int parse_cls_flower(struct mlx5e_priv *priv,
3135                             struct mlx5e_tc_flow *flow,
3136                             struct mlx5_flow_spec *spec,
3137                             struct flow_cls_offload *f,
3138                             struct net_device *filter_dev)
3139 {
3140         u8 inner_match_level, outer_match_level, non_tunnel_match_level;
3141         struct netlink_ext_ack *extack = f->common.extack;
3142         struct mlx5_core_dev *dev = priv->mdev;
3143         struct mlx5_eswitch *esw = dev->priv.eswitch;
3144         struct mlx5e_rep_priv *rpriv = priv->ppriv;
3145         struct mlx5_eswitch_rep *rep;
3146         bool is_eswitch_flow;
3147         int err;
3148
3149         inner_match_level = MLX5_MATCH_NONE;
3150         outer_match_level = MLX5_MATCH_NONE;
3151
3152         err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
3153                                  &inner_match_level, &outer_match_level);
3154         non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
3155                                  outer_match_level : inner_match_level;
3156
3157         is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
3158         if (!err && is_eswitch_flow) {
3159                 rep = rpriv->rep;
3160                 if (rep->vport != MLX5_VPORT_UPLINK &&
3161                     (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
3162                     esw->offloads.inline_mode < non_tunnel_match_level)) {
3163                         NL_SET_ERR_MSG_MOD(extack,
3164                                            "Flow is not offloaded due to min inline setting");
3165                         netdev_warn(priv->netdev,
3166                                     "Flow is not offloaded due to min inline setting, required %d actual %d\n",
3167                                     non_tunnel_match_level, esw->offloads.inline_mode);
3168                         return -EOPNOTSUPP;
3169                 }
3170         }
3171
3172         flow->attr->inner_match_level = inner_match_level;
3173         flow->attr->outer_match_level = outer_match_level;
3174
3175
3176         return err;
3177 }
3178
3179 struct mlx5_fields {
3180         u8  field;
3181         u8  field_bsize;
3182         u32 field_mask;
3183         u32 offset;
3184         u32 match_offset;
3185 };
3186
3187 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
3188                 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
3189                  offsetof(struct pedit_headers, field) + (off), \
3190                  MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
3191
3192 /* masked values are the same and there are no rewrites that do not have a
3193  * match.
3194  */
3195 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
3196         type matchmaskx = *(type *)(matchmaskp); \
3197         type matchvalx = *(type *)(matchvalp); \
3198         type maskx = *(type *)(maskp); \
3199         type valx = *(type *)(valp); \
3200         \
3201         (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
3202                                                                  matchmaskx)); \
3203 })
3204
3205 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
3206                          void *matchmaskp, u8 bsize)
3207 {
3208         bool same = false;
3209
3210         switch (bsize) {
3211         case 8:
3212                 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
3213                 break;
3214         case 16:
3215                 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
3216                 break;
3217         case 32:
3218                 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
3219                 break;
3220         }
3221
3222         return same;
3223 }
3224
3225 static struct mlx5_fields fields[] = {
3226         OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
3227         OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
3228         OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
3229         OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
3230         OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
3231         OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
3232
3233         OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
3234         OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
3235         OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
3236         OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
3237
3238         OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
3239                 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
3240         OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
3241                 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
3242         OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
3243                 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
3244         OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
3245                 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
3246         OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
3247                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
3248         OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
3249                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
3250         OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
3251                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
3252         OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
3253                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
3254         OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
3255         OFFLOAD(IP_DSCP, 16,  0xc00f, ip6, 0, ip_dscp),
3256
3257         OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
3258         OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
3259         /* in linux iphdr tcp_flags is 8 bits long */
3260         OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
3261
3262         OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
3263         OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
3264 };
3265
3266 static unsigned long mask_to_le(unsigned long mask, int size)
3267 {
3268         __be32 mask_be32;
3269         __be16 mask_be16;
3270
3271         if (size == 32) {
3272                 mask_be32 = (__force __be32)(mask);
3273                 mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
3274         } else if (size == 16) {
3275                 mask_be32 = (__force __be32)(mask);
3276                 mask_be16 = *(__be16 *)&mask_be32;
3277                 mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
3278         }
3279
3280         return mask;
3281 }
3282
3283 static int offload_pedit_fields(struct mlx5e_priv *priv,
3284                                 int namespace,
3285                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
3286                                 u32 *action_flags,
3287                                 struct netlink_ext_ack *extack)
3288 {
3289         struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
3290         struct pedit_headers_action *hdrs = parse_attr->hdrs;
3291         void *headers_c, *headers_v, *action, *vals_p;
3292         u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
3293         struct mlx5e_tc_mod_hdr_acts *mod_acts;
3294         unsigned long mask, field_mask;
3295         int i, first, last, next_z;
3296         struct mlx5_fields *f;
3297         u8 cmd;
3298
3299         mod_acts = &parse_attr->mod_hdr_acts;
3300         headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec);
3301         headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec);
3302
3303         set_masks = &hdrs[0].masks;
3304         add_masks = &hdrs[1].masks;
3305         set_vals = &hdrs[0].vals;
3306         add_vals = &hdrs[1].vals;
3307
3308         for (i = 0; i < ARRAY_SIZE(fields); i++) {
3309                 bool skip;
3310
3311                 f = &fields[i];
3312                 /* avoid seeing bits set from previous iterations */
3313                 s_mask = 0;
3314                 a_mask = 0;
3315
3316                 s_masks_p = (void *)set_masks + f->offset;
3317                 a_masks_p = (void *)add_masks + f->offset;
3318
3319                 s_mask = *s_masks_p & f->field_mask;
3320                 a_mask = *a_masks_p & f->field_mask;
3321
3322                 if (!s_mask && !a_mask) /* nothing to offload here */
3323                         continue;
3324
3325                 if (s_mask && a_mask) {
3326                         NL_SET_ERR_MSG_MOD(extack,
3327                                            "can't set and add to the same HW field");
3328                         netdev_warn(priv->netdev,
3329                                     "mlx5: can't set and add to the same HW field (%x)\n",
3330                                     f->field);
3331                         return -EOPNOTSUPP;
3332                 }
3333
3334                 skip = false;
3335                 if (s_mask) {
3336                         void *match_mask = headers_c + f->match_offset;
3337                         void *match_val = headers_v + f->match_offset;
3338
3339                         cmd  = MLX5_ACTION_TYPE_SET;
3340                         mask = s_mask;
3341                         vals_p = (void *)set_vals + f->offset;
3342                         /* don't rewrite if we have a match on the same value */
3343                         if (cmp_val_mask(vals_p, s_masks_p, match_val,
3344                                          match_mask, f->field_bsize))
3345                                 skip = true;
3346                         /* clear to denote we consumed this field */
3347                         *s_masks_p &= ~f->field_mask;
3348                 } else {
3349                         cmd  = MLX5_ACTION_TYPE_ADD;
3350                         mask = a_mask;
3351                         vals_p = (void *)add_vals + f->offset;
3352                         /* add 0 is no change */
3353                         if ((*(u32 *)vals_p & f->field_mask) == 0)
3354                                 skip = true;
3355                         /* clear to denote we consumed this field */
3356                         *a_masks_p &= ~f->field_mask;
3357                 }
3358                 if (skip)
3359                         continue;
3360
3361                 mask = mask_to_le(mask, f->field_bsize);
3362
3363                 first = find_first_bit(&mask, f->field_bsize);
3364                 next_z = find_next_zero_bit(&mask, f->field_bsize, first);
3365                 last  = find_last_bit(&mask, f->field_bsize);
3366                 if (first < next_z && next_z < last) {
3367                         NL_SET_ERR_MSG_MOD(extack,
3368                                            "rewrite of few sub-fields isn't supported");
3369                         netdev_warn(priv->netdev,
3370                                     "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
3371                                     mask);
3372                         return -EOPNOTSUPP;
3373                 }
3374
3375                 action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts);
3376                 if (IS_ERR(action)) {
3377                         NL_SET_ERR_MSG_MOD(extack,
3378                                            "too many pedit actions, can't offload");
3379                         mlx5_core_warn(priv->mdev,
3380                                        "mlx5: parsed %d pedit actions, can't do more\n",
3381                                        mod_acts->num_actions);
3382                         return PTR_ERR(action);
3383                 }
3384
3385                 MLX5_SET(set_action_in, action, action_type, cmd);
3386                 MLX5_SET(set_action_in, action, field, f->field);
3387
3388                 if (cmd == MLX5_ACTION_TYPE_SET) {
3389                         int start;
3390
3391                         field_mask = mask_to_le(f->field_mask, f->field_bsize);
3392
3393                         /* if field is bit sized it can start not from first bit */
3394                         start = find_first_bit(&field_mask, f->field_bsize);
3395
3396                         MLX5_SET(set_action_in, action, offset, first - start);
3397                         /* length is num of bits to be written, zero means length of 32 */
3398                         MLX5_SET(set_action_in, action, length, (last - first + 1));
3399                 }
3400
3401                 if (f->field_bsize == 32)
3402                         MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
3403                 else if (f->field_bsize == 16)
3404                         MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
3405                 else if (f->field_bsize == 8)
3406                         MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
3407
3408                 ++mod_acts->num_actions;
3409         }
3410
3411         return 0;
3412 }
3413
3414 static const struct pedit_headers zero_masks = {};
3415
3416 static int verify_offload_pedit_fields(struct mlx5e_priv *priv,
3417                                        struct mlx5e_tc_flow_parse_attr *parse_attr,
3418                                        struct netlink_ext_ack *extack)
3419 {
3420         struct pedit_headers *cmd_masks;
3421         u8 cmd;
3422
3423         for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
3424                 cmd_masks = &parse_attr->hdrs[cmd].masks;
3425                 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3426                         NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field");
3427                         netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
3428                         print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
3429                                        16, 1, cmd_masks, sizeof(zero_masks), true);
3430                         return -EOPNOTSUPP;
3431                 }
3432         }
3433
3434         return 0;
3435 }
3436
3437 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3438                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3439                                  u32 *action_flags,
3440                                  struct netlink_ext_ack *extack)
3441 {
3442         int err;
3443
3444         err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack);
3445         if (err)
3446                 goto out_dealloc_parsed_actions;
3447
3448         err = verify_offload_pedit_fields(priv, parse_attr, extack);
3449         if (err)
3450                 goto out_dealloc_parsed_actions;
3451
3452         return 0;
3453
3454 out_dealloc_parsed_actions:
3455         mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3456         return err;
3457 }
3458
3459 struct ip_ttl_word {
3460         __u8    ttl;
3461         __u8    protocol;
3462         __sum16 check;
3463 };
3464
3465 struct ipv6_hoplimit_word {
3466         __be16  payload_len;
3467         __u8    nexthdr;
3468         __u8    hop_limit;
3469 };
3470
3471 static bool
3472 is_action_keys_supported(const struct flow_action_entry *act, bool ct_flow,
3473                          bool *modify_ip_header, bool *modify_tuple,
3474                          struct netlink_ext_ack *extack)
3475 {
3476         u32 mask, offset;
3477         u8 htype;
3478
3479         htype = act->mangle.htype;
3480         offset = act->mangle.offset;
3481         mask = ~act->mangle.mask;
3482         /* For IPv4 & IPv6 header check 4 byte word,
3483          * to determine that modified fields
3484          * are NOT ttl & hop_limit only.
3485          */
3486         if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3487                 struct ip_ttl_word *ttl_word =
3488                         (struct ip_ttl_word *)&mask;
3489
3490                 if (offset != offsetof(struct iphdr, ttl) ||
3491                     ttl_word->protocol ||
3492                     ttl_word->check) {
3493                         *modify_ip_header = true;
3494                 }
3495
3496                 if (offset >= offsetof(struct iphdr, saddr))
3497                         *modify_tuple = true;
3498
3499                 if (ct_flow && *modify_tuple) {
3500                         NL_SET_ERR_MSG_MOD(extack,
3501                                            "can't offload re-write of ipv4 address with action ct");
3502                         return false;
3503                 }
3504         } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3505                 struct ipv6_hoplimit_word *hoplimit_word =
3506                         (struct ipv6_hoplimit_word *)&mask;
3507
3508                 if (offset != offsetof(struct ipv6hdr, payload_len) ||
3509                     hoplimit_word->payload_len ||
3510                     hoplimit_word->nexthdr) {
3511                         *modify_ip_header = true;
3512                 }
3513
3514                 if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
3515                         *modify_tuple = true;
3516
3517                 if (ct_flow && *modify_tuple) {
3518                         NL_SET_ERR_MSG_MOD(extack,
3519                                            "can't offload re-write of ipv6 address with action ct");
3520                         return false;
3521                 }
3522         } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
3523                    htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
3524                 *modify_tuple = true;
3525                 if (ct_flow) {
3526                         NL_SET_ERR_MSG_MOD(extack,
3527                                            "can't offload re-write of transport header ports with action ct");
3528                         return false;
3529                 }
3530         }
3531
3532         return true;
3533 }
3534
3535 static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
3536                                    bool ct_flow, struct netlink_ext_ack *extack,
3537                                    struct mlx5e_priv *priv,
3538                                    struct mlx5_flow_spec *spec)
3539 {
3540         if (!modify_tuple || ct_clear)
3541                 return true;
3542
3543         if (ct_flow) {
3544                 NL_SET_ERR_MSG_MOD(extack,
3545                                    "can't offload tuple modification with non-clear ct()");
3546                 netdev_info(priv->netdev,
3547                             "can't offload tuple modification with non-clear ct()");
3548                 return false;
3549         }
3550
3551         /* Add ct_state=-trk match so it will be offloaded for non ct flows
3552          * (or after clear action), as otherwise, since the tuple is changed,
3553          * we can't restore ct state
3554          */
3555         if (mlx5_tc_ct_add_no_trk_match(spec)) {
3556                 NL_SET_ERR_MSG_MOD(extack,
3557                                    "can't offload tuple modification with ct matches and no ct(clear) action");
3558                 netdev_info(priv->netdev,
3559                             "can't offload tuple modification with ct matches and no ct(clear) action");
3560                 return false;
3561         }
3562
3563         return true;
3564 }
3565
3566 static bool modify_header_match_supported(struct mlx5e_priv *priv,
3567                                           struct mlx5_flow_spec *spec,
3568                                           struct flow_action *flow_action,
3569                                           u32 actions, bool ct_flow,
3570                                           bool ct_clear,
3571                                           struct netlink_ext_ack *extack)
3572 {
3573         const struct flow_action_entry *act;
3574         bool modify_ip_header, modify_tuple;
3575         void *headers_c;
3576         void *headers_v;
3577         u16 ethertype;
3578         u8 ip_proto;
3579         int i;
3580
3581         headers_c = mlx5e_get_match_headers_criteria(actions, spec);
3582         headers_v = mlx5e_get_match_headers_value(actions, spec);
3583         ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3584
3585         /* for non-IP we only re-write MACs, so we're okay */
3586         if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3587             ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3588                 goto out_ok;
3589
3590         modify_ip_header = false;
3591         modify_tuple = false;
3592         flow_action_for_each(i, act, flow_action) {
3593                 if (act->id != FLOW_ACTION_MANGLE &&
3594                     act->id != FLOW_ACTION_ADD)
3595                         continue;
3596
3597                 if (!is_action_keys_supported(act, ct_flow,
3598                                               &modify_ip_header,
3599                                               &modify_tuple, extack))
3600                         return false;
3601         }
3602
3603         if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
3604                                     priv, spec))
3605                 return false;
3606
3607         ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3608         if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3609             ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3610                 NL_SET_ERR_MSG_MOD(extack,
3611                                    "can't offload re-write of non TCP/UDP");
3612                 netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3613                             ip_proto);
3614                 return false;
3615         }
3616
3617 out_ok:
3618         return true;
3619 }
3620
3621 static bool
3622 actions_match_supported_fdb(struct mlx5e_priv *priv,
3623                             struct mlx5e_tc_flow *flow,
3624                             struct netlink_ext_ack *extack)
3625 {
3626         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
3627         bool ct_flow, ct_clear;
3628
3629         ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
3630         ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3631
3632         if (esw_attr->split_count && ct_flow &&
3633             !MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve)) {
3634                 /* All registers used by ct are cleared when using
3635                  * split rules.
3636                  */
3637                 NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct");
3638                 return false;
3639         }
3640
3641         if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
3642                 NL_SET_ERR_MSG_MOD(extack,
3643                                    "current firmware doesn't support split rule for port mirroring");
3644                 netdev_warn_once(priv->netdev,
3645                                  "current firmware doesn't support split rule for port mirroring\n");
3646                 return false;
3647         }
3648
3649         return true;
3650 }
3651
3652 static bool
3653 actions_match_supported(struct mlx5e_priv *priv,
3654                         struct flow_action *flow_action,
3655                         u32 actions,
3656                         struct mlx5e_tc_flow_parse_attr *parse_attr,
3657                         struct mlx5e_tc_flow *flow,
3658                         struct netlink_ext_ack *extack)
3659 {
3660         bool ct_flow, ct_clear;
3661
3662         ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
3663         ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3664
3665         if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3666             !modify_header_match_supported(priv, &parse_attr->spec, flow_action,
3667                                            actions, ct_flow, ct_clear, extack))
3668                 return false;
3669
3670         if (mlx5e_is_eswitch_flow(flow) &&
3671             !actions_match_supported_fdb(priv, flow, extack))
3672                 return false;
3673
3674         return true;
3675 }
3676
3677 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3678 {
3679         return priv->mdev == peer_priv->mdev;
3680 }
3681
3682 bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3683 {
3684         struct mlx5_core_dev *fmdev, *pmdev;
3685         u64 fsystem_guid, psystem_guid;
3686
3687         fmdev = priv->mdev;
3688         pmdev = peer_priv->mdev;
3689
3690         fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3691         psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3692
3693         return (fsystem_guid == psystem_guid);
3694 }
3695
3696 static int
3697 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
3698                                 struct mlx5e_tc_flow *flow,
3699                                 struct mlx5_flow_attr *attr,
3700                                 struct netlink_ext_ack *extack)
3701 {
3702         struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
3703         struct pedit_headers_action *hdrs = parse_attr->hdrs;
3704         enum mlx5_flow_namespace_type ns_type;
3705         int err;
3706
3707         if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits &&
3708             !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits)
3709                 return 0;
3710
3711         ns_type = mlx5e_get_flow_namespace(flow);
3712
3713         err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack);
3714         if (err)
3715                 return err;
3716
3717         if (parse_attr->mod_hdr_acts.num_actions > 0)
3718                 return 0;
3719
3720         /* In case all pedit actions are skipped, remove the MOD_HDR flag. */
3721         attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3722         mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3723
3724         if (ns_type != MLX5_FLOW_NAMESPACE_FDB)
3725                 return 0;
3726
3727         if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
3728               (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
3729                 attr->esw_attr->split_count = 0;
3730
3731         return 0;
3732 }
3733
3734 static struct mlx5_flow_attr*
3735 mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
3736                                    enum mlx5_flow_namespace_type ns_type)
3737 {
3738         struct mlx5e_tc_flow_parse_attr *parse_attr;
3739         u32 attr_sz = ns_to_attr_sz(ns_type);
3740         struct mlx5_flow_attr *attr2;
3741
3742         attr2 = mlx5_alloc_flow_attr(ns_type);
3743         parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
3744         if (!attr2 || !parse_attr) {
3745                 kvfree(parse_attr);
3746                 kfree(attr2);
3747                 return NULL;
3748         }
3749
3750         memcpy(attr2, attr, attr_sz);
3751         INIT_LIST_HEAD(&attr2->list);
3752         parse_attr->filter_dev = attr->parse_attr->filter_dev;
3753         attr2->action = 0;
3754         attr2->counter = NULL;
3755         attr2->tc_act_cookies_count = 0;
3756         attr2->flags = 0;
3757         attr2->parse_attr = parse_attr;
3758         attr2->dest_chain = 0;
3759         attr2->dest_ft = NULL;
3760         attr2->act_id_restore_rule = NULL;
3761
3762         if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
3763                 attr2->esw_attr->out_count = 0;
3764                 attr2->esw_attr->split_count = 0;
3765         }
3766
3767         attr2->branch_true = NULL;
3768         attr2->branch_false = NULL;
3769         attr2->jumping_attr = NULL;
3770         return attr2;
3771 }
3772
3773 struct mlx5_flow_attr *
3774 mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow)
3775 {
3776         struct mlx5_esw_flow_attr *esw_attr;
3777         struct mlx5_flow_attr *attr;
3778         int i;
3779
3780         list_for_each_entry(attr, &flow->attrs, list) {
3781                 esw_attr = attr->esw_attr;
3782                 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
3783                         if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)
3784                                 return attr;
3785                 }
3786         }
3787
3788         return NULL;
3789 }
3790
3791 void
3792 mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow)
3793 {
3794         struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3795         struct mlx5_flow_attr *attr;
3796
3797         list_for_each_entry(attr, &flow->attrs, list) {
3798                 if (list_is_last(&attr->list, &flow->attrs))
3799                         break;
3800
3801                 mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle);
3802         }
3803 }
3804
3805 static void
3806 free_flow_post_acts(struct mlx5e_tc_flow *flow)
3807 {
3808         struct mlx5_flow_attr *attr, *tmp;
3809
3810         list_for_each_entry_safe(attr, tmp, &flow->attrs, list) {
3811                 if (list_is_last(&attr->list, &flow->attrs))
3812                         break;
3813
3814                 mlx5_free_flow_attr(flow, attr);
3815                 free_branch_attr(flow, attr->branch_true);
3816                 free_branch_attr(flow, attr->branch_false);
3817
3818                 list_del(&attr->list);
3819                 kvfree(attr->parse_attr);
3820                 kfree(attr);
3821         }
3822 }
3823
3824 int
3825 mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow)
3826 {
3827         struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3828         struct mlx5_flow_attr *attr;
3829         int err = 0;
3830
3831         list_for_each_entry(attr, &flow->attrs, list) {
3832                 if (list_is_last(&attr->list, &flow->attrs))
3833                         break;
3834
3835                 err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle);
3836                 if (err)
3837                         break;
3838         }
3839
3840         return err;
3841 }
3842
3843 /* TC filter rule HW translation:
3844  *
3845  * +---------------------+
3846  * + ft prio (tc chain)  +
3847  * + original match      +
3848  * +---------------------+
3849  *           |
3850  *           | if multi table action
3851  *           |
3852  *           v
3853  * +---------------------+
3854  * + post act ft         |<----.
3855  * + match fte id        |     | split on multi table action
3856  * + do actions          |-----'
3857  * +---------------------+
3858  *           |
3859  *           |
3860  *           v
3861  * Do rest of the actions after last multi table action.
3862  */
3863 static int
3864 alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
3865 {
3866         struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3867         struct mlx5_flow_attr *attr, *next_attr = NULL;
3868         struct mlx5e_post_act_handle *handle;
3869         int err;
3870
3871         /* This is going in reverse order as needed.
3872          * The first entry is the last attribute.
3873          */
3874         list_for_each_entry(attr, &flow->attrs, list) {
3875                 if (!next_attr) {
3876                         /* Set counter action on last post act rule. */
3877                         attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3878                 }
3879
3880                 if (next_attr && !(attr->flags & MLX5_ATTR_FLAG_TERMINATING)) {
3881                         err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr);
3882                         if (err)
3883                                 goto out_free;
3884                 }
3885
3886                 /* Don't add post_act rule for first attr (last in the list).
3887                  * It's being handled by the caller.
3888                  */
3889                 if (list_is_last(&attr->list, &flow->attrs))
3890                         break;
3891
3892                 err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
3893                 if (err)
3894                         goto out_free;
3895
3896                 err = post_process_attr(flow, attr, extack);
3897                 if (err)
3898                         goto out_free;
3899
3900                 handle = mlx5e_tc_post_act_add(post_act, attr);
3901                 if (IS_ERR(handle)) {
3902                         err = PTR_ERR(handle);
3903                         goto out_free;
3904                 }
3905
3906                 attr->post_act_handle = handle;
3907
3908                 if (attr->jumping_attr) {
3909                         err = mlx5e_tc_act_set_next_post_act(flow, attr->jumping_attr, attr);
3910                         if (err)
3911                                 goto out_free;
3912                 }
3913
3914                 next_attr = attr;
3915         }
3916
3917         if (flow_flag_test(flow, SLOW))
3918                 goto out;
3919
3920         err = mlx5e_tc_offload_flow_post_acts(flow);
3921         if (err)
3922                 goto out_free;
3923
3924 out:
3925         return 0;
3926
3927 out_free:
3928         free_flow_post_acts(flow);
3929         return err;
3930 }
3931
3932 static int
3933 alloc_branch_attr(struct mlx5e_tc_flow *flow,
3934                   struct mlx5e_tc_act_branch_ctrl *cond,
3935                   struct mlx5_flow_attr **cond_attr,
3936                   u32 *jump_count,
3937                   struct netlink_ext_ack *extack)
3938 {
3939         struct mlx5_flow_attr *attr;
3940         int err = 0;
3941
3942         *cond_attr = mlx5e_clone_flow_attr_for_post_act(flow->attr,
3943                                                         mlx5e_get_flow_namespace(flow));
3944         if (!(*cond_attr))
3945                 return -ENOMEM;
3946
3947         attr = *cond_attr;
3948
3949         switch (cond->act_id) {
3950         case FLOW_ACTION_DROP:
3951                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
3952                 break;
3953         case FLOW_ACTION_ACCEPT:
3954         case FLOW_ACTION_PIPE:
3955                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3956                 attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv));
3957                 break;
3958         case FLOW_ACTION_JUMP:
3959                 if (*jump_count) {
3960                         NL_SET_ERR_MSG_MOD(extack, "Cannot offload flows with nested jumps");
3961                         err = -EOPNOTSUPP;
3962                         goto out_err;
3963                 }
3964                 *jump_count = cond->extval;
3965                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3966                 attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv));
3967                 break;
3968         default:
3969                 err = -EOPNOTSUPP;
3970                 goto out_err;
3971         }
3972
3973         return err;
3974 out_err:
3975         kfree(*cond_attr);
3976         *cond_attr = NULL;
3977         return err;
3978 }
3979
3980 static void
3981 dec_jump_count(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act,
3982                struct mlx5_flow_attr *attr, struct mlx5e_priv *priv,
3983                struct mlx5e_tc_jump_state *jump_state)
3984 {
3985         if (!jump_state->jump_count)
3986                 return;
3987
3988         /* Single tc action can instantiate multiple offload actions (e.g. pedit)
3989          * Jump only over a tc action
3990          */
3991         if (act->id == jump_state->last_id && act->hw_index == jump_state->last_index)
3992                 return;
3993
3994         jump_state->last_id = act->id;
3995         jump_state->last_index = act->hw_index;
3996
3997         /* nothing to do for intermediate actions */
3998         if (--jump_state->jump_count > 1)
3999                 return;
4000
4001         if (jump_state->jump_count == 1) { /* last action in the jump action list */
4002
4003                 /* create a new attribute after this action */
4004                 jump_state->jump_target = true;
4005
4006                 if (tc_act->is_terminating_action) { /* the branch ends here */
4007                         attr->flags |= MLX5_ATTR_FLAG_TERMINATING;
4008                         attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
4009                 } else { /* the branch continues executing the rest of the actions */
4010                         struct mlx5e_post_act *post_act;
4011
4012                         attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
4013                         post_act = get_post_action(priv);
4014                         attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act);
4015                 }
4016         } else if (jump_state->jump_count == 0) { /* first attr after the jump action list */
4017                 /* This is the post action for the jumping attribute (either red or green)
4018                  * Use the stored jumping_attr to set the post act id on the jumping attribute
4019                  */
4020                 attr->jumping_attr = jump_state->jumping_attr;
4021         }
4022 }
4023
4024 static int
4025 parse_branch_ctrl(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act,
4026                   struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr,
4027                   struct mlx5e_tc_jump_state *jump_state,
4028                   struct netlink_ext_ack *extack)
4029 {
4030         struct mlx5e_tc_act_branch_ctrl cond_true, cond_false;
4031         u32 jump_count = jump_state->jump_count;
4032         int err;
4033
4034         if (!tc_act->get_branch_ctrl)
4035                 return 0;
4036
4037         tc_act->get_branch_ctrl(act, &cond_true, &cond_false);
4038
4039         err = alloc_branch_attr(flow, &cond_true,
4040                                 &attr->branch_true, &jump_count, extack);
4041         if (err)
4042                 goto out_err;
4043
4044         if (jump_count)
4045                 jump_state->jumping_attr = attr->branch_true;
4046
4047         err = alloc_branch_attr(flow, &cond_false,
4048                                 &attr->branch_false, &jump_count, extack);
4049         if (err)
4050                 goto err_branch_false;
4051
4052         if (jump_count && !jump_state->jumping_attr)
4053                 jump_state->jumping_attr = attr->branch_false;
4054
4055         jump_state->jump_count = jump_count;
4056
4057         /* branching action requires its own counter */
4058         attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
4059         flow_flag_set(flow, USE_ACT_STATS);
4060
4061         return 0;
4062
4063 err_branch_false:
4064         free_branch_attr(flow, attr->branch_true);
4065 out_err:
4066         return err;
4067 }
4068
4069 static int
4070 parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
4071                  struct flow_action *flow_action)
4072 {
4073         struct netlink_ext_ack *extack = parse_state->extack;
4074         struct mlx5e_tc_flow_action flow_action_reorder;
4075         struct mlx5e_tc_flow *flow = parse_state->flow;
4076         struct mlx5e_tc_jump_state jump_state = {};
4077         struct mlx5_flow_attr *attr = flow->attr;
4078         enum mlx5_flow_namespace_type ns_type;
4079         struct mlx5e_priv *priv = flow->priv;
4080         struct flow_action_entry *act, **_act;
4081         struct mlx5e_tc_act *tc_act;
4082         int err, i;
4083
4084         flow_action_reorder.num_entries = flow_action->num_entries;
4085         flow_action_reorder.entries = kcalloc(flow_action->num_entries,
4086                                               sizeof(flow_action), GFP_KERNEL);
4087         if (!flow_action_reorder.entries)
4088                 return -ENOMEM;
4089
4090         mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder);
4091
4092         ns_type = mlx5e_get_flow_namespace(flow);
4093         list_add(&attr->list, &flow->attrs);
4094
4095         flow_action_for_each(i, _act, &flow_action_reorder) {
4096                 jump_state.jump_target = false;
4097                 act = *_act;
4098                 tc_act = mlx5e_tc_act_get(act->id, ns_type);
4099                 if (!tc_act) {
4100                         NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action");
4101                         err = -EOPNOTSUPP;
4102                         goto out_free;
4103                 }
4104
4105                 if (!tc_act->can_offload(parse_state, act, i, attr)) {
4106                         err = -EOPNOTSUPP;
4107                         goto out_free;
4108                 }
4109
4110                 err = tc_act->parse_action(parse_state, act, priv, attr);
4111                 if (err)
4112                         goto out_free;
4113
4114                 dec_jump_count(act, tc_act, attr, priv, &jump_state);
4115
4116                 err = parse_branch_ctrl(act, tc_act, flow, attr, &jump_state, extack);
4117                 if (err)
4118                         goto out_free;
4119
4120                 parse_state->actions |= attr->action;
4121                 if (!tc_act->stats_action)
4122                         attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie;
4123
4124                 /* Split attr for multi table act if not the last act. */
4125                 if (jump_state.jump_target ||
4126                     (tc_act->is_multi_table_act &&
4127                     tc_act->is_multi_table_act(priv, act, attr) &&
4128                     i < flow_action_reorder.num_entries - 1)) {
4129                         err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
4130                         if (err)
4131                                 goto out_free;
4132
4133                         attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type);
4134                         if (!attr) {
4135                                 err = -ENOMEM;
4136                                 goto out_free;
4137                         }
4138
4139                         list_add(&attr->list, &flow->attrs);
4140                 }
4141         }
4142
4143         kfree(flow_action_reorder.entries);
4144
4145         err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
4146         if (err)
4147                 goto out_free_post_acts;
4148
4149         err = alloc_flow_post_acts(flow, extack);
4150         if (err)
4151                 goto out_free_post_acts;
4152
4153         return 0;
4154
4155 out_free:
4156         kfree(flow_action_reorder.entries);
4157 out_free_post_acts:
4158         free_flow_post_acts(flow);
4159
4160         return err;
4161 }
4162
4163 static int
4164 flow_action_supported(struct flow_action *flow_action,
4165                       struct netlink_ext_ack *extack)
4166 {
4167         if (!flow_action_has_entries(flow_action)) {
4168                 NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries");
4169                 return -EINVAL;
4170         }
4171
4172         if (!flow_action_hw_stats_check(flow_action, extack,
4173                                         FLOW_ACTION_HW_STATS_DELAYED_BIT)) {
4174                 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
4175                 return -EOPNOTSUPP;
4176         }
4177
4178         return 0;
4179 }
4180
4181 static int
4182 parse_tc_nic_actions(struct mlx5e_priv *priv,
4183                      struct flow_action *flow_action,
4184                      struct mlx5e_tc_flow *flow,
4185                      struct netlink_ext_ack *extack)
4186 {
4187         struct mlx5e_tc_act_parse_state *parse_state;
4188         struct mlx5e_tc_flow_parse_attr *parse_attr;
4189         struct mlx5_flow_attr *attr = flow->attr;
4190         int err;
4191
4192         err = flow_action_supported(flow_action, extack);
4193         if (err)
4194                 return err;
4195
4196         attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
4197         parse_attr = attr->parse_attr;
4198         parse_state = &parse_attr->parse_state;
4199         mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
4200         parse_state->ct_priv = get_ct_priv(priv);
4201
4202         err = parse_tc_actions(parse_state, flow_action);
4203         if (err)
4204                 return err;
4205
4206         err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
4207         if (err)
4208                 return err;
4209
4210         err = verify_attr_actions(attr->action, extack);
4211         if (err)
4212                 return err;
4213
4214         if (!actions_match_supported(priv, flow_action, parse_state->actions,
4215                                      parse_attr, flow, extack))
4216                 return -EOPNOTSUPP;
4217
4218         return 0;
4219 }
4220
4221 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
4222                                   struct net_device *peer_netdev)
4223 {
4224         struct mlx5e_priv *peer_priv;
4225
4226         peer_priv = netdev_priv(peer_netdev);
4227
4228         return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
4229                 mlx5e_eswitch_vf_rep(priv->netdev) &&
4230                 mlx5e_eswitch_vf_rep(peer_netdev) &&
4231                 mlx5e_same_hw_devs(priv, peer_priv));
4232 }
4233
4234 static bool same_hw_reps(struct mlx5e_priv *priv,
4235                          struct net_device *peer_netdev)
4236 {
4237         struct mlx5e_priv *peer_priv;
4238
4239         peer_priv = netdev_priv(peer_netdev);
4240
4241         return mlx5e_eswitch_rep(priv->netdev) &&
4242                mlx5e_eswitch_rep(peer_netdev) &&
4243                mlx5e_same_hw_devs(priv, peer_priv);
4244 }
4245
4246 static bool is_lag_dev(struct mlx5e_priv *priv,
4247                        struct net_device *peer_netdev)
4248 {
4249         return ((mlx5_lag_is_sriov(priv->mdev) ||
4250                  mlx5_lag_is_multipath(priv->mdev)) &&
4251                  same_hw_reps(priv, peer_netdev));
4252 }
4253
4254 static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
4255 {
4256         return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(priv->mdev);
4257 }
4258
4259 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
4260                                     struct net_device *out_dev)
4261 {
4262         if (is_merged_eswitch_vfs(priv, out_dev))
4263                 return true;
4264
4265         if (is_multiport_eligible(priv, out_dev))
4266                 return true;
4267
4268         if (is_lag_dev(priv, out_dev))
4269                 return true;
4270
4271         return mlx5e_eswitch_rep(out_dev) &&
4272                same_port_devs(priv, netdev_priv(out_dev));
4273 }
4274
4275 int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
4276                                       struct mlx5_flow_attr *attr,
4277                                       int ifindex,
4278                                       enum mlx5e_tc_int_port_type type,
4279                                       u32 *action,
4280                                       int out_index)
4281 {
4282         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4283         struct mlx5e_tc_int_port_priv *int_port_priv;
4284         struct mlx5e_tc_flow_parse_attr *parse_attr;
4285         struct mlx5e_tc_int_port *dest_int_port;
4286         int err;
4287
4288         parse_attr = attr->parse_attr;
4289         int_port_priv = mlx5e_get_int_port_priv(priv);
4290
4291         dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type);
4292         if (IS_ERR(dest_int_port))
4293                 return PTR_ERR(dest_int_port);
4294
4295         err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
4296                                         MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
4297                                         mlx5e_tc_int_port_get_metadata(dest_int_port));
4298         if (err) {
4299                 mlx5e_tc_int_port_put(int_port_priv, dest_int_port);
4300                 return err;
4301         }
4302
4303         *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4304
4305         esw_attr->dest_int_port = dest_int_port;
4306         esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
4307         esw_attr->split_count = out_index;
4308
4309         /* Forward to root fdb for matching against the new source vport */
4310         attr->dest_chain = 0;
4311
4312         return 0;
4313 }
4314
4315 static int
4316 parse_tc_fdb_actions(struct mlx5e_priv *priv,
4317                      struct flow_action *flow_action,
4318                      struct mlx5e_tc_flow *flow,
4319                      struct netlink_ext_ack *extack)
4320 {
4321         struct mlx5e_tc_act_parse_state *parse_state;
4322         struct mlx5e_tc_flow_parse_attr *parse_attr;
4323         struct mlx5_flow_attr *attr = flow->attr;
4324         struct mlx5_esw_flow_attr *esw_attr;
4325         struct net_device *filter_dev;
4326         int err;
4327
4328         err = flow_action_supported(flow_action, extack);
4329         if (err)
4330                 return err;
4331
4332         esw_attr = attr->esw_attr;
4333         parse_attr = attr->parse_attr;
4334         filter_dev = parse_attr->filter_dev;
4335         parse_state = &parse_attr->parse_state;
4336         mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
4337         parse_state->ct_priv = get_ct_priv(priv);
4338
4339         err = parse_tc_actions(parse_state, flow_action);
4340         if (err)
4341                 return err;
4342
4343         /* Forward to/from internal port can only have 1 dest */
4344         if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) &&
4345             esw_attr->out_count > 1) {
4346                 NL_SET_ERR_MSG_MOD(extack,
4347                                    "Rules with internal port can have only one destination");
4348                 return -EOPNOTSUPP;
4349         }
4350
4351         /* Forward from tunnel/internal port to internal port is not supported */
4352         if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) &&
4353             esw_attr->dest_int_port) {
4354                 NL_SET_ERR_MSG_MOD(extack,
4355                                    "Forwarding from tunnel/internal port to internal port is not supported");
4356                 return -EOPNOTSUPP;
4357         }
4358
4359         err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
4360         if (err)
4361                 return err;
4362
4363         if (!actions_match_supported(priv, flow_action, parse_state->actions,
4364                                      parse_attr, flow, extack))
4365                 return -EOPNOTSUPP;
4366
4367         return 0;
4368 }
4369
4370 static void get_flags(int flags, unsigned long *flow_flags)
4371 {
4372         unsigned long __flow_flags = 0;
4373
4374         if (flags & MLX5_TC_FLAG(INGRESS))
4375                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4376         if (flags & MLX5_TC_FLAG(EGRESS))
4377                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4378
4379         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4380                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4381         if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4382                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4383         if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4384                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
4385
4386         *flow_flags = __flow_flags;
4387 }
4388
4389 static const struct rhashtable_params tc_ht_params = {
4390         .head_offset = offsetof(struct mlx5e_tc_flow, node),
4391         .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
4392         .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
4393         .automatic_shrinking = true,
4394 };
4395
4396 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4397                                     unsigned long flags)
4398 {
4399         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
4400         struct mlx5e_rep_priv *rpriv;
4401
4402         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
4403                 rpriv = priv->ppriv;
4404                 return &rpriv->tc_ht;
4405         } else /* NIC offload */
4406                 return &tc->ht;
4407 }
4408
4409 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
4410 {
4411         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4412         struct mlx5_flow_attr *attr = flow->attr;
4413         bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4414                 flow_flag_test(flow, INGRESS);
4415         bool act_is_encap = !!(attr->action &
4416                                MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4417         bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
4418                                                 MLX5_DEVCOM_ESW_OFFLOADS);
4419
4420         if (!esw_paired)
4421                 return false;
4422
4423         if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4424              mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4425             (is_rep_ingress || act_is_encap))
4426                 return true;
4427
4428         if (mlx5_lag_is_mpesw(esw_attr->in_mdev))
4429                 return true;
4430
4431         return false;
4432 }
4433
4434 struct mlx5_flow_attr *
4435 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
4436 {
4437         u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB)  ?
4438                                 sizeof(struct mlx5_esw_flow_attr) :
4439                                 sizeof(struct mlx5_nic_flow_attr);
4440         struct mlx5_flow_attr *attr;
4441
4442         attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4443         if (!attr)
4444                 return attr;
4445
4446         INIT_LIST_HEAD(&attr->list);
4447         return attr;
4448 }
4449
4450 static void
4451 mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
4452 {
4453         struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
4454         bool vf_tun;
4455
4456         if (!attr)
4457                 return;
4458
4459         if (attr->post_act_handle)
4460                 mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle);
4461
4462         clean_encap_dests(flow->priv, flow, attr, &vf_tun);
4463
4464         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
4465                 mlx5_fc_destroy(counter_dev, attr->counter);
4466
4467         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
4468                 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
4469                 mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr);
4470         }
4471 }
4472
4473 static int
4474 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4475                  struct flow_cls_offload *f, unsigned long flow_flags,
4476                  struct mlx5e_tc_flow_parse_attr **__parse_attr,
4477                  struct mlx5e_tc_flow **__flow)
4478 {
4479         struct mlx5e_tc_flow_parse_attr *parse_attr;
4480         struct mlx5_flow_attr *attr;
4481         struct mlx5e_tc_flow *flow;
4482         int err = -ENOMEM;
4483         int out_index;
4484
4485         flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4486         parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4487         if (!parse_attr || !flow)
4488                 goto err_free;
4489
4490         flow->flags = flow_flags;
4491         flow->cookie = f->cookie;
4492         flow->priv = priv;
4493
4494         attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow));
4495         if (!attr)
4496                 goto err_free;
4497
4498         flow->attr = attr;
4499
4500         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4501                 INIT_LIST_HEAD(&flow->encaps[out_index].list);
4502         INIT_LIST_HEAD(&flow->hairpin);
4503         INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4504         INIT_LIST_HEAD(&flow->attrs);
4505         refcount_set(&flow->refcnt, 1);
4506         init_completion(&flow->init_done);
4507         init_completion(&flow->del_hw_done);
4508
4509         *__flow = flow;
4510         *__parse_attr = parse_attr;
4511
4512         return 0;
4513
4514 err_free:
4515         kfree(flow);
4516         kvfree(parse_attr);
4517         return err;
4518 }
4519
4520 static void
4521 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4522                      struct mlx5e_tc_flow_parse_attr *parse_attr,
4523                      struct flow_cls_offload *f)
4524 {
4525         attr->parse_attr = parse_attr;
4526         attr->chain = f->common.chain_index;
4527         attr->prio = f->common.prio;
4528 }
4529
4530 static void
4531 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4532                          struct mlx5e_priv *priv,
4533                          struct mlx5e_tc_flow_parse_attr *parse_attr,
4534                          struct flow_cls_offload *f,
4535                          struct mlx5_eswitch_rep *in_rep,
4536                          struct mlx5_core_dev *in_mdev)
4537 {
4538         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4539         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4540
4541         mlx5e_flow_attr_init(attr, parse_attr, f);
4542
4543         esw_attr->in_rep = in_rep;
4544         esw_attr->in_mdev = in_mdev;
4545
4546         if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4547             MLX5_COUNTER_SOURCE_ESWITCH)
4548                 esw_attr->counter_dev = in_mdev;
4549         else
4550                 esw_attr->counter_dev = priv->mdev;
4551 }
4552
4553 static struct mlx5e_tc_flow *
4554 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4555                      struct flow_cls_offload *f,
4556                      unsigned long flow_flags,
4557                      struct net_device *filter_dev,
4558                      struct mlx5_eswitch_rep *in_rep,
4559                      struct mlx5_core_dev *in_mdev)
4560 {
4561         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4562         struct netlink_ext_ack *extack = f->common.extack;
4563         struct mlx5e_tc_flow_parse_attr *parse_attr;
4564         struct mlx5e_tc_flow *flow;
4565         int attr_size, err;
4566
4567         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4568         attr_size  = sizeof(struct mlx5_esw_flow_attr);
4569         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4570                                &parse_attr, &flow);
4571         if (err)
4572                 goto out;
4573
4574         parse_attr->filter_dev = filter_dev;
4575         mlx5e_flow_esw_attr_init(flow->attr,
4576                                  priv, parse_attr,
4577                                  f, in_rep, in_mdev);
4578
4579         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4580                                f, filter_dev);
4581         if (err)
4582                 goto err_free;
4583
4584         /* actions validation depends on parsing the ct matches first */
4585         err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4586                                    &flow->attr->ct_attr, extack);
4587         if (err)
4588                 goto err_free;
4589
4590         err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
4591         if (err)
4592                 goto err_free;
4593
4594         err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4595         complete_all(&flow->init_done);
4596         if (err) {
4597                 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4598                         goto err_free;
4599
4600                 add_unready_flow(flow);
4601         }
4602
4603         return flow;
4604
4605 err_free:
4606         mlx5e_flow_put(priv, flow);
4607 out:
4608         return ERR_PTR(err);
4609 }
4610
4611 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4612                                       struct mlx5e_tc_flow *flow,
4613                                       unsigned long flow_flags)
4614 {
4615         struct mlx5e_priv *priv = flow->priv, *peer_priv;
4616         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4617         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4618         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4619         struct mlx5e_tc_flow_parse_attr *parse_attr;
4620         struct mlx5e_rep_priv *peer_urpriv;
4621         struct mlx5e_tc_flow *peer_flow;
4622         struct mlx5_core_dev *in_mdev;
4623         int err = 0;
4624
4625         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4626         if (!peer_esw)
4627                 return -ENODEV;
4628
4629         peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4630         peer_priv = netdev_priv(peer_urpriv->netdev);
4631
4632         /* in_mdev is assigned of which the packet originated from.
4633          * So packets redirected to uplink use the same mdev of the
4634          * original flow and packets redirected from uplink use the
4635          * peer mdev.
4636          * In multiport eswitch it's a special case that we need to
4637          * keep the original mdev.
4638          */
4639         if (attr->in_rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(priv->mdev))
4640                 in_mdev = peer_priv->mdev;
4641         else
4642                 in_mdev = priv->mdev;
4643
4644         parse_attr = flow->attr->parse_attr;
4645         peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4646                                          parse_attr->filter_dev,
4647                                          attr->in_rep, in_mdev);
4648         if (IS_ERR(peer_flow)) {
4649                 err = PTR_ERR(peer_flow);
4650                 goto out;
4651         }
4652
4653         flow->peer_flow = peer_flow;
4654         flow_flag_set(flow, DUP);
4655         mutex_lock(&esw->offloads.peer_mutex);
4656         list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4657         mutex_unlock(&esw->offloads.peer_mutex);
4658
4659 out:
4660         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4661         return err;
4662 }
4663
4664 static int
4665 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4666                    struct flow_cls_offload *f,
4667                    unsigned long flow_flags,
4668                    struct net_device *filter_dev,
4669                    struct mlx5e_tc_flow **__flow)
4670 {
4671         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4672         struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4673         struct mlx5_core_dev *in_mdev = priv->mdev;
4674         struct mlx5e_tc_flow *flow;
4675         int err;
4676
4677         flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4678                                     in_mdev);
4679         if (IS_ERR(flow))
4680                 return PTR_ERR(flow);
4681
4682         if (is_peer_flow_needed(flow)) {
4683                 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4684                 if (err) {
4685                         mlx5e_tc_del_fdb_flow(priv, flow);
4686                         goto out;
4687                 }
4688         }
4689
4690         *__flow = flow;
4691
4692         return 0;
4693
4694 out:
4695         return err;
4696 }
4697
4698 static int
4699 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4700                    struct flow_cls_offload *f,
4701                    unsigned long flow_flags,
4702                    struct net_device *filter_dev,
4703                    struct mlx5e_tc_flow **__flow)
4704 {
4705         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4706         struct netlink_ext_ack *extack = f->common.extack;
4707         struct mlx5e_tc_flow_parse_attr *parse_attr;
4708         struct mlx5e_tc_flow *flow;
4709         int attr_size, err;
4710
4711         if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4712                 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4713                         return -EOPNOTSUPP;
4714         } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4715                 return -EOPNOTSUPP;
4716         }
4717
4718         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4719         attr_size  = sizeof(struct mlx5_nic_flow_attr);
4720         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4721                                &parse_attr, &flow);
4722         if (err)
4723                 goto out;
4724
4725         parse_attr->filter_dev = filter_dev;
4726         mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4727
4728         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4729                                f, filter_dev);
4730         if (err)
4731                 goto err_free;
4732
4733         err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4734                                    &flow->attr->ct_attr, extack);
4735         if (err)
4736                 goto err_free;
4737
4738         err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
4739         if (err)
4740                 goto err_free;
4741
4742         err = mlx5e_tc_add_nic_flow(priv, flow, extack);
4743         if (err)
4744                 goto err_free;
4745
4746         flow_flag_set(flow, OFFLOADED);
4747         *__flow = flow;
4748
4749         return 0;
4750
4751 err_free:
4752         flow_flag_set(flow, FAILED);
4753         mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
4754         mlx5e_flow_put(priv, flow);
4755 out:
4756         return err;
4757 }
4758
4759 static int
4760 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4761                   struct flow_cls_offload *f,
4762                   unsigned long flags,
4763                   struct net_device *filter_dev,
4764                   struct mlx5e_tc_flow **flow)
4765 {
4766         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4767         unsigned long flow_flags;
4768         int err;
4769
4770         get_flags(flags, &flow_flags);
4771
4772         if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4773                 return -EOPNOTSUPP;
4774
4775         if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4776                 err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4777                                          filter_dev, flow);
4778         else
4779                 err = mlx5e_add_nic_flow(priv, f, flow_flags,
4780                                          filter_dev, flow);
4781
4782         return err;
4783 }
4784
4785 static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4786                                            struct mlx5e_rep_priv *rpriv)
4787 {
4788         /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4789          * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4790          * function is called from NIC mode.
4791          */
4792         return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4793 }
4794
4795 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4796                            struct flow_cls_offload *f, unsigned long flags)
4797 {
4798         struct netlink_ext_ack *extack = f->common.extack;
4799         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4800         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4801         struct mlx5e_tc_flow *flow;
4802         int err = 0;
4803
4804         if (!mlx5_esw_hold(priv->mdev))
4805                 return -EBUSY;
4806
4807         mlx5_esw_get(priv->mdev);
4808
4809         rcu_read_lock();
4810         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4811         if (flow) {
4812                 /* Same flow rule offloaded to non-uplink representor sharing tc block,
4813                  * just return 0.
4814                  */
4815                 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4816                         goto rcu_unlock;
4817
4818                 NL_SET_ERR_MSG_MOD(extack,
4819                                    "flow cookie already exists, ignoring");
4820                 netdev_warn_once(priv->netdev,
4821                                  "flow cookie %lx already exists, ignoring\n",
4822                                  f->cookie);
4823                 err = -EEXIST;
4824                 goto rcu_unlock;
4825         }
4826 rcu_unlock:
4827         rcu_read_unlock();
4828         if (flow)
4829                 goto out;
4830
4831         trace_mlx5e_configure_flower(f);
4832         err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4833         if (err)
4834                 goto out;
4835
4836         /* Flow rule offloaded to non-uplink representor sharing tc block,
4837          * set the flow's owner dev.
4838          */
4839         if (is_flow_rule_duplicate_allowed(dev, rpriv))
4840                 flow->orig_dev = dev;
4841
4842         err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4843         if (err)
4844                 goto err_free;
4845
4846         mlx5_esw_release(priv->mdev);
4847         return 0;
4848
4849 err_free:
4850         mlx5e_flow_put(priv, flow);
4851 out:
4852         mlx5_esw_put(priv->mdev);
4853         mlx5_esw_release(priv->mdev);
4854         return err;
4855 }
4856
4857 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4858 {
4859         bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4860         bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4861
4862         return flow_flag_test(flow, INGRESS) == dir_ingress &&
4863                 flow_flag_test(flow, EGRESS) == dir_egress;
4864 }
4865
4866 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4867                         struct flow_cls_offload *f, unsigned long flags)
4868 {
4869         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4870         struct mlx5e_tc_flow *flow;
4871         int err;
4872
4873         rcu_read_lock();
4874         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4875         if (!flow || !same_flow_direction(flow, flags)) {
4876                 err = -EINVAL;
4877                 goto errout;
4878         }
4879
4880         /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4881          * set.
4882          */
4883         if (flow_flag_test_and_set(flow, DELETED)) {
4884                 err = -EINVAL;
4885                 goto errout;
4886         }
4887         rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4888         rcu_read_unlock();
4889
4890         trace_mlx5e_delete_flower(f);
4891         mlx5e_flow_put(priv, flow);
4892
4893         mlx5_esw_put(priv->mdev);
4894         return 0;
4895
4896 errout:
4897         rcu_read_unlock();
4898         return err;
4899 }
4900
4901 int mlx5e_tc_fill_action_stats(struct mlx5e_priv *priv,
4902                                struct flow_offload_action *fl_act)
4903 {
4904         return mlx5e_tc_act_stats_fill_stats(get_act_stats_handle(priv), fl_act);
4905 }
4906
4907 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4908                        struct flow_cls_offload *f, unsigned long flags)
4909 {
4910         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4911         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4912         struct mlx5_eswitch *peer_esw;
4913         struct mlx5e_tc_flow *flow;
4914         struct mlx5_fc *counter;
4915         u64 lastuse = 0;
4916         u64 packets = 0;
4917         u64 bytes = 0;
4918         int err = 0;
4919
4920         rcu_read_lock();
4921         flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4922                                                 tc_ht_params));
4923         rcu_read_unlock();
4924         if (IS_ERR(flow))
4925                 return PTR_ERR(flow);
4926
4927         if (!same_flow_direction(flow, flags)) {
4928                 err = -EINVAL;
4929                 goto errout;
4930         }
4931
4932         if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
4933                 if (flow_flag_test(flow, USE_ACT_STATS)) {
4934                         f->use_act_stats = true;
4935                 } else {
4936                         counter = mlx5e_tc_get_counter(flow);
4937                         if (!counter)
4938                                 goto errout;
4939
4940                         mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4941                 }
4942         }
4943
4944         /* Under multipath it's possible for one rule to be currently
4945          * un-offloaded while the other rule is offloaded.
4946          */
4947         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4948         if (!peer_esw)
4949                 goto out;
4950
4951         if (flow_flag_test(flow, DUP) &&
4952             flow_flag_test(flow->peer_flow, OFFLOADED)) {
4953                 u64 bytes2;
4954                 u64 packets2;
4955                 u64 lastuse2;
4956
4957                 if (flow_flag_test(flow, USE_ACT_STATS)) {
4958                         f->use_act_stats = true;
4959                 } else {
4960                         counter = mlx5e_tc_get_counter(flow->peer_flow);
4961                         if (!counter)
4962                                 goto no_peer_counter;
4963                         mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4964
4965                         bytes += bytes2;
4966                         packets += packets2;
4967                         lastuse = max_t(u64, lastuse, lastuse2);
4968                 }
4969         }
4970
4971 no_peer_counter:
4972         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4973 out:
4974         flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
4975                           FLOW_ACTION_HW_STATS_DELAYED);
4976         trace_mlx5e_stats_flower(f);
4977 errout:
4978         mlx5e_flow_put(priv, flow);
4979         return err;
4980 }
4981
4982 static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
4983                                struct netlink_ext_ack *extack)
4984 {
4985         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4986         struct mlx5_eswitch *esw;
4987         u32 rate_mbps = 0;
4988         u16 vport_num;
4989         int err;
4990
4991         vport_num = rpriv->rep->vport;
4992         if (vport_num >= MLX5_VPORT_ECPF) {
4993                 NL_SET_ERR_MSG_MOD(extack,
4994                                    "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4995                 return -EOPNOTSUPP;
4996         }
4997
4998         esw = priv->mdev->priv.eswitch;
4999         /* rate is given in bytes/sec.
5000          * First convert to bits/sec and then round to the nearest mbit/secs.
5001          * mbit means million bits.
5002          * Moreover, if rate is non zero we choose to configure to a minimum of
5003          * 1 mbit/sec.
5004          */
5005         if (rate) {
5006                 rate = (rate * BITS_PER_BYTE) + 500000;
5007                 do_div(rate, 1000000);
5008                 rate_mbps = max_t(u32, rate, 1);
5009         }
5010
5011         err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
5012         if (err)
5013                 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
5014
5015         return err;
5016 }
5017
5018 static int
5019 tc_matchall_police_validate(const struct flow_action *action,
5020                             const struct flow_action_entry *act,
5021                             struct netlink_ext_ack *extack)
5022 {
5023         if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) {
5024                 NL_SET_ERR_MSG_MOD(extack,
5025                                    "Offload not supported when conform action is not continue");
5026                 return -EOPNOTSUPP;
5027         }
5028
5029         if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
5030                 NL_SET_ERR_MSG_MOD(extack,
5031                                    "Offload not supported when exceed action is not drop");
5032                 return -EOPNOTSUPP;
5033         }
5034
5035         if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
5036             !flow_action_is_last_entry(action, act)) {
5037                 NL_SET_ERR_MSG_MOD(extack,
5038                                    "Offload not supported when conform action is ok, but action is not last");
5039                 return -EOPNOTSUPP;
5040         }
5041
5042         if (act->police.peakrate_bytes_ps ||
5043             act->police.avrate || act->police.overhead) {
5044                 NL_SET_ERR_MSG_MOD(extack,
5045                                    "Offload not supported when peakrate/avrate/overhead is configured");
5046                 return -EOPNOTSUPP;
5047         }
5048
5049         return 0;
5050 }
5051
5052 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
5053                                         struct flow_action *flow_action,
5054                                         struct netlink_ext_ack *extack)
5055 {
5056         struct mlx5e_rep_priv *rpriv = priv->ppriv;
5057         const struct flow_action_entry *act;
5058         int err;
5059         int i;
5060
5061         if (!flow_action_has_entries(flow_action)) {
5062                 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
5063                 return -EINVAL;
5064         }
5065
5066         if (!flow_offload_has_one_action(flow_action)) {
5067                 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
5068                 return -EOPNOTSUPP;
5069         }
5070
5071         if (!flow_action_basic_hw_stats_check(flow_action, extack)) {
5072                 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
5073                 return -EOPNOTSUPP;
5074         }
5075
5076         flow_action_for_each(i, act, flow_action) {
5077                 switch (act->id) {
5078                 case FLOW_ACTION_POLICE:
5079                         err = tc_matchall_police_validate(flow_action, act, extack);
5080                         if (err)
5081                                 return err;
5082
5083                         err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
5084                         if (err)
5085                                 return err;
5086
5087                         rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
5088                         break;
5089                 default:
5090                         NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
5091                         return -EOPNOTSUPP;
5092                 }
5093         }
5094
5095         return 0;
5096 }
5097
5098 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
5099                                 struct tc_cls_matchall_offload *ma)
5100 {
5101         struct netlink_ext_ack *extack = ma->common.extack;
5102
5103         if (ma->common.prio != 1) {
5104                 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
5105                 return -EINVAL;
5106         }
5107
5108         return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
5109 }
5110
5111 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
5112                              struct tc_cls_matchall_offload *ma)
5113 {
5114         struct netlink_ext_ack *extack = ma->common.extack;
5115
5116         return apply_police_params(priv, 0, extack);
5117 }
5118
5119 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
5120                              struct tc_cls_matchall_offload *ma)
5121 {
5122         struct mlx5e_rep_priv *rpriv = priv->ppriv;
5123         struct rtnl_link_stats64 cur_stats;
5124         u64 dbytes;
5125         u64 dpkts;
5126
5127         cur_stats = priv->stats.vf_vport;
5128         dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
5129         dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
5130         rpriv->prev_vf_vport_stats = cur_stats;
5131         flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
5132                           FLOW_ACTION_HW_STATS_DELAYED);
5133 }
5134
5135 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
5136                                               struct mlx5e_priv *peer_priv)
5137 {
5138         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5139         struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
5140         struct mlx5e_hairpin_entry *hpe, *tmp;
5141         LIST_HEAD(init_wait_list);
5142         u16 peer_vhca_id;
5143         int bkt;
5144
5145         if (!mlx5e_same_hw_devs(priv, peer_priv))
5146                 return;
5147
5148         peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
5149
5150         mutex_lock(&tc->hairpin_tbl_lock);
5151         hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
5152                 if (refcount_inc_not_zero(&hpe->refcnt))
5153                         list_add(&hpe->dead_peer_wait_list, &init_wait_list);
5154         mutex_unlock(&tc->hairpin_tbl_lock);
5155
5156         list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
5157                 wait_for_completion(&hpe->res_ready);
5158                 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
5159                         mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
5160
5161                 mlx5e_hairpin_put(priv, hpe);
5162         }
5163 }
5164
5165 static int mlx5e_tc_netdev_event(struct notifier_block *this,
5166                                  unsigned long event, void *ptr)
5167 {
5168         struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
5169         struct mlx5e_priv *peer_priv;
5170         struct mlx5e_tc_table *tc;
5171         struct mlx5e_priv *priv;
5172
5173         if (ndev->netdev_ops != &mlx5e_netdev_ops ||
5174             event != NETDEV_UNREGISTER ||
5175             ndev->reg_state == NETREG_REGISTERED)
5176                 return NOTIFY_DONE;
5177
5178         tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
5179         priv = tc->priv;
5180         peer_priv = netdev_priv(ndev);
5181         if (priv == peer_priv ||
5182             !(priv->netdev->features & NETIF_F_HW_TC))
5183                 return NOTIFY_DONE;
5184
5185         mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
5186
5187         return NOTIFY_DONE;
5188 }
5189
5190 static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
5191 {
5192         int tc_grp_size, tc_tbl_size;
5193         u32 max_flow_counter;
5194
5195         max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
5196                             MLX5_CAP_GEN(dev, max_flow_counter_15_0);
5197
5198         tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
5199
5200         tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
5201                             BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
5202
5203         return tc_tbl_size;
5204 }
5205
5206 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
5207 {
5208         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5209         struct mlx5_flow_table **ft = &tc->miss_t;
5210         struct mlx5_flow_table_attr ft_attr = {};
5211         struct mlx5_flow_namespace *ns;
5212         int err = 0;
5213
5214         ft_attr.max_fte = 1;
5215         ft_attr.autogroup.max_num_groups = 1;
5216         ft_attr.level = MLX5E_TC_MISS_LEVEL;
5217         ft_attr.prio = 0;
5218         ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
5219
5220         *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
5221         if (IS_ERR(*ft)) {
5222                 err = PTR_ERR(*ft);
5223                 netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err);
5224         }
5225
5226         return err;
5227 }
5228
5229 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
5230 {
5231         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5232
5233         mlx5_destroy_flow_table(tc->miss_t);
5234 }
5235
5236 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
5237 {
5238         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5239         struct mlx5_core_dev *dev = priv->mdev;
5240         struct mapping_ctx *chains_mapping;
5241         struct mlx5_chains_attr attr = {};
5242         u64 mapping_id;
5243         int err;
5244
5245         mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
5246         mutex_init(&tc->t_lock);
5247         mutex_init(&tc->hairpin_tbl_lock);
5248         hash_init(tc->hairpin_tbl);
5249         tc->priv = priv;
5250
5251         err = rhashtable_init(&tc->ht, &tc_ht_params);
5252         if (err)
5253                 return err;
5254
5255         lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
5256         lockdep_init_map(&tc->ht.run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0);
5257
5258         mapping_id = mlx5_query_nic_system_image_guid(dev);
5259
5260         chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
5261                                                sizeof(struct mlx5_mapped_obj),
5262                                                MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
5263
5264         if (IS_ERR(chains_mapping)) {
5265                 err = PTR_ERR(chains_mapping);
5266                 goto err_mapping;
5267         }
5268         tc->mapping = chains_mapping;
5269
5270         err = mlx5e_tc_nic_create_miss_table(priv);
5271         if (err)
5272                 goto err_chains;
5273
5274         if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
5275                 attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
5276                         MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
5277         attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
5278         attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
5279         attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
5280         attr.default_ft = tc->miss_t;
5281         attr.mapping = chains_mapping;
5282
5283         tc->chains = mlx5_chains_create(dev, &attr);
5284         if (IS_ERR(tc->chains)) {
5285                 err = PTR_ERR(tc->chains);
5286                 goto err_miss;
5287         }
5288
5289         tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
5290         tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr,
5291                                  MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
5292
5293         mlx5e_hairpin_params_init(&tc->hairpin_params, dev);
5294
5295         tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
5296         err = register_netdevice_notifier_dev_net(priv->netdev,
5297                                                   &tc->netdevice_nb,
5298                                                   &tc->netdevice_nn);
5299         if (err) {
5300                 tc->netdevice_nb.notifier_call = NULL;
5301                 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5302                 goto err_reg;
5303         }
5304
5305         mlx5e_tc_debugfs_init(tc, mlx5e_fs_get_debugfs_root(priv->fs));
5306
5307         tc->action_stats_handle = mlx5e_tc_act_stats_create();
5308         if (IS_ERR(tc->action_stats_handle)) {
5309                 err = PTR_ERR(tc->action_stats_handle);
5310                 goto err_act_stats;
5311         }
5312
5313         return 0;
5314
5315 err_act_stats:
5316         unregister_netdevice_notifier_dev_net(priv->netdev,
5317                                               &tc->netdevice_nb,
5318                                               &tc->netdevice_nn);
5319 err_reg:
5320         mlx5_tc_ct_clean(tc->ct);
5321         mlx5e_tc_post_act_destroy(tc->post_act);
5322         mlx5_chains_destroy(tc->chains);
5323 err_miss:
5324         mlx5e_tc_nic_destroy_miss_table(priv);
5325 err_chains:
5326         mapping_destroy(chains_mapping);
5327 err_mapping:
5328         rhashtable_destroy(&tc->ht);
5329         return err;
5330 }
5331
5332 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
5333 {
5334         struct mlx5e_tc_flow *flow = ptr;
5335         struct mlx5e_priv *priv = flow->priv;
5336
5337         mlx5e_tc_del_flow(priv, flow);
5338         kfree(flow);
5339 }
5340
5341 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
5342 {
5343         struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5344
5345         debugfs_remove_recursive(tc->dfs_root);
5346
5347         if (tc->netdevice_nb.notifier_call)
5348                 unregister_netdevice_notifier_dev_net(priv->netdev,
5349                                                       &tc->netdevice_nb,
5350                                                       &tc->netdevice_nn);
5351
5352         mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
5353         mutex_destroy(&tc->hairpin_tbl_lock);
5354
5355         rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
5356
5357         if (!IS_ERR_OR_NULL(tc->t)) {
5358                 mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
5359                 tc->t = NULL;
5360         }
5361         mutex_destroy(&tc->t_lock);
5362
5363         mlx5_tc_ct_clean(tc->ct);
5364         mlx5e_tc_post_act_destroy(tc->post_act);
5365         mapping_destroy(tc->mapping);
5366         mlx5_chains_destroy(tc->chains);
5367         mlx5e_tc_nic_destroy_miss_table(priv);
5368         mlx5e_tc_act_stats_free(tc->action_stats_handle);
5369 }
5370
5371 int mlx5e_tc_ht_init(struct rhashtable *tc_ht)
5372 {
5373         int err;
5374
5375         err = rhashtable_init(tc_ht, &tc_ht_params);
5376         if (err)
5377                 return err;
5378
5379         lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
5380         lockdep_init_map(&tc_ht->run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0);
5381
5382         return 0;
5383 }
5384
5385 void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht)
5386 {
5387         rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5388 }
5389
5390 int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
5391 {
5392         const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
5393         struct mlx5e_rep_priv *rpriv;
5394         struct mapping_ctx *mapping;
5395         struct mlx5_eswitch *esw;
5396         struct mlx5e_priv *priv;
5397         u64 mapping_id;
5398         int err = 0;
5399
5400         rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5401         priv = netdev_priv(rpriv->netdev);
5402         esw = priv->mdev->priv.eswitch;
5403
5404         uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
5405                                                        MLX5_FLOW_NAMESPACE_FDB);
5406         uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
5407                                                esw_chains(esw),
5408                                                &esw->offloads.mod_hdr,
5409                                                MLX5_FLOW_NAMESPACE_FDB,
5410                                                uplink_priv->post_act);
5411
5412         uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev));
5413
5414         uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
5415
5416         mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
5417
5418         mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
5419                                         sizeof(struct tunnel_match_key),
5420                                         TUNNEL_INFO_BITS_MASK, true);
5421
5422         if (IS_ERR(mapping)) {
5423                 err = PTR_ERR(mapping);
5424                 goto err_tun_mapping;
5425         }
5426         uplink_priv->tunnel_mapping = mapping;
5427
5428         /* Two last values are reserved for stack devices slow path table mark
5429          * and bridge ingress push mark.
5430          */
5431         mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
5432                                         sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true);
5433         if (IS_ERR(mapping)) {
5434                 err = PTR_ERR(mapping);
5435                 goto err_enc_opts_mapping;
5436         }
5437         uplink_priv->tunnel_enc_opts_mapping = mapping;
5438
5439         uplink_priv->encap = mlx5e_tc_tun_init(priv);
5440         if (IS_ERR(uplink_priv->encap)) {
5441                 err = PTR_ERR(uplink_priv->encap);
5442                 goto err_register_fib_notifier;
5443         }
5444
5445         uplink_priv->action_stats_handle = mlx5e_tc_act_stats_create();
5446         if (IS_ERR(uplink_priv->action_stats_handle)) {
5447                 err = PTR_ERR(uplink_priv->action_stats_handle);
5448                 goto err_action_counter;
5449         }
5450
5451         return 0;
5452
5453 err_action_counter:
5454         mlx5e_tc_tun_cleanup(uplink_priv->encap);
5455 err_register_fib_notifier:
5456         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5457 err_enc_opts_mapping:
5458         mapping_destroy(uplink_priv->tunnel_mapping);
5459 err_tun_mapping:
5460         mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5461         mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5462         mlx5_tc_ct_clean(uplink_priv->ct_priv);
5463         netdev_warn(priv->netdev,
5464                     "Failed to initialize tc (eswitch), err: %d", err);
5465         mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5466         return err;
5467 }
5468
5469 void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
5470 {
5471         struct mlx5e_rep_priv *rpriv;
5472         struct mlx5_eswitch *esw;
5473         struct mlx5e_priv *priv;
5474
5475         rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5476         priv = netdev_priv(rpriv->netdev);
5477         esw = priv->mdev->priv.eswitch;
5478
5479         mlx5e_tc_clean_fdb_peer_flows(esw);
5480
5481         mlx5e_tc_tun_cleanup(uplink_priv->encap);
5482
5483         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5484         mapping_destroy(uplink_priv->tunnel_mapping);
5485
5486         mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5487         mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5488         mlx5_tc_ct_clean(uplink_priv->ct_priv);
5489         mlx5e_flow_meters_cleanup(uplink_priv->flow_meters);
5490         mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5491         mlx5e_tc_act_stats_free(uplink_priv->action_stats_handle);
5492 }
5493
5494 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
5495 {
5496         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5497
5498         return atomic_read(&tc_ht->nelems);
5499 }
5500
5501 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5502 {
5503         struct mlx5e_tc_flow *flow, *tmp;
5504
5505         list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
5506                 __mlx5e_tc_del_fdb_peer_flow(flow);
5507 }
5508
5509 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5510 {
5511         struct mlx5_rep_uplink_priv *rpriv =
5512                 container_of(work, struct mlx5_rep_uplink_priv,
5513                              reoffload_flows_work);
5514         struct mlx5e_tc_flow *flow, *tmp;
5515
5516         mutex_lock(&rpriv->unready_flows_lock);
5517         list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5518                 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5519                         unready_flow_del(flow);
5520         }
5521         mutex_unlock(&rpriv->unready_flows_lock);
5522 }
5523
5524 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5525                                      struct flow_cls_offload *cls_flower,
5526                                      unsigned long flags)
5527 {
5528         switch (cls_flower->command) {
5529         case FLOW_CLS_REPLACE:
5530                 return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5531                                               flags);
5532         case FLOW_CLS_DESTROY:
5533                 return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5534                                            flags);
5535         case FLOW_CLS_STATS:
5536                 return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5537                                           flags);
5538         default:
5539                 return -EOPNOTSUPP;
5540         }
5541 }
5542
5543 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5544                             void *cb_priv)
5545 {
5546         unsigned long flags = MLX5_TC_FLAG(INGRESS);
5547         struct mlx5e_priv *priv = cb_priv;
5548
5549         if (!priv->netdev || !netif_device_present(priv->netdev))
5550                 return -EOPNOTSUPP;
5551
5552         if (mlx5e_is_uplink_rep(priv))
5553                 flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
5554         else
5555                 flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
5556
5557         switch (type) {
5558         case TC_SETUP_CLSFLOWER:
5559                 return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5560         default:
5561                 return -EOPNOTSUPP;
5562         }
5563 }
5564
5565 static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
5566                                     struct mlx5e_tc_update_priv *tc_priv,
5567                                     u32 tunnel_id)
5568 {
5569         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5570         struct tunnel_match_enc_opts enc_opts = {};
5571         struct mlx5_rep_uplink_priv *uplink_priv;
5572         struct mlx5e_rep_priv *uplink_rpriv;
5573         struct metadata_dst *tun_dst;
5574         struct tunnel_match_key key;
5575         u32 tun_id, enc_opts_id;
5576         struct net_device *dev;
5577         int err;
5578
5579         enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
5580         tun_id = tunnel_id >> ENC_OPTS_BITS;
5581
5582         if (!tun_id)
5583                 return true;
5584
5585         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
5586         uplink_priv = &uplink_rpriv->uplink_priv;
5587
5588         err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
5589         if (err) {
5590                 netdev_dbg(priv->netdev,
5591                            "Couldn't find tunnel for tun_id: %d, err: %d\n",
5592                            tun_id, err);
5593                 return false;
5594         }
5595
5596         if (enc_opts_id) {
5597                 err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
5598                                    enc_opts_id, &enc_opts);
5599                 if (err) {
5600                         netdev_dbg(priv->netdev,
5601                                    "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
5602                                    enc_opts_id, err);
5603                         return false;
5604                 }
5605         }
5606
5607         switch (key.enc_control.addr_type) {
5608         case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
5609                 tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst,
5610                                            key.enc_ip.tos, key.enc_ip.ttl,
5611                                            key.enc_tp.dst, TUNNEL_KEY,
5612                                            key32_to_tunnel_id(key.enc_key_id.keyid),
5613                                            enc_opts.key.len);
5614                 break;
5615         case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
5616                 tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst,
5617                                              key.enc_ip.tos, key.enc_ip.ttl,
5618                                              key.enc_tp.dst, 0, TUNNEL_KEY,
5619                                              key32_to_tunnel_id(key.enc_key_id.keyid),
5620                                              enc_opts.key.len);
5621                 break;
5622         default:
5623                 netdev_dbg(priv->netdev,
5624                            "Couldn't restore tunnel, unsupported addr_type: %d\n",
5625                            key.enc_control.addr_type);
5626                 return false;
5627         }
5628
5629         if (!tun_dst) {
5630                 netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n");
5631                 return false;
5632         }
5633
5634         tun_dst->u.tun_info.key.tp_src = key.enc_tp.src;
5635
5636         if (enc_opts.key.len)
5637                 ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
5638                                         enc_opts.key.data,
5639                                         enc_opts.key.len,
5640                                         enc_opts.key.dst_opt_type);
5641
5642         skb_dst_set(skb, (struct dst_entry *)tun_dst);
5643         dev = dev_get_by_index(&init_net, key.filter_ifindex);
5644         if (!dev) {
5645                 netdev_dbg(priv->netdev,
5646                            "Couldn't find tunnel device with ifindex: %d\n",
5647                            key.filter_ifindex);
5648                 return false;
5649         }
5650
5651         /* Set fwd_dev so we do dev_put() after datapath */
5652         tc_priv->fwd_dev = dev;
5653
5654         skb->dev = dev;
5655
5656         return true;
5657 }
5658
5659 static bool mlx5e_tc_restore_skb_tc_meta(struct sk_buff *skb, struct mlx5_tc_ct_priv *ct_priv,
5660                                          struct mlx5_mapped_obj *mapped_obj, u32 zone_restore_id,
5661                                          u32 tunnel_id,  struct mlx5e_tc_update_priv *tc_priv)
5662 {
5663         struct mlx5e_priv *priv = netdev_priv(skb->dev);
5664         struct tc_skb_ext *tc_skb_ext;
5665         u64 act_miss_cookie;
5666         u32 chain;
5667
5668         chain = mapped_obj->type == MLX5_MAPPED_OBJ_CHAIN ? mapped_obj->chain : 0;
5669         act_miss_cookie = mapped_obj->type == MLX5_MAPPED_OBJ_ACT_MISS ?
5670                           mapped_obj->act_miss_cookie : 0;
5671         if (chain || act_miss_cookie) {
5672                 if (!mlx5e_tc_ct_restore_flow(ct_priv, skb, zone_restore_id))
5673                         return false;
5674
5675                 tc_skb_ext = tc_skb_ext_alloc(skb);
5676                 if (!tc_skb_ext) {
5677                         WARN_ON(1);
5678                         return false;
5679                 }
5680
5681                 if (act_miss_cookie) {
5682                         tc_skb_ext->act_miss_cookie = act_miss_cookie;
5683                         tc_skb_ext->act_miss = 1;
5684                 } else {
5685                         tc_skb_ext->chain = chain;
5686                 }
5687         }
5688
5689         if (tc_priv)
5690                 return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id);
5691
5692         return true;
5693 }
5694
5695 static void mlx5e_tc_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb,
5696                                         struct mlx5_mapped_obj *mapped_obj,
5697                                         struct mlx5e_tc_update_priv *tc_priv)
5698 {
5699         if (!mlx5e_tc_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) {
5700                 netdev_dbg(priv->netdev,
5701                            "Failed to restore tunnel info for sampled packet\n");
5702                 return;
5703         }
5704         mlx5e_tc_sample_skb(skb, mapped_obj);
5705 }
5706
5707 static bool mlx5e_tc_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb,
5708                                           struct mlx5_mapped_obj *mapped_obj,
5709                                           struct mlx5e_tc_update_priv *tc_priv,
5710                                           u32 tunnel_id)
5711 {
5712         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5713         struct mlx5_rep_uplink_priv *uplink_priv;
5714         struct mlx5e_rep_priv *uplink_rpriv;
5715         bool forward_tx = false;
5716
5717         /* Tunnel restore takes precedence over int port restore */
5718         if (tunnel_id)
5719                 return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id);
5720
5721         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
5722         uplink_priv = &uplink_rpriv->uplink_priv;
5723
5724         if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb,
5725                                       mapped_obj->int_port_metadata, &forward_tx)) {
5726                 /* Set fwd_dev for future dev_put */
5727                 tc_priv->fwd_dev = skb->dev;
5728                 tc_priv->forward_tx = forward_tx;
5729
5730                 return true;
5731         }
5732
5733         return false;
5734 }
5735
5736 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb,
5737                          struct mapping_ctx *mapping_ctx, u32 mapped_obj_id,
5738                          struct mlx5_tc_ct_priv *ct_priv,
5739                          u32 zone_restore_id, u32 tunnel_id,
5740                          struct mlx5e_tc_update_priv *tc_priv)
5741 {
5742         struct mlx5e_priv *priv = netdev_priv(skb->dev);
5743         struct mlx5_mapped_obj mapped_obj;
5744         int err;
5745
5746         err = mapping_find(mapping_ctx, mapped_obj_id, &mapped_obj);
5747         if (err) {
5748                 netdev_dbg(skb->dev,
5749                            "Couldn't find mapped object for mapped_obj_id: %d, err: %d\n",
5750                            mapped_obj_id, err);
5751                 return false;
5752         }
5753
5754         switch (mapped_obj.type) {
5755         case MLX5_MAPPED_OBJ_CHAIN:
5756         case MLX5_MAPPED_OBJ_ACT_MISS:
5757                 return mlx5e_tc_restore_skb_tc_meta(skb, ct_priv, &mapped_obj, zone_restore_id,
5758                                                     tunnel_id, tc_priv);
5759         case MLX5_MAPPED_OBJ_SAMPLE:
5760                 mlx5e_tc_restore_skb_sample(priv, skb, &mapped_obj, tc_priv);
5761                 tc_priv->skb_done = true;
5762                 return true;
5763         case MLX5_MAPPED_OBJ_INT_PORT_METADATA:
5764                 return mlx5e_tc_restore_skb_int_port(priv, skb, &mapped_obj, tc_priv, tunnel_id);
5765         default:
5766                 netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
5767                 return false;
5768         }
5769
5770         return false;
5771 }
5772
5773 bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
5774 {
5775         struct mlx5e_priv *priv = netdev_priv(skb->dev);
5776         u32 mapped_obj_id, reg_b, zone_restore_id;
5777         struct mlx5_tc_ct_priv *ct_priv;
5778         struct mapping_ctx *mapping_ctx;
5779         struct mlx5e_tc_table *tc;
5780
5781         reg_b = be32_to_cpu(cqe->ft_metadata);
5782         tc = mlx5e_fs_get_tc(priv->fs);
5783         mapped_obj_id = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5784         zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
5785                           ESW_ZONE_ID_MASK;
5786         ct_priv = tc->ct;
5787         mapping_ctx = tc->mapping;
5788
5789         return mlx5e_tc_update_skb(cqe, skb, mapping_ctx, mapped_obj_id, ct_priv, zone_restore_id,
5790                                    0, NULL);
5791 }
5792
5793 int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
5794                                      u64 act_miss_cookie, u32 *act_miss_mapping)
5795 {
5796         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5797         struct mlx5_mapped_obj mapped_obj = {};
5798         struct mapping_ctx *ctx;
5799         int err;
5800
5801         ctx = esw->offloads.reg_c0_obj_pool;
5802
5803         mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS;
5804         mapped_obj.act_miss_cookie = act_miss_cookie;
5805         err = mapping_add(ctx, &mapped_obj, act_miss_mapping);
5806         if (err)
5807                 return err;
5808
5809         attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping);
5810         if (IS_ERR(attr->act_id_restore_rule))
5811                 goto err_rule;
5812
5813         return 0;
5814
5815 err_rule:
5816         mapping_remove(ctx, *act_miss_mapping);
5817         return err;
5818 }
5819
5820 void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
5821                                       u32 act_miss_mapping)
5822 {
5823         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5824         struct mapping_ctx *ctx;
5825
5826         ctx = esw->offloads.reg_c0_obj_pool;
5827         mlx5_del_flow_rules(attr->act_id_restore_rule);
5828         mapping_remove(ctx, act_miss_mapping);
5829 }