ipv6: Remove in6addr_any alternatives.
[platform/kernel/linux-rpi.git] / drivers / net / ethernet / mellanox / mlx5 / core / en / tc_tun_encap.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include <net/ip_tunnels.h>
7 #include "tc_tun_encap.h"
8 #include "en_tc.h"
9 #include "tc_tun.h"
10 #include "rep/tc.h"
11 #include "diag/en_tc_tracepoint.h"
12
13 enum {
14         MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
15 };
16
17 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
18                                      struct mlx5_flow_attr *attr,
19                                      struct mlx5e_encap_entry *e,
20                                      int out_index)
21 {
22         struct net_device *route_dev;
23         int err = 0;
24
25         route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
26
27         if (!route_dev || !netif_is_ovs_master(route_dev))
28                 goto out;
29
30         err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
31                                                 MLX5E_TC_INT_PORT_EGRESS,
32                                                 &attr->action, out_index);
33
34 out:
35         if (route_dev)
36                 dev_put(route_dev);
37
38         return err;
39 }
40
41 struct mlx5e_route_key {
42         int ip_version;
43         union {
44                 __be32 v4;
45                 struct in6_addr v6;
46         } endpoint_ip;
47 };
48
49 struct mlx5e_route_entry {
50         struct mlx5e_route_key key;
51         struct list_head encap_entries;
52         struct list_head decap_flows;
53         u32 flags;
54         struct hlist_node hlist;
55         refcount_t refcnt;
56         int tunnel_dev_index;
57         struct rcu_head rcu;
58 };
59
60 struct mlx5e_tc_tun_encap {
61         struct mlx5e_priv *priv;
62         struct notifier_block fib_nb;
63         spinlock_t route_lock; /* protects route_tbl */
64         unsigned long route_tbl_last_update;
65         DECLARE_HASHTABLE(route_tbl, 8);
66 };
67
68 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
69 {
70         return r->flags & MLX5E_ROUTE_ENTRY_VALID;
71 }
72
73 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
74                              struct mlx5_flow_spec *spec)
75 {
76         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
77         struct mlx5_rx_tun_attr *tun_attr;
78         void *daddr, *saddr;
79         u8 ip_version;
80
81         tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
82         if (!tun_attr)
83                 return -ENOMEM;
84
85         esw_attr->rx_tun_attr = tun_attr;
86         ip_version = mlx5e_tc_get_ip_version(spec, true);
87
88         if (ip_version == 4) {
89                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
90                                      outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
91                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
92                                      outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
93                 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
94                 tun_attr->src_ip.v4 = *(__be32 *)saddr;
95                 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
96                         return 0;
97         }
98 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
99         else if (ip_version == 6) {
100                 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
101
102                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
103                                      outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
104                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
105                                      outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
106                 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
107                 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
108                 if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
109                     ipv6_addr_any(&tun_attr->src_ip.v6))
110                         return 0;
111         }
112 #endif
113         /* Only set the flag if both src and dst ip addresses exist. They are
114          * required to establish routing.
115          */
116         flow_flag_set(flow, TUN_RX);
117         flow->attr->tun_ip_version = ip_version;
118         return 0;
119 }
120
121 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
122 {
123         bool all_flow_encaps_valid = true;
124         int i;
125
126         /* Flow can be associated with multiple encap entries.
127          * Before offloading the flow verify that all of them have
128          * a valid neighbour.
129          */
130         for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
131                 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
132                         continue;
133                 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
134                         all_flow_encaps_valid = false;
135                         break;
136                 }
137         }
138
139         return all_flow_encaps_valid;
140 }
141
142 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
143                               struct mlx5e_encap_entry *e,
144                               struct list_head *flow_list)
145 {
146         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
147         struct mlx5_pkt_reformat_params reformat_params;
148         struct mlx5_esw_flow_attr *esw_attr;
149         struct mlx5_flow_handle *rule;
150         struct mlx5_flow_attr *attr;
151         struct mlx5_flow_spec *spec;
152         struct mlx5e_tc_flow *flow;
153         int err;
154
155         if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
156                 return;
157
158         memset(&reformat_params, 0, sizeof(reformat_params));
159         reformat_params.type = e->reformat_type;
160         reformat_params.size = e->encap_size;
161         reformat_params.data = e->encap_header;
162         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
163                                                      &reformat_params,
164                                                      MLX5_FLOW_NAMESPACE_FDB);
165         if (IS_ERR(e->pkt_reformat)) {
166                 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
167                                PTR_ERR(e->pkt_reformat));
168                 return;
169         }
170         e->flags |= MLX5_ENCAP_ENTRY_VALID;
171         mlx5e_rep_queue_neigh_stats_work(priv);
172
173         list_for_each_entry(flow, flow_list, tmp_list) {
174                 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
175                         continue;
176
177                 spec = &flow->attr->parse_attr->spec;
178
179                 attr = mlx5e_tc_get_encap_attr(flow);
180                 esw_attr = attr->esw_attr;
181                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
182                 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
183
184                 /* Do not offload flows with unresolved neighbors */
185                 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
186                         continue;
187
188                 err = mlx5e_tc_offload_flow_post_acts(flow);
189                 if (err) {
190                         mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
191                                        err);
192                         continue;
193                 }
194
195                 /* update from slow path rule to encap rule */
196                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
197                 if (IS_ERR(rule)) {
198                         mlx5e_tc_unoffload_flow_post_acts(flow);
199                         err = PTR_ERR(rule);
200                         mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
201                                        err);
202                         continue;
203                 }
204
205                 mlx5e_tc_unoffload_from_slow_path(esw, flow);
206                 flow->rule[0] = rule;
207                 /* was unset when slow path rule removed */
208                 flow_flag_set(flow, OFFLOADED);
209         }
210 }
211
212 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
213                               struct mlx5e_encap_entry *e,
214                               struct list_head *flow_list)
215 {
216         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
217         struct mlx5_esw_flow_attr *esw_attr;
218         struct mlx5_flow_handle *rule;
219         struct mlx5_flow_attr *attr;
220         struct mlx5_flow_spec *spec;
221         struct mlx5e_tc_flow *flow;
222         int err;
223
224         list_for_each_entry(flow, flow_list, tmp_list) {
225                 if (!mlx5e_is_offloaded_flow(flow))
226                         continue;
227
228                 attr = mlx5e_tc_get_encap_attr(flow);
229                 esw_attr = attr->esw_attr;
230                 /* mark the flow's encap dest as non-valid */
231                 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
232                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
233
234                 /* Clear pkt_reformat before checking slow path flag. Because
235                  * in next iteration, the same flow is already set slow path
236                  * flag, but still need to clear the pkt_reformat.
237                  */
238                 if (flow_flag_test(flow, SLOW))
239                         continue;
240
241                 /* update from encap rule to slow path rule */
242                 spec = &flow->attr->parse_attr->spec;
243                 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
244
245                 if (IS_ERR(rule)) {
246                         err = PTR_ERR(rule);
247                         mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
248                                        err);
249                         continue;
250                 }
251
252                 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
253                 mlx5e_tc_unoffload_flow_post_acts(flow);
254                 flow->rule[0] = rule;
255                 /* was unset when fast path rule removed */
256                 flow_flag_set(flow, OFFLOADED);
257         }
258
259         /* we know that the encap is valid */
260         e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
261         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
262         e->pkt_reformat = NULL;
263 }
264
265 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
266                                 struct list_head *flow_list,
267                                 int index)
268 {
269         if (IS_ERR(mlx5e_flow_get(flow))) {
270                 /* Flow is being deleted concurrently. Wait for it to be
271                  * unoffloaded from hardware, otherwise deleting encap will
272                  * fail.
273                  */
274                 wait_for_completion(&flow->del_hw_done);
275                 return;
276         }
277         wait_for_completion(&flow->init_done);
278
279         flow->tmp_entry_index = index;
280         list_add(&flow->tmp_list, flow_list);
281 }
282
283 /* Takes reference to all flows attached to encap and adds the flows to
284  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
285  */
286 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
287 {
288         struct encap_flow_item *efi;
289         struct mlx5e_tc_flow *flow;
290
291         list_for_each_entry(efi, &e->flows, list) {
292                 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
293                 mlx5e_take_tmp_flow(flow, flow_list, efi->index);
294         }
295 }
296
297 /* Takes reference to all flows attached to route and adds the flows to
298  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
299  */
300 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
301                                              struct list_head *flow_list)
302 {
303         struct mlx5e_tc_flow *flow;
304
305         list_for_each_entry(flow, &r->decap_flows, decap_routes)
306                 mlx5e_take_tmp_flow(flow, flow_list, 0);
307 }
308
309 typedef bool (match_cb)(struct mlx5e_encap_entry *);
310
311 static struct mlx5e_encap_entry *
312 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
313                               struct mlx5e_encap_entry *e,
314                               match_cb match)
315 {
316         struct mlx5e_encap_entry *next = NULL;
317
318 retry:
319         rcu_read_lock();
320
321         /* find encap with non-zero reference counter value */
322         for (next = e ?
323                      list_next_or_null_rcu(&nhe->encap_list,
324                                            &e->encap_list,
325                                            struct mlx5e_encap_entry,
326                                            encap_list) :
327                      list_first_or_null_rcu(&nhe->encap_list,
328                                             struct mlx5e_encap_entry,
329                                             encap_list);
330              next;
331              next = list_next_or_null_rcu(&nhe->encap_list,
332                                           &next->encap_list,
333                                           struct mlx5e_encap_entry,
334                                           encap_list))
335                 if (mlx5e_encap_take(next))
336                         break;
337
338         rcu_read_unlock();
339
340         /* release starting encap */
341         if (e)
342                 mlx5e_encap_put(netdev_priv(e->out_dev), e);
343         if (!next)
344                 return next;
345
346         /* wait for encap to be fully initialized */
347         wait_for_completion(&next->res_ready);
348         /* continue searching if encap entry is not in valid state after completion */
349         if (!match(next)) {
350                 e = next;
351                 goto retry;
352         }
353
354         return next;
355 }
356
357 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
358 {
359         return e->flags & MLX5_ENCAP_ENTRY_VALID;
360 }
361
362 static struct mlx5e_encap_entry *
363 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
364                            struct mlx5e_encap_entry *e)
365 {
366         return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
367 }
368
369 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
370 {
371         return e->compl_result >= 0;
372 }
373
374 struct mlx5e_encap_entry *
375 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
376                           struct mlx5e_encap_entry *e)
377 {
378         return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
379 }
380
381 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
382 {
383         struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
384         struct mlx5e_encap_entry *e = NULL;
385         struct mlx5e_tc_flow *flow;
386         struct mlx5_fc *counter;
387         struct neigh_table *tbl;
388         bool neigh_used = false;
389         struct neighbour *n;
390         u64 lastuse;
391
392         if (m_neigh->family == AF_INET)
393                 tbl = &arp_tbl;
394 #if IS_ENABLED(CONFIG_IPV6)
395         else if (m_neigh->family == AF_INET6)
396                 tbl = ipv6_stub->nd_tbl;
397 #endif
398         else
399                 return;
400
401         /* mlx5e_get_next_valid_encap() releases previous encap before returning
402          * next one.
403          */
404         while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
405                 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
406                 struct encap_flow_item *efi, *tmp;
407                 struct mlx5_eswitch *esw;
408                 LIST_HEAD(flow_list);
409
410                 esw = priv->mdev->priv.eswitch;
411                 mutex_lock(&esw->offloads.encap_tbl_lock);
412                 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
413                         flow = container_of(efi, struct mlx5e_tc_flow,
414                                             encaps[efi->index]);
415                         if (IS_ERR(mlx5e_flow_get(flow)))
416                                 continue;
417                         list_add(&flow->tmp_list, &flow_list);
418
419                         if (mlx5e_is_offloaded_flow(flow)) {
420                                 counter = mlx5e_tc_get_counter(flow);
421                                 lastuse = mlx5_fc_query_lastuse(counter);
422                                 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
423                                         neigh_used = true;
424                                         break;
425                                 }
426                         }
427                 }
428                 mutex_unlock(&esw->offloads.encap_tbl_lock);
429
430                 mlx5e_put_flow_list(priv, &flow_list);
431                 if (neigh_used) {
432                         /* release current encap before breaking the loop */
433                         mlx5e_encap_put(priv, e);
434                         break;
435                 }
436         }
437
438         trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
439
440         if (neigh_used) {
441                 nhe->reported_lastuse = jiffies;
442
443                 /* find the relevant neigh according to the cached device and
444                  * dst ip pair
445                  */
446                 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
447                 if (!n)
448                         return;
449
450                 neigh_event_send(n, NULL);
451                 neigh_release(n);
452         }
453 }
454
455 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
456 {
457         WARN_ON(!list_empty(&e->flows));
458
459         if (e->compl_result > 0) {
460                 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
461
462                 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
463                         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
464         }
465
466         kfree(e->tun_info);
467         kfree(e->encap_header);
468         kfree_rcu(e, rcu);
469 }
470
471 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
472                                 struct mlx5e_decap_entry *d)
473 {
474         WARN_ON(!list_empty(&d->flows));
475
476         if (!d->compl_result)
477                 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
478
479         kfree_rcu(d, rcu);
480 }
481
482 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
483 {
484         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
485
486         if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
487                 return;
488         list_del(&e->route_list);
489         hash_del_rcu(&e->encap_hlist);
490         mutex_unlock(&esw->offloads.encap_tbl_lock);
491
492         mlx5e_encap_dealloc(priv, e);
493 }
494
495 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
496 {
497         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
498
499         if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
500                 return;
501         hash_del_rcu(&d->hlist);
502         mutex_unlock(&esw->offloads.decap_tbl_lock);
503
504         mlx5e_decap_dealloc(priv, d);
505 }
506
507 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
508                                      struct mlx5e_tc_flow *flow,
509                                      int out_index);
510
511 void mlx5e_detach_encap(struct mlx5e_priv *priv,
512                         struct mlx5e_tc_flow *flow,
513                         struct mlx5_flow_attr *attr,
514                         int out_index)
515 {
516         struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
517         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
518
519         if (!mlx5e_is_eswitch_flow(flow))
520                 return;
521
522         if (attr->esw_attr->dests[out_index].flags &
523             MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
524                 mlx5e_detach_encap_route(priv, flow, out_index);
525
526         /* flow wasn't fully initialized */
527         if (!e)
528                 return;
529
530         mutex_lock(&esw->offloads.encap_tbl_lock);
531         list_del(&flow->encaps[out_index].list);
532         flow->encaps[out_index].e = NULL;
533         if (!refcount_dec_and_test(&e->refcnt)) {
534                 mutex_unlock(&esw->offloads.encap_tbl_lock);
535                 return;
536         }
537         list_del(&e->route_list);
538         hash_del_rcu(&e->encap_hlist);
539         mutex_unlock(&esw->offloads.encap_tbl_lock);
540
541         mlx5e_encap_dealloc(priv, e);
542 }
543
544 void mlx5e_detach_decap(struct mlx5e_priv *priv,
545                         struct mlx5e_tc_flow *flow)
546 {
547         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
548         struct mlx5e_decap_entry *d = flow->decap_reformat;
549
550         if (!d)
551                 return;
552
553         mutex_lock(&esw->offloads.decap_tbl_lock);
554         list_del(&flow->l3_to_l2_reformat);
555         flow->decap_reformat = NULL;
556
557         if (!refcount_dec_and_test(&d->refcnt)) {
558                 mutex_unlock(&esw->offloads.decap_tbl_lock);
559                 return;
560         }
561         hash_del_rcu(&d->hlist);
562         mutex_unlock(&esw->offloads.decap_tbl_lock);
563
564         mlx5e_decap_dealloc(priv, d);
565 }
566
567 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
568                                            struct mlx5e_encap_key *b)
569 {
570         return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
571                 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
572 }
573
574 bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
575                                            struct mlx5e_encap_key *b,
576                                            __be16 tun_flags)
577 {
578         struct ip_tunnel_info *a_info;
579         struct ip_tunnel_info *b_info;
580         bool a_has_opts, b_has_opts;
581
582         if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
583                 return false;
584
585         a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags);
586         b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags);
587
588         /* keys are equal when both don't have any options attached */
589         if (!a_has_opts && !b_has_opts)
590                 return true;
591
592         if (a_has_opts != b_has_opts)
593                 return false;
594
595         /* options stored in memory next to ip_tunnel_info struct */
596         a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
597         b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
598
599         return a_info->options_len == b_info->options_len &&
600                !memcmp(ip_tunnel_info_opts(a_info),
601                        ip_tunnel_info_opts(b_info),
602                        a_info->options_len);
603 }
604
605 static int cmp_decap_info(struct mlx5e_decap_key *a,
606                           struct mlx5e_decap_key *b)
607 {
608         return memcmp(&a->key, &b->key, sizeof(b->key));
609 }
610
611 static int hash_encap_info(struct mlx5e_encap_key *key)
612 {
613         return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
614                      key->tc_tunnel->tunnel_type);
615 }
616
617 static int hash_decap_info(struct mlx5e_decap_key *key)
618 {
619         return jhash(&key->key, sizeof(key->key), 0);
620 }
621
622 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
623 {
624         return refcount_inc_not_zero(&e->refcnt);
625 }
626
627 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
628 {
629         return refcount_inc_not_zero(&e->refcnt);
630 }
631
632 static struct mlx5e_encap_entry *
633 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
634                 uintptr_t hash_key)
635 {
636         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
637         struct mlx5e_encap_key e_key;
638         struct mlx5e_encap_entry *e;
639
640         hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
641                                    encap_hlist, hash_key) {
642                 e_key.ip_tun_key = &e->tun_info->key;
643                 e_key.tc_tunnel = e->tunnel;
644                 if (e->tunnel->encap_info_equal(&e_key, key) &&
645                     mlx5e_encap_take(e))
646                         return e;
647         }
648
649         return NULL;
650 }
651
652 static struct mlx5e_decap_entry *
653 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
654                 uintptr_t hash_key)
655 {
656         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
657         struct mlx5e_decap_key r_key;
658         struct mlx5e_decap_entry *e;
659
660         hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
661                                    hlist, hash_key) {
662                 r_key = e->key;
663                 if (!cmp_decap_info(&r_key, key) &&
664                     mlx5e_decap_take(e))
665                         return e;
666         }
667         return NULL;
668 }
669
670 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
671 {
672         size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
673
674         return kmemdup(tun_info, tun_size, GFP_KERNEL);
675 }
676
677 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
678                                       struct mlx5e_tc_flow *flow,
679                                       int out_index,
680                                       struct mlx5e_encap_entry *e,
681                                       struct netlink_ext_ack *extack)
682 {
683         int i;
684
685         for (i = 0; i < out_index; i++) {
686                 if (flow->encaps[i].e != e)
687                         continue;
688                 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
689                 netdev_err(priv->netdev, "can't duplicate encap action\n");
690                 return true;
691         }
692
693         return false;
694 }
695
696 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
697                                struct mlx5_flow_attr *attr,
698                                struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
699                                struct net_device *out_dev,
700                                int route_dev_ifindex,
701                                int out_index)
702 {
703         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
704         struct net_device *route_dev;
705         u16 vport_num;
706         int err = 0;
707         u32 data;
708
709         route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
710
711         if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
712             !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
713                 goto out;
714
715         err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
716         if (err)
717                 goto out;
718
719         attr->dest_chain = 0;
720         attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
721         esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
722         data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
723                                                        vport_num);
724         err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
725                                                    MLX5_FLOW_NAMESPACE_FDB,
726                                                    VPORT_TO_REG, data);
727         if (err >= 0) {
728                 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
729                 err = 0;
730         }
731
732 out:
733         if (route_dev)
734                 dev_put(route_dev);
735         return err;
736 }
737
738 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
739                                   struct mlx5_esw_flow_attr *attr,
740                                   struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
741                                   struct net_device *out_dev,
742                                   int route_dev_ifindex,
743                                   int out_index)
744 {
745         int act_id = attr->dests[out_index].src_port_rewrite_act_id;
746         struct net_device *route_dev;
747         u16 vport_num;
748         int err = 0;
749         u32 data;
750
751         route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
752
753         if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
754             !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
755                 err = -ENODEV;
756                 goto out;
757         }
758
759         err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
760         if (err)
761                 goto out;
762
763         data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
764                                                        vport_num);
765         mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
766
767 out:
768         if (route_dev)
769                 dev_put(route_dev);
770         return err;
771 }
772
773 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
774 {
775         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
776         struct mlx5_rep_uplink_priv *uplink_priv;
777         struct mlx5e_rep_priv *uplink_rpriv;
778         struct mlx5e_tc_tun_encap *encap;
779         unsigned int ret;
780
781         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
782         uplink_priv = &uplink_rpriv->uplink_priv;
783         encap = uplink_priv->encap;
784
785         spin_lock_bh(&encap->route_lock);
786         ret = encap->route_tbl_last_update;
787         spin_unlock_bh(&encap->route_lock);
788         return ret;
789 }
790
791 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
792                                     struct mlx5e_tc_flow *flow,
793                                     struct mlx5_flow_attr *attr,
794                                     struct mlx5e_encap_entry *e,
795                                     bool new_encap_entry,
796                                     unsigned long tbl_time_before,
797                                     int out_index);
798
799 int mlx5e_attach_encap(struct mlx5e_priv *priv,
800                        struct mlx5e_tc_flow *flow,
801                        struct mlx5_flow_attr *attr,
802                        struct net_device *mirred_dev,
803                        int out_index,
804                        struct netlink_ext_ack *extack,
805                        struct net_device **encap_dev)
806 {
807         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
808         struct mlx5e_tc_flow_parse_attr *parse_attr;
809         const struct ip_tunnel_info *tun_info;
810         const struct mlx5e_mpls_info *mpls_info;
811         unsigned long tbl_time_before = 0;
812         struct mlx5e_encap_entry *e;
813         struct mlx5e_encap_key key;
814         bool entry_created = false;
815         unsigned short family;
816         uintptr_t hash_key;
817         int err = 0;
818
819         parse_attr = attr->parse_attr;
820         tun_info = parse_attr->tun_info[out_index];
821         mpls_info = &parse_attr->mpls_info[out_index];
822         family = ip_tunnel_info_af(tun_info);
823         key.ip_tun_key = &tun_info->key;
824         key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
825         if (!key.tc_tunnel) {
826                 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
827                 return -EOPNOTSUPP;
828         }
829
830         hash_key = hash_encap_info(&key);
831
832         mutex_lock(&esw->offloads.encap_tbl_lock);
833         e = mlx5e_encap_get(priv, &key, hash_key);
834
835         /* must verify if encap is valid or not */
836         if (e) {
837                 /* Check that entry was not already attached to this flow */
838                 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
839                         err = -EOPNOTSUPP;
840                         goto out_err;
841                 }
842
843                 mutex_unlock(&esw->offloads.encap_tbl_lock);
844                 wait_for_completion(&e->res_ready);
845
846                 /* Protect against concurrent neigh update. */
847                 mutex_lock(&esw->offloads.encap_tbl_lock);
848                 if (e->compl_result < 0) {
849                         err = -EREMOTEIO;
850                         goto out_err;
851                 }
852                 goto attach_flow;
853         }
854
855         e = kzalloc(sizeof(*e), GFP_KERNEL);
856         if (!e) {
857                 err = -ENOMEM;
858                 goto out_err;
859         }
860
861         refcount_set(&e->refcnt, 1);
862         init_completion(&e->res_ready);
863         entry_created = true;
864         INIT_LIST_HEAD(&e->route_list);
865
866         tun_info = mlx5e_dup_tun_info(tun_info);
867         if (!tun_info) {
868                 err = -ENOMEM;
869                 goto out_err_init;
870         }
871         e->tun_info = tun_info;
872         memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
873         err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
874         if (err)
875                 goto out_err_init;
876
877         INIT_LIST_HEAD(&e->flows);
878         hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
879         tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
880         mutex_unlock(&esw->offloads.encap_tbl_lock);
881
882         if (family == AF_INET)
883                 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
884         else if (family == AF_INET6)
885                 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
886
887         /* Protect against concurrent neigh update. */
888         mutex_lock(&esw->offloads.encap_tbl_lock);
889         complete_all(&e->res_ready);
890         if (err) {
891                 e->compl_result = err;
892                 goto out_err;
893         }
894         e->compl_result = 1;
895
896 attach_flow:
897         err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
898                                        tbl_time_before, out_index);
899         if (err)
900                 goto out_err;
901
902         err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
903         if (err == -EOPNOTSUPP) {
904                 /* If device doesn't support int port offload,
905                  * redirect to uplink vport.
906                  */
907                 mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
908                 err = 0;
909         } else if (err) {
910                 goto out_err;
911         }
912
913         flow->encaps[out_index].e = e;
914         list_add(&flow->encaps[out_index].list, &e->flows);
915         flow->encaps[out_index].index = out_index;
916         *encap_dev = e->out_dev;
917         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
918                 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
919                 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
920         } else {
921                 flow_flag_set(flow, SLOW);
922         }
923         mutex_unlock(&esw->offloads.encap_tbl_lock);
924
925         return err;
926
927 out_err:
928         mutex_unlock(&esw->offloads.encap_tbl_lock);
929         if (e)
930                 mlx5e_encap_put(priv, e);
931         return err;
932
933 out_err_init:
934         mutex_unlock(&esw->offloads.encap_tbl_lock);
935         kfree(tun_info);
936         kfree(e);
937         return err;
938 }
939
940 int mlx5e_attach_decap(struct mlx5e_priv *priv,
941                        struct mlx5e_tc_flow *flow,
942                        struct netlink_ext_ack *extack)
943 {
944         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
945         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
946         struct mlx5_pkt_reformat_params reformat_params;
947         struct mlx5e_decap_entry *d;
948         struct mlx5e_decap_key key;
949         uintptr_t hash_key;
950         int err = 0;
951
952         if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
953                 NL_SET_ERR_MSG_MOD(extack,
954                                    "encap header larger than max supported");
955                 return -EOPNOTSUPP;
956         }
957
958         key.key = attr->eth;
959         hash_key = hash_decap_info(&key);
960         mutex_lock(&esw->offloads.decap_tbl_lock);
961         d = mlx5e_decap_get(priv, &key, hash_key);
962         if (d) {
963                 mutex_unlock(&esw->offloads.decap_tbl_lock);
964                 wait_for_completion(&d->res_ready);
965                 mutex_lock(&esw->offloads.decap_tbl_lock);
966                 if (d->compl_result) {
967                         err = -EREMOTEIO;
968                         goto out_free;
969                 }
970                 goto found;
971         }
972
973         d = kzalloc(sizeof(*d), GFP_KERNEL);
974         if (!d) {
975                 err = -ENOMEM;
976                 goto out_err;
977         }
978
979         d->key = key;
980         refcount_set(&d->refcnt, 1);
981         init_completion(&d->res_ready);
982         INIT_LIST_HEAD(&d->flows);
983         hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
984         mutex_unlock(&esw->offloads.decap_tbl_lock);
985
986         memset(&reformat_params, 0, sizeof(reformat_params));
987         reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
988         reformat_params.size = sizeof(attr->eth);
989         reformat_params.data = &attr->eth;
990         d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
991                                                      &reformat_params,
992                                                      MLX5_FLOW_NAMESPACE_FDB);
993         if (IS_ERR(d->pkt_reformat)) {
994                 err = PTR_ERR(d->pkt_reformat);
995                 d->compl_result = err;
996         }
997         mutex_lock(&esw->offloads.decap_tbl_lock);
998         complete_all(&d->res_ready);
999         if (err)
1000                 goto out_free;
1001
1002 found:
1003         flow->decap_reformat = d;
1004         attr->decap_pkt_reformat = d->pkt_reformat;
1005         list_add(&flow->l3_to_l2_reformat, &d->flows);
1006         mutex_unlock(&esw->offloads.decap_tbl_lock);
1007         return 0;
1008
1009 out_free:
1010         mutex_unlock(&esw->offloads.decap_tbl_lock);
1011         mlx5e_decap_put(priv, d);
1012         return err;
1013
1014 out_err:
1015         mutex_unlock(&esw->offloads.decap_tbl_lock);
1016         return err;
1017 }
1018
1019 static int cmp_route_info(struct mlx5e_route_key *a,
1020                           struct mlx5e_route_key *b)
1021 {
1022         if (a->ip_version == 4 && b->ip_version == 4)
1023                 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
1024                               sizeof(a->endpoint_ip.v4));
1025         else if (a->ip_version == 6 && b->ip_version == 6)
1026                 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
1027                               sizeof(a->endpoint_ip.v6));
1028         return 1;
1029 }
1030
1031 static u32 hash_route_info(struct mlx5e_route_key *key)
1032 {
1033         if (key->ip_version == 4)
1034                 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1035         return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1036 }
1037
1038 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1039                                 struct mlx5e_route_entry *r)
1040 {
1041         WARN_ON(!list_empty(&r->decap_flows));
1042         WARN_ON(!list_empty(&r->encap_entries));
1043
1044         kfree_rcu(r, rcu);
1045 }
1046
1047 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1048 {
1049         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1050
1051         if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1052                 return;
1053
1054         hash_del_rcu(&r->hlist);
1055         mutex_unlock(&esw->offloads.encap_tbl_lock);
1056
1057         mlx5e_route_dealloc(priv, r);
1058 }
1059
1060 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1061 {
1062         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1063
1064         lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1065
1066         if (!refcount_dec_and_test(&r->refcnt))
1067                 return;
1068         hash_del_rcu(&r->hlist);
1069         mlx5e_route_dealloc(priv, r);
1070 }
1071
1072 static struct mlx5e_route_entry *
1073 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1074                 u32 hash_key)
1075 {
1076         struct mlx5e_route_key r_key;
1077         struct mlx5e_route_entry *r;
1078
1079         hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1080                 r_key = r->key;
1081                 if (!cmp_route_info(&r_key, key) &&
1082                     refcount_inc_not_zero(&r->refcnt))
1083                         return r;
1084         }
1085         return NULL;
1086 }
1087
1088 static struct mlx5e_route_entry *
1089 mlx5e_route_get_create(struct mlx5e_priv *priv,
1090                        struct mlx5e_route_key *key,
1091                        int tunnel_dev_index,
1092                        unsigned long *route_tbl_change_time)
1093 {
1094         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1095         struct mlx5_rep_uplink_priv *uplink_priv;
1096         struct mlx5e_rep_priv *uplink_rpriv;
1097         struct mlx5e_tc_tun_encap *encap;
1098         struct mlx5e_route_entry *r;
1099         u32 hash_key;
1100
1101         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1102         uplink_priv = &uplink_rpriv->uplink_priv;
1103         encap = uplink_priv->encap;
1104
1105         hash_key = hash_route_info(key);
1106         spin_lock_bh(&encap->route_lock);
1107         r = mlx5e_route_get(encap, key, hash_key);
1108         spin_unlock_bh(&encap->route_lock);
1109         if (r) {
1110                 if (!mlx5e_route_entry_valid(r)) {
1111                         mlx5e_route_put_locked(priv, r);
1112                         return ERR_PTR(-EINVAL);
1113                 }
1114                 return r;
1115         }
1116
1117         r = kzalloc(sizeof(*r), GFP_KERNEL);
1118         if (!r)
1119                 return ERR_PTR(-ENOMEM);
1120
1121         r->key = *key;
1122         r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1123         r->tunnel_dev_index = tunnel_dev_index;
1124         refcount_set(&r->refcnt, 1);
1125         INIT_LIST_HEAD(&r->decap_flows);
1126         INIT_LIST_HEAD(&r->encap_entries);
1127
1128         spin_lock_bh(&encap->route_lock);
1129         *route_tbl_change_time = encap->route_tbl_last_update;
1130         hash_add(encap->route_tbl, &r->hlist, hash_key);
1131         spin_unlock_bh(&encap->route_lock);
1132
1133         return r;
1134 }
1135
1136 static struct mlx5e_route_entry *
1137 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1138 {
1139         u32 hash_key = hash_route_info(key);
1140         struct mlx5e_route_entry *r;
1141
1142         spin_lock_bh(&encap->route_lock);
1143         encap->route_tbl_last_update = jiffies;
1144         r = mlx5e_route_get(encap, key, hash_key);
1145         spin_unlock_bh(&encap->route_lock);
1146
1147         return r;
1148 }
1149
1150 struct mlx5e_tc_fib_event_data {
1151         struct work_struct work;
1152         unsigned long event;
1153         struct mlx5e_route_entry *r;
1154         struct net_device *ul_dev;
1155 };
1156
1157 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1158 static struct mlx5e_tc_fib_event_data *
1159 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1160 {
1161         struct mlx5e_tc_fib_event_data *fib_work;
1162
1163         fib_work = kzalloc(sizeof(*fib_work), flags);
1164         if (WARN_ON(!fib_work))
1165                 return NULL;
1166
1167         INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1168         fib_work->event = event;
1169         fib_work->ul_dev = ul_dev;
1170
1171         return fib_work;
1172 }
1173
1174 static int
1175 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1176                            struct mlx5e_route_entry *r,
1177                            unsigned long event)
1178 {
1179         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1180         struct mlx5e_tc_fib_event_data *fib_work;
1181         struct mlx5e_rep_priv *uplink_rpriv;
1182         struct net_device *ul_dev;
1183
1184         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1185         ul_dev = uplink_rpriv->netdev;
1186
1187         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1188         if (!fib_work)
1189                 return -ENOMEM;
1190
1191         dev_hold(ul_dev);
1192         refcount_inc(&r->refcnt);
1193         fib_work->r = r;
1194         queue_work(priv->wq, &fib_work->work);
1195
1196         return 0;
1197 }
1198
1199 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1200                              struct mlx5e_tc_flow *flow)
1201 {
1202         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1203         unsigned long tbl_time_before, tbl_time_after;
1204         struct mlx5e_tc_flow_parse_attr *parse_attr;
1205         struct mlx5_flow_attr *attr = flow->attr;
1206         struct mlx5_esw_flow_attr *esw_attr;
1207         struct mlx5e_route_entry *r;
1208         struct mlx5e_route_key key;
1209         int err = 0;
1210
1211         esw_attr = attr->esw_attr;
1212         parse_attr = attr->parse_attr;
1213         mutex_lock(&esw->offloads.encap_tbl_lock);
1214         if (!esw_attr->rx_tun_attr)
1215                 goto out;
1216
1217         tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1218         tbl_time_after = tbl_time_before;
1219         err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1220         if (err || !esw_attr->rx_tun_attr->decap_vport)
1221                 goto out;
1222
1223         key.ip_version = attr->tun_ip_version;
1224         if (key.ip_version == 4)
1225                 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1226         else
1227                 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1228
1229         r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1230                                    &tbl_time_after);
1231         if (IS_ERR(r)) {
1232                 err = PTR_ERR(r);
1233                 goto out;
1234         }
1235         /* Routing changed concurrently. FIB event handler might have missed new
1236          * entry, schedule update.
1237          */
1238         if (tbl_time_before != tbl_time_after) {
1239                 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1240                 if (err) {
1241                         mlx5e_route_put_locked(priv, r);
1242                         goto out;
1243                 }
1244         }
1245
1246         flow->decap_route = r;
1247         list_add(&flow->decap_routes, &r->decap_flows);
1248         mutex_unlock(&esw->offloads.encap_tbl_lock);
1249         return 0;
1250
1251 out:
1252         mutex_unlock(&esw->offloads.encap_tbl_lock);
1253         return err;
1254 }
1255
1256 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1257                                     struct mlx5e_tc_flow *flow,
1258                                     struct mlx5_flow_attr *attr,
1259                                     struct mlx5e_encap_entry *e,
1260                                     bool new_encap_entry,
1261                                     unsigned long tbl_time_before,
1262                                     int out_index)
1263 {
1264         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1265         unsigned long tbl_time_after = tbl_time_before;
1266         struct mlx5e_tc_flow_parse_attr *parse_attr;
1267         const struct ip_tunnel_info *tun_info;
1268         struct mlx5_esw_flow_attr *esw_attr;
1269         struct mlx5e_route_entry *r;
1270         struct mlx5e_route_key key;
1271         unsigned short family;
1272         int err = 0;
1273
1274         esw_attr = attr->esw_attr;
1275         parse_attr = attr->parse_attr;
1276         tun_info = parse_attr->tun_info[out_index];
1277         family = ip_tunnel_info_af(tun_info);
1278
1279         if (family == AF_INET) {
1280                 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1281                 key.ip_version = 4;
1282         } else if (family == AF_INET6) {
1283                 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1284                 key.ip_version = 6;
1285         }
1286
1287         err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1288                                   e->route_dev_ifindex, out_index);
1289         if (err || !(esw_attr->dests[out_index].flags &
1290                      MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1291                 return err;
1292
1293         r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1294                                    &tbl_time_after);
1295         if (IS_ERR(r))
1296                 return PTR_ERR(r);
1297         /* Routing changed concurrently. FIB event handler might have missed new
1298          * entry, schedule update.
1299          */
1300         if (tbl_time_before != tbl_time_after) {
1301                 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1302                 if (err) {
1303                         mlx5e_route_put_locked(priv, r);
1304                         return err;
1305                 }
1306         }
1307
1308         flow->encap_routes[out_index].r = r;
1309         if (new_encap_entry)
1310                 list_add(&e->route_list, &r->encap_entries);
1311         flow->encap_routes[out_index].index = out_index;
1312         return 0;
1313 }
1314
1315 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1316                               struct mlx5e_tc_flow *flow)
1317 {
1318         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1319         struct mlx5e_route_entry *r = flow->decap_route;
1320
1321         if (!r)
1322                 return;
1323
1324         mutex_lock(&esw->offloads.encap_tbl_lock);
1325         list_del(&flow->decap_routes);
1326         flow->decap_route = NULL;
1327
1328         if (!refcount_dec_and_test(&r->refcnt)) {
1329                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1330                 return;
1331         }
1332         hash_del_rcu(&r->hlist);
1333         mutex_unlock(&esw->offloads.encap_tbl_lock);
1334
1335         mlx5e_route_dealloc(priv, r);
1336 }
1337
1338 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1339                                      struct mlx5e_tc_flow *flow,
1340                                      int out_index)
1341 {
1342         struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1343         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1344         struct mlx5e_encap_entry *e, *tmp;
1345
1346         if (!r)
1347                 return;
1348
1349         mutex_lock(&esw->offloads.encap_tbl_lock);
1350         flow->encap_routes[out_index].r = NULL;
1351
1352         if (!refcount_dec_and_test(&r->refcnt)) {
1353                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1354                 return;
1355         }
1356         list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1357                 list_del_init(&e->route_list);
1358         hash_del_rcu(&r->hlist);
1359         mutex_unlock(&esw->offloads.encap_tbl_lock);
1360
1361         mlx5e_route_dealloc(priv, r);
1362 }
1363
1364 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1365                                    struct mlx5e_encap_entry *e,
1366                                    struct list_head *encap_flows)
1367 {
1368         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1369         struct mlx5e_tc_flow *flow;
1370
1371         list_for_each_entry(flow, encap_flows, tmp_list) {
1372                 struct mlx5_flow_attr *attr = flow->attr;
1373                 struct mlx5_esw_flow_attr *esw_attr;
1374
1375                 if (!mlx5e_is_offloaded_flow(flow))
1376                         continue;
1377                 esw_attr = attr->esw_attr;
1378
1379                 if (flow_flag_test(flow, SLOW))
1380                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
1381                 else
1382                         mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1383
1384                 mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1385                 attr->modify_hdr = NULL;
1386
1387                 esw_attr->dests[flow->tmp_entry_index].flags &=
1388                         ~MLX5_ESW_DEST_ENCAP_VALID;
1389                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1390         }
1391
1392         e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1393         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1394                 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1395                 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1396                 e->pkt_reformat = NULL;
1397         }
1398 }
1399
1400 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1401                                   struct net_device *tunnel_dev,
1402                                   struct mlx5e_encap_entry *e,
1403                                   struct list_head *encap_flows)
1404 {
1405         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1406         struct mlx5e_tc_flow *flow;
1407         int err;
1408
1409         err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1410                 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1411                 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1412         if (err)
1413                 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1414         e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1415
1416         list_for_each_entry(flow, encap_flows, tmp_list) {
1417                 struct mlx5e_tc_flow_parse_attr *parse_attr;
1418                 struct mlx5_esw_flow_attr *esw_attr;
1419                 struct mlx5_flow_handle *rule;
1420                 struct mlx5_flow_attr *attr;
1421                 struct mlx5_flow_spec *spec;
1422
1423                 if (flow_flag_test(flow, FAILED))
1424                         continue;
1425
1426                 spec = &flow->attr->parse_attr->spec;
1427
1428                 attr = mlx5e_tc_get_encap_attr(flow);
1429                 esw_attr = attr->esw_attr;
1430                 parse_attr = attr->parse_attr;
1431
1432                 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1433                                              e->out_dev, e->route_dev_ifindex,
1434                                              flow->tmp_entry_index);
1435                 if (err) {
1436                         mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1437                         continue;
1438                 }
1439
1440                 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1441                 if (err) {
1442                         mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1443                                        err);
1444                         continue;
1445                 }
1446
1447                 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1448                         esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1449                         esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1450                         if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1451                                 goto offload_to_slow_path;
1452
1453                         err = mlx5e_tc_offload_flow_post_acts(flow);
1454                         if (err) {
1455                                 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1456                                                err);
1457                                 goto offload_to_slow_path;
1458                         }
1459
1460                         /* update from slow path rule to encap rule */
1461                         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1462                         if (IS_ERR(rule)) {
1463                                 mlx5e_tc_unoffload_flow_post_acts(flow);
1464                                 err = PTR_ERR(rule);
1465                                 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1466                                                err);
1467                         } else {
1468                                 flow->rule[0] = rule;
1469                         }
1470                 } else {
1471 offload_to_slow_path:
1472                         rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1473                         /* mark the flow's encap dest as non-valid */
1474                         esw_attr->dests[flow->tmp_entry_index].flags &=
1475                                 ~MLX5_ESW_DEST_ENCAP_VALID;
1476
1477                         if (IS_ERR(rule)) {
1478                                 err = PTR_ERR(rule);
1479                                 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1480                                                err);
1481                         } else {
1482                                 flow->rule[0] = rule;
1483                         }
1484                 }
1485                 flow_flag_set(flow, OFFLOADED);
1486         }
1487 }
1488
1489 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1490                                      struct mlx5e_route_entry *r,
1491                                      struct list_head *flow_list,
1492                                      bool replace)
1493 {
1494         struct net_device *tunnel_dev;
1495         struct mlx5e_encap_entry *e;
1496
1497         tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1498         if (!tunnel_dev)
1499                 return -ENODEV;
1500
1501         list_for_each_entry(e, &r->encap_entries, route_list) {
1502                 LIST_HEAD(encap_flows);
1503
1504                 mlx5e_take_all_encap_flows(e, &encap_flows);
1505                 if (list_empty(&encap_flows))
1506                         continue;
1507
1508                 if (mlx5e_route_entry_valid(r))
1509                         mlx5e_invalidate_encap(priv, e, &encap_flows);
1510
1511                 if (!replace) {
1512                         list_splice(&encap_flows, flow_list);
1513                         continue;
1514                 }
1515
1516                 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1517                 list_splice(&encap_flows, flow_list);
1518         }
1519
1520         return 0;
1521 }
1522
1523 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1524                                       struct list_head *flow_list)
1525 {
1526         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1527         struct mlx5e_tc_flow *flow;
1528
1529         list_for_each_entry(flow, flow_list, tmp_list)
1530                 if (mlx5e_is_offloaded_flow(flow))
1531                         mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1532 }
1533
1534 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1535                                   struct list_head *decap_flows)
1536 {
1537         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1538         struct mlx5e_tc_flow *flow;
1539
1540         list_for_each_entry(flow, decap_flows, tmp_list) {
1541                 struct mlx5e_tc_flow_parse_attr *parse_attr;
1542                 struct mlx5_flow_attr *attr = flow->attr;
1543                 struct mlx5_flow_handle *rule;
1544                 struct mlx5_flow_spec *spec;
1545                 int err;
1546
1547                 if (flow_flag_test(flow, FAILED))
1548                         continue;
1549
1550                 parse_attr = attr->parse_attr;
1551                 spec = &parse_attr->spec;
1552                 err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1553                 if (err) {
1554                         mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1555                                        err);
1556                         continue;
1557                 }
1558
1559                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1560                 if (IS_ERR(rule)) {
1561                         err = PTR_ERR(rule);
1562                         mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1563                                        err);
1564                 } else {
1565                         flow->rule[0] = rule;
1566                         flow_flag_set(flow, OFFLOADED);
1567                 }
1568         }
1569 }
1570
1571 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1572                                           struct mlx5e_route_entry *r,
1573                                           struct list_head *flow_list,
1574                                           bool replace)
1575 {
1576         struct net_device *tunnel_dev;
1577         LIST_HEAD(decap_flows);
1578
1579         tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1580         if (!tunnel_dev)
1581                 return -ENODEV;
1582
1583         mlx5e_take_all_route_decap_flows(r, &decap_flows);
1584         if (mlx5e_route_entry_valid(r))
1585                 mlx5e_unoffload_flow_list(priv, &decap_flows);
1586         if (replace)
1587                 mlx5e_reoffload_decap(priv, &decap_flows);
1588
1589         list_splice(&decap_flows, flow_list);
1590
1591         return 0;
1592 }
1593
1594 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1595 {
1596         struct mlx5e_tc_fib_event_data *event_data =
1597                 container_of(work, struct mlx5e_tc_fib_event_data, work);
1598         struct net_device *ul_dev = event_data->ul_dev;
1599         struct mlx5e_priv *priv = netdev_priv(ul_dev);
1600         struct mlx5e_route_entry *r = event_data->r;
1601         struct mlx5_eswitch *esw;
1602         LIST_HEAD(flow_list);
1603         bool replace;
1604         int err;
1605
1606         /* sync with concurrent neigh updates */
1607         rtnl_lock();
1608         esw = priv->mdev->priv.eswitch;
1609         mutex_lock(&esw->offloads.encap_tbl_lock);
1610         replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1611
1612         if (!mlx5e_route_entry_valid(r) && !replace)
1613                 goto out;
1614
1615         err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1616         if (err)
1617                 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1618                                err);
1619
1620         err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1621         if (err)
1622                 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1623                                err);
1624
1625         if (replace)
1626                 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1627 out:
1628         mutex_unlock(&esw->offloads.encap_tbl_lock);
1629         rtnl_unlock();
1630
1631         mlx5e_put_flow_list(priv, &flow_list);
1632         mlx5e_route_put(priv, event_data->r);
1633         dev_put(event_data->ul_dev);
1634         kfree(event_data);
1635 }
1636
1637 static struct mlx5e_tc_fib_event_data *
1638 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1639                          struct net_device *ul_dev,
1640                          struct mlx5e_tc_tun_encap *encap,
1641                          unsigned long event,
1642                          struct fib_notifier_info *info)
1643 {
1644         struct fib_entry_notifier_info *fen_info;
1645         struct mlx5e_tc_fib_event_data *fib_work;
1646         struct mlx5e_route_entry *r;
1647         struct mlx5e_route_key key;
1648         struct net_device *fib_dev;
1649
1650         fen_info = container_of(info, struct fib_entry_notifier_info, info);
1651         if (fen_info->fi->nh)
1652                 return NULL;
1653         fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1654         if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1655             fen_info->dst_len != 32)
1656                 return NULL;
1657
1658         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1659         if (!fib_work)
1660                 return ERR_PTR(-ENOMEM);
1661
1662         key.endpoint_ip.v4 = htonl(fen_info->dst);
1663         key.ip_version = 4;
1664
1665         /* Can't fail after this point because releasing reference to r
1666          * requires obtaining sleeping mutex which we can't do in atomic
1667          * context.
1668          */
1669         r = mlx5e_route_lookup_for_update(encap, &key);
1670         if (!r)
1671                 goto out;
1672         fib_work->r = r;
1673         dev_hold(ul_dev);
1674
1675         return fib_work;
1676
1677 out:
1678         kfree(fib_work);
1679         return NULL;
1680 }
1681
1682 static struct mlx5e_tc_fib_event_data *
1683 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1684                          struct net_device *ul_dev,
1685                          struct mlx5e_tc_tun_encap *encap,
1686                          unsigned long event,
1687                          struct fib_notifier_info *info)
1688 {
1689         struct fib6_entry_notifier_info *fen_info;
1690         struct mlx5e_tc_fib_event_data *fib_work;
1691         struct mlx5e_route_entry *r;
1692         struct mlx5e_route_key key;
1693         struct net_device *fib_dev;
1694
1695         fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1696         fib_dev = fib6_info_nh_dev(fen_info->rt);
1697         if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1698             fen_info->rt->fib6_dst.plen != 128)
1699                 return NULL;
1700
1701         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1702         if (!fib_work)
1703                 return ERR_PTR(-ENOMEM);
1704
1705         memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1706                sizeof(fen_info->rt->fib6_dst.addr));
1707         key.ip_version = 6;
1708
1709         /* Can't fail after this point because releasing reference to r
1710          * requires obtaining sleeping mutex which we can't do in atomic
1711          * context.
1712          */
1713         r = mlx5e_route_lookup_for_update(encap, &key);
1714         if (!r)
1715                 goto out;
1716         fib_work->r = r;
1717         dev_hold(ul_dev);
1718
1719         return fib_work;
1720
1721 out:
1722         kfree(fib_work);
1723         return NULL;
1724 }
1725
1726 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1727 {
1728         struct mlx5e_tc_fib_event_data *fib_work;
1729         struct fib_notifier_info *info = ptr;
1730         struct mlx5e_tc_tun_encap *encap;
1731         struct net_device *ul_dev;
1732         struct mlx5e_priv *priv;
1733
1734         encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1735         priv = encap->priv;
1736         ul_dev = priv->netdev;
1737         priv = netdev_priv(ul_dev);
1738
1739         switch (event) {
1740         case FIB_EVENT_ENTRY_REPLACE:
1741         case FIB_EVENT_ENTRY_DEL:
1742                 if (info->family == AF_INET)
1743                         fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1744                 else if (info->family == AF_INET6)
1745                         fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1746                 else
1747                         return NOTIFY_DONE;
1748
1749                 if (!IS_ERR_OR_NULL(fib_work)) {
1750                         queue_work(priv->wq, &fib_work->work);
1751                 } else if (IS_ERR(fib_work)) {
1752                         NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1753                         mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1754                                        PTR_ERR(fib_work));
1755                 }
1756
1757                 break;
1758         default:
1759                 return NOTIFY_DONE;
1760         }
1761
1762         return NOTIFY_DONE;
1763 }
1764
1765 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1766 {
1767         struct mlx5e_tc_tun_encap *encap;
1768         int err;
1769
1770         encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1771         if (!encap)
1772                 return ERR_PTR(-ENOMEM);
1773
1774         encap->priv = priv;
1775         encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1776         spin_lock_init(&encap->route_lock);
1777         hash_init(encap->route_tbl);
1778         err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1779                                     NULL, NULL);
1780         if (err) {
1781                 kvfree(encap);
1782                 return ERR_PTR(err);
1783         }
1784
1785         return encap;
1786 }
1787
1788 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1789 {
1790         if (!encap)
1791                 return;
1792
1793         unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1794         flush_workqueue(encap->priv->wq); /* flush fib event works */
1795         kvfree(encap);
1796 }