usb: typec: mux: fix static inline syntax error
[platform/kernel/linux-starfive.git] / drivers / net / ethernet / mellanox / mlx5 / core / en / tc_tun_encap.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include <net/ip_tunnels.h>
7 #include "tc_tun_encap.h"
8 #include "en_tc.h"
9 #include "tc_tun.h"
10 #include "rep/tc.h"
11 #include "diag/en_tc_tracepoint.h"
12
13 enum {
14         MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
15 };
16
17 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
18                                      struct mlx5_flow_attr *attr,
19                                      struct mlx5e_encap_entry *e,
20                                      int out_index)
21 {
22         struct net_device *route_dev;
23         int err = 0;
24
25         route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
26
27         if (!route_dev || !netif_is_ovs_master(route_dev))
28                 goto out;
29
30         err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
31                                                 MLX5E_TC_INT_PORT_EGRESS,
32                                                 &attr->action, out_index);
33
34 out:
35         if (route_dev)
36                 dev_put(route_dev);
37
38         return err;
39 }
40
41 struct mlx5e_route_key {
42         int ip_version;
43         union {
44                 __be32 v4;
45                 struct in6_addr v6;
46         } endpoint_ip;
47 };
48
49 struct mlx5e_route_entry {
50         struct mlx5e_route_key key;
51         struct list_head encap_entries;
52         struct list_head decap_flows;
53         u32 flags;
54         struct hlist_node hlist;
55         refcount_t refcnt;
56         int tunnel_dev_index;
57         struct rcu_head rcu;
58 };
59
60 struct mlx5e_tc_tun_encap {
61         struct mlx5e_priv *priv;
62         struct notifier_block fib_nb;
63         spinlock_t route_lock; /* protects route_tbl */
64         unsigned long route_tbl_last_update;
65         DECLARE_HASHTABLE(route_tbl, 8);
66 };
67
68 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
69 {
70         return r->flags & MLX5E_ROUTE_ENTRY_VALID;
71 }
72
73 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
74                              struct mlx5_flow_spec *spec)
75 {
76         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
77         struct mlx5_rx_tun_attr *tun_attr;
78         void *daddr, *saddr;
79         u8 ip_version;
80
81         tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
82         if (!tun_attr)
83                 return -ENOMEM;
84
85         esw_attr->rx_tun_attr = tun_attr;
86         ip_version = mlx5e_tc_get_ip_version(spec, true);
87
88         if (ip_version == 4) {
89                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
90                                      outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
91                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
92                                      outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
93                 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
94                 tun_attr->src_ip.v4 = *(__be32 *)saddr;
95                 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
96                         return 0;
97         }
98 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
99         else if (ip_version == 6) {
100                 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
101
102                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
103                                      outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
104                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
105                                      outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
106                 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
107                 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
108                 if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
109                     ipv6_addr_any(&tun_attr->src_ip.v6))
110                         return 0;
111         }
112 #endif
113         /* Only set the flag if both src and dst ip addresses exist. They are
114          * required to establish routing.
115          */
116         flow_flag_set(flow, TUN_RX);
117         flow->attr->tun_ip_version = ip_version;
118         return 0;
119 }
120
121 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
122 {
123         bool all_flow_encaps_valid = true;
124         int i;
125
126         /* Flow can be associated with multiple encap entries.
127          * Before offloading the flow verify that all of them have
128          * a valid neighbour.
129          */
130         for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
131                 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
132                         continue;
133                 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
134                         all_flow_encaps_valid = false;
135                         break;
136                 }
137         }
138
139         return all_flow_encaps_valid;
140 }
141
142 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
143                               struct mlx5e_encap_entry *e,
144                               struct list_head *flow_list)
145 {
146         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
147         struct mlx5_pkt_reformat_params reformat_params;
148         struct mlx5_esw_flow_attr *esw_attr;
149         struct mlx5_flow_handle *rule;
150         struct mlx5_flow_attr *attr;
151         struct mlx5_flow_spec *spec;
152         struct mlx5e_tc_flow *flow;
153         int err;
154
155         if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
156                 return;
157
158         memset(&reformat_params, 0, sizeof(reformat_params));
159         reformat_params.type = e->reformat_type;
160         reformat_params.size = e->encap_size;
161         reformat_params.data = e->encap_header;
162         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
163                                                      &reformat_params,
164                                                      MLX5_FLOW_NAMESPACE_FDB);
165         if (IS_ERR(e->pkt_reformat)) {
166                 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
167                                PTR_ERR(e->pkt_reformat));
168                 return;
169         }
170         e->flags |= MLX5_ENCAP_ENTRY_VALID;
171         mlx5e_rep_queue_neigh_stats_work(priv);
172
173         list_for_each_entry(flow, flow_list, tmp_list) {
174                 if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
175                         continue;
176
177                 spec = &flow->attr->parse_attr->spec;
178
179                 attr = mlx5e_tc_get_encap_attr(flow);
180                 esw_attr = attr->esw_attr;
181                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
182                 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
183
184                 /* Do not offload flows with unresolved neighbors */
185                 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
186                         continue;
187
188                 err = mlx5e_tc_offload_flow_post_acts(flow);
189                 if (err) {
190                         mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
191                                        err);
192                         continue;
193                 }
194
195                 /* update from slow path rule to encap rule */
196                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
197                 if (IS_ERR(rule)) {
198                         mlx5e_tc_unoffload_flow_post_acts(flow);
199                         err = PTR_ERR(rule);
200                         mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
201                                        err);
202                         continue;
203                 }
204
205                 mlx5e_tc_unoffload_from_slow_path(esw, flow);
206                 flow->rule[0] = rule;
207                 /* was unset when slow path rule removed */
208                 flow_flag_set(flow, OFFLOADED);
209         }
210 }
211
212 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
213                               struct mlx5e_encap_entry *e,
214                               struct list_head *flow_list)
215 {
216         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
217         struct mlx5_esw_flow_attr *esw_attr;
218         struct mlx5_flow_handle *rule;
219         struct mlx5_flow_attr *attr;
220         struct mlx5_flow_spec *spec;
221         struct mlx5e_tc_flow *flow;
222         int err;
223
224         list_for_each_entry(flow, flow_list, tmp_list) {
225                 if (!mlx5e_is_offloaded_flow(flow))
226                         continue;
227
228                 attr = mlx5e_tc_get_encap_attr(flow);
229                 esw_attr = attr->esw_attr;
230                 /* mark the flow's encap dest as non-valid */
231                 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
232                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
233
234                 /* Clear pkt_reformat before checking slow path flag. Because
235                  * in next iteration, the same flow is already set slow path
236                  * flag, but still need to clear the pkt_reformat.
237                  */
238                 if (flow_flag_test(flow, SLOW))
239                         continue;
240
241                 /* update from encap rule to slow path rule */
242                 spec = &flow->attr->parse_attr->spec;
243                 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
244
245                 if (IS_ERR(rule)) {
246                         err = PTR_ERR(rule);
247                         mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
248                                        err);
249                         continue;
250                 }
251
252                 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
253                 mlx5e_tc_unoffload_flow_post_acts(flow);
254                 flow->rule[0] = rule;
255                 /* was unset when fast path rule removed */
256                 flow_flag_set(flow, OFFLOADED);
257         }
258
259         /* we know that the encap is valid */
260         e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
261         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
262         e->pkt_reformat = NULL;
263 }
264
265 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
266                                 struct list_head *flow_list,
267                                 int index)
268 {
269         if (IS_ERR(mlx5e_flow_get(flow))) {
270                 /* Flow is being deleted concurrently. Wait for it to be
271                  * unoffloaded from hardware, otherwise deleting encap will
272                  * fail.
273                  */
274                 wait_for_completion(&flow->del_hw_done);
275                 return;
276         }
277         wait_for_completion(&flow->init_done);
278
279         flow->tmp_entry_index = index;
280         list_add(&flow->tmp_list, flow_list);
281 }
282
283 /* Takes reference to all flows attached to encap and adds the flows to
284  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
285  */
286 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
287 {
288         struct encap_flow_item *efi;
289         struct mlx5e_tc_flow *flow;
290
291         list_for_each_entry(efi, &e->flows, list) {
292                 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
293                 mlx5e_take_tmp_flow(flow, flow_list, efi->index);
294         }
295 }
296
297 /* Takes reference to all flows attached to route and adds the flows to
298  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
299  */
300 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
301                                              struct list_head *flow_list)
302 {
303         struct mlx5e_tc_flow *flow;
304
305         list_for_each_entry(flow, &r->decap_flows, decap_routes)
306                 mlx5e_take_tmp_flow(flow, flow_list, 0);
307 }
308
309 typedef bool (match_cb)(struct mlx5e_encap_entry *);
310
311 static struct mlx5e_encap_entry *
312 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
313                               struct mlx5e_encap_entry *e,
314                               match_cb match)
315 {
316         struct mlx5e_encap_entry *next = NULL;
317
318 retry:
319         rcu_read_lock();
320
321         /* find encap with non-zero reference counter value */
322         for (next = e ?
323                      list_next_or_null_rcu(&nhe->encap_list,
324                                            &e->encap_list,
325                                            struct mlx5e_encap_entry,
326                                            encap_list) :
327                      list_first_or_null_rcu(&nhe->encap_list,
328                                             struct mlx5e_encap_entry,
329                                             encap_list);
330              next;
331              next = list_next_or_null_rcu(&nhe->encap_list,
332                                           &next->encap_list,
333                                           struct mlx5e_encap_entry,
334                                           encap_list))
335                 if (mlx5e_encap_take(next))
336                         break;
337
338         rcu_read_unlock();
339
340         /* release starting encap */
341         if (e)
342                 mlx5e_encap_put(netdev_priv(e->out_dev), e);
343         if (!next)
344                 return next;
345
346         /* wait for encap to be fully initialized */
347         wait_for_completion(&next->res_ready);
348         /* continue searching if encap entry is not in valid state after completion */
349         if (!match(next)) {
350                 e = next;
351                 goto retry;
352         }
353
354         return next;
355 }
356
357 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
358 {
359         return e->flags & MLX5_ENCAP_ENTRY_VALID;
360 }
361
362 static struct mlx5e_encap_entry *
363 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
364                            struct mlx5e_encap_entry *e)
365 {
366         return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
367 }
368
369 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
370 {
371         return e->compl_result >= 0;
372 }
373
374 struct mlx5e_encap_entry *
375 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
376                           struct mlx5e_encap_entry *e)
377 {
378         return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
379 }
380
381 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
382 {
383         struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
384         struct mlx5e_encap_entry *e = NULL;
385         struct mlx5e_tc_flow *flow;
386         struct mlx5_fc *counter;
387         struct neigh_table *tbl;
388         bool neigh_used = false;
389         struct neighbour *n;
390         u64 lastuse;
391
392         if (m_neigh->family == AF_INET)
393                 tbl = &arp_tbl;
394 #if IS_ENABLED(CONFIG_IPV6)
395         else if (m_neigh->family == AF_INET6)
396                 tbl = ipv6_stub->nd_tbl;
397 #endif
398         else
399                 return;
400
401         /* mlx5e_get_next_valid_encap() releases previous encap before returning
402          * next one.
403          */
404         while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
405                 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
406                 struct encap_flow_item *efi, *tmp;
407                 struct mlx5_eswitch *esw;
408                 LIST_HEAD(flow_list);
409
410                 esw = priv->mdev->priv.eswitch;
411                 mutex_lock(&esw->offloads.encap_tbl_lock);
412                 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
413                         flow = container_of(efi, struct mlx5e_tc_flow,
414                                             encaps[efi->index]);
415                         if (IS_ERR(mlx5e_flow_get(flow)))
416                                 continue;
417                         list_add(&flow->tmp_list, &flow_list);
418
419                         if (mlx5e_is_offloaded_flow(flow)) {
420                                 counter = mlx5e_tc_get_counter(flow);
421                                 lastuse = mlx5_fc_query_lastuse(counter);
422                                 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
423                                         neigh_used = true;
424                                         break;
425                                 }
426                         }
427                 }
428                 mutex_unlock(&esw->offloads.encap_tbl_lock);
429
430                 mlx5e_put_flow_list(priv, &flow_list);
431                 if (neigh_used) {
432                         /* release current encap before breaking the loop */
433                         mlx5e_encap_put(priv, e);
434                         break;
435                 }
436         }
437
438         trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
439
440         if (neigh_used) {
441                 nhe->reported_lastuse = jiffies;
442
443                 /* find the relevant neigh according to the cached device and
444                  * dst ip pair
445                  */
446                 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
447                 if (!n)
448                         return;
449
450                 neigh_event_send(n, NULL);
451                 neigh_release(n);
452         }
453 }
454
455 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
456 {
457         WARN_ON(!list_empty(&e->flows));
458
459         if (e->compl_result > 0) {
460                 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
461
462                 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
463                         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
464         }
465
466         kfree(e->tun_info);
467         kfree(e->encap_header);
468         kfree_rcu(e, rcu);
469 }
470
471 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
472                                 struct mlx5e_decap_entry *d)
473 {
474         WARN_ON(!list_empty(&d->flows));
475
476         if (!d->compl_result)
477                 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
478
479         kfree_rcu(d, rcu);
480 }
481
482 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
483 {
484         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
485
486         if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
487                 return;
488         list_del(&e->route_list);
489         hash_del_rcu(&e->encap_hlist);
490         mutex_unlock(&esw->offloads.encap_tbl_lock);
491
492         mlx5e_encap_dealloc(priv, e);
493 }
494
495 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
496 {
497         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
498
499         if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
500                 return;
501         hash_del_rcu(&d->hlist);
502         mutex_unlock(&esw->offloads.decap_tbl_lock);
503
504         mlx5e_decap_dealloc(priv, d);
505 }
506
507 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
508                                      struct mlx5e_tc_flow *flow,
509                                      int out_index);
510
511 void mlx5e_detach_encap(struct mlx5e_priv *priv,
512                         struct mlx5e_tc_flow *flow,
513                         struct mlx5_flow_attr *attr,
514                         int out_index)
515 {
516         struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
517         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
518
519         if (!mlx5e_is_eswitch_flow(flow))
520                 return;
521
522         if (attr->esw_attr->dests[out_index].flags &
523             MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
524                 mlx5e_detach_encap_route(priv, flow, out_index);
525
526         /* flow wasn't fully initialized */
527         if (!e)
528                 return;
529
530         mutex_lock(&esw->offloads.encap_tbl_lock);
531         list_del(&flow->encaps[out_index].list);
532         flow->encaps[out_index].e = NULL;
533         if (!refcount_dec_and_test(&e->refcnt)) {
534                 mutex_unlock(&esw->offloads.encap_tbl_lock);
535                 return;
536         }
537         list_del(&e->route_list);
538         hash_del_rcu(&e->encap_hlist);
539         mutex_unlock(&esw->offloads.encap_tbl_lock);
540
541         mlx5e_encap_dealloc(priv, e);
542 }
543
544 void mlx5e_detach_decap(struct mlx5e_priv *priv,
545                         struct mlx5e_tc_flow *flow)
546 {
547         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
548         struct mlx5e_decap_entry *d = flow->decap_reformat;
549
550         if (!d)
551                 return;
552
553         mutex_lock(&esw->offloads.decap_tbl_lock);
554         list_del(&flow->l3_to_l2_reformat);
555         flow->decap_reformat = NULL;
556
557         if (!refcount_dec_and_test(&d->refcnt)) {
558                 mutex_unlock(&esw->offloads.decap_tbl_lock);
559                 return;
560         }
561         hash_del_rcu(&d->hlist);
562         mutex_unlock(&esw->offloads.decap_tbl_lock);
563
564         mlx5e_decap_dealloc(priv, d);
565 }
566
567 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
568                                            struct mlx5e_encap_key *b)
569 {
570         return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
571                 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
572 }
573
574 bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
575                                            struct mlx5e_encap_key *b,
576                                            __be16 tun_flags)
577 {
578         struct ip_tunnel_info *a_info;
579         struct ip_tunnel_info *b_info;
580         bool a_has_opts, b_has_opts;
581
582         if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
583                 return false;
584
585         a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags);
586         b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags);
587
588         /* keys are equal when both don't have any options attached */
589         if (!a_has_opts && !b_has_opts)
590                 return true;
591
592         if (a_has_opts != b_has_opts)
593                 return false;
594
595         /* options stored in memory next to ip_tunnel_info struct */
596         a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
597         b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
598
599         return a_info->options_len == b_info->options_len &&
600                !memcmp(ip_tunnel_info_opts(a_info),
601                        ip_tunnel_info_opts(b_info),
602                        a_info->options_len);
603 }
604
605 static int cmp_decap_info(struct mlx5e_decap_key *a,
606                           struct mlx5e_decap_key *b)
607 {
608         return memcmp(&a->key, &b->key, sizeof(b->key));
609 }
610
611 static int hash_encap_info(struct mlx5e_encap_key *key)
612 {
613         return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
614                      key->tc_tunnel->tunnel_type);
615 }
616
617 static int hash_decap_info(struct mlx5e_decap_key *key)
618 {
619         return jhash(&key->key, sizeof(key->key), 0);
620 }
621
622 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
623 {
624         return refcount_inc_not_zero(&e->refcnt);
625 }
626
627 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
628 {
629         return refcount_inc_not_zero(&e->refcnt);
630 }
631
632 static struct mlx5e_encap_entry *
633 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
634                 uintptr_t hash_key)
635 {
636         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
637         struct mlx5e_encap_key e_key;
638         struct mlx5e_encap_entry *e;
639
640         hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
641                                    encap_hlist, hash_key) {
642                 e_key.ip_tun_key = &e->tun_info->key;
643                 e_key.tc_tunnel = e->tunnel;
644                 if (e->tunnel->encap_info_equal(&e_key, key) &&
645                     mlx5e_encap_take(e))
646                         return e;
647         }
648
649         return NULL;
650 }
651
652 static struct mlx5e_decap_entry *
653 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
654                 uintptr_t hash_key)
655 {
656         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
657         struct mlx5e_decap_key r_key;
658         struct mlx5e_decap_entry *e;
659
660         hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
661                                    hlist, hash_key) {
662                 r_key = e->key;
663                 if (!cmp_decap_info(&r_key, key) &&
664                     mlx5e_decap_take(e))
665                         return e;
666         }
667         return NULL;
668 }
669
670 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
671 {
672         size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
673
674         return kmemdup(tun_info, tun_size, GFP_KERNEL);
675 }
676
677 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
678                                       struct mlx5e_tc_flow *flow,
679                                       int out_index,
680                                       struct mlx5e_encap_entry *e,
681                                       struct netlink_ext_ack *extack)
682 {
683         int i;
684
685         for (i = 0; i < out_index; i++) {
686                 if (flow->encaps[i].e != e)
687                         continue;
688                 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
689                 netdev_err(priv->netdev, "can't duplicate encap action\n");
690                 return true;
691         }
692
693         return false;
694 }
695
696 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
697                                struct mlx5_flow_attr *attr,
698                                struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
699                                struct net_device *out_dev,
700                                int route_dev_ifindex,
701                                int out_index)
702 {
703         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
704         struct net_device *route_dev;
705         u16 vport_num;
706         int err = 0;
707         u32 data;
708
709         route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
710
711         if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
712             !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
713                 goto out;
714
715         err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
716         if (err)
717                 goto out;
718
719         attr->dest_chain = 0;
720         attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
721         esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
722         data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
723                                                        vport_num);
724         err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
725                                                    MLX5_FLOW_NAMESPACE_FDB,
726                                                    VPORT_TO_REG, data);
727         if (err >= 0) {
728                 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
729                 err = 0;
730         }
731
732 out:
733         if (route_dev)
734                 dev_put(route_dev);
735         return err;
736 }
737
738 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
739                                   struct mlx5_esw_flow_attr *attr,
740                                   struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
741                                   struct net_device *out_dev,
742                                   int route_dev_ifindex,
743                                   int out_index)
744 {
745         int act_id = attr->dests[out_index].src_port_rewrite_act_id;
746         struct net_device *route_dev;
747         u16 vport_num;
748         int err = 0;
749         u32 data;
750
751         route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
752
753         if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
754             !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
755                 err = -ENODEV;
756                 goto out;
757         }
758
759         err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
760         if (err)
761                 goto out;
762
763         data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
764                                                        vport_num);
765         mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
766
767 out:
768         if (route_dev)
769                 dev_put(route_dev);
770         return err;
771 }
772
773 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
774 {
775         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
776         struct mlx5_rep_uplink_priv *uplink_priv;
777         struct mlx5e_rep_priv *uplink_rpriv;
778         struct mlx5e_tc_tun_encap *encap;
779         unsigned int ret;
780
781         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
782         uplink_priv = &uplink_rpriv->uplink_priv;
783         encap = uplink_priv->encap;
784
785         spin_lock_bh(&encap->route_lock);
786         ret = encap->route_tbl_last_update;
787         spin_unlock_bh(&encap->route_lock);
788         return ret;
789 }
790
791 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
792                                     struct mlx5e_tc_flow *flow,
793                                     struct mlx5_flow_attr *attr,
794                                     struct mlx5e_encap_entry *e,
795                                     bool new_encap_entry,
796                                     unsigned long tbl_time_before,
797                                     int out_index);
798
799 int mlx5e_attach_encap(struct mlx5e_priv *priv,
800                        struct mlx5e_tc_flow *flow,
801                        struct mlx5_flow_attr *attr,
802                        struct net_device *mirred_dev,
803                        int out_index,
804                        struct netlink_ext_ack *extack,
805                        struct net_device **encap_dev)
806 {
807         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
808         struct mlx5e_tc_flow_parse_attr *parse_attr;
809         const struct ip_tunnel_info *tun_info;
810         const struct mlx5e_mpls_info *mpls_info;
811         unsigned long tbl_time_before = 0;
812         struct mlx5e_encap_entry *e;
813         struct mlx5e_encap_key key;
814         bool entry_created = false;
815         unsigned short family;
816         uintptr_t hash_key;
817         int err = 0;
818
819         parse_attr = attr->parse_attr;
820         tun_info = parse_attr->tun_info[out_index];
821         mpls_info = &parse_attr->mpls_info[out_index];
822         family = ip_tunnel_info_af(tun_info);
823         key.ip_tun_key = &tun_info->key;
824         key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
825         if (!key.tc_tunnel) {
826                 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
827                 return -EOPNOTSUPP;
828         }
829
830         hash_key = hash_encap_info(&key);
831
832         mutex_lock(&esw->offloads.encap_tbl_lock);
833         e = mlx5e_encap_get(priv, &key, hash_key);
834
835         /* must verify if encap is valid or not */
836         if (e) {
837                 /* Check that entry was not already attached to this flow */
838                 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
839                         err = -EOPNOTSUPP;
840                         goto out_err;
841                 }
842
843                 mutex_unlock(&esw->offloads.encap_tbl_lock);
844                 wait_for_completion(&e->res_ready);
845
846                 /* Protect against concurrent neigh update. */
847                 mutex_lock(&esw->offloads.encap_tbl_lock);
848                 if (e->compl_result < 0) {
849                         err = -EREMOTEIO;
850                         goto out_err;
851                 }
852                 goto attach_flow;
853         }
854
855         e = kzalloc(sizeof(*e), GFP_KERNEL);
856         if (!e) {
857                 err = -ENOMEM;
858                 goto out_err;
859         }
860
861         refcount_set(&e->refcnt, 1);
862         init_completion(&e->res_ready);
863         entry_created = true;
864         INIT_LIST_HEAD(&e->route_list);
865
866         tun_info = mlx5e_dup_tun_info(tun_info);
867         if (!tun_info) {
868                 err = -ENOMEM;
869                 goto out_err_init;
870         }
871         e->tun_info = tun_info;
872         memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
873         err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
874         if (err)
875                 goto out_err_init;
876
877         INIT_LIST_HEAD(&e->flows);
878         hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
879         tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
880         mutex_unlock(&esw->offloads.encap_tbl_lock);
881
882         if (family == AF_INET)
883                 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
884         else if (family == AF_INET6)
885                 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
886
887         /* Protect against concurrent neigh update. */
888         mutex_lock(&esw->offloads.encap_tbl_lock);
889         complete_all(&e->res_ready);
890         if (err) {
891                 e->compl_result = err;
892                 goto out_err;
893         }
894         e->compl_result = 1;
895
896 attach_flow:
897         err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
898                                        tbl_time_before, out_index);
899         if (err)
900                 goto out_err;
901
902         err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
903         if (err == -EOPNOTSUPP) {
904                 /* If device doesn't support int port offload,
905                  * redirect to uplink vport.
906                  */
907                 mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
908                 err = 0;
909         } else if (err) {
910                 goto out_err;
911         }
912
913         flow->encaps[out_index].e = e;
914         list_add(&flow->encaps[out_index].list, &e->flows);
915         flow->encaps[out_index].index = out_index;
916         *encap_dev = e->out_dev;
917         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
918                 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
919                 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
920         } else {
921                 flow_flag_set(flow, SLOW);
922         }
923         mutex_unlock(&esw->offloads.encap_tbl_lock);
924
925         return err;
926
927 out_err:
928         mutex_unlock(&esw->offloads.encap_tbl_lock);
929         if (e)
930                 mlx5e_encap_put(priv, e);
931         return err;
932
933 out_err_init:
934         mutex_unlock(&esw->offloads.encap_tbl_lock);
935         kfree(tun_info);
936         kfree(e);
937         return err;
938 }
939
940 int mlx5e_attach_decap(struct mlx5e_priv *priv,
941                        struct mlx5e_tc_flow *flow,
942                        struct netlink_ext_ack *extack)
943 {
944         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
945         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
946         struct mlx5_pkt_reformat_params reformat_params;
947         struct mlx5e_decap_entry *d;
948         struct mlx5e_decap_key key;
949         uintptr_t hash_key;
950         int err = 0;
951
952         if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
953                 NL_SET_ERR_MSG_MOD(extack,
954                                    "encap header larger than max supported");
955                 return -EOPNOTSUPP;
956         }
957
958         key.key = attr->eth;
959         hash_key = hash_decap_info(&key);
960         mutex_lock(&esw->offloads.decap_tbl_lock);
961         d = mlx5e_decap_get(priv, &key, hash_key);
962         if (d) {
963                 mutex_unlock(&esw->offloads.decap_tbl_lock);
964                 wait_for_completion(&d->res_ready);
965                 mutex_lock(&esw->offloads.decap_tbl_lock);
966                 if (d->compl_result) {
967                         err = -EREMOTEIO;
968                         goto out_free;
969                 }
970                 goto found;
971         }
972
973         d = kzalloc(sizeof(*d), GFP_KERNEL);
974         if (!d) {
975                 err = -ENOMEM;
976                 goto out_err;
977         }
978
979         d->key = key;
980         refcount_set(&d->refcnt, 1);
981         init_completion(&d->res_ready);
982         INIT_LIST_HEAD(&d->flows);
983         hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
984         mutex_unlock(&esw->offloads.decap_tbl_lock);
985
986         memset(&reformat_params, 0, sizeof(reformat_params));
987         reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
988         reformat_params.size = sizeof(attr->eth);
989         reformat_params.data = &attr->eth;
990         d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
991                                                      &reformat_params,
992                                                      MLX5_FLOW_NAMESPACE_FDB);
993         if (IS_ERR(d->pkt_reformat)) {
994                 err = PTR_ERR(d->pkt_reformat);
995                 d->compl_result = err;
996         }
997         mutex_lock(&esw->offloads.decap_tbl_lock);
998         complete_all(&d->res_ready);
999         if (err)
1000                 goto out_free;
1001
1002 found:
1003         flow->decap_reformat = d;
1004         attr->decap_pkt_reformat = d->pkt_reformat;
1005         list_add(&flow->l3_to_l2_reformat, &d->flows);
1006         mutex_unlock(&esw->offloads.decap_tbl_lock);
1007         return 0;
1008
1009 out_free:
1010         mutex_unlock(&esw->offloads.decap_tbl_lock);
1011         mlx5e_decap_put(priv, d);
1012         return err;
1013
1014 out_err:
1015         mutex_unlock(&esw->offloads.decap_tbl_lock);
1016         return err;
1017 }
1018
1019 static int cmp_route_info(struct mlx5e_route_key *a,
1020                           struct mlx5e_route_key *b)
1021 {
1022         if (a->ip_version == 4 && b->ip_version == 4)
1023                 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
1024                               sizeof(a->endpoint_ip.v4));
1025         else if (a->ip_version == 6 && b->ip_version == 6)
1026                 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
1027                               sizeof(a->endpoint_ip.v6));
1028         return 1;
1029 }
1030
1031 static u32 hash_route_info(struct mlx5e_route_key *key)
1032 {
1033         if (key->ip_version == 4)
1034                 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1035         return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1036 }
1037
1038 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1039                                 struct mlx5e_route_entry *r)
1040 {
1041         WARN_ON(!list_empty(&r->decap_flows));
1042         WARN_ON(!list_empty(&r->encap_entries));
1043
1044         kfree_rcu(r, rcu);
1045 }
1046
1047 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1048 {
1049         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1050
1051         if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1052                 return;
1053
1054         hash_del_rcu(&r->hlist);
1055         mutex_unlock(&esw->offloads.encap_tbl_lock);
1056
1057         mlx5e_route_dealloc(priv, r);
1058 }
1059
1060 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1061 {
1062         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1063
1064         lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1065
1066         if (!refcount_dec_and_test(&r->refcnt))
1067                 return;
1068         hash_del_rcu(&r->hlist);
1069         mlx5e_route_dealloc(priv, r);
1070 }
1071
1072 static struct mlx5e_route_entry *
1073 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1074                 u32 hash_key)
1075 {
1076         struct mlx5e_route_key r_key;
1077         struct mlx5e_route_entry *r;
1078
1079         hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1080                 r_key = r->key;
1081                 if (!cmp_route_info(&r_key, key) &&
1082                     refcount_inc_not_zero(&r->refcnt))
1083                         return r;
1084         }
1085         return NULL;
1086 }
1087
1088 static struct mlx5e_route_entry *
1089 mlx5e_route_get_create(struct mlx5e_priv *priv,
1090                        struct mlx5e_route_key *key,
1091                        int tunnel_dev_index,
1092                        unsigned long *route_tbl_change_time)
1093 {
1094         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1095         struct mlx5_rep_uplink_priv *uplink_priv;
1096         struct mlx5e_rep_priv *uplink_rpriv;
1097         struct mlx5e_tc_tun_encap *encap;
1098         struct mlx5e_route_entry *r;
1099         u32 hash_key;
1100
1101         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1102         uplink_priv = &uplink_rpriv->uplink_priv;
1103         encap = uplink_priv->encap;
1104
1105         hash_key = hash_route_info(key);
1106         spin_lock_bh(&encap->route_lock);
1107         r = mlx5e_route_get(encap, key, hash_key);
1108         spin_unlock_bh(&encap->route_lock);
1109         if (r) {
1110                 if (!mlx5e_route_entry_valid(r)) {
1111                         mlx5e_route_put_locked(priv, r);
1112                         return ERR_PTR(-EINVAL);
1113                 }
1114                 return r;
1115         }
1116
1117         r = kzalloc(sizeof(*r), GFP_KERNEL);
1118         if (!r)
1119                 return ERR_PTR(-ENOMEM);
1120
1121         r->key = *key;
1122         r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1123         r->tunnel_dev_index = tunnel_dev_index;
1124         refcount_set(&r->refcnt, 1);
1125         INIT_LIST_HEAD(&r->decap_flows);
1126         INIT_LIST_HEAD(&r->encap_entries);
1127
1128         spin_lock_bh(&encap->route_lock);
1129         *route_tbl_change_time = encap->route_tbl_last_update;
1130         hash_add(encap->route_tbl, &r->hlist, hash_key);
1131         spin_unlock_bh(&encap->route_lock);
1132
1133         return r;
1134 }
1135
1136 static struct mlx5e_route_entry *
1137 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1138 {
1139         u32 hash_key = hash_route_info(key);
1140         struct mlx5e_route_entry *r;
1141
1142         spin_lock_bh(&encap->route_lock);
1143         encap->route_tbl_last_update = jiffies;
1144         r = mlx5e_route_get(encap, key, hash_key);
1145         spin_unlock_bh(&encap->route_lock);
1146
1147         return r;
1148 }
1149
1150 struct mlx5e_tc_fib_event_data {
1151         struct work_struct work;
1152         unsigned long event;
1153         struct mlx5e_route_entry *r;
1154         struct net_device *ul_dev;
1155 };
1156
1157 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1158 static struct mlx5e_tc_fib_event_data *
1159 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1160 {
1161         struct mlx5e_tc_fib_event_data *fib_work;
1162
1163         fib_work = kzalloc(sizeof(*fib_work), flags);
1164         if (WARN_ON(!fib_work))
1165                 return NULL;
1166
1167         INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1168         fib_work->event = event;
1169         fib_work->ul_dev = ul_dev;
1170
1171         return fib_work;
1172 }
1173
1174 static int
1175 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1176                            struct mlx5e_route_entry *r,
1177                            unsigned long event)
1178 {
1179         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1180         struct mlx5e_tc_fib_event_data *fib_work;
1181         struct mlx5e_rep_priv *uplink_rpriv;
1182         struct net_device *ul_dev;
1183
1184         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1185         ul_dev = uplink_rpriv->netdev;
1186
1187         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1188         if (!fib_work)
1189                 return -ENOMEM;
1190
1191         dev_hold(ul_dev);
1192         refcount_inc(&r->refcnt);
1193         fib_work->r = r;
1194         queue_work(priv->wq, &fib_work->work);
1195
1196         return 0;
1197 }
1198
1199 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1200                              struct mlx5e_tc_flow *flow)
1201 {
1202         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1203         unsigned long tbl_time_before, tbl_time_after;
1204         struct mlx5e_tc_flow_parse_attr *parse_attr;
1205         struct mlx5_flow_attr *attr = flow->attr;
1206         struct mlx5_esw_flow_attr *esw_attr;
1207         struct mlx5e_route_entry *r;
1208         struct mlx5e_route_key key;
1209         int err = 0;
1210
1211         esw_attr = attr->esw_attr;
1212         parse_attr = attr->parse_attr;
1213         mutex_lock(&esw->offloads.encap_tbl_lock);
1214         if (!esw_attr->rx_tun_attr)
1215                 goto out;
1216
1217         tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1218         tbl_time_after = tbl_time_before;
1219         err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1220         if (err || !esw_attr->rx_tun_attr->decap_vport)
1221                 goto out;
1222
1223         key.ip_version = attr->tun_ip_version;
1224         if (key.ip_version == 4)
1225                 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1226         else
1227                 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1228
1229         r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1230                                    &tbl_time_after);
1231         if (IS_ERR(r)) {
1232                 err = PTR_ERR(r);
1233                 goto out;
1234         }
1235         /* Routing changed concurrently. FIB event handler might have missed new
1236          * entry, schedule update.
1237          */
1238         if (tbl_time_before != tbl_time_after) {
1239                 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1240                 if (err) {
1241                         mlx5e_route_put_locked(priv, r);
1242                         goto out;
1243                 }
1244         }
1245
1246         flow->decap_route = r;
1247         list_add(&flow->decap_routes, &r->decap_flows);
1248         mutex_unlock(&esw->offloads.encap_tbl_lock);
1249         return 0;
1250
1251 out:
1252         mutex_unlock(&esw->offloads.encap_tbl_lock);
1253         return err;
1254 }
1255
1256 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1257                                     struct mlx5e_tc_flow *flow,
1258                                     struct mlx5_flow_attr *attr,
1259                                     struct mlx5e_encap_entry *e,
1260                                     bool new_encap_entry,
1261                                     unsigned long tbl_time_before,
1262                                     int out_index)
1263 {
1264         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1265         unsigned long tbl_time_after = tbl_time_before;
1266         struct mlx5e_tc_flow_parse_attr *parse_attr;
1267         const struct ip_tunnel_info *tun_info;
1268         struct mlx5_esw_flow_attr *esw_attr;
1269         struct mlx5e_route_entry *r;
1270         struct mlx5e_route_key key;
1271         unsigned short family;
1272         int err = 0;
1273
1274         esw_attr = attr->esw_attr;
1275         parse_attr = attr->parse_attr;
1276         tun_info = parse_attr->tun_info[out_index];
1277         family = ip_tunnel_info_af(tun_info);
1278
1279         if (family == AF_INET) {
1280                 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1281                 key.ip_version = 4;
1282         } else if (family == AF_INET6) {
1283                 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1284                 key.ip_version = 6;
1285         }
1286
1287         err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1288                                   e->route_dev_ifindex, out_index);
1289         if (err || !(esw_attr->dests[out_index].flags &
1290                      MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1291                 return err;
1292
1293         r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1294                                    &tbl_time_after);
1295         if (IS_ERR(r))
1296                 return PTR_ERR(r);
1297         /* Routing changed concurrently. FIB event handler might have missed new
1298          * entry, schedule update.
1299          */
1300         if (tbl_time_before != tbl_time_after) {
1301                 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1302                 if (err) {
1303                         mlx5e_route_put_locked(priv, r);
1304                         return err;
1305                 }
1306         }
1307
1308         flow->encap_routes[out_index].r = r;
1309         if (new_encap_entry)
1310                 list_add(&e->route_list, &r->encap_entries);
1311         flow->encap_routes[out_index].index = out_index;
1312         return 0;
1313 }
1314
1315 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1316                               struct mlx5e_tc_flow *flow)
1317 {
1318         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1319         struct mlx5e_route_entry *r = flow->decap_route;
1320
1321         if (!r)
1322                 return;
1323
1324         mutex_lock(&esw->offloads.encap_tbl_lock);
1325         list_del(&flow->decap_routes);
1326         flow->decap_route = NULL;
1327
1328         if (!refcount_dec_and_test(&r->refcnt)) {
1329                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1330                 return;
1331         }
1332         hash_del_rcu(&r->hlist);
1333         mutex_unlock(&esw->offloads.encap_tbl_lock);
1334
1335         mlx5e_route_dealloc(priv, r);
1336 }
1337
1338 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1339                                      struct mlx5e_tc_flow *flow,
1340                                      int out_index)
1341 {
1342         struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1343         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1344         struct mlx5e_encap_entry *e, *tmp;
1345
1346         if (!r)
1347                 return;
1348
1349         mutex_lock(&esw->offloads.encap_tbl_lock);
1350         flow->encap_routes[out_index].r = NULL;
1351
1352         if (!refcount_dec_and_test(&r->refcnt)) {
1353                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1354                 return;
1355         }
1356         list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1357                 list_del_init(&e->route_list);
1358         hash_del_rcu(&r->hlist);
1359         mutex_unlock(&esw->offloads.encap_tbl_lock);
1360
1361         mlx5e_route_dealloc(priv, r);
1362 }
1363
1364 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1365                                    struct mlx5e_encap_entry *e,
1366                                    struct list_head *encap_flows)
1367 {
1368         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1369         struct mlx5e_tc_flow *flow;
1370
1371         list_for_each_entry(flow, encap_flows, tmp_list) {
1372                 struct mlx5_esw_flow_attr *esw_attr;
1373                 struct mlx5_flow_attr *attr;
1374
1375                 if (!mlx5e_is_offloaded_flow(flow))
1376                         continue;
1377
1378                 attr = mlx5e_tc_get_encap_attr(flow);
1379                 esw_attr = attr->esw_attr;
1380
1381                 if (flow_flag_test(flow, SLOW))
1382                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
1383                 else
1384                         mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1385
1386                 mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1387                 attr->modify_hdr = NULL;
1388
1389                 esw_attr->dests[flow->tmp_entry_index].flags &=
1390                         ~MLX5_ESW_DEST_ENCAP_VALID;
1391                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1392         }
1393
1394         e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1395         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1396                 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1397                 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1398                 e->pkt_reformat = NULL;
1399         }
1400 }
1401
1402 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1403                                   struct net_device *tunnel_dev,
1404                                   struct mlx5e_encap_entry *e,
1405                                   struct list_head *encap_flows)
1406 {
1407         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1408         struct mlx5e_tc_flow *flow;
1409         int err;
1410
1411         err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1412                 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1413                 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1414         if (err)
1415                 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1416         e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1417
1418         list_for_each_entry(flow, encap_flows, tmp_list) {
1419                 struct mlx5e_tc_flow_parse_attr *parse_attr;
1420                 struct mlx5_esw_flow_attr *esw_attr;
1421                 struct mlx5_flow_handle *rule;
1422                 struct mlx5_flow_attr *attr;
1423                 struct mlx5_flow_spec *spec;
1424
1425                 if (flow_flag_test(flow, FAILED))
1426                         continue;
1427
1428                 spec = &flow->attr->parse_attr->spec;
1429
1430                 attr = mlx5e_tc_get_encap_attr(flow);
1431                 esw_attr = attr->esw_attr;
1432                 parse_attr = attr->parse_attr;
1433
1434                 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1435                                              e->out_dev, e->route_dev_ifindex,
1436                                              flow->tmp_entry_index);
1437                 if (err) {
1438                         mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1439                         continue;
1440                 }
1441
1442                 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1443                 if (err) {
1444                         mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1445                                        err);
1446                         continue;
1447                 }
1448
1449                 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1450                         esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1451                         esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1452                         if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1453                                 goto offload_to_slow_path;
1454
1455                         err = mlx5e_tc_offload_flow_post_acts(flow);
1456                         if (err) {
1457                                 mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1458                                                err);
1459                                 goto offload_to_slow_path;
1460                         }
1461
1462                         /* update from slow path rule to encap rule */
1463                         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1464                         if (IS_ERR(rule)) {
1465                                 mlx5e_tc_unoffload_flow_post_acts(flow);
1466                                 err = PTR_ERR(rule);
1467                                 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1468                                                err);
1469                         } else {
1470                                 flow->rule[0] = rule;
1471                         }
1472                 } else {
1473 offload_to_slow_path:
1474                         rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1475                         /* mark the flow's encap dest as non-valid */
1476                         esw_attr->dests[flow->tmp_entry_index].flags &=
1477                                 ~MLX5_ESW_DEST_ENCAP_VALID;
1478
1479                         if (IS_ERR(rule)) {
1480                                 err = PTR_ERR(rule);
1481                                 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1482                                                err);
1483                         } else {
1484                                 flow->rule[0] = rule;
1485                         }
1486                 }
1487                 flow_flag_set(flow, OFFLOADED);
1488         }
1489 }
1490
1491 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1492                                      struct mlx5e_route_entry *r,
1493                                      struct list_head *flow_list,
1494                                      bool replace)
1495 {
1496         struct net_device *tunnel_dev;
1497         struct mlx5e_encap_entry *e;
1498
1499         tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1500         if (!tunnel_dev)
1501                 return -ENODEV;
1502
1503         list_for_each_entry(e, &r->encap_entries, route_list) {
1504                 LIST_HEAD(encap_flows);
1505
1506                 mlx5e_take_all_encap_flows(e, &encap_flows);
1507                 if (list_empty(&encap_flows))
1508                         continue;
1509
1510                 if (mlx5e_route_entry_valid(r))
1511                         mlx5e_invalidate_encap(priv, e, &encap_flows);
1512
1513                 if (!replace) {
1514                         list_splice(&encap_flows, flow_list);
1515                         continue;
1516                 }
1517
1518                 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1519                 list_splice(&encap_flows, flow_list);
1520         }
1521
1522         return 0;
1523 }
1524
1525 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1526                                       struct list_head *flow_list)
1527 {
1528         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1529         struct mlx5e_tc_flow *flow;
1530
1531         list_for_each_entry(flow, flow_list, tmp_list)
1532                 if (mlx5e_is_offloaded_flow(flow))
1533                         mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1534 }
1535
1536 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1537                                   struct list_head *decap_flows)
1538 {
1539         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1540         struct mlx5e_tc_flow *flow;
1541
1542         list_for_each_entry(flow, decap_flows, tmp_list) {
1543                 struct mlx5e_tc_flow_parse_attr *parse_attr;
1544                 struct mlx5_flow_attr *attr = flow->attr;
1545                 struct mlx5_flow_handle *rule;
1546                 struct mlx5_flow_spec *spec;
1547                 int err;
1548
1549                 if (flow_flag_test(flow, FAILED))
1550                         continue;
1551
1552                 parse_attr = attr->parse_attr;
1553                 spec = &parse_attr->spec;
1554                 err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1555                 if (err) {
1556                         mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1557                                        err);
1558                         continue;
1559                 }
1560
1561                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1562                 if (IS_ERR(rule)) {
1563                         err = PTR_ERR(rule);
1564                         mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1565                                        err);
1566                 } else {
1567                         flow->rule[0] = rule;
1568                         flow_flag_set(flow, OFFLOADED);
1569                 }
1570         }
1571 }
1572
1573 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1574                                           struct mlx5e_route_entry *r,
1575                                           struct list_head *flow_list,
1576                                           bool replace)
1577 {
1578         struct net_device *tunnel_dev;
1579         LIST_HEAD(decap_flows);
1580
1581         tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1582         if (!tunnel_dev)
1583                 return -ENODEV;
1584
1585         mlx5e_take_all_route_decap_flows(r, &decap_flows);
1586         if (mlx5e_route_entry_valid(r))
1587                 mlx5e_unoffload_flow_list(priv, &decap_flows);
1588         if (replace)
1589                 mlx5e_reoffload_decap(priv, &decap_flows);
1590
1591         list_splice(&decap_flows, flow_list);
1592
1593         return 0;
1594 }
1595
1596 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1597 {
1598         struct mlx5e_tc_fib_event_data *event_data =
1599                 container_of(work, struct mlx5e_tc_fib_event_data, work);
1600         struct net_device *ul_dev = event_data->ul_dev;
1601         struct mlx5e_priv *priv = netdev_priv(ul_dev);
1602         struct mlx5e_route_entry *r = event_data->r;
1603         struct mlx5_eswitch *esw;
1604         LIST_HEAD(flow_list);
1605         bool replace;
1606         int err;
1607
1608         /* sync with concurrent neigh updates */
1609         rtnl_lock();
1610         esw = priv->mdev->priv.eswitch;
1611         mutex_lock(&esw->offloads.encap_tbl_lock);
1612         replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1613
1614         if (!mlx5e_route_entry_valid(r) && !replace)
1615                 goto out;
1616
1617         err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1618         if (err)
1619                 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1620                                err);
1621
1622         err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1623         if (err)
1624                 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1625                                err);
1626
1627         if (replace)
1628                 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1629 out:
1630         mutex_unlock(&esw->offloads.encap_tbl_lock);
1631         rtnl_unlock();
1632
1633         mlx5e_put_flow_list(priv, &flow_list);
1634         mlx5e_route_put(priv, event_data->r);
1635         dev_put(event_data->ul_dev);
1636         kfree(event_data);
1637 }
1638
1639 static struct mlx5e_tc_fib_event_data *
1640 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1641                          struct net_device *ul_dev,
1642                          struct mlx5e_tc_tun_encap *encap,
1643                          unsigned long event,
1644                          struct fib_notifier_info *info)
1645 {
1646         struct fib_entry_notifier_info *fen_info;
1647         struct mlx5e_tc_fib_event_data *fib_work;
1648         struct mlx5e_route_entry *r;
1649         struct mlx5e_route_key key;
1650         struct net_device *fib_dev;
1651
1652         fen_info = container_of(info, struct fib_entry_notifier_info, info);
1653         if (fen_info->fi->nh)
1654                 return NULL;
1655         fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1656         if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1657             fen_info->dst_len != 32)
1658                 return NULL;
1659
1660         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1661         if (!fib_work)
1662                 return ERR_PTR(-ENOMEM);
1663
1664         key.endpoint_ip.v4 = htonl(fen_info->dst);
1665         key.ip_version = 4;
1666
1667         /* Can't fail after this point because releasing reference to r
1668          * requires obtaining sleeping mutex which we can't do in atomic
1669          * context.
1670          */
1671         r = mlx5e_route_lookup_for_update(encap, &key);
1672         if (!r)
1673                 goto out;
1674         fib_work->r = r;
1675         dev_hold(ul_dev);
1676
1677         return fib_work;
1678
1679 out:
1680         kfree(fib_work);
1681         return NULL;
1682 }
1683
1684 static struct mlx5e_tc_fib_event_data *
1685 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1686                          struct net_device *ul_dev,
1687                          struct mlx5e_tc_tun_encap *encap,
1688                          unsigned long event,
1689                          struct fib_notifier_info *info)
1690 {
1691         struct fib6_entry_notifier_info *fen_info;
1692         struct mlx5e_tc_fib_event_data *fib_work;
1693         struct mlx5e_route_entry *r;
1694         struct mlx5e_route_key key;
1695         struct net_device *fib_dev;
1696
1697         fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1698         fib_dev = fib6_info_nh_dev(fen_info->rt);
1699         if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1700             fen_info->rt->fib6_dst.plen != 128)
1701                 return NULL;
1702
1703         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1704         if (!fib_work)
1705                 return ERR_PTR(-ENOMEM);
1706
1707         memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1708                sizeof(fen_info->rt->fib6_dst.addr));
1709         key.ip_version = 6;
1710
1711         /* Can't fail after this point because releasing reference to r
1712          * requires obtaining sleeping mutex which we can't do in atomic
1713          * context.
1714          */
1715         r = mlx5e_route_lookup_for_update(encap, &key);
1716         if (!r)
1717                 goto out;
1718         fib_work->r = r;
1719         dev_hold(ul_dev);
1720
1721         return fib_work;
1722
1723 out:
1724         kfree(fib_work);
1725         return NULL;
1726 }
1727
1728 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1729 {
1730         struct mlx5e_tc_fib_event_data *fib_work;
1731         struct fib_notifier_info *info = ptr;
1732         struct mlx5e_tc_tun_encap *encap;
1733         struct net_device *ul_dev;
1734         struct mlx5e_priv *priv;
1735
1736         encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1737         priv = encap->priv;
1738         ul_dev = priv->netdev;
1739         priv = netdev_priv(ul_dev);
1740
1741         switch (event) {
1742         case FIB_EVENT_ENTRY_REPLACE:
1743         case FIB_EVENT_ENTRY_DEL:
1744                 if (info->family == AF_INET)
1745                         fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1746                 else if (info->family == AF_INET6)
1747                         fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1748                 else
1749                         return NOTIFY_DONE;
1750
1751                 if (!IS_ERR_OR_NULL(fib_work)) {
1752                         queue_work(priv->wq, &fib_work->work);
1753                 } else if (IS_ERR(fib_work)) {
1754                         NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1755                         mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1756                                        PTR_ERR(fib_work));
1757                 }
1758
1759                 break;
1760         default:
1761                 return NOTIFY_DONE;
1762         }
1763
1764         return NOTIFY_DONE;
1765 }
1766
1767 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1768 {
1769         struct mlx5e_tc_tun_encap *encap;
1770         int err;
1771
1772         encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1773         if (!encap)
1774                 return ERR_PTR(-ENOMEM);
1775
1776         encap->priv = priv;
1777         encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1778         spin_lock_init(&encap->route_lock);
1779         hash_init(encap->route_tbl);
1780         err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1781                                     NULL, NULL);
1782         if (err) {
1783                 kvfree(encap);
1784                 return ERR_PTR(err);
1785         }
1786
1787         return encap;
1788 }
1789
1790 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1791 {
1792         if (!encap)
1793                 return;
1794
1795         unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1796         flush_workqueue(encap->priv->wq); /* flush fib event works */
1797         kvfree(encap);
1798 }