powerpc/mm: Avoid calling arch_enter/leave_lazy_mmu() in set_ptes
[platform/kernel/linux-starfive.git] / drivers / net / ethernet / sfc / tc_conntrack.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2023, Advanced Micro Devices, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation, incorporated herein by reference.
9  */
10
11 #include "tc_conntrack.h"
12 #include "tc.h"
13 #include "mae.h"
14
15 static int efx_tc_flow_block(enum tc_setup_type type, void *type_data,
16                              void *cb_priv);
17
18 static const struct rhashtable_params efx_tc_ct_zone_ht_params = {
19         .key_len        = offsetof(struct efx_tc_ct_zone, linkage),
20         .key_offset     = 0,
21         .head_offset    = offsetof(struct efx_tc_ct_zone, linkage),
22 };
23
24 static const struct rhashtable_params efx_tc_ct_ht_params = {
25         .key_len        = offsetof(struct efx_tc_ct_entry, linkage),
26         .key_offset     = 0,
27         .head_offset    = offsetof(struct efx_tc_ct_entry, linkage),
28 };
29
30 static void efx_tc_ct_zone_free(void *ptr, void *arg)
31 {
32         struct efx_tc_ct_zone *zone = ptr;
33         struct efx_nic *efx = zone->efx;
34
35         netif_err(efx, drv, efx->net_dev,
36                   "tc ct_zone %u still present at teardown, removing\n",
37                   zone->zone);
38
39         nf_flow_table_offload_del_cb(zone->nf_ft, efx_tc_flow_block, zone);
40         kfree(zone);
41 }
42
43 static void efx_tc_ct_free(void *ptr, void *arg)
44 {
45         struct efx_tc_ct_entry *conn = ptr;
46         struct efx_nic *efx = arg;
47
48         netif_err(efx, drv, efx->net_dev,
49                   "tc ct_entry %lx still present at teardown\n",
50                   conn->cookie);
51
52         /* We can release the counter, but we can't remove the CT itself
53          * from hardware because the table meta is already gone.
54          */
55         efx_tc_flower_release_counter(efx, conn->cnt);
56         kfree(conn);
57 }
58
59 int efx_tc_init_conntrack(struct efx_nic *efx)
60 {
61         int rc;
62
63         rc = rhashtable_init(&efx->tc->ct_zone_ht, &efx_tc_ct_zone_ht_params);
64         if (rc < 0)
65                 goto fail_ct_zone_ht;
66         rc = rhashtable_init(&efx->tc->ct_ht, &efx_tc_ct_ht_params);
67         if (rc < 0)
68                 goto fail_ct_ht;
69         return 0;
70 fail_ct_ht:
71         rhashtable_destroy(&efx->tc->ct_zone_ht);
72 fail_ct_zone_ht:
73         return rc;
74 }
75
76 /* Only call this in init failure teardown.
77  * Normal exit should fini instead as there may be entries in the table.
78  */
79 void efx_tc_destroy_conntrack(struct efx_nic *efx)
80 {
81         rhashtable_destroy(&efx->tc->ct_ht);
82         rhashtable_destroy(&efx->tc->ct_zone_ht);
83 }
84
85 void efx_tc_fini_conntrack(struct efx_nic *efx)
86 {
87         rhashtable_free_and_destroy(&efx->tc->ct_zone_ht, efx_tc_ct_zone_free, NULL);
88         rhashtable_free_and_destroy(&efx->tc->ct_ht, efx_tc_ct_free, efx);
89 }
90
91 #define EFX_NF_TCP_FLAG(flg)    cpu_to_be16(be32_to_cpu(TCP_FLAG_##flg) >> 16)
92
93 static int efx_tc_ct_parse_match(struct efx_nic *efx, struct flow_rule *fr,
94                                  struct efx_tc_ct_entry *conn)
95 {
96         struct flow_dissector *dissector = fr->match.dissector;
97         unsigned char ipv = 0;
98         bool tcp = false;
99
100         if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_CONTROL)) {
101                 struct flow_match_control fm;
102
103                 flow_rule_match_control(fr, &fm);
104                 if (IS_ALL_ONES(fm.mask->addr_type))
105                         switch (fm.key->addr_type) {
106                         case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
107                                 ipv = 4;
108                                 break;
109                         case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
110                                 ipv = 6;
111                                 break;
112                         default:
113                                 break;
114                         }
115         }
116
117         if (!ipv) {
118                 netif_dbg(efx, drv, efx->net_dev,
119                           "Conntrack missing ipv specification\n");
120                 return -EOPNOTSUPP;
121         }
122
123         if (dissector->used_keys &
124             ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
125               BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
126               BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
127               BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
128               BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
129               BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
130               BIT_ULL(FLOW_DISSECTOR_KEY_META))) {
131                 netif_dbg(efx, drv, efx->net_dev,
132                           "Unsupported conntrack keys %#llx\n",
133                           dissector->used_keys);
134                 return -EOPNOTSUPP;
135         }
136
137         if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_BASIC)) {
138                 struct flow_match_basic fm;
139
140                 flow_rule_match_basic(fr, &fm);
141                 if (!IS_ALL_ONES(fm.mask->n_proto)) {
142                         netif_dbg(efx, drv, efx->net_dev,
143                                   "Conntrack eth_proto is not exact-match; mask %04x\n",
144                                    ntohs(fm.mask->n_proto));
145                         return -EOPNOTSUPP;
146                 }
147                 conn->eth_proto = fm.key->n_proto;
148                 if (conn->eth_proto != (ipv == 4 ? htons(ETH_P_IP)
149                                                  : htons(ETH_P_IPV6))) {
150                         netif_dbg(efx, drv, efx->net_dev,
151                                   "Conntrack eth_proto is not IPv%u, is %04x\n",
152                                    ipv, ntohs(conn->eth_proto));
153                         return -EOPNOTSUPP;
154                 }
155                 if (!IS_ALL_ONES(fm.mask->ip_proto)) {
156                         netif_dbg(efx, drv, efx->net_dev,
157                                   "Conntrack ip_proto is not exact-match; mask %02x\n",
158                                    fm.mask->ip_proto);
159                         return -EOPNOTSUPP;
160                 }
161                 conn->ip_proto = fm.key->ip_proto;
162                 switch (conn->ip_proto) {
163                 case IPPROTO_TCP:
164                         tcp = true;
165                         break;
166                 case IPPROTO_UDP:
167                         break;
168                 default:
169                         netif_dbg(efx, drv, efx->net_dev,
170                                   "Conntrack ip_proto not TCP or UDP, is %02x\n",
171                                    conn->ip_proto);
172                         return -EOPNOTSUPP;
173                 }
174         } else {
175                 netif_dbg(efx, drv, efx->net_dev,
176                           "Conntrack missing eth_proto, ip_proto\n");
177                 return -EOPNOTSUPP;
178         }
179
180         if (ipv == 4 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
181                 struct flow_match_ipv4_addrs fm;
182
183                 flow_rule_match_ipv4_addrs(fr, &fm);
184                 if (!IS_ALL_ONES(fm.mask->src)) {
185                         netif_dbg(efx, drv, efx->net_dev,
186                                   "Conntrack ipv4.src is not exact-match; mask %08x\n",
187                                    ntohl(fm.mask->src));
188                         return -EOPNOTSUPP;
189                 }
190                 conn->src_ip = fm.key->src;
191                 if (!IS_ALL_ONES(fm.mask->dst)) {
192                         netif_dbg(efx, drv, efx->net_dev,
193                                   "Conntrack ipv4.dst is not exact-match; mask %08x\n",
194                                    ntohl(fm.mask->dst));
195                         return -EOPNOTSUPP;
196                 }
197                 conn->dst_ip = fm.key->dst;
198         } else if (ipv == 6 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
199                 struct flow_match_ipv6_addrs fm;
200
201                 flow_rule_match_ipv6_addrs(fr, &fm);
202                 if (!efx_ipv6_addr_all_ones(&fm.mask->src)) {
203                         netif_dbg(efx, drv, efx->net_dev,
204                                   "Conntrack ipv6.src is not exact-match; mask %pI6\n",
205                                    &fm.mask->src);
206                         return -EOPNOTSUPP;
207                 }
208                 conn->src_ip6 = fm.key->src;
209                 if (!efx_ipv6_addr_all_ones(&fm.mask->dst)) {
210                         netif_dbg(efx, drv, efx->net_dev,
211                                   "Conntrack ipv6.dst is not exact-match; mask %pI6\n",
212                                    &fm.mask->dst);
213                         return -EOPNOTSUPP;
214                 }
215                 conn->dst_ip6 = fm.key->dst;
216         } else {
217                 netif_dbg(efx, drv, efx->net_dev,
218                           "Conntrack missing IPv%u addrs\n", ipv);
219                 return -EOPNOTSUPP;
220         }
221
222         if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_PORTS)) {
223                 struct flow_match_ports fm;
224
225                 flow_rule_match_ports(fr, &fm);
226                 if (!IS_ALL_ONES(fm.mask->src)) {
227                         netif_dbg(efx, drv, efx->net_dev,
228                                   "Conntrack ports.src is not exact-match; mask %04x\n",
229                                    ntohs(fm.mask->src));
230                         return -EOPNOTSUPP;
231                 }
232                 conn->l4_sport = fm.key->src;
233                 if (!IS_ALL_ONES(fm.mask->dst)) {
234                         netif_dbg(efx, drv, efx->net_dev,
235                                   "Conntrack ports.dst is not exact-match; mask %04x\n",
236                                    ntohs(fm.mask->dst));
237                         return -EOPNOTSUPP;
238                 }
239                 conn->l4_dport = fm.key->dst;
240         } else {
241                 netif_dbg(efx, drv, efx->net_dev, "Conntrack missing L4 ports\n");
242                 return -EOPNOTSUPP;
243         }
244
245         if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_TCP)) {
246                 __be16 tcp_interesting_flags;
247                 struct flow_match_tcp fm;
248
249                 if (!tcp) {
250                         netif_dbg(efx, drv, efx->net_dev,
251                                   "Conntrack matching on TCP keys but ipproto is not tcp\n");
252                         return -EOPNOTSUPP;
253                 }
254                 flow_rule_match_tcp(fr, &fm);
255                 tcp_interesting_flags = EFX_NF_TCP_FLAG(SYN) |
256                                         EFX_NF_TCP_FLAG(RST) |
257                                         EFX_NF_TCP_FLAG(FIN);
258                 /* If any of the tcp_interesting_flags is set, we always
259                  * inhibit CT lookup in LHS (so SW can update CT table).
260                  */
261                 if (fm.key->flags & tcp_interesting_flags) {
262                         netif_dbg(efx, drv, efx->net_dev,
263                                   "Unsupported conntrack tcp.flags %04x/%04x\n",
264                                    ntohs(fm.key->flags), ntohs(fm.mask->flags));
265                         return -EOPNOTSUPP;
266                 }
267                 /* Other TCP flags cannot be filtered at CT */
268                 if (fm.mask->flags & ~tcp_interesting_flags) {
269                         netif_dbg(efx, drv, efx->net_dev,
270                                   "Unsupported conntrack tcp.flags %04x/%04x\n",
271                                    ntohs(fm.key->flags), ntohs(fm.mask->flags));
272                         return -EOPNOTSUPP;
273                 }
274         }
275
276         return 0;
277 }
278
279 static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone,
280                              struct flow_cls_offload *tc)
281 {
282         struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
283         struct efx_tc_ct_entry *conn, *old;
284         struct efx_nic *efx = ct_zone->efx;
285         const struct flow_action_entry *fa;
286         struct efx_tc_counter *cnt;
287         int rc, i;
288
289         if (WARN_ON(!efx->tc))
290                 return -ENETDOWN;
291         if (WARN_ON(!efx->tc->up))
292                 return -ENETDOWN;
293
294         conn = kzalloc(sizeof(*conn), GFP_USER);
295         if (!conn)
296                 return -ENOMEM;
297         conn->cookie = tc->cookie;
298         old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_ht,
299                                                 &conn->linkage,
300                                                 efx_tc_ct_ht_params);
301         if (old) {
302                 netif_dbg(efx, drv, efx->net_dev,
303                           "Already offloaded conntrack (cookie %lx)\n", tc->cookie);
304                 rc = -EEXIST;
305                 goto release;
306         }
307
308         /* Parse match */
309         conn->zone = ct_zone;
310         rc = efx_tc_ct_parse_match(efx, fr, conn);
311         if (rc)
312                 goto release;
313
314         /* Parse actions */
315         flow_action_for_each(i, fa, &fr->action) {
316                 switch (fa->id) {
317                 case FLOW_ACTION_CT_METADATA:
318                         conn->mark = fa->ct_metadata.mark;
319                         if (memchr_inv(fa->ct_metadata.labels, 0, sizeof(fa->ct_metadata.labels))) {
320                                 netif_dbg(efx, drv, efx->net_dev,
321                                           "Setting CT label not supported\n");
322                                 rc = -EOPNOTSUPP;
323                                 goto release;
324                         }
325                         break;
326                 default:
327                         netif_dbg(efx, drv, efx->net_dev,
328                                   "Unhandled action %u for conntrack\n", fa->id);
329                         rc = -EOPNOTSUPP;
330                         goto release;
331                 }
332         }
333
334         /* fill in defaults for unmangled values */
335         conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip;
336         conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport;
337
338         cnt = efx_tc_flower_allocate_counter(efx, EFX_TC_COUNTER_TYPE_CT);
339         if (IS_ERR(cnt)) {
340                 rc = PTR_ERR(cnt);
341                 goto release;
342         }
343         conn->cnt = cnt;
344
345         rc = efx_mae_insert_ct(efx, conn);
346         if (rc) {
347                 netif_dbg(efx, drv, efx->net_dev,
348                           "Failed to insert conntrack, %d\n", rc);
349                 goto release;
350         }
351         mutex_lock(&ct_zone->mutex);
352         list_add_tail(&conn->list, &ct_zone->cts);
353         mutex_unlock(&ct_zone->mutex);
354         return 0;
355 release:
356         if (conn->cnt)
357                 efx_tc_flower_release_counter(efx, conn->cnt);
358         if (!old)
359                 rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
360                                        efx_tc_ct_ht_params);
361         kfree(conn);
362         return rc;
363 }
364
365 /* Caller must follow with efx_tc_ct_remove_finish() after RCU grace period! */
366 static void efx_tc_ct_remove(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
367 {
368         int rc;
369
370         /* Remove it from HW */
371         rc = efx_mae_remove_ct(efx, conn);
372         /* Delete it from SW */
373         rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
374                                efx_tc_ct_ht_params);
375         if (rc) {
376                 netif_err(efx, drv, efx->net_dev,
377                           "Failed to remove conntrack %lx from hw, rc %d\n",
378                           conn->cookie, rc);
379         } else {
380                 netif_dbg(efx, drv, efx->net_dev, "Removed conntrack %lx\n",
381                           conn->cookie);
382         }
383 }
384
385 static void efx_tc_ct_remove_finish(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
386 {
387         /* Remove related CT counter.  This is delayed after the conn object we
388          * are working with has been successfully removed.  This protects the
389          * counter from being used-after-free inside efx_tc_ct_stats.
390          */
391         efx_tc_flower_release_counter(efx, conn->cnt);
392         kfree(conn);
393 }
394
395 static int efx_tc_ct_destroy(struct efx_tc_ct_zone *ct_zone,
396                              struct flow_cls_offload *tc)
397 {
398         struct efx_nic *efx = ct_zone->efx;
399         struct efx_tc_ct_entry *conn;
400
401         conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
402                                       efx_tc_ct_ht_params);
403         if (!conn) {
404                 netif_warn(efx, drv, efx->net_dev,
405                            "Conntrack %lx not found to remove\n", tc->cookie);
406                 return -ENOENT;
407         }
408
409         mutex_lock(&ct_zone->mutex);
410         list_del(&conn->list);
411         efx_tc_ct_remove(efx, conn);
412         mutex_unlock(&ct_zone->mutex);
413         synchronize_rcu();
414         efx_tc_ct_remove_finish(efx, conn);
415         return 0;
416 }
417
418 static int efx_tc_ct_stats(struct efx_tc_ct_zone *ct_zone,
419                            struct flow_cls_offload *tc)
420 {
421         struct efx_nic *efx = ct_zone->efx;
422         struct efx_tc_ct_entry *conn;
423         struct efx_tc_counter *cnt;
424
425         rcu_read_lock();
426         conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
427                                       efx_tc_ct_ht_params);
428         if (!conn) {
429                 netif_warn(efx, drv, efx->net_dev,
430                            "Conntrack %lx not found for stats\n", tc->cookie);
431                 rcu_read_unlock();
432                 return -ENOENT;
433         }
434
435         cnt = conn->cnt;
436         spin_lock_bh(&cnt->lock);
437         /* Report only last use */
438         flow_stats_update(&tc->stats, 0, 0, 0, cnt->touched,
439                           FLOW_ACTION_HW_STATS_DELAYED);
440         spin_unlock_bh(&cnt->lock);
441         rcu_read_unlock();
442
443         return 0;
444 }
445
446 static int efx_tc_flow_block(enum tc_setup_type type, void *type_data,
447                              void *cb_priv)
448 {
449         struct flow_cls_offload *tcb = type_data;
450         struct efx_tc_ct_zone *ct_zone = cb_priv;
451
452         if (type != TC_SETUP_CLSFLOWER)
453                 return -EOPNOTSUPP;
454
455         switch (tcb->command) {
456         case FLOW_CLS_REPLACE:
457                 return efx_tc_ct_replace(ct_zone, tcb);
458         case FLOW_CLS_DESTROY:
459                 return efx_tc_ct_destroy(ct_zone, tcb);
460         case FLOW_CLS_STATS:
461                 return efx_tc_ct_stats(ct_zone, tcb);
462         default:
463                 break;
464         }
465
466         return -EOPNOTSUPP;
467 }
468
469 struct efx_tc_ct_zone *efx_tc_ct_register_zone(struct efx_nic *efx, u16 zone,
470                                                struct nf_flowtable *ct_ft)
471 {
472         struct efx_tc_ct_zone *ct_zone, *old;
473         int rc;
474
475         ct_zone = kzalloc(sizeof(*ct_zone), GFP_USER);
476         if (!ct_zone)
477                 return ERR_PTR(-ENOMEM);
478         ct_zone->zone = zone;
479         old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_zone_ht,
480                                                 &ct_zone->linkage,
481                                                 efx_tc_ct_zone_ht_params);
482         if (old) {
483                 /* don't need our new entry */
484                 kfree(ct_zone);
485                 if (!refcount_inc_not_zero(&old->ref))
486                         return ERR_PTR(-EAGAIN);
487                 /* existing entry found */
488                 WARN_ON_ONCE(old->nf_ft != ct_ft);
489                 netif_dbg(efx, drv, efx->net_dev,
490                           "Found existing ct_zone for %u\n", zone);
491                 return old;
492         }
493         ct_zone->nf_ft = ct_ft;
494         ct_zone->efx = efx;
495         INIT_LIST_HEAD(&ct_zone->cts);
496         mutex_init(&ct_zone->mutex);
497         rc = nf_flow_table_offload_add_cb(ct_ft, efx_tc_flow_block, ct_zone);
498         netif_dbg(efx, drv, efx->net_dev, "Adding new ct_zone for %u, rc %d\n",
499                   zone, rc);
500         if (rc < 0)
501                 goto fail;
502         refcount_set(&ct_zone->ref, 1);
503         return ct_zone;
504 fail:
505         rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage,
506                                efx_tc_ct_zone_ht_params);
507         kfree(ct_zone);
508         return ERR_PTR(rc);
509 }
510
511 void efx_tc_ct_unregister_zone(struct efx_nic *efx,
512                                struct efx_tc_ct_zone *ct_zone)
513 {
514         struct efx_tc_ct_entry *conn, *next;
515
516         if (!refcount_dec_and_test(&ct_zone->ref))
517                 return; /* still in use */
518         nf_flow_table_offload_del_cb(ct_zone->nf_ft, efx_tc_flow_block, ct_zone);
519         rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage,
520                                efx_tc_ct_zone_ht_params);
521         mutex_lock(&ct_zone->mutex);
522         list_for_each_entry(conn, &ct_zone->cts, list)
523                 efx_tc_ct_remove(efx, conn);
524         synchronize_rcu();
525         /* need to use _safe because efx_tc_ct_remove_finish() frees conn */
526         list_for_each_entry_safe(conn, next, &ct_zone->cts, list)
527                 efx_tc_ct_remove_finish(efx, conn);
528         mutex_unlock(&ct_zone->mutex);
529         mutex_destroy(&ct_zone->mutex);
530         netif_dbg(efx, drv, efx->net_dev, "Removed ct_zone for %u\n",
531                   ct_zone->zone);
532         kfree(ct_zone);
533 }