drivers/net/ethernet/sfc/tc.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /****************************************************************************
   3  * Driver for Solarflare network controllers and boards
   4  * Copyright 2019 Solarflare Communications Inc.
   5  * Copyright 2020-2022 Xilinx Inc.
   6  *
   7  * This program is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 as published
   9  * by the Free Software Foundation, incorporated herein by reference.
  10  */
  11
  12 #include <net/pkt_cls.h>
  13 #include <net/vxlan.h>
  14 #include <net/geneve.h>
  15 #include <net/tc_act/tc_ct.h>
  16 #include "tc.h"
  17 #include "tc_bindings.h"
  18 #include "tc_encap_actions.h"
  19 #include "tc_conntrack.h"
  20 #include "mae.h"
  21 #include "ef100_rep.h"
  22 #include "efx.h"
  23
  24 enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
  25 {
  26         if (netif_is_vxlan(net_dev))
  27                 return EFX_ENCAP_TYPE_VXLAN;
  28         if (netif_is_geneve(net_dev))
  29                 return EFX_ENCAP_TYPE_GENEVE;
  30
  31         return EFX_ENCAP_TYPE_NONE;
  32 }
  33
  34 #define EFX_TC_HDR_TYPE_TTL_MASK ((u32)0xff)
  35 /* Hoplimit is stored in the most significant byte in the pedit ipv6 header action */
  36 #define EFX_TC_HDR_TYPE_HLIMIT_MASK ~((u32)0xff000000)
  37 #define EFX_EFV_PF      NULL
  38 /* Look up the representor information (efv) for a device.
  39  * May return NULL for the PF (us), or an error pointer for a device that
  40  * isn't supported as a TC offload endpoint
  41  */
  42 struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
  43                                          struct net_device *dev)
  44 {
  45         struct efx_rep *efv;
  46
  47         if (!dev)
  48                 return ERR_PTR(-EOPNOTSUPP);
  49         /* Is it us (the PF)? */
  50         if (dev == efx->net_dev)
  51                 return EFX_EFV_PF;
  52         /* Is it an efx vfrep at all? */
  53         if (dev->netdev_ops != &efx_ef100_rep_netdev_ops)
  54                 return ERR_PTR(-EOPNOTSUPP);
  55         /* Is it ours?  We don't support TC rules that include another
  56          * EF100's netdevices (not even on another port of the same NIC).
  57          */
  58         efv = netdev_priv(dev);
  59         if (efv->parent != efx)
  60                 return ERR_PTR(-EOPNOTSUPP);
  61         return efv;
  62 }
  63
  64 /* Convert a driver-internal vport ID into an internal device (PF or VF) */
  65 static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv)
  66 {
  67         u32 mport;
  68
  69         if (IS_ERR(efv))
  70                 return PTR_ERR(efv);
  71         if (!efv) /* device is PF (us) */
  72                 efx_mae_mport_uplink(efx, &mport);
  73         else /* device is repr */
  74                 efx_mae_mport_mport(efx, efv->mport, &mport);
  75         return mport;
  76 }
  77
  78 /* Convert a driver-internal vport ID into an external device (wire or VF) */
  79 s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
  80 {
  81         u32 mport;
  82
  83         if (IS_ERR(efv))
  84                 return PTR_ERR(efv);
  85         if (!efv) /* device is PF (us) */
  86                 efx_mae_mport_wire(efx, &mport);
  87         else /* device is repr */
  88                 efx_mae_mport_mport(efx, efv->mport, &mport);
  89         return mport;
  90 }
  91
  92 static const struct rhashtable_params efx_tc_mac_ht_params = {
  93         .key_len        = offsetofend(struct efx_tc_mac_pedit_action, h_addr),
  94         .key_offset     = 0,
  95         .head_offset    = offsetof(struct efx_tc_mac_pedit_action, linkage),
  96 };
  97
  98 static const struct rhashtable_params efx_tc_encap_match_ht_params = {
  99         .key_len        = offsetof(struct efx_tc_encap_match, linkage),
 100         .key_offset     = 0,
 101         .head_offset    = offsetof(struct efx_tc_encap_match, linkage),
 102 };
 103
 104 static const struct rhashtable_params efx_tc_match_action_ht_params = {
 105         .key_len        = sizeof(unsigned long),
 106         .key_offset     = offsetof(struct efx_tc_flow_rule, cookie),
 107         .head_offset    = offsetof(struct efx_tc_flow_rule, linkage),
 108 };
 109
 110 static const struct rhashtable_params efx_tc_lhs_rule_ht_params = {
 111         .key_len        = sizeof(unsigned long),
 112         .key_offset     = offsetof(struct efx_tc_lhs_rule, cookie),
 113         .head_offset    = offsetof(struct efx_tc_lhs_rule, linkage),
 114 };
 115
 116 static const struct rhashtable_params efx_tc_recirc_ht_params = {
 117         .key_len        = offsetof(struct efx_tc_recirc_id, linkage),
 118         .key_offset     = 0,
 119         .head_offset    = offsetof(struct efx_tc_recirc_id, linkage),
 120 };
 121
 122 static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx,
 123                                                              unsigned char h_addr[ETH_ALEN],
 124                                                              struct netlink_ext_ack *extack)
 125 {
 126         struct efx_tc_mac_pedit_action *ped, *old;
 127         int rc;
 128
 129         ped = kzalloc(sizeof(*ped), GFP_USER);
 130         if (!ped)
 131                 return ERR_PTR(-ENOMEM);
 132         memcpy(ped->h_addr, h_addr, ETH_ALEN);
 133         old = rhashtable_lookup_get_insert_fast(&efx->tc->mac_ht,
 134                                                 &ped->linkage,
 135                                                 efx_tc_mac_ht_params);
 136         if (old) {
 137                 /* don't need our new entry */
 138                 kfree(ped);
 139                 if (!refcount_inc_not_zero(&old->ref))
 140                         return ERR_PTR(-EAGAIN);
 141                 /* existing entry found, ref taken */
 142                 return old;
 143         }
 144
 145         rc = efx_mae_allocate_pedit_mac(efx, ped);
 146         if (rc < 0) {
 147                 NL_SET_ERR_MSG_MOD(extack, "Failed to store pedit MAC address in hw");
 148                 goto out_remove;
 149         }
 150
 151         /* ref and return */
 152         refcount_set(&ped->ref, 1);
 153         return ped;
 154 out_remove:
 155         rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
 156                                efx_tc_mac_ht_params);
 157         kfree(ped);
 158         return ERR_PTR(rc);
 159 }
 160
 161 static void efx_tc_flower_put_mac(struct efx_nic *efx,
 162                                   struct efx_tc_mac_pedit_action *ped)
 163 {
 164         if (!refcount_dec_and_test(&ped->ref))
 165                 return; /* still in use */
 166         rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
 167                                efx_tc_mac_ht_params);
 168         efx_mae_free_pedit_mac(efx, ped);
 169         kfree(ped);
 170 }
 171
 172 static void efx_tc_free_action_set(struct efx_nic *efx,
 173                                    struct efx_tc_action_set *act, bool in_hw)
 174 {
 175         /* Failure paths calling this on the 'cursor' action set in_hw=false,
 176          * because if the alloc had succeeded we'd've put it in acts.list and
 177          * not still have it in act.
 178          */
 179         if (in_hw) {
 180                 efx_mae_free_action_set(efx, act->fw_id);
 181                 /* in_hw is true iff we are on an acts.list; make sure to
 182                  * remove ourselves from that list before we are freed.
 183                  */
 184                 list_del(&act->list);
 185         }
 186         if (act->count) {
 187                 spin_lock_bh(&act->count->cnt->lock);
 188                 if (!list_empty(&act->count_user))
 189                         list_del(&act->count_user);
 190                 spin_unlock_bh(&act->count->cnt->lock);
 191                 efx_tc_flower_put_counter_index(efx, act->count);
 192         }
 193         if (act->encap_md) {
 194                 list_del(&act->encap_user);
 195                 efx_tc_flower_release_encap_md(efx, act->encap_md);
 196         }
 197         if (act->src_mac)
 198                 efx_tc_flower_put_mac(efx, act->src_mac);
 199         if (act->dst_mac)
 200                 efx_tc_flower_put_mac(efx, act->dst_mac);
 201         kfree(act);
 202 }
 203
 204 static void efx_tc_free_action_set_list(struct efx_nic *efx,
 205                                         struct efx_tc_action_set_list *acts,
 206                                         bool in_hw)
 207 {
 208         struct efx_tc_action_set *act, *next;
 209
 210         /* Failure paths set in_hw=false, because usually the acts didn't get
 211          * to efx_mae_alloc_action_set_list(); if they did, the failure tree
 212          * has a separate efx_mae_free_action_set_list() before calling us.
 213          */
 214         if (in_hw)
 215                 efx_mae_free_action_set_list(efx, acts);
 216         /* Any act that's on the list will be in_hw even if the list isn't */
 217         list_for_each_entry_safe(act, next, &acts->list, list)
 218                 efx_tc_free_action_set(efx, act, true);
 219         /* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */
 220 }
 221
 222 /* Boilerplate for the simple 'copy a field' cases */
 223 #define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)       \
 224 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) {            \
 225         struct flow_match_##_type fm;                                   \
 226                                                                         \
 227         flow_rule_match_##_tcget(rule, &fm);                            \
 228         match->value._field = fm.key->_tcfield;                         \
 229         match->mask._field = fm.mask->_tcfield;                         \
 230 }
 231 #define MAP_KEY_AND_MASK(_name, _type, _tcfield, _field)        \
 232         _MAP_KEY_AND_MASK(_name, _type, _type, _tcfield, _field)
 233 #define MAP_ENC_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)    \
 234         _MAP_KEY_AND_MASK(ENC_##_name, _type, _tcget, _tcfield, _field)
 235
 236 static int efx_tc_flower_parse_match(struct efx_nic *efx,
 237                                      struct flow_rule *rule,
 238                                      struct efx_tc_match *match,
 239                                      struct netlink_ext_ack *extack)
 240 {
 241         struct flow_dissector *dissector = rule->match.dissector;
 242         unsigned char ipv = 0;
 243
 244         /* Owing to internal TC infelicities, the IPV6_ADDRS key might be set
 245          * even on IPv4 filters; so rather than relying on dissector->used_keys
 246          * we check the addr_type in the CONTROL key.  If we don't find it (or
 247          * it's masked, which should never happen), we treat both IPV4_ADDRS
 248          * and IPV6_ADDRS as absent.
 249          */
 250         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
 251                 struct flow_match_control fm;
 252
 253                 flow_rule_match_control(rule, &fm);
 254                 if (IS_ALL_ONES(fm.mask->addr_type))
 255                         switch (fm.key->addr_type) {
 256                         case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
 257                                 ipv = 4;
 258                                 break;
 259                         case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
 260                                 ipv = 6;
 261                                 break;
 262                         default:
 263                                 break;
 264                         }
 265
 266                 if (fm.mask->flags & FLOW_DIS_IS_FRAGMENT) {
 267                         match->value.ip_frag = fm.key->flags & FLOW_DIS_IS_FRAGMENT;
 268                         match->mask.ip_frag = true;
 269                 }
 270                 if (fm.mask->flags & FLOW_DIS_FIRST_FRAG) {
 271                         match->value.ip_firstfrag = fm.key->flags & FLOW_DIS_FIRST_FRAG;
 272                         match->mask.ip_firstfrag = true;
 273                 }
 274                 if (fm.mask->flags & ~(FLOW_DIS_IS_FRAGMENT | FLOW_DIS_FIRST_FRAG)) {
 275                         NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on control.flags %#x",
 276                                                fm.mask->flags);
 277                         return -EOPNOTSUPP;
 278                 }
 279         }
 280         if (dissector->used_keys &
 281             ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
 282               BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
 283               BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
 284               BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) |
 285               BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) |
 286               BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
 287               BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
 288               BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
 289               BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
 290               BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
 291               BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
 292               BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
 293               BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
 294               BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
 295               BIT_ULL(FLOW_DISSECTOR_KEY_CT) |
 296               BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
 297               BIT_ULL(FLOW_DISSECTOR_KEY_IP))) {
 298                 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported flower keys %#llx",
 299                                        dissector->used_keys);
 300                 return -EOPNOTSUPP;
 301         }
 302
 303         MAP_KEY_AND_MASK(BASIC, basic, n_proto, eth_proto);
 304         /* Make sure we're IP if any L3/L4 keys used. */
 305         if (!IS_ALL_ONES(match->mask.eth_proto) ||
 306             !(match->value.eth_proto == htons(ETH_P_IP) ||
 307               match->value.eth_proto == htons(ETH_P_IPV6)))
 308                 if (dissector->used_keys &
 309                     (BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
 310                      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
 311                      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
 312                      BIT_ULL(FLOW_DISSECTOR_KEY_IP) |
 313                      BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
 314                         NL_SET_ERR_MSG_FMT_MOD(extack,
 315                                                "L3/L4 flower keys %#llx require protocol ipv[46]",
 316                                                dissector->used_keys);
 317                         return -EINVAL;
 318                 }
 319
 320         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
 321                 struct flow_match_vlan fm;
 322
 323                 flow_rule_match_vlan(rule, &fm);
 324                 if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
 325                         match->value.vlan_proto[0] = fm.key->vlan_tpid;
 326                         match->mask.vlan_proto[0] = fm.mask->vlan_tpid;
 327                         match->value.vlan_tci[0] = cpu_to_be16(fm.key->vlan_priority << 13 |
 328                                                                fm.key->vlan_id);
 329                         match->mask.vlan_tci[0] = cpu_to_be16(fm.mask->vlan_priority << 13 |
 330                                                               fm.mask->vlan_id);
 331                 }
 332         }
 333
 334         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
 335                 struct flow_match_vlan fm;
 336
 337                 flow_rule_match_cvlan(rule, &fm);
 338                 if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
 339                         match->value.vlan_proto[1] = fm.key->vlan_tpid;
 340                         match->mask.vlan_proto[1] = fm.mask->vlan_tpid;
 341                         match->value.vlan_tci[1] = cpu_to_be16(fm.key->vlan_priority << 13 |
 342                                                                fm.key->vlan_id);
 343                         match->mask.vlan_tci[1] = cpu_to_be16(fm.mask->vlan_priority << 13 |
 344                                                               fm.mask->vlan_id);
 345                 }
 346         }
 347
 348         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
 349                 struct flow_match_eth_addrs fm;
 350
 351                 flow_rule_match_eth_addrs(rule, &fm);
 352                 ether_addr_copy(match->value.eth_saddr, fm.key->src);
 353                 ether_addr_copy(match->value.eth_daddr, fm.key->dst);
 354                 ether_addr_copy(match->mask.eth_saddr, fm.mask->src);
 355                 ether_addr_copy(match->mask.eth_daddr, fm.mask->dst);
 356         }
 357
 358         MAP_KEY_AND_MASK(BASIC, basic, ip_proto, ip_proto);
 359         /* Make sure we're TCP/UDP if any L4 keys used. */
 360         if ((match->value.ip_proto != IPPROTO_UDP &&
 361              match->value.ip_proto != IPPROTO_TCP) || !IS_ALL_ONES(match->mask.ip_proto))
 362                 if (dissector->used_keys &
 363                     (BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
 364                      BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
 365                         NL_SET_ERR_MSG_FMT_MOD(extack,
 366                                                "L4 flower keys %#llx require ipproto udp or tcp",
 367                                                dissector->used_keys);
 368                         return -EINVAL;
 369                 }
 370         MAP_KEY_AND_MASK(IP, ip, tos, ip_tos);
 371         MAP_KEY_AND_MASK(IP, ip, ttl, ip_ttl);
 372         if (ipv == 4) {
 373                 MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, src, src_ip);
 374                 MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, dst, dst_ip);
 375         }
 376 #ifdef CONFIG_IPV6
 377         else if (ipv == 6) {
 378                 MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, src, src_ip6);
 379                 MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, dst, dst_ip6);
 380         }
 381 #endif
 382         MAP_KEY_AND_MASK(PORTS, ports, src, l4_sport);
 383         MAP_KEY_AND_MASK(PORTS, ports, dst, l4_dport);
 384         MAP_KEY_AND_MASK(TCP, tcp, flags, tcp_flags);
 385         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
 386                 struct flow_match_control fm;
 387
 388                 flow_rule_match_enc_control(rule, &fm);
 389                 if (fm.mask->flags) {
 390                         NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on enc_control.flags %#x",
 391                                                fm.mask->flags);
 392                         return -EOPNOTSUPP;
 393                 }
 394                 if (!IS_ALL_ONES(fm.mask->addr_type)) {
 395                         NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported enc addr_type mask %u (key %u)",
 396                                                fm.mask->addr_type,
 397                                                fm.key->addr_type);
 398                         return -EOPNOTSUPP;
 399                 }
 400                 switch (fm.key->addr_type) {
 401                 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
 402                         MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
 403                                              src, enc_src_ip);
 404                         MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
 405                                              dst, enc_dst_ip);
 406                         break;
 407 #ifdef CONFIG_IPV6
 408                 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
 409                         MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
 410                                              src, enc_src_ip6);
 411                         MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
 412                                              dst, enc_dst_ip6);
 413                         break;
 414 #endif
 415                 default:
 416                         NL_SET_ERR_MSG_FMT_MOD(extack,
 417                                                "Unsupported enc addr_type %u (supported are IPv4, IPv6)",
 418                                                fm.key->addr_type);
 419                         return -EOPNOTSUPP;
 420                 }
 421                 MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, tos, enc_ip_tos);
 422                 MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, ttl, enc_ip_ttl);
 423                 MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, src, enc_sport);
 424                 MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, dst, enc_dport);
 425                 MAP_ENC_KEY_AND_MASK(KEYID, enc_keyid, enc_keyid, keyid, enc_keyid);
 426         } else if (dissector->used_keys &
 427                    (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
 428                     BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
 429                     BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
 430                     BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
 431                     BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
 432                 NL_SET_ERR_MSG_FMT_MOD(extack,
 433                                        "Flower enc keys require enc_control (keys: %#llx)",
 434                                        dissector->used_keys);
 435                 return -EOPNOTSUPP;
 436         }
 437         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) {
 438                 struct flow_match_ct fm;
 439
 440                 flow_rule_match_ct(rule, &fm);
 441                 match->value.ct_state_trk = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
 442                 match->mask.ct_state_trk = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
 443                 match->value.ct_state_est = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
 444                 match->mask.ct_state_est = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
 445                 if (fm.mask->ct_state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
 446                                           TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)) {
 447                         NL_SET_ERR_MSG_FMT_MOD(extack,
 448                                                "Unsupported ct_state match %#x",
 449                                                fm.mask->ct_state);
 450                         return -EOPNOTSUPP;
 451                 }
 452                 match->value.ct_mark = fm.key->ct_mark;
 453                 match->mask.ct_mark = fm.mask->ct_mark;
 454                 match->value.ct_zone = fm.key->ct_zone;
 455                 match->mask.ct_zone = fm.mask->ct_zone;
 456
 457                 if (memchr_inv(fm.mask->ct_labels, 0, sizeof(fm.mask->ct_labels))) {
 458                         NL_SET_ERR_MSG_MOD(extack, "Matching on ct_label not supported");
 459                         return -EOPNOTSUPP;
 460                 }
 461         }
 462
 463         return 0;
 464 }
 465
 466 static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
 467                                               struct efx_tc_encap_match *encap)
 468 {
 469         int rc;
 470
 471         if (!refcount_dec_and_test(&encap->ref))
 472                 return; /* still in use */
 473
 474         if (encap->type == EFX_TC_EM_DIRECT) {
 475                 rc = efx_mae_unregister_encap_match(efx, encap);
 476                 if (rc)
 477                         /* Display message but carry on and remove entry from our
 478                          * SW tables, because there's not much we can do about it.
 479                          */
 480                         netif_err(efx, drv, efx->net_dev,
 481                                   "Failed to release encap match %#x, rc %d\n",
 482                                   encap->fw_id, rc);
 483         }
 484         rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
 485                                efx_tc_encap_match_ht_params);
 486         if (encap->pseudo)
 487                 efx_tc_flower_release_encap_match(efx, encap->pseudo);
 488         kfree(encap);
 489 }
 490
 491 static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
 492                                             struct efx_tc_match *match,
 493                                             enum efx_encap_type type,
 494                                             enum efx_tc_em_pseudo_type em_type,
 495                                             u8 child_ip_tos_mask,
 496                                             __be16 child_udp_sport_mask,
 497                                             struct netlink_ext_ack *extack)
 498 {
 499         struct efx_tc_encap_match *encap, *old, *pseudo = NULL;
 500         bool ipv6 = false;
 501         int rc;
 502
 503         /* We require that the socket-defining fields (IP addrs and UDP dest
 504          * port) are present and exact-match.  Other fields may only be used
 505          * if the field-set (and any masks) are the same for all encap
 506          * matches on the same <sip,dip,dport> tuple; this is enforced by
 507          * pseudo encap matches.
 508          */
 509         if (match->mask.enc_dst_ip | match->mask.enc_src_ip) {
 510                 if (!IS_ALL_ONES(match->mask.enc_dst_ip)) {
 511                         NL_SET_ERR_MSG_MOD(extack,
 512                                            "Egress encap match is not exact on dst IP address");
 513                         return -EOPNOTSUPP;
 514                 }
 515                 if (!IS_ALL_ONES(match->mask.enc_src_ip)) {
 516                         NL_SET_ERR_MSG_MOD(extack,
 517                                            "Egress encap match is not exact on src IP address");
 518                         return -EOPNOTSUPP;
 519                 }
 520 #ifdef CONFIG_IPV6
 521                 if (!ipv6_addr_any(&match->mask.enc_dst_ip6) ||
 522                     !ipv6_addr_any(&match->mask.enc_src_ip6)) {
 523                         NL_SET_ERR_MSG_MOD(extack,
 524                                            "Egress encap match on both IPv4 and IPv6, don't understand");
 525                         return -EOPNOTSUPP;
 526                 }
 527         } else {
 528                 ipv6 = true;
 529                 if (!efx_ipv6_addr_all_ones(&match->mask.enc_dst_ip6)) {
 530                         NL_SET_ERR_MSG_MOD(extack,
 531                                            "Egress encap match is not exact on dst IP address");
 532                         return -EOPNOTSUPP;
 533                 }
 534                 if (!efx_ipv6_addr_all_ones(&match->mask.enc_src_ip6)) {
 535                         NL_SET_ERR_MSG_MOD(extack,
 536                                            "Egress encap match is not exact on src IP address");
 537                         return -EOPNOTSUPP;
 538                 }
 539 #endif
 540         }
 541         if (!IS_ALL_ONES(match->mask.enc_dport)) {
 542                 NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port");
 543                 return -EOPNOTSUPP;
 544         }
 545         if (match->mask.enc_sport || match->mask.enc_ip_tos) {
 546                 struct efx_tc_match pmatch = *match;
 547
 548                 if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */
 549                         NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler");
 550                         return -EOPNOTSUPP;
 551                 }
 552                 pmatch.value.enc_ip_tos = 0;
 553                 pmatch.mask.enc_ip_tos = 0;
 554                 pmatch.value.enc_sport = 0;
 555                 pmatch.mask.enc_sport = 0;
 556                 rc = efx_tc_flower_record_encap_match(efx, &pmatch, type,
 557                                                       EFX_TC_EM_PSEUDO_MASK,
 558                                                       match->mask.enc_ip_tos,
 559                                                       match->mask.enc_sport,
 560                                                       extack);
 561                 if (rc)
 562                         return rc;
 563                 pseudo = pmatch.encap;
 564         }
 565         if (match->mask.enc_ip_ttl) {
 566                 NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported");
 567                 rc = -EOPNOTSUPP;
 568                 goto fail_pseudo;
 569         }
 570
 571         rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos,
 572                                             match->mask.enc_sport, extack);
 573         if (rc)
 574                 goto fail_pseudo;
 575
 576         encap = kzalloc(sizeof(*encap), GFP_USER);
 577         if (!encap) {
 578                 rc = -ENOMEM;
 579                 goto fail_pseudo;
 580         }
 581         encap->src_ip = match->value.enc_src_ip;
 582         encap->dst_ip = match->value.enc_dst_ip;
 583 #ifdef CONFIG_IPV6
 584         encap->src_ip6 = match->value.enc_src_ip6;
 585         encap->dst_ip6 = match->value.enc_dst_ip6;
 586 #endif
 587         encap->udp_dport = match->value.enc_dport;
 588         encap->tun_type = type;
 589         encap->ip_tos = match->value.enc_ip_tos;
 590         encap->ip_tos_mask = match->mask.enc_ip_tos;
 591         encap->child_ip_tos_mask = child_ip_tos_mask;
 592         encap->udp_sport = match->value.enc_sport;
 593         encap->udp_sport_mask = match->mask.enc_sport;
 594         encap->child_udp_sport_mask = child_udp_sport_mask;
 595         encap->type = em_type;
 596         encap->pseudo = pseudo;
 597         old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht,
 598                                                 &encap->linkage,
 599                                                 efx_tc_encap_match_ht_params);
 600         if (old) {
 601                 /* don't need our new entry */
 602                 kfree(encap);
 603                 if (pseudo) /* don't need our new pseudo either */
 604                         efx_tc_flower_release_encap_match(efx, pseudo);
 605                 /* check old and new em_types are compatible */
 606                 switch (old->type) {
 607                 case EFX_TC_EM_DIRECT:
 608                         /* old EM is in hardware, so mustn't overlap with a
 609                          * pseudo, but may be shared with another direct EM
 610                          */
 611                         if (em_type == EFX_TC_EM_DIRECT)
 612                                 break;
 613                         NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry");
 614                         return -EEXIST;
 615                 case EFX_TC_EM_PSEUDO_MASK:
 616                         /* old EM is protecting a ToS- or src port-qualified
 617                          * filter, so may only be shared with another pseudo
 618                          * for the same ToS and src port masks.
 619                          */
 620                         if (em_type != EFX_TC_EM_PSEUDO_MASK) {
 621                                 NL_SET_ERR_MSG_FMT_MOD(extack,
 622                                                        "%s encap match conflicts with existing pseudo(MASK) entry",
 623                                                        em_type ? "Pseudo" : "Direct");
 624                                 return -EEXIST;
 625                         }
 626                         if (child_ip_tos_mask != old->child_ip_tos_mask) {
 627                                 NL_SET_ERR_MSG_FMT_MOD(extack,
 628                                                        "Pseudo encap match for TOS mask %#04x conflicts with existing pseudo(MASK) entry for TOS mask %#04x",
 629                                                        child_ip_tos_mask,
 630                                                        old->child_ip_tos_mask);
 631                                 return -EEXIST;
 632                         }
 633                         if (child_udp_sport_mask != old->child_udp_sport_mask) {
 634                                 NL_SET_ERR_MSG_FMT_MOD(extack,
 635                                                        "Pseudo encap match for UDP src port mask %#x conflicts with existing pseudo(MASK) entry for mask %#x",
 636                                                        child_udp_sport_mask,
 637                                                        old->child_udp_sport_mask);
 638                                 return -EEXIST;
 639                         }
 640                         break;
 641                 default: /* Unrecognised pseudo-type.  Just say no */
 642                         NL_SET_ERR_MSG_FMT_MOD(extack,
 643                                                "%s encap match conflicts with existing pseudo(%d) entry",
 644                                                em_type ? "Pseudo" : "Direct",
 645                                                old->type);
 646                         return -EEXIST;
 647                 }
 648                 /* check old and new tun_types are compatible */
 649                 if (old->tun_type != type) {
 650                         NL_SET_ERR_MSG_FMT_MOD(extack,
 651                                                "Egress encap match with conflicting tun_type %u != %u",
 652                                                old->tun_type, type);
 653                         return -EEXIST;
 654                 }
 655                 if (!refcount_inc_not_zero(&old->ref))
 656                         return -EAGAIN;
 657                 /* existing entry found */
 658                 encap = old;
 659         } else {
 660                 if (em_type == EFX_TC_EM_DIRECT) {
 661                         rc = efx_mae_register_encap_match(efx, encap);
 662                         if (rc) {
 663                                 NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW");
 664                                 goto fail;
 665                         }
 666                 }
 667                 refcount_set(&encap->ref, 1);
 668         }
 669         match->encap = encap;
 670         return 0;
 671 fail:
 672         rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
 673                                efx_tc_encap_match_ht_params);
 674         kfree(encap);
 675 fail_pseudo:
 676         if (pseudo)
 677                 efx_tc_flower_release_encap_match(efx, pseudo);
 678         return rc;
 679 }
 680
 681 static struct efx_tc_recirc_id *efx_tc_get_recirc_id(struct efx_nic *efx,
 682                                                      u32 chain_index,
 683                                                      struct net_device *net_dev)
 684 {
 685         struct efx_tc_recirc_id *rid, *old;
 686         int rc;
 687
 688         rid = kzalloc(sizeof(*rid), GFP_USER);
 689         if (!rid)
 690                 return ERR_PTR(-ENOMEM);
 691         rid->chain_index = chain_index;
 692         /* We don't take a reference here, because it's implied - if there's
 693          * a rule on the net_dev that's been offloaded to us, then the net_dev
 694          * can't go away until the rule has been deoffloaded.
 695          */
 696         rid->net_dev = net_dev;
 697         old = rhashtable_lookup_get_insert_fast(&efx->tc->recirc_ht,
 698                                                 &rid->linkage,
 699                                                 efx_tc_recirc_ht_params);
 700         if (old) {
 701                 /* don't need our new entry */
 702                 kfree(rid);
 703                 if (!refcount_inc_not_zero(&old->ref))
 704                         return ERR_PTR(-EAGAIN);
 705                 /* existing entry found */
 706                 rid = old;
 707         } else {
 708                 rc = ida_alloc_range(&efx->tc->recirc_ida, 1, U8_MAX, GFP_USER);
 709                 if (rc < 0) {
 710                         rhashtable_remove_fast(&efx->tc->recirc_ht,
 711                                                &rid->linkage,
 712                                                efx_tc_recirc_ht_params);
 713                         kfree(rid);
 714                         return ERR_PTR(rc);
 715                 }
 716                 rid->fw_id = rc;
 717                 refcount_set(&rid->ref, 1);
 718         }
 719         return rid;
 720 }
 721
 722 static void efx_tc_put_recirc_id(struct efx_nic *efx, struct efx_tc_recirc_id *rid)
 723 {
 724         if (!refcount_dec_and_test(&rid->ref))
 725                 return; /* still in use */
 726         rhashtable_remove_fast(&efx->tc->recirc_ht, &rid->linkage,
 727                                efx_tc_recirc_ht_params);
 728         ida_free(&efx->tc->recirc_ida, rid->fw_id);
 729         kfree(rid);
 730 }
 731
 732 static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
 733 {
 734         efx_mae_delete_rule(efx, rule->fw_id);
 735
 736         /* Release entries in subsidiary tables */
 737         efx_tc_free_action_set_list(efx, &rule->acts, true);
 738         if (rule->match.rid)
 739                 efx_tc_put_recirc_id(efx, rule->match.rid);
 740         if (rule->match.encap)
 741                 efx_tc_flower_release_encap_match(efx, rule->match.encap);
 742         rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
 743 }
 744
 745 static const char *efx_tc_encap_type_name(enum efx_encap_type typ)
 746 {
 747         switch (typ) {
 748         case EFX_ENCAP_TYPE_NONE:
 749                 return "none";
 750         case EFX_ENCAP_TYPE_VXLAN:
 751                 return "vxlan";
 752         case EFX_ENCAP_TYPE_GENEVE:
 753                 return "geneve";
 754         default:
 755                 pr_warn_once("Unknown efx_encap_type %d encountered\n", typ);
 756                 return "unknown";
 757         }
 758 }
 759
 760 /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */
 761 enum efx_tc_action_order {
 762         EFX_TC_AO_DECAP,
 763         EFX_TC_AO_DEC_TTL,
 764         EFX_TC_AO_PEDIT_MAC_ADDRS,
 765         EFX_TC_AO_VLAN_POP,
 766         EFX_TC_AO_VLAN_PUSH,
 767         EFX_TC_AO_COUNT,
 768         EFX_TC_AO_ENCAP,
 769         EFX_TC_AO_DELIVER
 770 };
 771 /* Determine whether we can add @new action without violating order */
 772 static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
 773                                           enum efx_tc_action_order new)
 774 {
 775         switch (new) {
 776         case EFX_TC_AO_DECAP:
 777                 if (act->decap)
 778                         return false;
 779                 /* PEDIT_MAC_ADDRS must not happen before DECAP, though it
 780                  * can wait until much later
 781                  */
 782                 if (act->dst_mac || act->src_mac)
 783                         return false;
 784
 785                 /* Decrementing ttl must not happen before DECAP */
 786                 if (act->do_ttl_dec)
 787                         return false;
 788                 fallthrough;
 789         case EFX_TC_AO_VLAN_POP:
 790                 if (act->vlan_pop >= 2)
 791                         return false;
 792                 /* If we've already pushed a VLAN, we can't then pop it;
 793                  * the hardware would instead try to pop an existing VLAN
 794                  * before pushing the new one.
 795                  */
 796                 if (act->vlan_push)
 797                         return false;
 798                 fallthrough;
 799         case EFX_TC_AO_VLAN_PUSH:
 800                 if (act->vlan_push >= 2)
 801                         return false;
 802                 fallthrough;
 803         case EFX_TC_AO_COUNT:
 804                 if (act->count)
 805                         return false;
 806                 fallthrough;
 807         case EFX_TC_AO_PEDIT_MAC_ADDRS:
 808         case EFX_TC_AO_ENCAP:
 809                 if (act->encap_md)
 810                         return false;
 811                 fallthrough;
 812         case EFX_TC_AO_DELIVER:
 813                 return !act->deliver;
 814         case EFX_TC_AO_DEC_TTL:
 815                 if (act->encap_md)
 816                         return false;
 817                 return !act->do_ttl_dec;
 818         default:
 819                 /* Bad caller.  Whatever they wanted to do, say they can't. */
 820                 WARN_ON_ONCE(1);
 821                 return false;
 822         }
 823 }
 824
 825 /**
 826  * DOC: TC conntrack sequences
 827  *
 828  * The MAE hardware can handle at most two rounds of action rule matching,
 829  * consequently we support conntrack through the notion of a "left-hand side
 830  * rule".  This is a rule which typically contains only the actions "ct" and
 831  * "goto chain N", and corresponds to one or more "right-hand side rules" in
 832  * chain N, which typically match on +trk+est, and may perform ct(nat) actions.
 833  * RHS rules go in the Action Rule table as normal but with a nonzero recirc_id
 834  * (the hardware equivalent of chain_index), while LHS rules may go in either
 835  * the Action Rule or the Outer Rule table, the latter being preferred for
 836  * performance reasons, and set both DO_CT and a recirc_id in their response.
 837  *
 838  * Besides the RHS rules, there are often also similar rules matching on
 839  * +trk+new which perform the ct(commit) action.  These are not offloaded.
 840  */
 841
 842 static bool efx_tc_rule_is_lhs_rule(struct flow_rule *fr,
 843                                     struct efx_tc_match *match)
 844 {
 845         const struct flow_action_entry *fa;
 846         int i;
 847
 848         flow_action_for_each(i, fa, &fr->action) {
 849                 switch (fa->id) {
 850                 case FLOW_ACTION_GOTO:
 851                         return true;
 852                 case FLOW_ACTION_CT:
 853                         /* If rule is -trk, or doesn't mention trk at all, then
 854                          * a CT action implies a conntrack lookup (hence it's an
 855                          * LHS rule).  If rule is +trk, then a CT action could
 856                          * just be ct(nat) or even ct(commit) (though the latter
 857                          * can't be offloaded).
 858                          */
 859                         if (!match->mask.ct_state_trk || !match->value.ct_state_trk)
 860                                 return true;
 861                         break;
 862                 default:
 863                         break;
 864                 }
 865         }
 866         return false;
 867 }
 868
 869 static int efx_tc_flower_handle_lhs_actions(struct efx_nic *efx,
 870                                             struct flow_cls_offload *tc,
 871                                             struct flow_rule *fr,
 872                                             struct net_device *net_dev,
 873                                             struct efx_tc_lhs_rule *rule)
 874
 875 {
 876         struct netlink_ext_ack *extack = tc->common.extack;
 877         struct efx_tc_lhs_action *act = &rule->lhs_act;
 878         const struct flow_action_entry *fa;
 879         bool pipe = true;
 880         int i;
 881
 882         flow_action_for_each(i, fa, &fr->action) {
 883                 struct efx_tc_ct_zone *ct_zone;
 884                 struct efx_tc_recirc_id *rid;
 885
 886                 if (!pipe) {
 887                         /* more actions after a non-pipe action */
 888                         NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
 889                         return -EINVAL;
 890                 }
 891                 switch (fa->id) {
 892                 case FLOW_ACTION_GOTO:
 893                         if (!fa->chain_index) {
 894                                 NL_SET_ERR_MSG_MOD(extack, "Can't goto chain 0, no looping in hw");
 895                                 return -EOPNOTSUPP;
 896                         }
 897                         rid = efx_tc_get_recirc_id(efx, fa->chain_index,
 898                                                    net_dev);
 899                         if (IS_ERR(rid)) {
 900                                 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate a hardware recirculation ID for this chain_index");
 901                                 return PTR_ERR(rid);
 902                         }
 903                         act->rid = rid;
 904                         if (fa->hw_stats) {
 905                                 struct efx_tc_counter_index *cnt;
 906
 907                                 if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
 908                                         NL_SET_ERR_MSG_FMT_MOD(extack,
 909                                                                "hw_stats_type %u not supported (only 'delayed')",
 910                                                                fa->hw_stats);
 911                                         return -EOPNOTSUPP;
 912                                 }
 913                                 cnt = efx_tc_flower_get_counter_index(efx, tc->cookie,
 914                                                                       EFX_TC_COUNTER_TYPE_OR);
 915                                 if (IS_ERR(cnt)) {
 916                                         NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
 917                                         return PTR_ERR(cnt);
 918                                 }
 919                                 WARN_ON(act->count); /* can't happen */
 920                                 act->count = cnt;
 921                         }
 922                         pipe = false;
 923                         break;
 924                 case FLOW_ACTION_CT:
 925                         if (act->zone) {
 926                                 NL_SET_ERR_MSG_MOD(extack, "Can't offload multiple ct actions");
 927                                 return -EOPNOTSUPP;
 928                         }
 929                         if (fa->ct.action & (TCA_CT_ACT_COMMIT |
 930                                              TCA_CT_ACT_FORCE)) {
 931                                 NL_SET_ERR_MSG_MOD(extack, "Can't offload ct commit/force");
 932                                 return -EOPNOTSUPP;
 933                         }
 934                         if (fa->ct.action & TCA_CT_ACT_CLEAR) {
 935                                 NL_SET_ERR_MSG_MOD(extack, "Can't clear ct in LHS rule");
 936                                 return -EOPNOTSUPP;
 937                         }
 938                         if (fa->ct.action & (TCA_CT_ACT_NAT |
 939                                              TCA_CT_ACT_NAT_SRC |
 940                                              TCA_CT_ACT_NAT_DST)) {
 941                                 NL_SET_ERR_MSG_MOD(extack, "Can't perform NAT in LHS rule - packet isn't conntracked yet");
 942                                 return -EOPNOTSUPP;
 943                         }
 944                         if (fa->ct.action) {
 945                                 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled ct.action %u for LHS rule\n",
 946                                                        fa->ct.action);
 947                                 return -EOPNOTSUPP;
 948                         }
 949                         ct_zone = efx_tc_ct_register_zone(efx, fa->ct.zone,
 950                                                           fa->ct.flow_table);
 951                         if (IS_ERR(ct_zone)) {
 952                                 NL_SET_ERR_MSG_MOD(extack, "Failed to register for CT updates");
 953                                 return PTR_ERR(ct_zone);
 954                         }
 955                         act->zone = ct_zone;
 956                         break;
 957                 default:
 958                         NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u for LHS rule\n",
 959                                                fa->id);
 960                         return -EOPNOTSUPP;
 961                 }
 962         }
 963
 964         if (pipe) {
 965                 NL_SET_ERR_MSG_MOD(extack, "Missing goto chain in LHS rule");
 966                 return -EOPNOTSUPP;
 967         }
 968         return 0;
 969 }
 970
 971 static void efx_tc_flower_release_lhs_actions(struct efx_nic *efx,
 972                                               struct efx_tc_lhs_action *act)
 973 {
 974         if (act->rid)
 975                 efx_tc_put_recirc_id(efx, act->rid);
 976         if (act->zone)
 977                 efx_tc_ct_unregister_zone(efx, act->zone);
 978         if (act->count)
 979                 efx_tc_flower_put_counter_index(efx, act->count);
 980 }
 981
 982 /**
 983  * struct efx_tc_mangler_state - accumulates 32-bit pedits into fields
 984  *
 985  * @dst_mac_32: dst_mac[0:3] has been populated
 986  * @dst_mac_16: dst_mac[4:5] has been populated
 987  * @src_mac_16: src_mac[0:1] has been populated
 988  * @src_mac_32: src_mac[2:5] has been populated
 989  * @dst_mac:    h_dest field of ethhdr
 990  * @src_mac:    h_source field of ethhdr
 991  *
 992  * Since FLOW_ACTION_MANGLE comes in 32-bit chunks that do not
 993  * necessarily equate to whole fields of the packet header, this
 994  * structure is used to hold the cumulative effect of the partial
 995  * field pedits that have been processed so far.
 996  */
 997 struct efx_tc_mangler_state {
 998         u8 dst_mac_32:1; /* eth->h_dest[0:3] */
 999         u8 dst_mac_16:1; /* eth->h_dest[4:5] */
1000         u8 src_mac_16:1; /* eth->h_source[0:1] */
1001         u8 src_mac_32:1; /* eth->h_source[2:5] */
1002         unsigned char dst_mac[ETH_ALEN];
1003         unsigned char src_mac[ETH_ALEN];
1004 };
1005
1006 /** efx_tc_complete_mac_mangle() - pull complete field pedits out of @mung
1007  * @efx:        NIC we're installing a flow rule on
1008  * @act:        action set (cursor) to update
1009  * @mung:       accumulated partial mangles
1010  * @extack:     netlink extended ack for reporting errors
1011  *
1012  * Check @mung to find any combinations of partial mangles that can be
1013  * combined into a complete packet field edit, add that edit to @act,
1014  * and consume the partial mangles from @mung.
1015  */
1016
1017 static int efx_tc_complete_mac_mangle(struct efx_nic *efx,
1018                                       struct efx_tc_action_set *act,
1019                                       struct efx_tc_mangler_state *mung,
1020                                       struct netlink_ext_ack *extack)
1021 {
1022         struct efx_tc_mac_pedit_action *ped;
1023
1024         if (mung->dst_mac_32 && mung->dst_mac_16) {
1025                 ped = efx_tc_flower_get_mac(efx, mung->dst_mac, extack);
1026                 if (IS_ERR(ped))
1027                         return PTR_ERR(ped);
1028
1029                 /* Check that we have not already populated dst_mac */
1030                 if (act->dst_mac)
1031                         efx_tc_flower_put_mac(efx, act->dst_mac);
1032
1033                 act->dst_mac = ped;
1034
1035                 /* consume the incomplete state */
1036                 mung->dst_mac_32 = 0;
1037                 mung->dst_mac_16 = 0;
1038         }
1039         if (mung->src_mac_16 && mung->src_mac_32) {
1040                 ped = efx_tc_flower_get_mac(efx, mung->src_mac, extack);
1041                 if (IS_ERR(ped))
1042                         return PTR_ERR(ped);
1043
1044                 /* Check that we have not already populated src_mac */
1045                 if (act->src_mac)
1046                         efx_tc_flower_put_mac(efx, act->src_mac);
1047
1048                 act->src_mac = ped;
1049
1050                 /* consume the incomplete state */
1051                 mung->src_mac_32 = 0;
1052                 mung->src_mac_16 = 0;
1053         }
1054         return 0;
1055 }
1056
1057 static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act,
1058                             const struct flow_action_entry *fa,
1059                             struct netlink_ext_ack *extack)
1060 {
1061         switch (fa->mangle.htype) {
1062         case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1063                 switch (fa->mangle.offset) {
1064                 case offsetof(struct iphdr, ttl):
1065                         /* check that pedit applies to ttl only */
1066                         if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK)
1067                                 break;
1068
1069                         /* Adding 0xff is equivalent to decrementing the ttl.
1070                          * Other added values are not supported.
1071                          */
1072                         if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) != U8_MAX)
1073                                 break;
1074
1075                         /* check that we do not decrement ttl twice */
1076                         if (!efx_tc_flower_action_order_ok(act,
1077                                                            EFX_TC_AO_DEC_TTL)) {
1078                                 NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl");
1079                                 return -EOPNOTSUPP;
1080                         }
1081                         act->do_ttl_dec = 1;
1082                         return 0;
1083                 default:
1084                         break;
1085                 }
1086                 break;
1087         case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
1088                 switch (fa->mangle.offset) {
1089                 case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
1090                         /* check that pedit applies to hoplimit only */
1091                         if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK)
1092                                 break;
1093
1094                         /* Adding 0xff is equivalent to decrementing the hoplimit.
1095                          * Other added values are not supported.
1096                          */
1097                         if ((fa->mangle.val >> 24) != U8_MAX)
1098                                 break;
1099
1100                         /* check that we do not decrement hoplimit twice */
1101                         if (!efx_tc_flower_action_order_ok(act,
1102                                                            EFX_TC_AO_DEC_TTL)) {
1103                                 NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl");
1104                                 return -EOPNOTSUPP;
1105                         }
1106                         act->do_ttl_dec = 1;
1107                         return 0;
1108                 default:
1109                         break;
1110                 }
1111                 break;
1112         default:
1113                 break;
1114         }
1115
1116         NL_SET_ERR_MSG_FMT_MOD(extack,
1117                                "Unsupported: ttl add action type %x %x %x/%x",
1118                                fa->mangle.htype, fa->mangle.offset,
1119                                fa->mangle.val, fa->mangle.mask);
1120         return -EOPNOTSUPP;
1121 }
1122
1123 /**
1124  * efx_tc_mangle() - handle a single 32-bit (or less) pedit
1125  * @efx:        NIC we're installing a flow rule on
1126  * @act:        action set (cursor) to update
1127  * @fa:         FLOW_ACTION_MANGLE action metadata
1128  * @mung:       accumulator for partial mangles
1129  * @extack:     netlink extended ack for reporting errors
1130  * @match:      original match used along with the mangle action
1131  *
1132  * Identify the fields written by a FLOW_ACTION_MANGLE, and record
1133  * the partial mangle state in @mung.  If this mangle completes an
1134  * earlier partial mangle, consume and apply to @act by calling
1135  * efx_tc_complete_mac_mangle().
1136  */
1137
1138 static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act,
1139                          const struct flow_action_entry *fa,
1140                          struct efx_tc_mangler_state *mung,
1141                          struct netlink_ext_ack *extack,
1142                          struct efx_tc_match *match)
1143 {
1144         __le32 mac32;
1145         __le16 mac16;
1146         u8 tr_ttl;
1147
1148         switch (fa->mangle.htype) {
1149         case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1150                 BUILD_BUG_ON(offsetof(struct ethhdr, h_dest) != 0);
1151                 BUILD_BUG_ON(offsetof(struct ethhdr, h_source) != 6);
1152                 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_PEDIT_MAC_ADDRS)) {
1153                         NL_SET_ERR_MSG_MOD(extack,
1154                                            "Pedit mangle mac action violates action order");
1155                         return -EOPNOTSUPP;
1156                 }
1157                 switch (fa->mangle.offset) {
1158                 case 0:
1159                         if (fa->mangle.mask) {
1160                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1161                                                        "Unsupported: mask (%#x) of eth.dst32 mangle",
1162                                                        fa->mangle.mask);
1163                                 return -EOPNOTSUPP;
1164                         }
1165                         /* Ethernet address is little-endian */
1166                         mac32 = cpu_to_le32(fa->mangle.val);
1167                         memcpy(mung->dst_mac, &mac32, sizeof(mac32));
1168                         mung->dst_mac_32 = 1;
1169                         return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1170                 case 4:
1171                         if (fa->mangle.mask == 0xffff) {
1172                                 mac16 = cpu_to_le16(fa->mangle.val >> 16);
1173                                 memcpy(mung->src_mac, &mac16, sizeof(mac16));
1174                                 mung->src_mac_16 = 1;
1175                         } else if (fa->mangle.mask == 0xffff0000) {
1176                                 mac16 = cpu_to_le16((u16)fa->mangle.val);
1177                                 memcpy(mung->dst_mac + 4, &mac16, sizeof(mac16));
1178                                 mung->dst_mac_16 = 1;
1179                         } else {
1180                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1181                                                        "Unsupported: mask (%#x) of eth+4 mangle is not high or low 16b",
1182                                                        fa->mangle.mask);
1183                                 return -EOPNOTSUPP;
1184                         }
1185                         return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1186                 case 8:
1187                         if (fa->mangle.mask) {
1188                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1189                                                        "Unsupported: mask (%#x) of eth.src32 mangle",
1190                                                        fa->mangle.mask);
1191                                 return -EOPNOTSUPP;
1192                         }
1193                         mac32 = cpu_to_le32(fa->mangle.val);
1194                         memcpy(mung->src_mac + 2, &mac32, sizeof(mac32));
1195                         mung->src_mac_32 = 1;
1196                         return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1197                 default:
1198                         NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported: mangle eth+%u %x/%x",
1199                                                fa->mangle.offset, fa->mangle.val, fa->mangle.mask);
1200                         return -EOPNOTSUPP;
1201                 }
1202                 break;
1203         case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1204                 switch (fa->mangle.offset) {
1205                 case offsetof(struct iphdr, ttl):
1206                         /* we currently only support pedit IP4 when it applies
1207                          * to TTL and then only when it can be achieved with a
1208                          * decrement ttl action
1209                          */
1210
1211                         /* check that pedit applies to ttl only */
1212                         if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) {
1213                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1214                                                        "Unsupported: mask (%#x) out of range, only support mangle action on ipv4.ttl",
1215                                                        fa->mangle.mask);
1216                                 return -EOPNOTSUPP;
1217                         }
1218
1219                         /* we can only convert to a dec ttl when we have an
1220                          * exact match on the ttl field
1221                          */
1222                         if (match->mask.ip_ttl != U8_MAX) {
1223                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1224                                                        "Unsupported: only support mangle ipv4.ttl when we have an exact match on ttl, mask used for match (%#x)",
1225                                                        match->mask.ip_ttl);
1226                                 return -EOPNOTSUPP;
1227                         }
1228
1229                         /* check that we don't try to decrement 0, which equates
1230                          * to setting the ttl to 0xff
1231                          */
1232                         if (match->value.ip_ttl == 0) {
1233                                 NL_SET_ERR_MSG_MOD(extack,
1234                                                    "Unsupported: we cannot decrement ttl past 0");
1235                                 return -EOPNOTSUPP;
1236                         }
1237
1238                         /* check that we do not decrement ttl twice */
1239                         if (!efx_tc_flower_action_order_ok(act,
1240                                                            EFX_TC_AO_DEC_TTL)) {
1241                                 NL_SET_ERR_MSG_MOD(extack,
1242                                                    "Unsupported: multiple dec ttl");
1243                                 return -EOPNOTSUPP;
1244                         }
1245
1246                         /* check pedit can be achieved with decrement action */
1247                         tr_ttl = match->value.ip_ttl - 1;
1248                         if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) == tr_ttl) {
1249                                 act->do_ttl_dec = 1;
1250                                 return 0;
1251                         }
1252
1253                         fallthrough;
1254                 default:
1255                         NL_SET_ERR_MSG_FMT_MOD(extack,
1256                                                "Unsupported: only support mangle on the ttl field (offset is %u)",
1257                                                fa->mangle.offset);
1258                         return -EOPNOTSUPP;
1259                 }
1260                 break;
1261         case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
1262                 switch (fa->mangle.offset) {
1263                 case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
1264                         /* we currently only support pedit IP6 when it applies
1265                          * to the hoplimit and then only when it can be achieved
1266                          * with a decrement hoplimit action
1267                          */
1268
1269                         /* check that pedit applies to ttl only */
1270                         if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) {
1271                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1272                                                        "Unsupported: mask (%#x) out of range, only support mangle action on ipv6.hop_limit",
1273                                                        fa->mangle.mask);
1274
1275                                 return -EOPNOTSUPP;
1276                         }
1277
1278                         /* we can only convert to a dec ttl when we have an
1279                          * exact match on the ttl field
1280                          */
1281                         if (match->mask.ip_ttl != U8_MAX) {
1282                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1283                                                        "Unsupported: only support mangle ipv6.hop_limit when we have an exact match on ttl, mask used for match (%#x)",
1284                                                        match->mask.ip_ttl);
1285                                 return -EOPNOTSUPP;
1286                         }
1287
1288                         /* check that we don't try to decrement 0, which equates
1289                          * to setting the ttl to 0xff
1290                          */
1291                         if (match->value.ip_ttl == 0) {
1292                                 NL_SET_ERR_MSG_MOD(extack,
1293                                                    "Unsupported: we cannot decrement hop_limit past 0");
1294                                 return -EOPNOTSUPP;
1295                         }
1296
1297                         /* check that we do not decrement hoplimit twice */
1298                         if (!efx_tc_flower_action_order_ok(act,
1299                                                            EFX_TC_AO_DEC_TTL)) {
1300                                 NL_SET_ERR_MSG_MOD(extack,
1301                                                    "Unsupported: multiple dec ttl");
1302                                 return -EOPNOTSUPP;
1303                         }
1304
1305                         /* check pedit can be achieved with decrement action */
1306                         tr_ttl = match->value.ip_ttl - 1;
1307                         if ((fa->mangle.val >> 24) == tr_ttl) {
1308                                 act->do_ttl_dec = 1;
1309                                 return 0;
1310                         }
1311
1312                         fallthrough;
1313                 default:
1314                         NL_SET_ERR_MSG_FMT_MOD(extack,
1315                                                "Unsupported: only support mangle on the hop_limit field");
1316                         return -EOPNOTSUPP;
1317                 }
1318         default:
1319                 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule",
1320                                        fa->mangle.htype);
1321                 return -EOPNOTSUPP;
1322         }
1323         return 0;
1324 }
1325
1326 /**
1327  * efx_tc_incomplete_mangle() - check for leftover partial pedits
1328  * @mung:       accumulator for partial mangles
1329  * @extack:     netlink extended ack for reporting errors
1330  *
1331  * Since the MAE can only overwrite whole fields, any partial
1332  * field mangle left over on reaching packet delivery (mirred or
1333  * end of TC actions) cannot be offloaded.  Check for any such
1334  * and reject them with -%EOPNOTSUPP.
1335  */
1336
1337 static int efx_tc_incomplete_mangle(struct efx_tc_mangler_state *mung,
1338                                     struct netlink_ext_ack *extack)
1339 {
1340         if (mung->dst_mac_32 || mung->dst_mac_16) {
1341                 NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of destination MAC address");
1342                 return -EOPNOTSUPP;
1343         }
1344         if (mung->src_mac_16 || mung->src_mac_32) {
1345                 NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of source MAC address");
1346                 return -EOPNOTSUPP;
1347         }
1348         return 0;
1349 }
1350
1351 static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
1352                                          struct net_device *net_dev,
1353                                          struct flow_cls_offload *tc)
1354 {
1355         struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
1356         struct netlink_ext_ack *extack = tc->common.extack;
1357         struct efx_tc_flow_rule *rule = NULL, *old = NULL;
1358         struct efx_tc_action_set *act = NULL;
1359         bool found = false, uplinked = false;
1360         const struct flow_action_entry *fa;
1361         struct efx_tc_match match;
1362         struct efx_rep *to_efv;
1363         s64 rc;
1364         int i;
1365
1366         /* Parse match */
1367         memset(&match, 0, sizeof(match));
1368         rc = efx_tc_flower_parse_match(efx, fr, &match, NULL);
1369         if (rc)
1370                 return rc;
1371         /* The rule as given to us doesn't specify a source netdevice.
1372          * But, determining whether packets from a VF should match it is
1373          * complicated, so leave those to the software slowpath: qualify
1374          * the filter with source m-port == wire.
1375          */
1376         rc = efx_tc_flower_external_mport(efx, EFX_EFV_PF);
1377         if (rc < 0) {
1378                 NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port for foreign filter");
1379                 return rc;
1380         }
1381         match.value.ingress_port = rc;
1382         match.mask.ingress_port = ~0;
1383
1384         if (tc->common.chain_index) {
1385                 struct efx_tc_recirc_id *rid;
1386
1387                 rid = efx_tc_get_recirc_id(efx, tc->common.chain_index, net_dev);
1388                 if (IS_ERR(rid)) {
1389                         NL_SET_ERR_MSG_FMT_MOD(extack,
1390                                                "Failed to allocate a hardware recirculation ID for chain_index %u",
1391                                                tc->common.chain_index);
1392                         return PTR_ERR(rid);
1393                 }
1394                 match.rid = rid;
1395                 match.value.recirc_id = rid->fw_id;
1396         }
1397         match.mask.recirc_id = 0xff;
1398
1399         /* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
1400          * +trk+est, which is strictly implied by +est, so rewrite it to that.
1401          */
1402         if (match.mask.ct_state_trk && match.value.ct_state_trk &&
1403             match.mask.ct_state_est && match.value.ct_state_est)
1404                 match.mask.ct_state_trk = 0;
1405         /* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
1406          * match +trk-est (CT_HIT=0) despite being on an established connection.
1407          * So make -est imply -tcp_syn_fin_rst match to ensure these packets
1408          * still hit the software path.
1409          */
1410         if (match.mask.ct_state_est && !match.value.ct_state_est) {
1411                 if (match.value.tcp_syn_fin_rst) {
1412                         /* Can't offload this combination */
1413                         rc = -EOPNOTSUPP;
1414                         goto release;
1415                 }
1416                 match.mask.tcp_syn_fin_rst = true;
1417         }
1418
1419         flow_action_for_each(i, fa, &fr->action) {
1420                 switch (fa->id) {
1421                 case FLOW_ACTION_REDIRECT:
1422                 case FLOW_ACTION_MIRRED: /* mirred means mirror here */
1423                         to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1424                         if (IS_ERR(to_efv))
1425                                 continue;
1426                         found = true;
1427                         break;
1428                 default:
1429                         break;
1430                 }
1431         }
1432         if (!found) { /* We don't care. */
1433                 netif_dbg(efx, drv, efx->net_dev,
1434                           "Ignoring foreign filter that doesn't egdev us\n");
1435                 rc = -EOPNOTSUPP;
1436                 goto release;
1437         }
1438
1439         rc = efx_mae_match_check_caps(efx, &match.mask, NULL);
1440         if (rc)
1441                 goto release;
1442
1443         if (efx_tc_match_is_encap(&match.mask)) {
1444                 enum efx_encap_type type;
1445
1446                 type = efx_tc_indr_netdev_type(net_dev);
1447                 if (type == EFX_ENCAP_TYPE_NONE) {
1448                         NL_SET_ERR_MSG_MOD(extack,
1449                                            "Egress encap match on unsupported tunnel device");
1450                         rc = -EOPNOTSUPP;
1451                         goto release;
1452                 }
1453
1454                 rc = efx_mae_check_encap_type_supported(efx, type);
1455                 if (rc) {
1456                         NL_SET_ERR_MSG_FMT_MOD(extack,
1457                                                "Firmware reports no support for %s encap match",
1458                                                efx_tc_encap_type_name(type));
1459                         goto release;
1460                 }
1461
1462                 rc = efx_tc_flower_record_encap_match(efx, &match, type,
1463                                                       EFX_TC_EM_DIRECT, 0, 0,
1464                                                       extack);
1465                 if (rc)
1466                         goto release;
1467         } else {
1468                 /* This is not a tunnel decap rule, ignore it */
1469                 netif_dbg(efx, drv, efx->net_dev,
1470                           "Ignoring foreign filter without encap match\n");
1471                 rc = -EOPNOTSUPP;
1472                 goto release;
1473         }
1474
1475         rule = kzalloc(sizeof(*rule), GFP_USER);
1476         if (!rule) {
1477                 rc = -ENOMEM;
1478                 goto release;
1479         }
1480         INIT_LIST_HEAD(&rule->acts.list);
1481         rule->cookie = tc->cookie;
1482         old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
1483                                                 &rule->linkage,
1484                                                 efx_tc_match_action_ht_params);
1485         if (old) {
1486                 netif_dbg(efx, drv, efx->net_dev,
1487                           "Ignoring already-offloaded rule (cookie %lx)\n",
1488                           tc->cookie);
1489                 rc = -EEXIST;
1490                 goto release;
1491         }
1492
1493         act = kzalloc(sizeof(*act), GFP_USER);
1494         if (!act) {
1495                 rc = -ENOMEM;
1496                 goto release;
1497         }
1498
1499         /* Parse actions.  For foreign rules we only support decap & redirect.
1500          * See corresponding code in efx_tc_flower_replace() for theory of
1501          * operation & how 'act' cursor is used.
1502          */
1503         flow_action_for_each(i, fa, &fr->action) {
1504                 struct efx_tc_action_set save;
1505
1506                 switch (fa->id) {
1507                 case FLOW_ACTION_REDIRECT:
1508                 case FLOW_ACTION_MIRRED:
1509                         /* See corresponding code in efx_tc_flower_replace() for
1510                          * long explanations of what's going on here.
1511                          */
1512                         save = *act;
1513                         if (fa->hw_stats) {
1514                                 struct efx_tc_counter_index *ctr;
1515
1516                                 if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1517                                         NL_SET_ERR_MSG_FMT_MOD(extack,
1518                                                                "hw_stats_type %u not supported (only 'delayed')",
1519                                                                fa->hw_stats);
1520                                         rc = -EOPNOTSUPP;
1521                                         goto release;
1522                                 }
1523                                 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
1524                                         rc = -EOPNOTSUPP;
1525                                         goto release;
1526                                 }
1527
1528                                 ctr = efx_tc_flower_get_counter_index(efx,
1529                                                                       tc->cookie,
1530                                                                       EFX_TC_COUNTER_TYPE_AR);
1531                                 if (IS_ERR(ctr)) {
1532                                         rc = PTR_ERR(ctr);
1533                                         NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1534                                         goto release;
1535                                 }
1536                                 act->count = ctr;
1537                                 INIT_LIST_HEAD(&act->count_user);
1538                         }
1539
1540                         if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
1541                                 /* can't happen */
1542                                 rc = -EOPNOTSUPP;
1543                                 NL_SET_ERR_MSG_MOD(extack,
1544                                                    "Deliver action violates action order (can't happen)");
1545                                 goto release;
1546                         }
1547                         to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1548                         /* PF implies egdev is us, in which case we really
1549                          * want to deliver to the uplink (because this is an
1550                          * ingress filter).  If we don't recognise the egdev
1551                          * at all, then we'd better trap so SW can handle it.
1552                          */
1553                         if (IS_ERR(to_efv))
1554                                 to_efv = EFX_EFV_PF;
1555                         if (to_efv == EFX_EFV_PF) {
1556                                 if (uplinked)
1557                                         break;
1558                                 uplinked = true;
1559                         }
1560                         rc = efx_tc_flower_internal_mport(efx, to_efv);
1561                         if (rc < 0) {
1562                                 NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
1563                                 goto release;
1564                         }
1565                         act->dest_mport = rc;
1566                         act->deliver = 1;
1567                         rc = efx_mae_alloc_action_set(efx, act);
1568                         if (rc) {
1569                                 NL_SET_ERR_MSG_MOD(extack,
1570                                                    "Failed to write action set to hw (mirred)");
1571                                 goto release;
1572                         }
1573                         list_add_tail(&act->list, &rule->acts.list);
1574                         act = NULL;
1575                         if (fa->id == FLOW_ACTION_REDIRECT)
1576                                 break; /* end of the line */
1577                         /* Mirror, so continue on with saved act */
1578                         act = kzalloc(sizeof(*act), GFP_USER);
1579                         if (!act) {
1580                                 rc = -ENOMEM;
1581                                 goto release;
1582                         }
1583                         *act = save;
1584                         break;
1585                 case FLOW_ACTION_TUNNEL_DECAP:
1586                         if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DECAP)) {
1587                                 rc = -EINVAL;
1588                                 NL_SET_ERR_MSG_MOD(extack, "Decap action violates action order");
1589                                 goto release;
1590                         }
1591                         act->decap = 1;
1592                         /* If we previously delivered/trapped to uplink, now
1593                          * that we've decapped we'll want another copy if we
1594                          * try to deliver/trap to uplink again.
1595                          */
1596                         uplinked = false;
1597                         break;
1598                 default:
1599                         NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
1600                                                fa->id);
1601                         rc = -EOPNOTSUPP;
1602                         goto release;
1603                 }
1604         }
1605
1606         if (act) {
1607                 if (!uplinked) {
1608                         /* Not shot/redirected, so deliver to default dest (which is
1609                          * the uplink, as this is an ingress filter)
1610                          */
1611                         efx_mae_mport_uplink(efx, &act->dest_mport);
1612                         act->deliver = 1;
1613                 }
1614                 rc = efx_mae_alloc_action_set(efx, act);
1615                 if (rc) {
1616                         NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
1617                         goto release;
1618                 }
1619                 list_add_tail(&act->list, &rule->acts.list);
1620                 act = NULL; /* Prevent double-free in error path */
1621         }
1622
1623         rule->match = match;
1624
1625         netif_dbg(efx, drv, efx->net_dev,
1626                   "Successfully parsed foreign filter (cookie %lx)\n",
1627                   tc->cookie);
1628
1629         rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
1630         if (rc) {
1631                 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
1632                 goto release;
1633         }
1634         rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
1635                                  rule->acts.fw_id, &rule->fw_id);
1636         if (rc) {
1637                 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1638                 goto release_acts;
1639         }
1640         return 0;
1641
1642 release_acts:
1643         efx_mae_free_action_set_list(efx, &rule->acts);
1644 release:
1645         /* We failed to insert the rule, so free up any entries we created in
1646          * subsidiary tables.
1647          */
1648         if (match.rid)
1649                 efx_tc_put_recirc_id(efx, match.rid);
1650         if (act)
1651                 efx_tc_free_action_set(efx, act, false);
1652         if (rule) {
1653                 if (!old)
1654                         rhashtable_remove_fast(&efx->tc->match_action_ht,
1655                                                &rule->linkage,
1656                                                efx_tc_match_action_ht_params);
1657                 efx_tc_free_action_set_list(efx, &rule->acts, false);
1658         }
1659         kfree(rule);
1660         if (match.encap)
1661                 efx_tc_flower_release_encap_match(efx, match.encap);
1662         return rc;
1663 }
1664
1665 static int efx_tc_flower_replace_lhs(struct efx_nic *efx,
1666                                      struct flow_cls_offload *tc,
1667                                      struct flow_rule *fr,
1668                                      struct efx_tc_match *match,
1669                                      struct efx_rep *efv,
1670                                      struct net_device *net_dev)
1671 {
1672         struct netlink_ext_ack *extack = tc->common.extack;
1673         struct efx_tc_lhs_rule *rule, *old;
1674         int rc;
1675
1676         if (tc->common.chain_index) {
1677                 NL_SET_ERR_MSG_MOD(extack, "LHS rule only allowed in chain 0");
1678                 return -EOPNOTSUPP;
1679         }
1680
1681         if (match->mask.ct_state_trk && match->value.ct_state_trk) {
1682                 NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk");
1683                 return -EOPNOTSUPP;
1684         }
1685         /* LHS rules are always -trk, so we don't need to match on that */
1686         match->mask.ct_state_trk = 0;
1687         match->value.ct_state_trk = 0;
1688
1689         rc = efx_mae_match_check_caps_lhs(efx, &match->mask, extack);
1690         if (rc)
1691                 return rc;
1692
1693         rule = kzalloc(sizeof(*rule), GFP_USER);
1694         if (!rule)
1695                 return -ENOMEM;
1696         rule->cookie = tc->cookie;
1697         old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht,
1698                                                 &rule->linkage,
1699                                                 efx_tc_lhs_rule_ht_params);
1700         if (old) {
1701                 netif_dbg(efx, drv, efx->net_dev,
1702                           "Already offloaded rule (cookie %lx)\n", tc->cookie);
1703                 rc = -EEXIST;
1704                 NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1705                 goto release;
1706         }
1707
1708         /* Parse actions */
1709         /* See note in efx_tc_flower_replace() regarding passed net_dev
1710          * (used for efx_tc_get_recirc_id()).
1711          */
1712         rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, efx->net_dev, rule);
1713         if (rc)
1714                 goto release;
1715
1716         rule->match = *match;
1717
1718         rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC);
1719         if (rc) {
1720                 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1721                 goto release;
1722         }
1723         netif_dbg(efx, drv, efx->net_dev,
1724                   "Successfully parsed lhs rule (cookie %lx)\n",
1725                   tc->cookie);
1726         return 0;
1727
1728 release:
1729         efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act);
1730         if (!old)
1731                 rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage,
1732                                        efx_tc_lhs_rule_ht_params);
1733         kfree(rule);
1734         return rc;
1735 }
1736
1737 static int efx_tc_flower_replace(struct efx_nic *efx,
1738                                  struct net_device *net_dev,
1739                                  struct flow_cls_offload *tc,
1740                                  struct efx_rep *efv)
1741 {
1742         struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
1743         struct netlink_ext_ack *extack = tc->common.extack;
1744         const struct ip_tunnel_info *encap_info = NULL;
1745         struct efx_tc_flow_rule *rule = NULL, *old;
1746         struct efx_tc_mangler_state mung = {};
1747         struct efx_tc_action_set *act = NULL;
1748         const struct flow_action_entry *fa;
1749         struct efx_rep *from_efv, *to_efv;
1750         struct efx_tc_match match;
1751         u32 acts_id;
1752         s64 rc;
1753         int i;
1754
1755         if (!tc_can_offload_extack(efx->net_dev, extack))
1756                 return -EOPNOTSUPP;
1757         if (WARN_ON(!efx->tc))
1758                 return -ENETDOWN;
1759         if (WARN_ON(!efx->tc->up))
1760                 return -ENETDOWN;
1761
1762         from_efv = efx_tc_flower_lookup_efv(efx, net_dev);
1763         if (IS_ERR(from_efv)) {
1764                 /* Not from our PF or representors, so probably a tunnel dev */
1765                 return efx_tc_flower_replace_foreign(efx, net_dev, tc);
1766         }
1767
1768         if (efv != from_efv) {
1769                 /* can't happen */
1770                 NL_SET_ERR_MSG_FMT_MOD(extack, "for %s efv is %snull but from_efv is %snull (can't happen)",
1771                                        netdev_name(net_dev), efv ? "non-" : "",
1772                                        from_efv ? "non-" : "");
1773                 return -EINVAL;
1774         }
1775
1776         /* Parse match */
1777         memset(&match, 0, sizeof(match));
1778         rc = efx_tc_flower_external_mport(efx, from_efv);
1779         if (rc < 0) {
1780                 NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port");
1781                 return rc;
1782         }
1783         match.value.ingress_port = rc;
1784         match.mask.ingress_port = ~0;
1785         rc = efx_tc_flower_parse_match(efx, fr, &match, extack);
1786         if (rc)
1787                 return rc;
1788         if (efx_tc_match_is_encap(&match.mask)) {
1789                 NL_SET_ERR_MSG_MOD(extack, "Ingress enc_key matches not supported");
1790                 return -EOPNOTSUPP;
1791         }
1792
1793         if (efx_tc_rule_is_lhs_rule(fr, &match))
1794                 return efx_tc_flower_replace_lhs(efx, tc, fr, &match, efv,
1795                                                  net_dev);
1796
1797         /* chain_index 0 is always recirc_id 0 (and does not appear in recirc_ht).
1798          * Conveniently, match.rid == NULL and match.value.recirc_id == 0 owing
1799          * to the initial memset(), so we don't need to do anything in that case.
1800          */
1801         if (tc->common.chain_index) {
1802                 struct efx_tc_recirc_id *rid;
1803
1804                 /* Note regarding passed net_dev:
1805                  * VFreps and PF can share chain namespace, as they have
1806                  * distinct ingress_mports.  So we don't need to burn an
1807                  * extra recirc_id if both use the same chain_index.
1808                  * (Strictly speaking, we could give each VFrep its own
1809                  * recirc_id namespace that doesn't take IDs away from the
1810                  * PF, but that would require a bunch of additional IDAs -
1811                  * one for each representor - and that's not likely to be
1812                  * the main cause of recirc_id exhaustion anyway.)
1813                  */
1814                 rid = efx_tc_get_recirc_id(efx, tc->common.chain_index,
1815                                            efx->net_dev);
1816                 if (IS_ERR(rid)) {
1817                         NL_SET_ERR_MSG_FMT_MOD(extack,
1818                                                "Failed to allocate a hardware recirculation ID for chain_index %u",
1819                                                tc->common.chain_index);
1820                         return PTR_ERR(rid);
1821                 }
1822                 match.rid = rid;
1823                 match.value.recirc_id = rid->fw_id;
1824         }
1825         match.mask.recirc_id = 0xff;
1826
1827         /* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
1828          * +trk+est, which is strictly implied by +est, so rewrite it to that.
1829          */
1830         if (match.mask.ct_state_trk && match.value.ct_state_trk &&
1831             match.mask.ct_state_est && match.value.ct_state_est)
1832                 match.mask.ct_state_trk = 0;
1833         /* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
1834          * match +trk-est (CT_HIT=0) despite being on an established connection.
1835          * So make -est imply -tcp_syn_fin_rst match to ensure these packets
1836          * still hit the software path.
1837          */
1838         if (match.mask.ct_state_est && !match.value.ct_state_est) {
1839                 if (match.value.tcp_syn_fin_rst) {
1840                         /* Can't offload this combination */
1841                         rc = -EOPNOTSUPP;
1842                         goto release;
1843                 }
1844                 match.mask.tcp_syn_fin_rst = true;
1845         }
1846
1847         rc = efx_mae_match_check_caps(efx, &match.mask, extack);
1848         if (rc)
1849                 goto release;
1850
1851         rule = kzalloc(sizeof(*rule), GFP_USER);
1852         if (!rule) {
1853                 rc = -ENOMEM;
1854                 goto release;
1855         }
1856         INIT_LIST_HEAD(&rule->acts.list);
1857         rule->cookie = tc->cookie;
1858         old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
1859                                                 &rule->linkage,
1860                                                 efx_tc_match_action_ht_params);
1861         if (old) {
1862                 netif_dbg(efx, drv, efx->net_dev,
1863                           "Already offloaded rule (cookie %lx)\n", tc->cookie);
1864                 NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1865                 rc = -EEXIST;
1866                 goto release;
1867         }
1868
1869         /* Parse actions */
1870         act = kzalloc(sizeof(*act), GFP_USER);
1871         if (!act) {
1872                 rc = -ENOMEM;
1873                 goto release;
1874         }
1875
1876         /**
1877          * DOC: TC action translation
1878          *
1879          * Actions in TC are sequential and cumulative, with delivery actions
1880          * potentially anywhere in the order.  The EF100 MAE, however, takes
1881          * an 'action set list' consisting of 'action sets', each of which is
1882          * applied to the _original_ packet, and consists of a set of optional
1883          * actions in a fixed order with delivery at the end.
1884          * To translate between these two models, we maintain a 'cursor', @act,
1885          * which describes the cumulative effect of all the packet-mutating
1886          * actions encountered so far; on handling a delivery (mirred or drop)
1887          * action, once the action-set has been inserted into hardware, we
1888          * append @act to the action-set list (@rule->acts); if this is a pipe
1889          * action (mirred mirror) we then allocate a new @act with a copy of
1890          * the cursor state _before_ the delivery action, otherwise we set @act
1891          * to %NULL.
1892          * This ensures that every allocated action-set is either attached to
1893          * @rule->acts or pointed to by @act (and never both), and that only
1894          * those action-sets in @rule->acts exist in hardware.  Consequently,
1895          * in the failure path, @act only needs to be freed in memory, whereas
1896          * for @rule->acts we remove each action-set from hardware before
1897          * freeing it (efx_tc_free_action_set_list()), even if the action-set
1898          * list itself is not in hardware.
1899          */
1900         flow_action_for_each(i, fa, &fr->action) {
1901                 struct efx_tc_action_set save;
1902                 u16 tci;
1903
1904                 if (!act) {
1905                         /* more actions after a non-pipe action */
1906                         NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
1907                         rc = -EINVAL;
1908                         goto release;
1909                 }
1910
1911                 if ((fa->id == FLOW_ACTION_REDIRECT ||
1912                      fa->id == FLOW_ACTION_MIRRED ||
1913                      fa->id == FLOW_ACTION_DROP) && fa->hw_stats) {
1914                         struct efx_tc_counter_index *ctr;
1915
1916                         /* Currently the only actions that want stats are
1917                          * mirred and gact (ok, shot, trap, goto-chain), which
1918                          * means we want stats just before delivery.  Also,
1919                          * note that tunnel_key set shouldn't change the length
1920                          * — it's only the subsequent mirred that does that,
1921                          * and the stats are taken _before_ the mirred action
1922                          * happens.
1923                          */
1924                         if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
1925                                 /* All supported actions that count either steal
1926                                  * (gact shot, mirred redirect) or clone act
1927                                  * (mirred mirror), so we should never get two
1928                                  * count actions on one action_set.
1929                                  */
1930                                 NL_SET_ERR_MSG_MOD(extack, "Count-action conflict (can't happen)");
1931                                 rc = -EOPNOTSUPP;
1932                                 goto release;
1933                         }
1934
1935                         if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1936                                 NL_SET_ERR_MSG_FMT_MOD(extack, "hw_stats_type %u not supported (only 'delayed')",
1937                                                        fa->hw_stats);
1938                                 rc = -EOPNOTSUPP;
1939                                 goto release;
1940                         }
1941
1942                         ctr = efx_tc_flower_get_counter_index(efx, tc->cookie,
1943                                                               EFX_TC_COUNTER_TYPE_AR);
1944                         if (IS_ERR(ctr)) {
1945                                 rc = PTR_ERR(ctr);
1946                                 NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1947                                 goto release;
1948                         }
1949                         act->count = ctr;
1950                         INIT_LIST_HEAD(&act->count_user);
1951                 }
1952
1953                 switch (fa->id) {
1954                 case FLOW_ACTION_DROP:
1955                         rc = efx_mae_alloc_action_set(efx, act);
1956                         if (rc) {
1957                                 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (drop)");
1958                                 goto release;
1959                         }
1960                         list_add_tail(&act->list, &rule->acts.list);
1961                         act = NULL; /* end of the line */
1962                         break;
1963                 case FLOW_ACTION_REDIRECT:
1964                 case FLOW_ACTION_MIRRED:
1965                         save = *act;
1966
1967                         if (encap_info) {
1968                                 struct efx_tc_encap_action *encap;
1969
1970                                 if (!efx_tc_flower_action_order_ok(act,
1971                                                                    EFX_TC_AO_ENCAP)) {
1972                                         rc = -EOPNOTSUPP;
1973                                         NL_SET_ERR_MSG_MOD(extack, "Encap action violates action order");
1974                                         goto release;
1975                                 }
1976                                 encap = efx_tc_flower_create_encap_md(
1977                                                 efx, encap_info, fa->dev, extack);
1978                                 if (IS_ERR_OR_NULL(encap)) {
1979                                         rc = PTR_ERR(encap);
1980                                         if (!rc)
1981                                                 rc = -EIO; /* arbitrary */
1982                                         goto release;
1983                                 }
1984                                 act->encap_md = encap;
1985                                 list_add_tail(&act->encap_user, &encap->users);
1986                                 act->dest_mport = encap->dest_mport;
1987                                 act->deliver = 1;
1988                                 if (act->count && !WARN_ON(!act->count->cnt)) {
1989                                         /* This counter is used by an encap
1990                                          * action, which needs a reference back
1991                                          * so it can prod neighbouring whenever
1992                                          * traffic is seen.
1993                                          */
1994                                         spin_lock_bh(&act->count->cnt->lock);
1995                                         list_add_tail(&act->count_user,
1996                                                       &act->count->cnt->users);
1997                                         spin_unlock_bh(&act->count->cnt->lock);
1998                                 }
1999                                 rc = efx_mae_alloc_action_set(efx, act);
2000                                 if (rc) {
2001                                         NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)");
2002                                         goto release;
2003                                 }
2004                                 list_add_tail(&act->list, &rule->acts.list);
2005                                 act->user = &rule->acts;
2006                                 act = NULL;
2007                                 if (fa->id == FLOW_ACTION_REDIRECT)
2008                                         break; /* end of the line */
2009                                 /* Mirror, so continue on with saved act */
2010                                 save.count = NULL;
2011                                 act = kzalloc(sizeof(*act), GFP_USER);
2012                                 if (!act) {
2013                                         rc = -ENOMEM;
2014                                         goto release;
2015                                 }
2016                                 *act = save;
2017                                 break;
2018                         }
2019
2020                         if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
2021                                 /* can't happen */
2022                                 rc = -EOPNOTSUPP;
2023                                 NL_SET_ERR_MSG_MOD(extack, "Deliver action violates action order (can't happen)");
2024                                 goto release;
2025                         }
2026
2027                         to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
2028                         if (IS_ERR(to_efv)) {
2029                                 NL_SET_ERR_MSG_MOD(extack, "Mirred egress device not on switch");
2030                                 rc = PTR_ERR(to_efv);
2031                                 goto release;
2032                         }
2033                         rc = efx_tc_flower_external_mport(efx, to_efv);
2034                         if (rc < 0) {
2035                                 NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
2036                                 goto release;
2037                         }
2038                         act->dest_mport = rc;
2039                         act->deliver = 1;
2040                         rc = efx_mae_alloc_action_set(efx, act);
2041                         if (rc) {
2042                                 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (mirred)");
2043                                 goto release;
2044                         }
2045                         list_add_tail(&act->list, &rule->acts.list);
2046                         act = NULL;
2047                         if (fa->id == FLOW_ACTION_REDIRECT)
2048                                 break; /* end of the line */
2049                         /* Mirror, so continue on with saved act */
2050                         save.count = NULL;
2051                         act = kzalloc(sizeof(*act), GFP_USER);
2052                         if (!act) {
2053                                 rc = -ENOMEM;
2054                                 goto release;
2055                         }
2056                         *act = save;
2057                         break;
2058                 case FLOW_ACTION_VLAN_POP:
2059                         if (act->vlan_push) {
2060                                 act->vlan_push--;
2061                         } else if (efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_POP)) {
2062                                 act->vlan_pop++;
2063                         } else {
2064                                 NL_SET_ERR_MSG_MOD(extack,
2065                                                    "More than two VLAN pops, or action order violated");
2066                                 rc = -EINVAL;
2067                                 goto release;
2068                         }
2069                         break;
2070                 case FLOW_ACTION_VLAN_PUSH:
2071                         if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_PUSH)) {
2072                                 rc = -EINVAL;
2073                                 NL_SET_ERR_MSG_MOD(extack,
2074                                                    "More than two VLAN pushes, or action order violated");
2075                                 goto release;
2076                         }
2077                         tci = fa->vlan.vid & VLAN_VID_MASK;
2078                         tci |= fa->vlan.prio << VLAN_PRIO_SHIFT;
2079                         act->vlan_tci[act->vlan_push] = cpu_to_be16(tci);
2080                         act->vlan_proto[act->vlan_push] = fa->vlan.proto;
2081                         act->vlan_push++;
2082                         break;
2083                 case FLOW_ACTION_ADD:
2084                         rc = efx_tc_pedit_add(efx, act, fa, extack);
2085                         if (rc < 0)
2086                                 goto release;
2087                         break;
2088                 case FLOW_ACTION_MANGLE:
2089                         rc = efx_tc_mangle(efx, act, fa, &mung, extack, &match);
2090                         if (rc < 0)
2091                                 goto release;
2092                         break;
2093                 case FLOW_ACTION_TUNNEL_ENCAP:
2094                         if (encap_info) {
2095                                 /* Can't specify encap multiple times.
2096                                  * If you want to overwrite an existing
2097                                  * encap_info, use an intervening
2098                                  * FLOW_ACTION_TUNNEL_DECAP to clear it.
2099                                  */
2100                                 NL_SET_ERR_MSG_MOD(extack, "Tunnel key set when already set");
2101                                 rc = -EINVAL;
2102                                 goto release;
2103                         }
2104                         if (!fa->tunnel) {
2105                                 NL_SET_ERR_MSG_MOD(extack, "Tunnel key set is missing key");
2106                                 rc = -EOPNOTSUPP;
2107                                 goto release;
2108                         }
2109                         encap_info = fa->tunnel;
2110                         break;
2111                 case FLOW_ACTION_TUNNEL_DECAP:
2112                         if (encap_info) {
2113                                 encap_info = NULL;
2114                                 break;
2115                         }
2116                         /* Since we don't support enc_key matches on ingress
2117                          * (and if we did there'd be no tunnel-device to give
2118                          * us a type), we can't offload a decap that's not
2119                          * just undoing a previous encap action.
2120                          */
2121                         NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device");
2122                         rc = -EOPNOTSUPP;
2123                         goto release;
2124                 default:
2125                         NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
2126                                                fa->id);
2127                         rc = -EOPNOTSUPP;
2128                         goto release;
2129                 }
2130         }
2131
2132         rc = efx_tc_incomplete_mangle(&mung, extack);
2133         if (rc < 0)
2134                 goto release;
2135         if (act) {
2136                 /* Not shot/redirected, so deliver to default dest */
2137                 if (from_efv == EFX_EFV_PF)
2138                         /* Rule applies to traffic from the wire,
2139                          * and default dest is thus the PF
2140                          */
2141                         efx_mae_mport_uplink(efx, &act->dest_mport);
2142                 else
2143                         /* Representor, so rule applies to traffic from
2144                          * representee, and default dest is thus the rep.
2145                          * All reps use the same mport for delivery
2146                          */
2147                         efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
2148                                             &act->dest_mport);
2149                 act->deliver = 1;
2150                 rc = efx_mae_alloc_action_set(efx, act);
2151                 if (rc) {
2152                         NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
2153                         goto release;
2154                 }
2155                 list_add_tail(&act->list, &rule->acts.list);
2156                 act = NULL; /* Prevent double-free in error path */
2157         }
2158
2159         netif_dbg(efx, drv, efx->net_dev,
2160                   "Successfully parsed filter (cookie %lx)\n",
2161                   tc->cookie);
2162
2163         rule->match = match;
2164
2165         rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
2166         if (rc) {
2167                 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
2168                 goto release;
2169         }
2170         if (from_efv == EFX_EFV_PF)
2171                 /* PF netdev, so rule applies to traffic from wire */
2172                 rule->fallback = &efx->tc->facts.pf;
2173         else
2174                 /* repdev, so rule applies to traffic from representee */
2175                 rule->fallback = &efx->tc->facts.reps;
2176         if (!efx_tc_check_ready(efx, rule)) {
2177                 netif_dbg(efx, drv, efx->net_dev, "action not ready for hw\n");
2178                 acts_id = rule->fallback->fw_id;
2179         } else {
2180                 netif_dbg(efx, drv, efx->net_dev, "ready for hw\n");
2181                 acts_id = rule->acts.fw_id;
2182         }
2183         rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
2184                                  acts_id, &rule->fw_id);
2185         if (rc) {
2186                 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
2187                 goto release_acts;
2188         }
2189         return 0;
2190
2191 release_acts:
2192         efx_mae_free_action_set_list(efx, &rule->acts);
2193 release:
2194         /* We failed to insert the rule, so free up any entries we created in
2195          * subsidiary tables.
2196          */
2197         if (match.rid)
2198                 efx_tc_put_recirc_id(efx, match.rid);
2199         if (act)
2200                 efx_tc_free_action_set(efx, act, false);
2201         if (rule) {
2202                 if (!old)
2203                         rhashtable_remove_fast(&efx->tc->match_action_ht,
2204                                                &rule->linkage,
2205                                                efx_tc_match_action_ht_params);
2206                 efx_tc_free_action_set_list(efx, &rule->acts, false);
2207         }
2208         kfree(rule);
2209         return rc;
2210 }
2211
2212 static int efx_tc_flower_destroy(struct efx_nic *efx,
2213                                  struct net_device *net_dev,
2214                                  struct flow_cls_offload *tc)
2215 {
2216         struct netlink_ext_ack *extack = tc->common.extack;
2217         struct efx_tc_lhs_rule *lhs_rule;
2218         struct efx_tc_flow_rule *rule;
2219
2220         lhs_rule = rhashtable_lookup_fast(&efx->tc->lhs_rule_ht, &tc->cookie,
2221                                           efx_tc_lhs_rule_ht_params);
2222         if (lhs_rule) {
2223                 /* Remove it from HW */
2224                 efx_mae_remove_lhs_rule(efx, lhs_rule);
2225                 /* Delete it from SW */
2226                 efx_tc_flower_release_lhs_actions(efx, &lhs_rule->lhs_act);
2227                 rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &lhs_rule->linkage,
2228                                        efx_tc_lhs_rule_ht_params);
2229                 if (lhs_rule->match.encap)
2230                         efx_tc_flower_release_encap_match(efx, lhs_rule->match.encap);
2231                 netif_dbg(efx, drv, efx->net_dev, "Removed (lhs) filter %lx\n",
2232                           lhs_rule->cookie);
2233                 kfree(lhs_rule);
2234                 return 0;
2235         }
2236
2237         rule = rhashtable_lookup_fast(&efx->tc->match_action_ht, &tc->cookie,
2238                                       efx_tc_match_action_ht_params);
2239         if (!rule) {
2240                 /* Only log a message if we're the ingress device.  Otherwise
2241                  * it's a foreign filter and we might just not have been
2242                  * interested (e.g. we might not have been the egress device
2243                  * either).
2244                  */
2245                 if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
2246                         netif_warn(efx, drv, efx->net_dev,
2247                                    "Filter %lx not found to remove\n", tc->cookie);
2248                 NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
2249                 return -ENOENT;
2250         }
2251
2252         /* Remove it from HW */
2253         efx_tc_delete_rule(efx, rule);
2254         /* Delete it from SW */
2255         rhashtable_remove_fast(&efx->tc->match_action_ht, &rule->linkage,
2256                                efx_tc_match_action_ht_params);
2257         netif_dbg(efx, drv, efx->net_dev, "Removed filter %lx\n", rule->cookie);
2258         kfree(rule);
2259         return 0;
2260 }
2261
2262 static int efx_tc_flower_stats(struct efx_nic *efx, struct net_device *net_dev,
2263                                struct flow_cls_offload *tc)
2264 {
2265         struct netlink_ext_ack *extack = tc->common.extack;
2266         struct efx_tc_counter_index *ctr;
2267         struct efx_tc_counter *cnt;
2268         u64 packets, bytes;
2269
2270         ctr = efx_tc_flower_find_counter_index(efx, tc->cookie);
2271         if (!ctr) {
2272                 /* See comment in efx_tc_flower_destroy() */
2273                 if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
2274                         if (net_ratelimit())
2275                                 netif_warn(efx, drv, efx->net_dev,
2276                                            "Filter %lx not found for stats\n",
2277                                            tc->cookie);
2278                 NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
2279                 return -ENOENT;
2280         }
2281         if (WARN_ON(!ctr->cnt)) /* can't happen */
2282                 return -EIO;
2283         cnt = ctr->cnt;
2284
2285         spin_lock_bh(&cnt->lock);
2286         /* Report only new pkts/bytes since last time TC asked */
2287         packets = cnt->packets;
2288         bytes = cnt->bytes;
2289         flow_stats_update(&tc->stats, bytes - cnt->old_bytes,
2290                           packets - cnt->old_packets, 0, cnt->touched,
2291                           FLOW_ACTION_HW_STATS_DELAYED);
2292         cnt->old_packets = packets;
2293         cnt->old_bytes = bytes;
2294         spin_unlock_bh(&cnt->lock);
2295         return 0;
2296 }
2297
2298 int efx_tc_flower(struct efx_nic *efx, struct net_device *net_dev,
2299                   struct flow_cls_offload *tc, struct efx_rep *efv)
2300 {
2301         int rc;
2302
2303         if (!efx->tc)
2304                 return -EOPNOTSUPP;
2305
2306         mutex_lock(&efx->tc->mutex);
2307         switch (tc->command) {
2308         case FLOW_CLS_REPLACE:
2309                 rc = efx_tc_flower_replace(efx, net_dev, tc, efv);
2310                 break;
2311         case FLOW_CLS_DESTROY:
2312                 rc = efx_tc_flower_destroy(efx, net_dev, tc);
2313                 break;
2314         case FLOW_CLS_STATS:
2315                 rc = efx_tc_flower_stats(efx, net_dev, tc);
2316                 break;
2317         default:
2318                 rc = -EOPNOTSUPP;
2319                 break;
2320         }
2321         mutex_unlock(&efx->tc->mutex);
2322         return rc;
2323 }
2324
2325 static int efx_tc_configure_default_rule(struct efx_nic *efx, u32 ing_port,
2326                                          u32 eg_port, struct efx_tc_flow_rule *rule)
2327 {
2328         struct efx_tc_action_set_list *acts = &rule->acts;
2329         struct efx_tc_match *match = &rule->match;
2330         struct efx_tc_action_set *act;
2331         int rc;
2332
2333         match->value.ingress_port = ing_port;
2334         match->mask.ingress_port = ~0;
2335         act = kzalloc(sizeof(*act), GFP_KERNEL);
2336         if (!act)
2337                 return -ENOMEM;
2338         act->deliver = 1;
2339         act->dest_mport = eg_port;
2340         rc = efx_mae_alloc_action_set(efx, act);
2341         if (rc)
2342                 goto fail1;
2343         EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
2344         list_add_tail(&act->list, &acts->list);
2345         rc = efx_mae_alloc_action_set_list(efx, acts);
2346         if (rc)
2347                 goto fail2;
2348         rc = efx_mae_insert_rule(efx, match, EFX_TC_PRIO_DFLT,
2349                                  acts->fw_id, &rule->fw_id);
2350         if (rc)
2351                 goto fail3;
2352         return 0;
2353 fail3:
2354         efx_mae_free_action_set_list(efx, acts);
2355 fail2:
2356         list_del(&act->list);
2357         efx_mae_free_action_set(efx, act->fw_id);
2358 fail1:
2359         kfree(act);
2360         return rc;
2361 }
2362
2363 static int efx_tc_configure_default_rule_pf(struct efx_nic *efx)
2364 {
2365         struct efx_tc_flow_rule *rule = &efx->tc->dflt.pf;
2366         u32 ing_port, eg_port;
2367
2368         efx_mae_mport_uplink(efx, &ing_port);
2369         efx_mae_mport_wire(efx, &eg_port);
2370         return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2371 }
2372
2373 static int efx_tc_configure_default_rule_wire(struct efx_nic *efx)
2374 {
2375         struct efx_tc_flow_rule *rule = &efx->tc->dflt.wire;
2376         u32 ing_port, eg_port;
2377
2378         efx_mae_mport_wire(efx, &ing_port);
2379         efx_mae_mport_uplink(efx, &eg_port);
2380         return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2381 }
2382
2383 int efx_tc_configure_default_rule_rep(struct efx_rep *efv)
2384 {
2385         struct efx_tc_flow_rule *rule = &efv->dflt;
2386         struct efx_nic *efx = efv->parent;
2387         u32 ing_port, eg_port;
2388
2389         efx_mae_mport_mport(efx, efv->mport, &ing_port);
2390         efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
2391         return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2392 }
2393
2394 void efx_tc_deconfigure_default_rule(struct efx_nic *efx,
2395                                      struct efx_tc_flow_rule *rule)
2396 {
2397         if (rule->fw_id != MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL)
2398                 efx_tc_delete_rule(efx, rule);
2399         rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2400 }
2401
2402 static int efx_tc_configure_fallback_acts(struct efx_nic *efx, u32 eg_port,
2403                                           struct efx_tc_action_set_list *acts)
2404 {
2405         struct efx_tc_action_set *act;
2406         int rc;
2407
2408         act = kzalloc(sizeof(*act), GFP_KERNEL);
2409         if (!act)
2410                 return -ENOMEM;
2411         act->deliver = 1;
2412         act->dest_mport = eg_port;
2413         rc = efx_mae_alloc_action_set(efx, act);
2414         if (rc)
2415                 goto fail1;
2416         EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
2417         list_add_tail(&act->list, &acts->list);
2418         rc = efx_mae_alloc_action_set_list(efx, acts);
2419         if (rc)
2420                 goto fail2;
2421         return 0;
2422 fail2:
2423         list_del(&act->list);
2424         efx_mae_free_action_set(efx, act->fw_id);
2425 fail1:
2426         kfree(act);
2427         return rc;
2428 }
2429
2430 static int efx_tc_configure_fallback_acts_pf(struct efx_nic *efx)
2431 {
2432         struct efx_tc_action_set_list *acts = &efx->tc->facts.pf;
2433         u32 eg_port;
2434
2435         efx_mae_mport_uplink(efx, &eg_port);
2436         return efx_tc_configure_fallback_acts(efx, eg_port, acts);
2437 }
2438
2439 static int efx_tc_configure_fallback_acts_reps(struct efx_nic *efx)
2440 {
2441         struct efx_tc_action_set_list *acts = &efx->tc->facts.reps;
2442         u32 eg_port;
2443
2444         efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
2445         return efx_tc_configure_fallback_acts(efx, eg_port, acts);
2446 }
2447
2448 static void efx_tc_deconfigure_fallback_acts(struct efx_nic *efx,
2449                                              struct efx_tc_action_set_list *acts)
2450 {
2451         efx_tc_free_action_set_list(efx, acts, true);
2452 }
2453
2454 static int efx_tc_configure_rep_mport(struct efx_nic *efx)
2455 {
2456         u32 rep_mport_label;
2457         int rc;
2458
2459         rc = efx_mae_allocate_mport(efx, &efx->tc->reps_mport_id, &rep_mport_label);
2460         if (rc)
2461                 return rc;
2462         pci_dbg(efx->pci_dev, "created rep mport 0x%08x (0x%04x)\n",
2463                 efx->tc->reps_mport_id, rep_mport_label);
2464         /* Use mport *selector* as vport ID */
2465         efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
2466                             &efx->tc->reps_mport_vport_id);
2467         return 0;
2468 }
2469
2470 static void efx_tc_deconfigure_rep_mport(struct efx_nic *efx)
2471 {
2472         efx_mae_free_mport(efx, efx->tc->reps_mport_id);
2473         efx->tc->reps_mport_id = MAE_MPORT_SELECTOR_NULL;
2474 }
2475
2476 int efx_tc_insert_rep_filters(struct efx_nic *efx)
2477 {
2478         struct efx_filter_spec promisc, allmulti;
2479         int rc;
2480
2481         if (efx->type->is_vf)
2482                 return 0;
2483         if (!efx->tc)
2484                 return 0;
2485         efx_filter_init_rx(&promisc, EFX_FILTER_PRI_REQUIRED, 0, 0);
2486         efx_filter_set_uc_def(&promisc);
2487         efx_filter_set_vport_id(&promisc, efx->tc->reps_mport_vport_id);
2488         rc = efx_filter_insert_filter(efx, &promisc, false);
2489         if (rc < 0)
2490                 return rc;
2491         efx->tc->reps_filter_uc = rc;
2492         efx_filter_init_rx(&allmulti, EFX_FILTER_PRI_REQUIRED, 0, 0);
2493         efx_filter_set_mc_def(&allmulti);
2494         efx_filter_set_vport_id(&allmulti, efx->tc->reps_mport_vport_id);
2495         rc = efx_filter_insert_filter(efx, &allmulti, false);
2496         if (rc < 0)
2497                 return rc;
2498         efx->tc->reps_filter_mc = rc;
2499         return 0;
2500 }
2501
2502 void efx_tc_remove_rep_filters(struct efx_nic *efx)
2503 {
2504         if (efx->type->is_vf)
2505                 return;
2506         if (!efx->tc)
2507                 return;
2508         if (efx->tc->reps_filter_mc >= 0)
2509                 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_mc);
2510         efx->tc->reps_filter_mc = -1;
2511         if (efx->tc->reps_filter_uc >= 0)
2512                 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_uc);
2513         efx->tc->reps_filter_uc = -1;
2514 }
2515
2516 int efx_init_tc(struct efx_nic *efx)
2517 {
2518         int rc;
2519
2520         rc = efx_mae_get_caps(efx, efx->tc->caps);
2521         if (rc)
2522                 return rc;
2523         if (efx->tc->caps->match_field_count > MAE_NUM_FIELDS)
2524                 /* Firmware supports some match fields the driver doesn't know
2525                  * about.  Not fatal, unless any of those fields are required
2526                  * (MAE_FIELD_SUPPORTED_MATCH_ALWAYS) but if so we don't know.
2527                  */
2528                 netif_warn(efx, probe, efx->net_dev,
2529                            "FW reports additional match fields %u\n",
2530                            efx->tc->caps->match_field_count);
2531         if (efx->tc->caps->action_prios < EFX_TC_PRIO__NUM) {
2532                 netif_err(efx, probe, efx->net_dev,
2533                           "Too few action prios supported (have %u, need %u)\n",
2534                           efx->tc->caps->action_prios, EFX_TC_PRIO__NUM);
2535                 return -EIO;
2536         }
2537         rc = efx_tc_configure_default_rule_pf(efx);
2538         if (rc)
2539                 return rc;
2540         rc = efx_tc_configure_default_rule_wire(efx);
2541         if (rc)
2542                 return rc;
2543         rc = efx_tc_configure_rep_mport(efx);
2544         if (rc)
2545                 return rc;
2546         rc = efx_tc_configure_fallback_acts_pf(efx);
2547         if (rc)
2548                 return rc;
2549         rc = efx_tc_configure_fallback_acts_reps(efx);
2550         if (rc)
2551                 return rc;
2552         rc = efx_mae_get_tables(efx);
2553         if (rc)
2554                 return rc;
2555         rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx);
2556         if (rc)
2557                 goto out_free;
2558         efx->tc->up = true;
2559         return 0;
2560 out_free:
2561         efx_mae_free_tables(efx);
2562         return rc;
2563 }
2564
2565 void efx_fini_tc(struct efx_nic *efx)
2566 {
2567         /* We can get called even if efx_init_struct_tc() failed */
2568         if (!efx->tc)
2569                 return;
2570         if (efx->tc->up)
2571                 flow_indr_dev_unregister(efx_tc_indr_setup_cb, efx, efx_tc_block_unbind);
2572         efx_tc_deconfigure_rep_mport(efx);
2573         efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf);
2574         efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire);
2575         efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.pf);
2576         efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.reps);
2577         efx->tc->up = false;
2578         efx_mae_free_tables(efx);
2579 }
2580
2581 /* At teardown time, all TC filter rules (and thus all resources they created)
2582  * should already have been removed.  If we find any in our hashtables, make a
2583  * cursory attempt to clean up the software side.
2584  */
2585 static void efx_tc_encap_match_free(void *ptr, void *__unused)
2586 {
2587         struct efx_tc_encap_match *encap = ptr;
2588
2589         WARN_ON(refcount_read(&encap->ref));
2590         kfree(encap);
2591 }
2592
2593 static void efx_tc_recirc_free(void *ptr, void *arg)
2594 {
2595         struct efx_tc_recirc_id *rid = ptr;
2596         struct efx_nic *efx = arg;
2597
2598         WARN_ON(refcount_read(&rid->ref));
2599         ida_free(&efx->tc->recirc_ida, rid->fw_id);
2600         kfree(rid);
2601 }
2602
2603 static void efx_tc_lhs_free(void *ptr, void *arg)
2604 {
2605         struct efx_tc_lhs_rule *rule = ptr;
2606         struct efx_nic *efx = arg;
2607
2608         netif_err(efx, drv, efx->net_dev,
2609                   "tc lhs_rule %lx still present at teardown, removing\n",
2610                   rule->cookie);
2611
2612         if (rule->lhs_act.zone)
2613                 efx_tc_ct_unregister_zone(efx, rule->lhs_act.zone);
2614         if (rule->lhs_act.count)
2615                 efx_tc_flower_put_counter_index(efx, rule->lhs_act.count);
2616         efx_mae_remove_lhs_rule(efx, rule);
2617
2618         kfree(rule);
2619 }
2620
2621 static void efx_tc_mac_free(void *ptr, void *__unused)
2622 {
2623         struct efx_tc_mac_pedit_action *ped = ptr;
2624
2625         WARN_ON(refcount_read(&ped->ref));
2626         kfree(ped);
2627 }
2628
2629 static void efx_tc_flow_free(void *ptr, void *arg)
2630 {
2631         struct efx_tc_flow_rule *rule = ptr;
2632         struct efx_nic *efx = arg;
2633
2634         netif_err(efx, drv, efx->net_dev,
2635                   "tc rule %lx still present at teardown, removing\n",
2636                   rule->cookie);
2637
2638         /* Also releases entries in subsidiary tables */
2639         efx_tc_delete_rule(efx, rule);
2640
2641         kfree(rule);
2642 }
2643
2644 int efx_init_struct_tc(struct efx_nic *efx)
2645 {
2646         int rc;
2647
2648         if (efx->type->is_vf)
2649                 return 0;
2650
2651         efx->tc = kzalloc(sizeof(*efx->tc), GFP_KERNEL);
2652         if (!efx->tc)
2653                 return -ENOMEM;
2654         efx->tc->caps = kzalloc(sizeof(struct mae_caps), GFP_KERNEL);
2655         if (!efx->tc->caps) {
2656                 rc = -ENOMEM;
2657                 goto fail_alloc_caps;
2658         }
2659         INIT_LIST_HEAD(&efx->tc->block_list);
2660
2661         mutex_init(&efx->tc->mutex);
2662         init_waitqueue_head(&efx->tc->flush_wq);
2663         rc = efx_tc_init_encap_actions(efx);
2664         if (rc < 0)
2665                 goto fail_encap_actions;
2666         rc = efx_tc_init_counters(efx);
2667         if (rc < 0)
2668                 goto fail_counters;
2669         rc = rhashtable_init(&efx->tc->mac_ht, &efx_tc_mac_ht_params);
2670         if (rc < 0)
2671                 goto fail_mac_ht;
2672         rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params);
2673         if (rc < 0)
2674                 goto fail_encap_match_ht;
2675         rc = rhashtable_init(&efx->tc->match_action_ht, &efx_tc_match_action_ht_params);
2676         if (rc < 0)
2677                 goto fail_match_action_ht;
2678         rc = rhashtable_init(&efx->tc->lhs_rule_ht, &efx_tc_lhs_rule_ht_params);
2679         if (rc < 0)
2680                 goto fail_lhs_rule_ht;
2681         rc = efx_tc_init_conntrack(efx);
2682         if (rc < 0)
2683                 goto fail_conntrack;
2684         rc = rhashtable_init(&efx->tc->recirc_ht, &efx_tc_recirc_ht_params);
2685         if (rc < 0)
2686                 goto fail_recirc_ht;
2687         ida_init(&efx->tc->recirc_ida);
2688         efx->tc->reps_filter_uc = -1;
2689         efx->tc->reps_filter_mc = -1;
2690         INIT_LIST_HEAD(&efx->tc->dflt.pf.acts.list);
2691         efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2692         INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list);
2693         efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2694         INIT_LIST_HEAD(&efx->tc->facts.pf.list);
2695         efx->tc->facts.pf.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
2696         INIT_LIST_HEAD(&efx->tc->facts.reps.list);
2697         efx->tc->facts.reps.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
2698         efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type;
2699         return 0;
2700 fail_recirc_ht:
2701         efx_tc_destroy_conntrack(efx);
2702 fail_conntrack:
2703         rhashtable_destroy(&efx->tc->lhs_rule_ht);
2704 fail_lhs_rule_ht:
2705         rhashtable_destroy(&efx->tc->match_action_ht);
2706 fail_match_action_ht:
2707         rhashtable_destroy(&efx->tc->encap_match_ht);
2708 fail_encap_match_ht:
2709         rhashtable_destroy(&efx->tc->mac_ht);
2710 fail_mac_ht:
2711         efx_tc_destroy_counters(efx);
2712 fail_counters:
2713         efx_tc_destroy_encap_actions(efx);
2714 fail_encap_actions:
2715         mutex_destroy(&efx->tc->mutex);
2716         kfree(efx->tc->caps);
2717 fail_alloc_caps:
2718         kfree(efx->tc);
2719         efx->tc = NULL;
2720         return rc;
2721 }
2722
2723 void efx_fini_struct_tc(struct efx_nic *efx)
2724 {
2725         if (!efx->tc)
2726                 return;
2727
2728         mutex_lock(&efx->tc->mutex);
2729         EFX_WARN_ON_PARANOID(efx->tc->dflt.pf.fw_id !=
2730                              MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
2731         EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id !=
2732                              MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
2733         EFX_WARN_ON_PARANOID(efx->tc->facts.pf.fw_id !=
2734                              MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
2735         EFX_WARN_ON_PARANOID(efx->tc->facts.reps.fw_id !=
2736                              MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
2737         rhashtable_free_and_destroy(&efx->tc->lhs_rule_ht, efx_tc_lhs_free, efx);
2738         rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free,
2739                                     efx);
2740         rhashtable_free_and_destroy(&efx->tc->encap_match_ht,
2741                                     efx_tc_encap_match_free, NULL);
2742         efx_tc_fini_conntrack(efx);
2743         rhashtable_free_and_destroy(&efx->tc->recirc_ht, efx_tc_recirc_free, efx);
2744         WARN_ON(!ida_is_empty(&efx->tc->recirc_ida));
2745         ida_destroy(&efx->tc->recirc_ida);
2746         rhashtable_free_and_destroy(&efx->tc->mac_ht, efx_tc_mac_free, NULL);
2747         efx_tc_fini_counters(efx);
2748         efx_tc_fini_encap_actions(efx);
2749         mutex_unlock(&efx->tc->mutex);
2750         mutex_destroy(&efx->tc->mutex);
2751         kfree(efx->tc->caps);
2752         kfree(efx->tc);
2753         efx->tc = NULL;
2754 }