drivers/net/ethernet/sfc/tc.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /****************************************************************************
   3  * Driver for Solarflare network controllers and boards
   4  * Copyright 2019 Solarflare Communications Inc.
   5  * Copyright 2020-2022 Xilinx Inc.
   6  *
   7  * This program is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 as published
   9  * by the Free Software Foundation, incorporated herein by reference.
  10  */
  11
  12 #include <net/pkt_cls.h>
  13 #include <net/vxlan.h>
  14 #include <net/geneve.h>
  15 #include <net/tc_act/tc_ct.h>
  16 #include "tc.h"
  17 #include "tc_bindings.h"
  18 #include "tc_encap_actions.h"
  19 #include "tc_conntrack.h"
  20 #include "mae.h"
  21 #include "ef100_rep.h"
  22 #include "efx.h"
  23
  24 enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
  25 {
  26         if (netif_is_vxlan(net_dev))
  27                 return EFX_ENCAP_TYPE_VXLAN;
  28         if (netif_is_geneve(net_dev))
  29                 return EFX_ENCAP_TYPE_GENEVE;
  30
  31         return EFX_ENCAP_TYPE_NONE;
  32 }
  33
  34 #define EFX_TC_HDR_TYPE_TTL_MASK ((u32)0xff)
  35 /* Hoplimit is stored in the most significant byte in the pedit ipv6 header action */
  36 #define EFX_TC_HDR_TYPE_HLIMIT_MASK ~((u32)0xff000000)
  37 #define EFX_EFV_PF      NULL
  38 /* Look up the representor information (efv) for a device.
  39  * May return NULL for the PF (us), or an error pointer for a device that
  40  * isn't supported as a TC offload endpoint
  41  */
  42 struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
  43                                          struct net_device *dev)
  44 {
  45         struct efx_rep *efv;
  46
  47         if (!dev)
  48                 return ERR_PTR(-EOPNOTSUPP);
  49         /* Is it us (the PF)? */
  50         if (dev == efx->net_dev)
  51                 return EFX_EFV_PF;
  52         /* Is it an efx vfrep at all? */
  53         if (dev->netdev_ops != &efx_ef100_rep_netdev_ops)
  54                 return ERR_PTR(-EOPNOTSUPP);
  55         /* Is it ours?  We don't support TC rules that include another
  56          * EF100's netdevices (not even on another port of the same NIC).
  57          */
  58         efv = netdev_priv(dev);
  59         if (efv->parent != efx)
  60                 return ERR_PTR(-EOPNOTSUPP);
  61         return efv;
  62 }
  63
  64 /* Convert a driver-internal vport ID into an internal device (PF or VF) */
  65 static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv)
  66 {
  67         u32 mport;
  68
  69         if (IS_ERR(efv))
  70                 return PTR_ERR(efv);
  71         if (!efv) /* device is PF (us) */
  72                 efx_mae_mport_uplink(efx, &mport);
  73         else /* device is repr */
  74                 efx_mae_mport_mport(efx, efv->mport, &mport);
  75         return mport;
  76 }
  77
  78 /* Convert a driver-internal vport ID into an external device (wire or VF) */
  79 s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
  80 {
  81         u32 mport;
  82
  83         if (IS_ERR(efv))
  84                 return PTR_ERR(efv);
  85         if (!efv) /* device is PF (us) */
  86                 efx_mae_mport_wire(efx, &mport);
  87         else /* device is repr */
  88                 efx_mae_mport_mport(efx, efv->mport, &mport);
  89         return mport;
  90 }
  91
  92 static const struct rhashtable_params efx_tc_mac_ht_params = {
  93         .key_len        = offsetofend(struct efx_tc_mac_pedit_action, h_addr),
  94         .key_offset     = 0,
  95         .head_offset    = offsetof(struct efx_tc_mac_pedit_action, linkage),
  96 };
  97
  98 static const struct rhashtable_params efx_tc_encap_match_ht_params = {
  99         .key_len        = offsetof(struct efx_tc_encap_match, linkage),
 100         .key_offset     = 0,
 101         .head_offset    = offsetof(struct efx_tc_encap_match, linkage),
 102 };
 103
 104 static const struct rhashtable_params efx_tc_match_action_ht_params = {
 105         .key_len        = sizeof(unsigned long),
 106         .key_offset     = offsetof(struct efx_tc_flow_rule, cookie),
 107         .head_offset    = offsetof(struct efx_tc_flow_rule, linkage),
 108 };
 109
 110 static const struct rhashtable_params efx_tc_lhs_rule_ht_params = {
 111         .key_len        = sizeof(unsigned long),
 112         .key_offset     = offsetof(struct efx_tc_lhs_rule, cookie),
 113         .head_offset    = offsetof(struct efx_tc_lhs_rule, linkage),
 114 };
 115
 116 static const struct rhashtable_params efx_tc_recirc_ht_params = {
 117         .key_len        = offsetof(struct efx_tc_recirc_id, linkage),
 118         .key_offset     = 0,
 119         .head_offset    = offsetof(struct efx_tc_recirc_id, linkage),
 120 };
 121
 122 static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx,
 123                                                              unsigned char h_addr[ETH_ALEN],
 124                                                              struct netlink_ext_ack *extack)
 125 {
 126         struct efx_tc_mac_pedit_action *ped, *old;
 127         int rc;
 128
 129         ped = kzalloc(sizeof(*ped), GFP_USER);
 130         if (!ped)
 131                 return ERR_PTR(-ENOMEM);
 132         memcpy(ped->h_addr, h_addr, ETH_ALEN);
 133         old = rhashtable_lookup_get_insert_fast(&efx->tc->mac_ht,
 134                                                 &ped->linkage,
 135                                                 efx_tc_mac_ht_params);
 136         if (old) {
 137                 /* don't need our new entry */
 138                 kfree(ped);
 139                 if (IS_ERR(old)) /* oh dear, it's actually an error */
 140                         return ERR_CAST(old);
 141                 if (!refcount_inc_not_zero(&old->ref))
 142                         return ERR_PTR(-EAGAIN);
 143                 /* existing entry found, ref taken */
 144                 return old;
 145         }
 146
 147         rc = efx_mae_allocate_pedit_mac(efx, ped);
 148         if (rc < 0) {
 149                 NL_SET_ERR_MSG_MOD(extack, "Failed to store pedit MAC address in hw");
 150                 goto out_remove;
 151         }
 152
 153         /* ref and return */
 154         refcount_set(&ped->ref, 1);
 155         return ped;
 156 out_remove:
 157         rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
 158                                efx_tc_mac_ht_params);
 159         kfree(ped);
 160         return ERR_PTR(rc);
 161 }
 162
 163 static void efx_tc_flower_put_mac(struct efx_nic *efx,
 164                                   struct efx_tc_mac_pedit_action *ped)
 165 {
 166         if (!refcount_dec_and_test(&ped->ref))
 167                 return; /* still in use */
 168         rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
 169                                efx_tc_mac_ht_params);
 170         efx_mae_free_pedit_mac(efx, ped);
 171         kfree(ped);
 172 }
 173
 174 static void efx_tc_free_action_set(struct efx_nic *efx,
 175                                    struct efx_tc_action_set *act, bool in_hw)
 176 {
 177         /* Failure paths calling this on the 'cursor' action set in_hw=false,
 178          * because if the alloc had succeeded we'd've put it in acts.list and
 179          * not still have it in act.
 180          */
 181         if (in_hw) {
 182                 efx_mae_free_action_set(efx, act->fw_id);
 183                 /* in_hw is true iff we are on an acts.list; make sure to
 184                  * remove ourselves from that list before we are freed.
 185                  */
 186                 list_del(&act->list);
 187         }
 188         if (act->count) {
 189                 spin_lock_bh(&act->count->cnt->lock);
 190                 if (!list_empty(&act->count_user))
 191                         list_del(&act->count_user);
 192                 spin_unlock_bh(&act->count->cnt->lock);
 193                 efx_tc_flower_put_counter_index(efx, act->count);
 194         }
 195         if (act->encap_md) {
 196                 list_del(&act->encap_user);
 197                 efx_tc_flower_release_encap_md(efx, act->encap_md);
 198         }
 199         if (act->src_mac)
 200                 efx_tc_flower_put_mac(efx, act->src_mac);
 201         if (act->dst_mac)
 202                 efx_tc_flower_put_mac(efx, act->dst_mac);
 203         kfree(act);
 204 }
 205
 206 static void efx_tc_free_action_set_list(struct efx_nic *efx,
 207                                         struct efx_tc_action_set_list *acts,
 208                                         bool in_hw)
 209 {
 210         struct efx_tc_action_set *act, *next;
 211
 212         /* Failure paths set in_hw=false, because usually the acts didn't get
 213          * to efx_mae_alloc_action_set_list(); if they did, the failure tree
 214          * has a separate efx_mae_free_action_set_list() before calling us.
 215          */
 216         if (in_hw)
 217                 efx_mae_free_action_set_list(efx, acts);
 218         /* Any act that's on the list will be in_hw even if the list isn't */
 219         list_for_each_entry_safe(act, next, &acts->list, list)
 220                 efx_tc_free_action_set(efx, act, true);
 221         /* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */
 222 }
 223
 224 /* Boilerplate for the simple 'copy a field' cases */
 225 #define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)       \
 226 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) {            \
 227         struct flow_match_##_type fm;                                   \
 228                                                                         \
 229         flow_rule_match_##_tcget(rule, &fm);                            \
 230         match->value._field = fm.key->_tcfield;                         \
 231         match->mask._field = fm.mask->_tcfield;                         \
 232 }
 233 #define MAP_KEY_AND_MASK(_name, _type, _tcfield, _field)        \
 234         _MAP_KEY_AND_MASK(_name, _type, _type, _tcfield, _field)
 235 #define MAP_ENC_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)    \
 236         _MAP_KEY_AND_MASK(ENC_##_name, _type, _tcget, _tcfield, _field)
 237
 238 static int efx_tc_flower_parse_match(struct efx_nic *efx,
 239                                      struct flow_rule *rule,
 240                                      struct efx_tc_match *match,
 241                                      struct netlink_ext_ack *extack)
 242 {
 243         struct flow_dissector *dissector = rule->match.dissector;
 244         unsigned char ipv = 0;
 245
 246         /* Owing to internal TC infelicities, the IPV6_ADDRS key might be set
 247          * even on IPv4 filters; so rather than relying on dissector->used_keys
 248          * we check the addr_type in the CONTROL key.  If we don't find it (or
 249          * it's masked, which should never happen), we treat both IPV4_ADDRS
 250          * and IPV6_ADDRS as absent.
 251          */
 252         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
 253                 struct flow_match_control fm;
 254
 255                 flow_rule_match_control(rule, &fm);
 256                 if (IS_ALL_ONES(fm.mask->addr_type))
 257                         switch (fm.key->addr_type) {
 258                         case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
 259                                 ipv = 4;
 260                                 break;
 261                         case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
 262                                 ipv = 6;
 263                                 break;
 264                         default:
 265                                 break;
 266                         }
 267
 268                 if (fm.mask->flags & FLOW_DIS_IS_FRAGMENT) {
 269                         match->value.ip_frag = fm.key->flags & FLOW_DIS_IS_FRAGMENT;
 270                         match->mask.ip_frag = true;
 271                 }
 272                 if (fm.mask->flags & FLOW_DIS_FIRST_FRAG) {
 273                         match->value.ip_firstfrag = fm.key->flags & FLOW_DIS_FIRST_FRAG;
 274                         match->mask.ip_firstfrag = true;
 275                 }
 276                 if (fm.mask->flags & ~(FLOW_DIS_IS_FRAGMENT | FLOW_DIS_FIRST_FRAG)) {
 277                         NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on control.flags %#x",
 278                                                fm.mask->flags);
 279                         return -EOPNOTSUPP;
 280                 }
 281         }
 282         if (dissector->used_keys &
 283             ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
 284               BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
 285               BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
 286               BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) |
 287               BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) |
 288               BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
 289               BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
 290               BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
 291               BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
 292               BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
 293               BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
 294               BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
 295               BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
 296               BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
 297               BIT_ULL(FLOW_DISSECTOR_KEY_CT) |
 298               BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
 299               BIT_ULL(FLOW_DISSECTOR_KEY_IP))) {
 300                 NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported flower keys %#llx",
 301                                        dissector->used_keys);
 302                 return -EOPNOTSUPP;
 303         }
 304
 305         MAP_KEY_AND_MASK(BASIC, basic, n_proto, eth_proto);
 306         /* Make sure we're IP if any L3/L4 keys used. */
 307         if (!IS_ALL_ONES(match->mask.eth_proto) ||
 308             !(match->value.eth_proto == htons(ETH_P_IP) ||
 309               match->value.eth_proto == htons(ETH_P_IPV6)))
 310                 if (dissector->used_keys &
 311                     (BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
 312                      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
 313                      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
 314                      BIT_ULL(FLOW_DISSECTOR_KEY_IP) |
 315                      BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
 316                         NL_SET_ERR_MSG_FMT_MOD(extack,
 317                                                "L3/L4 flower keys %#llx require protocol ipv[46]",
 318                                                dissector->used_keys);
 319                         return -EINVAL;
 320                 }
 321
 322         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
 323                 struct flow_match_vlan fm;
 324
 325                 flow_rule_match_vlan(rule, &fm);
 326                 if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
 327                         match->value.vlan_proto[0] = fm.key->vlan_tpid;
 328                         match->mask.vlan_proto[0] = fm.mask->vlan_tpid;
 329                         match->value.vlan_tci[0] = cpu_to_be16(fm.key->vlan_priority << 13 |
 330                                                                fm.key->vlan_id);
 331                         match->mask.vlan_tci[0] = cpu_to_be16(fm.mask->vlan_priority << 13 |
 332                                                               fm.mask->vlan_id);
 333                 }
 334         }
 335
 336         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
 337                 struct flow_match_vlan fm;
 338
 339                 flow_rule_match_cvlan(rule, &fm);
 340                 if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
 341                         match->value.vlan_proto[1] = fm.key->vlan_tpid;
 342                         match->mask.vlan_proto[1] = fm.mask->vlan_tpid;
 343                         match->value.vlan_tci[1] = cpu_to_be16(fm.key->vlan_priority << 13 |
 344                                                                fm.key->vlan_id);
 345                         match->mask.vlan_tci[1] = cpu_to_be16(fm.mask->vlan_priority << 13 |
 346                                                               fm.mask->vlan_id);
 347                 }
 348         }
 349
 350         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
 351                 struct flow_match_eth_addrs fm;
 352
 353                 flow_rule_match_eth_addrs(rule, &fm);
 354                 ether_addr_copy(match->value.eth_saddr, fm.key->src);
 355                 ether_addr_copy(match->value.eth_daddr, fm.key->dst);
 356                 ether_addr_copy(match->mask.eth_saddr, fm.mask->src);
 357                 ether_addr_copy(match->mask.eth_daddr, fm.mask->dst);
 358         }
 359
 360         MAP_KEY_AND_MASK(BASIC, basic, ip_proto, ip_proto);
 361         /* Make sure we're TCP/UDP if any L4 keys used. */
 362         if ((match->value.ip_proto != IPPROTO_UDP &&
 363              match->value.ip_proto != IPPROTO_TCP) || !IS_ALL_ONES(match->mask.ip_proto))
 364                 if (dissector->used_keys &
 365                     (BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
 366                      BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
 367                         NL_SET_ERR_MSG_FMT_MOD(extack,
 368                                                "L4 flower keys %#llx require ipproto udp or tcp",
 369                                                dissector->used_keys);
 370                         return -EINVAL;
 371                 }
 372         MAP_KEY_AND_MASK(IP, ip, tos, ip_tos);
 373         MAP_KEY_AND_MASK(IP, ip, ttl, ip_ttl);
 374         if (ipv == 4) {
 375                 MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, src, src_ip);
 376                 MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, dst, dst_ip);
 377         }
 378 #ifdef CONFIG_IPV6
 379         else if (ipv == 6) {
 380                 MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, src, src_ip6);
 381                 MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, dst, dst_ip6);
 382         }
 383 #endif
 384         MAP_KEY_AND_MASK(PORTS, ports, src, l4_sport);
 385         MAP_KEY_AND_MASK(PORTS, ports, dst, l4_dport);
 386         MAP_KEY_AND_MASK(TCP, tcp, flags, tcp_flags);
 387         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
 388                 struct flow_match_control fm;
 389
 390                 flow_rule_match_enc_control(rule, &fm);
 391                 if (fm.mask->flags) {
 392                         NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on enc_control.flags %#x",
 393                                                fm.mask->flags);
 394                         return -EOPNOTSUPP;
 395                 }
 396                 if (!IS_ALL_ONES(fm.mask->addr_type)) {
 397                         NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported enc addr_type mask %u (key %u)",
 398                                                fm.mask->addr_type,
 399                                                fm.key->addr_type);
 400                         return -EOPNOTSUPP;
 401                 }
 402                 switch (fm.key->addr_type) {
 403                 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
 404                         MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
 405                                              src, enc_src_ip);
 406                         MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
 407                                              dst, enc_dst_ip);
 408                         break;
 409 #ifdef CONFIG_IPV6
 410                 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
 411                         MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
 412                                              src, enc_src_ip6);
 413                         MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
 414                                              dst, enc_dst_ip6);
 415                         break;
 416 #endif
 417                 default:
 418                         NL_SET_ERR_MSG_FMT_MOD(extack,
 419                                                "Unsupported enc addr_type %u (supported are IPv4, IPv6)",
 420                                                fm.key->addr_type);
 421                         return -EOPNOTSUPP;
 422                 }
 423                 MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, tos, enc_ip_tos);
 424                 MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, ttl, enc_ip_ttl);
 425                 MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, src, enc_sport);
 426                 MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, dst, enc_dport);
 427                 MAP_ENC_KEY_AND_MASK(KEYID, enc_keyid, enc_keyid, keyid, enc_keyid);
 428         } else if (dissector->used_keys &
 429                    (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
 430                     BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
 431                     BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
 432                     BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
 433                     BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
 434                 NL_SET_ERR_MSG_FMT_MOD(extack,
 435                                        "Flower enc keys require enc_control (keys: %#llx)",
 436                                        dissector->used_keys);
 437                 return -EOPNOTSUPP;
 438         }
 439         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) {
 440                 struct flow_match_ct fm;
 441
 442                 flow_rule_match_ct(rule, &fm);
 443                 match->value.ct_state_trk = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
 444                 match->mask.ct_state_trk = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
 445                 match->value.ct_state_est = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
 446                 match->mask.ct_state_est = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
 447                 if (fm.mask->ct_state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
 448                                           TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)) {
 449                         NL_SET_ERR_MSG_FMT_MOD(extack,
 450                                                "Unsupported ct_state match %#x",
 451                                                fm.mask->ct_state);
 452                         return -EOPNOTSUPP;
 453                 }
 454                 match->value.ct_mark = fm.key->ct_mark;
 455                 match->mask.ct_mark = fm.mask->ct_mark;
 456                 match->value.ct_zone = fm.key->ct_zone;
 457                 match->mask.ct_zone = fm.mask->ct_zone;
 458
 459                 if (memchr_inv(fm.mask->ct_labels, 0, sizeof(fm.mask->ct_labels))) {
 460                         NL_SET_ERR_MSG_MOD(extack, "Matching on ct_label not supported");
 461                         return -EOPNOTSUPP;
 462                 }
 463         }
 464
 465         return 0;
 466 }
 467
 468 static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
 469                                               struct efx_tc_encap_match *encap)
 470 {
 471         int rc;
 472
 473         if (!refcount_dec_and_test(&encap->ref))
 474                 return; /* still in use */
 475
 476         if (encap->type == EFX_TC_EM_DIRECT) {
 477                 rc = efx_mae_unregister_encap_match(efx, encap);
 478                 if (rc)
 479                         /* Display message but carry on and remove entry from our
 480                          * SW tables, because there's not much we can do about it.
 481                          */
 482                         netif_err(efx, drv, efx->net_dev,
 483                                   "Failed to release encap match %#x, rc %d\n",
 484                                   encap->fw_id, rc);
 485         }
 486         rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
 487                                efx_tc_encap_match_ht_params);
 488         if (encap->pseudo)
 489                 efx_tc_flower_release_encap_match(efx, encap->pseudo);
 490         kfree(encap);
 491 }
 492
 493 static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
 494                                             struct efx_tc_match *match,
 495                                             enum efx_encap_type type,
 496                                             enum efx_tc_em_pseudo_type em_type,
 497                                             u8 child_ip_tos_mask,
 498                                             __be16 child_udp_sport_mask,
 499                                             struct netlink_ext_ack *extack)
 500 {
 501         struct efx_tc_encap_match *encap, *old, *pseudo = NULL;
 502         bool ipv6 = false;
 503         int rc;
 504
 505         /* We require that the socket-defining fields (IP addrs and UDP dest
 506          * port) are present and exact-match.  Other fields may only be used
 507          * if the field-set (and any masks) are the same for all encap
 508          * matches on the same <sip,dip,dport> tuple; this is enforced by
 509          * pseudo encap matches.
 510          */
 511         if (match->mask.enc_dst_ip | match->mask.enc_src_ip) {
 512                 if (!IS_ALL_ONES(match->mask.enc_dst_ip)) {
 513                         NL_SET_ERR_MSG_MOD(extack,
 514                                            "Egress encap match is not exact on dst IP address");
 515                         return -EOPNOTSUPP;
 516                 }
 517                 if (!IS_ALL_ONES(match->mask.enc_src_ip)) {
 518                         NL_SET_ERR_MSG_MOD(extack,
 519                                            "Egress encap match is not exact on src IP address");
 520                         return -EOPNOTSUPP;
 521                 }
 522 #ifdef CONFIG_IPV6
 523                 if (!ipv6_addr_any(&match->mask.enc_dst_ip6) ||
 524                     !ipv6_addr_any(&match->mask.enc_src_ip6)) {
 525                         NL_SET_ERR_MSG_MOD(extack,
 526                                            "Egress encap match on both IPv4 and IPv6, don't understand");
 527                         return -EOPNOTSUPP;
 528                 }
 529         } else {
 530                 ipv6 = true;
 531                 if (!efx_ipv6_addr_all_ones(&match->mask.enc_dst_ip6)) {
 532                         NL_SET_ERR_MSG_MOD(extack,
 533                                            "Egress encap match is not exact on dst IP address");
 534                         return -EOPNOTSUPP;
 535                 }
 536                 if (!efx_ipv6_addr_all_ones(&match->mask.enc_src_ip6)) {
 537                         NL_SET_ERR_MSG_MOD(extack,
 538                                            "Egress encap match is not exact on src IP address");
 539                         return -EOPNOTSUPP;
 540                 }
 541 #endif
 542         }
 543         if (!IS_ALL_ONES(match->mask.enc_dport)) {
 544                 NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port");
 545                 return -EOPNOTSUPP;
 546         }
 547         if (match->mask.enc_sport || match->mask.enc_ip_tos) {
 548                 struct efx_tc_match pmatch = *match;
 549
 550                 if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */
 551                         NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler");
 552                         return -EOPNOTSUPP;
 553                 }
 554                 pmatch.value.enc_ip_tos = 0;
 555                 pmatch.mask.enc_ip_tos = 0;
 556                 pmatch.value.enc_sport = 0;
 557                 pmatch.mask.enc_sport = 0;
 558                 rc = efx_tc_flower_record_encap_match(efx, &pmatch, type,
 559                                                       EFX_TC_EM_PSEUDO_MASK,
 560                                                       match->mask.enc_ip_tos,
 561                                                       match->mask.enc_sport,
 562                                                       extack);
 563                 if (rc)
 564                         return rc;
 565                 pseudo = pmatch.encap;
 566         }
 567         if (match->mask.enc_ip_ttl) {
 568                 NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported");
 569                 rc = -EOPNOTSUPP;
 570                 goto fail_pseudo;
 571         }
 572
 573         rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos,
 574                                             match->mask.enc_sport, extack);
 575         if (rc)
 576                 goto fail_pseudo;
 577
 578         encap = kzalloc(sizeof(*encap), GFP_USER);
 579         if (!encap) {
 580                 rc = -ENOMEM;
 581                 goto fail_pseudo;
 582         }
 583         encap->src_ip = match->value.enc_src_ip;
 584         encap->dst_ip = match->value.enc_dst_ip;
 585 #ifdef CONFIG_IPV6
 586         encap->src_ip6 = match->value.enc_src_ip6;
 587         encap->dst_ip6 = match->value.enc_dst_ip6;
 588 #endif
 589         encap->udp_dport = match->value.enc_dport;
 590         encap->tun_type = type;
 591         encap->ip_tos = match->value.enc_ip_tos;
 592         encap->ip_tos_mask = match->mask.enc_ip_tos;
 593         encap->child_ip_tos_mask = child_ip_tos_mask;
 594         encap->udp_sport = match->value.enc_sport;
 595         encap->udp_sport_mask = match->mask.enc_sport;
 596         encap->child_udp_sport_mask = child_udp_sport_mask;
 597         encap->type = em_type;
 598         encap->pseudo = pseudo;
 599         old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht,
 600                                                 &encap->linkage,
 601                                                 efx_tc_encap_match_ht_params);
 602         if (old) {
 603                 /* don't need our new entry */
 604                 kfree(encap);
 605                 if (pseudo) /* don't need our new pseudo either */
 606                         efx_tc_flower_release_encap_match(efx, pseudo);
 607                 if (IS_ERR(old)) /* oh dear, it's actually an error */
 608                         return PTR_ERR(old);
 609                 /* check old and new em_types are compatible */
 610                 switch (old->type) {
 611                 case EFX_TC_EM_DIRECT:
 612                         /* old EM is in hardware, so mustn't overlap with a
 613                          * pseudo, but may be shared with another direct EM
 614                          */
 615                         if (em_type == EFX_TC_EM_DIRECT)
 616                                 break;
 617                         NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry");
 618                         return -EEXIST;
 619                 case EFX_TC_EM_PSEUDO_MASK:
 620                         /* old EM is protecting a ToS- or src port-qualified
 621                          * filter, so may only be shared with another pseudo
 622                          * for the same ToS and src port masks.
 623                          */
 624                         if (em_type != EFX_TC_EM_PSEUDO_MASK) {
 625                                 NL_SET_ERR_MSG_FMT_MOD(extack,
 626                                                        "%s encap match conflicts with existing pseudo(MASK) entry",
 627                                                        em_type ? "Pseudo" : "Direct");
 628                                 return -EEXIST;
 629                         }
 630                         if (child_ip_tos_mask != old->child_ip_tos_mask) {
 631                                 NL_SET_ERR_MSG_FMT_MOD(extack,
 632                                                        "Pseudo encap match for TOS mask %#04x conflicts with existing mask %#04x",
 633                                                        child_ip_tos_mask,
 634                                                        old->child_ip_tos_mask);
 635                                 return -EEXIST;
 636                         }
 637                         if (child_udp_sport_mask != old->child_udp_sport_mask) {
 638                                 NL_SET_ERR_MSG_FMT_MOD(extack,
 639                                                        "Pseudo encap match for UDP src port mask %#x conflicts with existing mask %#x",
 640                                                        child_udp_sport_mask,
 641                                                        old->child_udp_sport_mask);
 642                                 return -EEXIST;
 643                         }
 644                         break;
 645                 default: /* Unrecognised pseudo-type.  Just say no */
 646                         NL_SET_ERR_MSG_FMT_MOD(extack,
 647                                                "%s encap match conflicts with existing pseudo(%d) entry",
 648                                                em_type ? "Pseudo" : "Direct",
 649                                                old->type);
 650                         return -EEXIST;
 651                 }
 652                 /* check old and new tun_types are compatible */
 653                 if (old->tun_type != type) {
 654                         NL_SET_ERR_MSG_FMT_MOD(extack,
 655                                                "Egress encap match with conflicting tun_type %u != %u",
 656                                                old->tun_type, type);
 657                         return -EEXIST;
 658                 }
 659                 if (!refcount_inc_not_zero(&old->ref))
 660                         return -EAGAIN;
 661                 /* existing entry found */
 662                 encap = old;
 663         } else {
 664                 if (em_type == EFX_TC_EM_DIRECT) {
 665                         rc = efx_mae_register_encap_match(efx, encap);
 666                         if (rc) {
 667                                 NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW");
 668                                 goto fail;
 669                         }
 670                 }
 671                 refcount_set(&encap->ref, 1);
 672         }
 673         match->encap = encap;
 674         return 0;
 675 fail:
 676         rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
 677                                efx_tc_encap_match_ht_params);
 678         kfree(encap);
 679 fail_pseudo:
 680         if (pseudo)
 681                 efx_tc_flower_release_encap_match(efx, pseudo);
 682         return rc;
 683 }
 684
 685 static struct efx_tc_recirc_id *efx_tc_get_recirc_id(struct efx_nic *efx,
 686                                                      u32 chain_index,
 687                                                      struct net_device *net_dev)
 688 {
 689         struct efx_tc_recirc_id *rid, *old;
 690         int rc;
 691
 692         rid = kzalloc(sizeof(*rid), GFP_USER);
 693         if (!rid)
 694                 return ERR_PTR(-ENOMEM);
 695         rid->chain_index = chain_index;
 696         /* We don't take a reference here, because it's implied - if there's
 697          * a rule on the net_dev that's been offloaded to us, then the net_dev
 698          * can't go away until the rule has been deoffloaded.
 699          */
 700         rid->net_dev = net_dev;
 701         old = rhashtable_lookup_get_insert_fast(&efx->tc->recirc_ht,
 702                                                 &rid->linkage,
 703                                                 efx_tc_recirc_ht_params);
 704         if (old) {
 705                 /* don't need our new entry */
 706                 kfree(rid);
 707                 if (IS_ERR(old)) /* oh dear, it's actually an error */
 708                         return ERR_CAST(old);
 709                 if (!refcount_inc_not_zero(&old->ref))
 710                         return ERR_PTR(-EAGAIN);
 711                 /* existing entry found */
 712                 rid = old;
 713         } else {
 714                 rc = ida_alloc_range(&efx->tc->recirc_ida, 1, U8_MAX, GFP_USER);
 715                 if (rc < 0) {
 716                         rhashtable_remove_fast(&efx->tc->recirc_ht,
 717                                                &rid->linkage,
 718                                                efx_tc_recirc_ht_params);
 719                         kfree(rid);
 720                         return ERR_PTR(rc);
 721                 }
 722                 rid->fw_id = rc;
 723                 refcount_set(&rid->ref, 1);
 724         }
 725         return rid;
 726 }
 727
 728 static void efx_tc_put_recirc_id(struct efx_nic *efx, struct efx_tc_recirc_id *rid)
 729 {
 730         if (!refcount_dec_and_test(&rid->ref))
 731                 return; /* still in use */
 732         rhashtable_remove_fast(&efx->tc->recirc_ht, &rid->linkage,
 733                                efx_tc_recirc_ht_params);
 734         ida_free(&efx->tc->recirc_ida, rid->fw_id);
 735         kfree(rid);
 736 }
 737
 738 static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
 739 {
 740         efx_mae_delete_rule(efx, rule->fw_id);
 741
 742         /* Release entries in subsidiary tables */
 743         efx_tc_free_action_set_list(efx, &rule->acts, true);
 744         if (rule->match.rid)
 745                 efx_tc_put_recirc_id(efx, rule->match.rid);
 746         if (rule->match.encap)
 747                 efx_tc_flower_release_encap_match(efx, rule->match.encap);
 748         rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
 749 }
 750
 751 static const char *efx_tc_encap_type_name(enum efx_encap_type typ)
 752 {
 753         switch (typ) {
 754         case EFX_ENCAP_TYPE_NONE:
 755                 return "none";
 756         case EFX_ENCAP_TYPE_VXLAN:
 757                 return "vxlan";
 758         case EFX_ENCAP_TYPE_GENEVE:
 759                 return "geneve";
 760         default:
 761                 pr_warn_once("Unknown efx_encap_type %d encountered\n", typ);
 762                 return "unknown";
 763         }
 764 }
 765
 766 /* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */
 767 enum efx_tc_action_order {
 768         EFX_TC_AO_DECAP,
 769         EFX_TC_AO_DEC_TTL,
 770         EFX_TC_AO_PEDIT_MAC_ADDRS,
 771         EFX_TC_AO_VLAN_POP,
 772         EFX_TC_AO_VLAN_PUSH,
 773         EFX_TC_AO_COUNT,
 774         EFX_TC_AO_ENCAP,
 775         EFX_TC_AO_DELIVER
 776 };
 777 /* Determine whether we can add @new action without violating order */
 778 static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
 779                                           enum efx_tc_action_order new)
 780 {
 781         switch (new) {
 782         case EFX_TC_AO_DECAP:
 783                 if (act->decap)
 784                         return false;
 785                 /* PEDIT_MAC_ADDRS must not happen before DECAP, though it
 786                  * can wait until much later
 787                  */
 788                 if (act->dst_mac || act->src_mac)
 789                         return false;
 790
 791                 /* Decrementing ttl must not happen before DECAP */
 792                 if (act->do_ttl_dec)
 793                         return false;
 794                 fallthrough;
 795         case EFX_TC_AO_VLAN_POP:
 796                 if (act->vlan_pop >= 2)
 797                         return false;
 798                 /* If we've already pushed a VLAN, we can't then pop it;
 799                  * the hardware would instead try to pop an existing VLAN
 800                  * before pushing the new one.
 801                  */
 802                 if (act->vlan_push)
 803                         return false;
 804                 fallthrough;
 805         case EFX_TC_AO_VLAN_PUSH:
 806                 if (act->vlan_push >= 2)
 807                         return false;
 808                 fallthrough;
 809         case EFX_TC_AO_COUNT:
 810                 if (act->count)
 811                         return false;
 812                 fallthrough;
 813         case EFX_TC_AO_PEDIT_MAC_ADDRS:
 814         case EFX_TC_AO_ENCAP:
 815                 if (act->encap_md)
 816                         return false;
 817                 fallthrough;
 818         case EFX_TC_AO_DELIVER:
 819                 return !act->deliver;
 820         case EFX_TC_AO_DEC_TTL:
 821                 if (act->encap_md)
 822                         return false;
 823                 return !act->do_ttl_dec;
 824         default:
 825                 /* Bad caller.  Whatever they wanted to do, say they can't. */
 826                 WARN_ON_ONCE(1);
 827                 return false;
 828         }
 829 }
 830
 831 /**
 832  * DOC: TC conntrack sequences
 833  *
 834  * The MAE hardware can handle at most two rounds of action rule matching,
 835  * consequently we support conntrack through the notion of a "left-hand side
 836  * rule".  This is a rule which typically contains only the actions "ct" and
 837  * "goto chain N", and corresponds to one or more "right-hand side rules" in
 838  * chain N, which typically match on +trk+est, and may perform ct(nat) actions.
 839  * RHS rules go in the Action Rule table as normal but with a nonzero recirc_id
 840  * (the hardware equivalent of chain_index), while LHS rules may go in either
 841  * the Action Rule or the Outer Rule table, the latter being preferred for
 842  * performance reasons, and set both DO_CT and a recirc_id in their response.
 843  *
 844  * Besides the RHS rules, there are often also similar rules matching on
 845  * +trk+new which perform the ct(commit) action.  These are not offloaded.
 846  */
 847
 848 static bool efx_tc_rule_is_lhs_rule(struct flow_rule *fr,
 849                                     struct efx_tc_match *match)
 850 {
 851         const struct flow_action_entry *fa;
 852         int i;
 853
 854         flow_action_for_each(i, fa, &fr->action) {
 855                 switch (fa->id) {
 856                 case FLOW_ACTION_GOTO:
 857                         return true;
 858                 case FLOW_ACTION_CT:
 859                         /* If rule is -trk, or doesn't mention trk at all, then
 860                          * a CT action implies a conntrack lookup (hence it's an
 861                          * LHS rule).  If rule is +trk, then a CT action could
 862                          * just be ct(nat) or even ct(commit) (though the latter
 863                          * can't be offloaded).
 864                          */
 865                         if (!match->mask.ct_state_trk || !match->value.ct_state_trk)
 866                                 return true;
 867                         break;
 868                 default:
 869                         break;
 870                 }
 871         }
 872         return false;
 873 }
 874
 875 static int efx_tc_flower_handle_lhs_actions(struct efx_nic *efx,
 876                                             struct flow_cls_offload *tc,
 877                                             struct flow_rule *fr,
 878                                             struct net_device *net_dev,
 879                                             struct efx_tc_lhs_rule *rule)
 880
 881 {
 882         struct netlink_ext_ack *extack = tc->common.extack;
 883         struct efx_tc_lhs_action *act = &rule->lhs_act;
 884         const struct flow_action_entry *fa;
 885         bool pipe = true;
 886         int i;
 887
 888         flow_action_for_each(i, fa, &fr->action) {
 889                 struct efx_tc_ct_zone *ct_zone;
 890                 struct efx_tc_recirc_id *rid;
 891
 892                 if (!pipe) {
 893                         /* more actions after a non-pipe action */
 894                         NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
 895                         return -EINVAL;
 896                 }
 897                 switch (fa->id) {
 898                 case FLOW_ACTION_GOTO:
 899                         if (!fa->chain_index) {
 900                                 NL_SET_ERR_MSG_MOD(extack, "Can't goto chain 0, no looping in hw");
 901                                 return -EOPNOTSUPP;
 902                         }
 903                         rid = efx_tc_get_recirc_id(efx, fa->chain_index,
 904                                                    net_dev);
 905                         if (IS_ERR(rid)) {
 906                                 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate a hardware recirculation ID for this chain_index");
 907                                 return PTR_ERR(rid);
 908                         }
 909                         act->rid = rid;
 910                         if (fa->hw_stats) {
 911                                 struct efx_tc_counter_index *cnt;
 912
 913                                 if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
 914                                         NL_SET_ERR_MSG_FMT_MOD(extack,
 915                                                                "hw_stats_type %u not supported (only 'delayed')",
 916                                                                fa->hw_stats);
 917                                         return -EOPNOTSUPP;
 918                                 }
 919                                 cnt = efx_tc_flower_get_counter_index(efx, tc->cookie,
 920                                                                       EFX_TC_COUNTER_TYPE_OR);
 921                                 if (IS_ERR(cnt)) {
 922                                         NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
 923                                         return PTR_ERR(cnt);
 924                                 }
 925                                 WARN_ON(act->count); /* can't happen */
 926                                 act->count = cnt;
 927                         }
 928                         pipe = false;
 929                         break;
 930                 case FLOW_ACTION_CT:
 931                         if (act->zone) {
 932                                 NL_SET_ERR_MSG_MOD(extack, "Can't offload multiple ct actions");
 933                                 return -EOPNOTSUPP;
 934                         }
 935                         if (fa->ct.action & (TCA_CT_ACT_COMMIT |
 936                                              TCA_CT_ACT_FORCE)) {
 937                                 NL_SET_ERR_MSG_MOD(extack, "Can't offload ct commit/force");
 938                                 return -EOPNOTSUPP;
 939                         }
 940                         if (fa->ct.action & TCA_CT_ACT_CLEAR) {
 941                                 NL_SET_ERR_MSG_MOD(extack, "Can't clear ct in LHS rule");
 942                                 return -EOPNOTSUPP;
 943                         }
 944                         if (fa->ct.action & (TCA_CT_ACT_NAT |
 945                                              TCA_CT_ACT_NAT_SRC |
 946                                              TCA_CT_ACT_NAT_DST)) {
 947                                 NL_SET_ERR_MSG_MOD(extack, "Can't perform NAT in LHS rule - packet isn't conntracked yet");
 948                                 return -EOPNOTSUPP;
 949                         }
 950                         if (fa->ct.action) {
 951                                 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled ct.action %u for LHS rule\n",
 952                                                        fa->ct.action);
 953                                 return -EOPNOTSUPP;
 954                         }
 955                         ct_zone = efx_tc_ct_register_zone(efx, fa->ct.zone,
 956                                                           fa->ct.flow_table);
 957                         if (IS_ERR(ct_zone)) {
 958                                 NL_SET_ERR_MSG_MOD(extack, "Failed to register for CT updates");
 959                                 return PTR_ERR(ct_zone);
 960                         }
 961                         act->zone = ct_zone;
 962                         break;
 963                 default:
 964                         NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u for LHS rule\n",
 965                                                fa->id);
 966                         return -EOPNOTSUPP;
 967                 }
 968         }
 969
 970         if (pipe) {
 971                 NL_SET_ERR_MSG_MOD(extack, "Missing goto chain in LHS rule");
 972                 return -EOPNOTSUPP;
 973         }
 974         return 0;
 975 }
 976
 977 static void efx_tc_flower_release_lhs_actions(struct efx_nic *efx,
 978                                               struct efx_tc_lhs_action *act)
 979 {
 980         if (act->rid)
 981                 efx_tc_put_recirc_id(efx, act->rid);
 982         if (act->zone)
 983                 efx_tc_ct_unregister_zone(efx, act->zone);
 984         if (act->count)
 985                 efx_tc_flower_put_counter_index(efx, act->count);
 986 }
 987
 988 /**
 989  * struct efx_tc_mangler_state - accumulates 32-bit pedits into fields
 990  *
 991  * @dst_mac_32: dst_mac[0:3] has been populated
 992  * @dst_mac_16: dst_mac[4:5] has been populated
 993  * @src_mac_16: src_mac[0:1] has been populated
 994  * @src_mac_32: src_mac[2:5] has been populated
 995  * @dst_mac:    h_dest field of ethhdr
 996  * @src_mac:    h_source field of ethhdr
 997  *
 998  * Since FLOW_ACTION_MANGLE comes in 32-bit chunks that do not
 999  * necessarily equate to whole fields of the packet header, this
1000  * structure is used to hold the cumulative effect of the partial
1001  * field pedits that have been processed so far.
1002  */
1003 struct efx_tc_mangler_state {
1004         u8 dst_mac_32:1; /* eth->h_dest[0:3] */
1005         u8 dst_mac_16:1; /* eth->h_dest[4:5] */
1006         u8 src_mac_16:1; /* eth->h_source[0:1] */
1007         u8 src_mac_32:1; /* eth->h_source[2:5] */
1008         unsigned char dst_mac[ETH_ALEN];
1009         unsigned char src_mac[ETH_ALEN];
1010 };
1011
1012 /** efx_tc_complete_mac_mangle() - pull complete field pedits out of @mung
1013  * @efx:        NIC we're installing a flow rule on
1014  * @act:        action set (cursor) to update
1015  * @mung:       accumulated partial mangles
1016  * @extack:     netlink extended ack for reporting errors
1017  *
1018  * Check @mung to find any combinations of partial mangles that can be
1019  * combined into a complete packet field edit, add that edit to @act,
1020  * and consume the partial mangles from @mung.
1021  */
1022
1023 static int efx_tc_complete_mac_mangle(struct efx_nic *efx,
1024                                       struct efx_tc_action_set *act,
1025                                       struct efx_tc_mangler_state *mung,
1026                                       struct netlink_ext_ack *extack)
1027 {
1028         struct efx_tc_mac_pedit_action *ped;
1029
1030         if (mung->dst_mac_32 && mung->dst_mac_16) {
1031                 ped = efx_tc_flower_get_mac(efx, mung->dst_mac, extack);
1032                 if (IS_ERR(ped))
1033                         return PTR_ERR(ped);
1034
1035                 /* Check that we have not already populated dst_mac */
1036                 if (act->dst_mac)
1037                         efx_tc_flower_put_mac(efx, act->dst_mac);
1038
1039                 act->dst_mac = ped;
1040
1041                 /* consume the incomplete state */
1042                 mung->dst_mac_32 = 0;
1043                 mung->dst_mac_16 = 0;
1044         }
1045         if (mung->src_mac_16 && mung->src_mac_32) {
1046                 ped = efx_tc_flower_get_mac(efx, mung->src_mac, extack);
1047                 if (IS_ERR(ped))
1048                         return PTR_ERR(ped);
1049
1050                 /* Check that we have not already populated src_mac */
1051                 if (act->src_mac)
1052                         efx_tc_flower_put_mac(efx, act->src_mac);
1053
1054                 act->src_mac = ped;
1055
1056                 /* consume the incomplete state */
1057                 mung->src_mac_32 = 0;
1058                 mung->src_mac_16 = 0;
1059         }
1060         return 0;
1061 }
1062
1063 static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act,
1064                             const struct flow_action_entry *fa,
1065                             struct netlink_ext_ack *extack)
1066 {
1067         switch (fa->mangle.htype) {
1068         case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1069                 switch (fa->mangle.offset) {
1070                 case offsetof(struct iphdr, ttl):
1071                         /* check that pedit applies to ttl only */
1072                         if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK)
1073                                 break;
1074
1075                         /* Adding 0xff is equivalent to decrementing the ttl.
1076                          * Other added values are not supported.
1077                          */
1078                         if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) != U8_MAX)
1079                                 break;
1080
1081                         /* check that we do not decrement ttl twice */
1082                         if (!efx_tc_flower_action_order_ok(act,
1083                                                            EFX_TC_AO_DEC_TTL)) {
1084                                 NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported");
1085                                 return -EOPNOTSUPP;
1086                         }
1087                         act->do_ttl_dec = 1;
1088                         return 0;
1089                 default:
1090                         break;
1091                 }
1092                 break;
1093         case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
1094                 switch (fa->mangle.offset) {
1095                 case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
1096                         /* check that pedit applies to hoplimit only */
1097                         if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK)
1098                                 break;
1099
1100                         /* Adding 0xff is equivalent to decrementing the hoplimit.
1101                          * Other added values are not supported.
1102                          */
1103                         if ((fa->mangle.val >> 24) != U8_MAX)
1104                                 break;
1105
1106                         /* check that we do not decrement hoplimit twice */
1107                         if (!efx_tc_flower_action_order_ok(act,
1108                                                            EFX_TC_AO_DEC_TTL)) {
1109                                 NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported");
1110                                 return -EOPNOTSUPP;
1111                         }
1112                         act->do_ttl_dec = 1;
1113                         return 0;
1114                 default:
1115                         break;
1116                 }
1117                 break;
1118         default:
1119                 break;
1120         }
1121
1122         NL_SET_ERR_MSG_FMT_MOD(extack,
1123                                "ttl add action type %x %x %x/%x is not supported",
1124                                fa->mangle.htype, fa->mangle.offset,
1125                                fa->mangle.val, fa->mangle.mask);
1126         return -EOPNOTSUPP;
1127 }
1128
1129 /**
1130  * efx_tc_mangle() - handle a single 32-bit (or less) pedit
1131  * @efx:        NIC we're installing a flow rule on
1132  * @act:        action set (cursor) to update
1133  * @fa:         FLOW_ACTION_MANGLE action metadata
1134  * @mung:       accumulator for partial mangles
1135  * @extack:     netlink extended ack for reporting errors
1136  * @match:      original match used along with the mangle action
1137  *
1138  * Identify the fields written by a FLOW_ACTION_MANGLE, and record
1139  * the partial mangle state in @mung.  If this mangle completes an
1140  * earlier partial mangle, consume and apply to @act by calling
1141  * efx_tc_complete_mac_mangle().
1142  */
1143
1144 static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act,
1145                          const struct flow_action_entry *fa,
1146                          struct efx_tc_mangler_state *mung,
1147                          struct netlink_ext_ack *extack,
1148                          struct efx_tc_match *match)
1149 {
1150         __le32 mac32;
1151         __le16 mac16;
1152         u8 tr_ttl;
1153
1154         switch (fa->mangle.htype) {
1155         case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1156                 BUILD_BUG_ON(offsetof(struct ethhdr, h_dest) != 0);
1157                 BUILD_BUG_ON(offsetof(struct ethhdr, h_source) != 6);
1158                 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_PEDIT_MAC_ADDRS)) {
1159                         NL_SET_ERR_MSG_MOD(extack,
1160                                            "Pedit mangle mac action violates action order");
1161                         return -EOPNOTSUPP;
1162                 }
1163                 switch (fa->mangle.offset) {
1164                 case 0:
1165                         if (fa->mangle.mask) {
1166                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1167                                                        "mask (%#x) of eth.dst32 mangle is not supported",
1168                                                        fa->mangle.mask);
1169                                 return -EOPNOTSUPP;
1170                         }
1171                         /* Ethernet address is little-endian */
1172                         mac32 = cpu_to_le32(fa->mangle.val);
1173                         memcpy(mung->dst_mac, &mac32, sizeof(mac32));
1174                         mung->dst_mac_32 = 1;
1175                         return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1176                 case 4:
1177                         if (fa->mangle.mask == 0xffff) {
1178                                 mac16 = cpu_to_le16(fa->mangle.val >> 16);
1179                                 memcpy(mung->src_mac, &mac16, sizeof(mac16));
1180                                 mung->src_mac_16 = 1;
1181                         } else if (fa->mangle.mask == 0xffff0000) {
1182                                 mac16 = cpu_to_le16((u16)fa->mangle.val);
1183                                 memcpy(mung->dst_mac + 4, &mac16, sizeof(mac16));
1184                                 mung->dst_mac_16 = 1;
1185                         } else {
1186                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1187                                                        "mask (%#x) of eth+4 mangle is not high or low 16b",
1188                                                        fa->mangle.mask);
1189                                 return -EOPNOTSUPP;
1190                         }
1191                         return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1192                 case 8:
1193                         if (fa->mangle.mask) {
1194                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1195                                                        "mask (%#x) of eth.src32 mangle is not supported",
1196                                                        fa->mangle.mask);
1197                                 return -EOPNOTSUPP;
1198                         }
1199                         mac32 = cpu_to_le32(fa->mangle.val);
1200                         memcpy(mung->src_mac + 2, &mac32, sizeof(mac32));
1201                         mung->src_mac_32 = 1;
1202                         return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1203                 default:
1204                         NL_SET_ERR_MSG_FMT_MOD(extack, "mangle eth+%u %x/%x is not supported",
1205                                                fa->mangle.offset, fa->mangle.val, fa->mangle.mask);
1206                         return -EOPNOTSUPP;
1207                 }
1208                 break;
1209         case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1210                 switch (fa->mangle.offset) {
1211                 case offsetof(struct iphdr, ttl):
1212                         /* we currently only support pedit IP4 when it applies
1213                          * to TTL and then only when it can be achieved with a
1214                          * decrement ttl action
1215                          */
1216
1217                         /* check that pedit applies to ttl only */
1218                         if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) {
1219                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1220                                                        "mask (%#x) out of range, only support mangle action on ipv4.ttl",
1221                                                        fa->mangle.mask);
1222                                 return -EOPNOTSUPP;
1223                         }
1224
1225                         /* we can only convert to a dec ttl when we have an
1226                          * exact match on the ttl field
1227                          */
1228                         if (match->mask.ip_ttl != U8_MAX) {
1229                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1230                                                        "only support mangle ttl when we have an exact match, current mask (%#x)",
1231                                                        match->mask.ip_ttl);
1232                                 return -EOPNOTSUPP;
1233                         }
1234
1235                         /* check that we don't try to decrement 0, which equates
1236                          * to setting the ttl to 0xff
1237                          */
1238                         if (match->value.ip_ttl == 0) {
1239                                 NL_SET_ERR_MSG_MOD(extack,
1240                                                    "decrement ttl past 0 is not supported");
1241                                 return -EOPNOTSUPP;
1242                         }
1243
1244                         /* check that we do not decrement ttl twice */
1245                         if (!efx_tc_flower_action_order_ok(act,
1246                                                            EFX_TC_AO_DEC_TTL)) {
1247                                 NL_SET_ERR_MSG_MOD(extack,
1248                                                    "multiple dec ttl is not supported");
1249                                 return -EOPNOTSUPP;
1250                         }
1251
1252                         /* check pedit can be achieved with decrement action */
1253                         tr_ttl = match->value.ip_ttl - 1;
1254                         if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) == tr_ttl) {
1255                                 act->do_ttl_dec = 1;
1256                                 return 0;
1257                         }
1258
1259                         fallthrough;
1260                 default:
1261                         NL_SET_ERR_MSG_FMT_MOD(extack,
1262                                                "only support mangle on the ttl field (offset is %u)",
1263                                                fa->mangle.offset);
1264                         return -EOPNOTSUPP;
1265                 }
1266                 break;
1267         case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
1268                 switch (fa->mangle.offset) {
1269                 case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
1270                         /* we currently only support pedit IP6 when it applies
1271                          * to the hoplimit and then only when it can be achieved
1272                          * with a decrement hoplimit action
1273                          */
1274
1275                         /* check that pedit applies to ttl only */
1276                         if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) {
1277                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1278                                                        "mask (%#x) out of range, only support mangle action on ipv6.hop_limit",
1279                                                        fa->mangle.mask);
1280
1281                                 return -EOPNOTSUPP;
1282                         }
1283
1284                         /* we can only convert to a dec ttl when we have an
1285                          * exact match on the ttl field
1286                          */
1287                         if (match->mask.ip_ttl != U8_MAX) {
1288                                 NL_SET_ERR_MSG_FMT_MOD(extack,
1289                                                        "only support hop_limit when we have an exact match, current mask (%#x)",
1290                                                        match->mask.ip_ttl);
1291                                 return -EOPNOTSUPP;
1292                         }
1293
1294                         /* check that we don't try to decrement 0, which equates
1295                          * to setting the ttl to 0xff
1296                          */
1297                         if (match->value.ip_ttl == 0) {
1298                                 NL_SET_ERR_MSG_MOD(extack,
1299                                                    "decrementing hop_limit past 0 is not supported");
1300                                 return -EOPNOTSUPP;
1301                         }
1302
1303                         /* check that we do not decrement hoplimit twice */
1304                         if (!efx_tc_flower_action_order_ok(act,
1305                                                            EFX_TC_AO_DEC_TTL)) {
1306                                 NL_SET_ERR_MSG_MOD(extack,
1307                                                    "multiple dec ttl is not supported");
1308                                 return -EOPNOTSUPP;
1309                         }
1310
1311                         /* check pedit can be achieved with decrement action */
1312                         tr_ttl = match->value.ip_ttl - 1;
1313                         if ((fa->mangle.val >> 24) == tr_ttl) {
1314                                 act->do_ttl_dec = 1;
1315                                 return 0;
1316                         }
1317
1318                         fallthrough;
1319                 default:
1320                         NL_SET_ERR_MSG_FMT_MOD(extack,
1321                                                "only support mangle on the hop_limit field");
1322                         return -EOPNOTSUPP;
1323                 }
1324         default:
1325                 NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule",
1326                                        fa->mangle.htype);
1327                 return -EOPNOTSUPP;
1328         }
1329         return 0;
1330 }
1331
1332 /**
1333  * efx_tc_incomplete_mangle() - check for leftover partial pedits
1334  * @mung:       accumulator for partial mangles
1335  * @extack:     netlink extended ack for reporting errors
1336  *
1337  * Since the MAE can only overwrite whole fields, any partial
1338  * field mangle left over on reaching packet delivery (mirred or
1339  * end of TC actions) cannot be offloaded.  Check for any such
1340  * and reject them with -%EOPNOTSUPP.
1341  */
1342
1343 static int efx_tc_incomplete_mangle(struct efx_tc_mangler_state *mung,
1344                                     struct netlink_ext_ack *extack)
1345 {
1346         if (mung->dst_mac_32 || mung->dst_mac_16) {
1347                 NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of destination MAC address");
1348                 return -EOPNOTSUPP;
1349         }
1350         if (mung->src_mac_16 || mung->src_mac_32) {
1351                 NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of source MAC address");
1352                 return -EOPNOTSUPP;
1353         }
1354         return 0;
1355 }
1356
1357 static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
1358                                          struct net_device *net_dev,
1359                                          struct flow_cls_offload *tc)
1360 {
1361         struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
1362         struct netlink_ext_ack *extack = tc->common.extack;
1363         struct efx_tc_flow_rule *rule = NULL, *old = NULL;
1364         struct efx_tc_action_set *act = NULL;
1365         bool found = false, uplinked = false;
1366         const struct flow_action_entry *fa;
1367         struct efx_tc_match match;
1368         struct efx_rep *to_efv;
1369         s64 rc;
1370         int i;
1371
1372         /* Parse match */
1373         memset(&match, 0, sizeof(match));
1374         rc = efx_tc_flower_parse_match(efx, fr, &match, NULL);
1375         if (rc)
1376                 return rc;
1377         /* The rule as given to us doesn't specify a source netdevice.
1378          * But, determining whether packets from a VF should match it is
1379          * complicated, so leave those to the software slowpath: qualify
1380          * the filter with source m-port == wire.
1381          */
1382         rc = efx_tc_flower_external_mport(efx, EFX_EFV_PF);
1383         if (rc < 0) {
1384                 NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port for foreign filter");
1385                 return rc;
1386         }
1387         match.value.ingress_port = rc;
1388         match.mask.ingress_port = ~0;
1389
1390         if (tc->common.chain_index) {
1391                 struct efx_tc_recirc_id *rid;
1392
1393                 rid = efx_tc_get_recirc_id(efx, tc->common.chain_index, net_dev);
1394                 if (IS_ERR(rid)) {
1395                         NL_SET_ERR_MSG_FMT_MOD(extack,
1396                                                "Failed to allocate a hardware recirculation ID for chain_index %u",
1397                                                tc->common.chain_index);
1398                         return PTR_ERR(rid);
1399                 }
1400                 match.rid = rid;
1401                 match.value.recirc_id = rid->fw_id;
1402         }
1403         match.mask.recirc_id = 0xff;
1404
1405         /* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
1406          * +trk+est, which is strictly implied by +est, so rewrite it to that.
1407          */
1408         if (match.mask.ct_state_trk && match.value.ct_state_trk &&
1409             match.mask.ct_state_est && match.value.ct_state_est)
1410                 match.mask.ct_state_trk = 0;
1411         /* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
1412          * match +trk-est (CT_HIT=0) despite being on an established connection.
1413          * So make -est imply -tcp_syn_fin_rst match to ensure these packets
1414          * still hit the software path.
1415          */
1416         if (match.mask.ct_state_est && !match.value.ct_state_est) {
1417                 if (match.value.tcp_syn_fin_rst) {
1418                         /* Can't offload this combination */
1419                         rc = -EOPNOTSUPP;
1420                         goto release;
1421                 }
1422                 match.mask.tcp_syn_fin_rst = true;
1423         }
1424
1425         flow_action_for_each(i, fa, &fr->action) {
1426                 switch (fa->id) {
1427                 case FLOW_ACTION_REDIRECT:
1428                 case FLOW_ACTION_MIRRED: /* mirred means mirror here */
1429                         to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1430                         if (IS_ERR(to_efv))
1431                                 continue;
1432                         found = true;
1433                         break;
1434                 default:
1435                         break;
1436                 }
1437         }
1438         if (!found) { /* We don't care. */
1439                 netif_dbg(efx, drv, efx->net_dev,
1440                           "Ignoring foreign filter that doesn't egdev us\n");
1441                 rc = -EOPNOTSUPP;
1442                 goto release;
1443         }
1444
1445         rc = efx_mae_match_check_caps(efx, &match.mask, NULL);
1446         if (rc)
1447                 goto release;
1448
1449         if (efx_tc_match_is_encap(&match.mask)) {
1450                 enum efx_encap_type type;
1451
1452                 type = efx_tc_indr_netdev_type(net_dev);
1453                 if (type == EFX_ENCAP_TYPE_NONE) {
1454                         NL_SET_ERR_MSG_MOD(extack,
1455                                            "Egress encap match on unsupported tunnel device");
1456                         rc = -EOPNOTSUPP;
1457                         goto release;
1458                 }
1459
1460                 rc = efx_mae_check_encap_type_supported(efx, type);
1461                 if (rc) {
1462                         NL_SET_ERR_MSG_FMT_MOD(extack,
1463                                                "Firmware reports no support for %s encap match",
1464                                                efx_tc_encap_type_name(type));
1465                         goto release;
1466                 }
1467
1468                 rc = efx_tc_flower_record_encap_match(efx, &match, type,
1469                                                       EFX_TC_EM_DIRECT, 0, 0,
1470                                                       extack);
1471                 if (rc)
1472                         goto release;
1473         } else {
1474                 /* This is not a tunnel decap rule, ignore it */
1475                 netif_dbg(efx, drv, efx->net_dev,
1476                           "Ignoring foreign filter without encap match\n");
1477                 rc = -EOPNOTSUPP;
1478                 goto release;
1479         }
1480
1481         rule = kzalloc(sizeof(*rule), GFP_USER);
1482         if (!rule) {
1483                 rc = -ENOMEM;
1484                 goto release;
1485         }
1486         INIT_LIST_HEAD(&rule->acts.list);
1487         rule->cookie = tc->cookie;
1488         old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
1489                                                 &rule->linkage,
1490                                                 efx_tc_match_action_ht_params);
1491         if (IS_ERR(old)) {
1492                 rc = PTR_ERR(old);
1493                 goto release;
1494         } else if (old) {
1495                 netif_dbg(efx, drv, efx->net_dev,
1496                           "Ignoring already-offloaded rule (cookie %lx)\n",
1497                           tc->cookie);
1498                 rc = -EEXIST;
1499                 goto release;
1500         }
1501
1502         act = kzalloc(sizeof(*act), GFP_USER);
1503         if (!act) {
1504                 rc = -ENOMEM;
1505                 goto release;
1506         }
1507
1508         /* Parse actions.  For foreign rules we only support decap & redirect.
1509          * See corresponding code in efx_tc_flower_replace() for theory of
1510          * operation & how 'act' cursor is used.
1511          */
1512         flow_action_for_each(i, fa, &fr->action) {
1513                 struct efx_tc_action_set save;
1514
1515                 switch (fa->id) {
1516                 case FLOW_ACTION_REDIRECT:
1517                 case FLOW_ACTION_MIRRED:
1518                         /* See corresponding code in efx_tc_flower_replace() for
1519                          * long explanations of what's going on here.
1520                          */
1521                         save = *act;
1522                         if (fa->hw_stats) {
1523                                 struct efx_tc_counter_index *ctr;
1524
1525                                 if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1526                                         NL_SET_ERR_MSG_FMT_MOD(extack,
1527                                                                "hw_stats_type %u not supported (only 'delayed')",
1528                                                                fa->hw_stats);
1529                                         rc = -EOPNOTSUPP;
1530                                         goto release;
1531                                 }
1532                                 if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
1533                                         rc = -EOPNOTSUPP;
1534                                         goto release;
1535                                 }
1536
1537                                 ctr = efx_tc_flower_get_counter_index(efx,
1538                                                                       tc->cookie,
1539                                                                       EFX_TC_COUNTER_TYPE_AR);
1540                                 if (IS_ERR(ctr)) {
1541                                         rc = PTR_ERR(ctr);
1542                                         NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1543                                         goto release;
1544                                 }
1545                                 act->count = ctr;
1546                                 INIT_LIST_HEAD(&act->count_user);
1547                         }
1548
1549                         if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
1550                                 /* can't happen */
1551                                 rc = -EOPNOTSUPP;
1552                                 NL_SET_ERR_MSG_MOD(extack,
1553                                                    "Deliver action violates action order (can't happen)");
1554                                 goto release;
1555                         }
1556                         to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1557                         /* PF implies egdev is us, in which case we really
1558                          * want to deliver to the uplink (because this is an
1559                          * ingress filter).  If we don't recognise the egdev
1560                          * at all, then we'd better trap so SW can handle it.
1561                          */
1562                         if (IS_ERR(to_efv))
1563                                 to_efv = EFX_EFV_PF;
1564                         if (to_efv == EFX_EFV_PF) {
1565                                 if (uplinked)
1566                                         break;
1567                                 uplinked = true;
1568                         }
1569                         rc = efx_tc_flower_internal_mport(efx, to_efv);
1570                         if (rc < 0) {
1571                                 NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
1572                                 goto release;
1573                         }
1574                         act->dest_mport = rc;
1575                         act->deliver = 1;
1576                         rc = efx_mae_alloc_action_set(efx, act);
1577                         if (rc) {
1578                                 NL_SET_ERR_MSG_MOD(extack,
1579                                                    "Failed to write action set to hw (mirred)");
1580                                 goto release;
1581                         }
1582                         list_add_tail(&act->list, &rule->acts.list);
1583                         act = NULL;
1584                         if (fa->id == FLOW_ACTION_REDIRECT)
1585                                 break; /* end of the line */
1586                         /* Mirror, so continue on with saved act */
1587                         act = kzalloc(sizeof(*act), GFP_USER);
1588                         if (!act) {
1589                                 rc = -ENOMEM;
1590                                 goto release;
1591                         }
1592                         *act = save;
1593                         break;
1594                 case FLOW_ACTION_TUNNEL_DECAP:
1595                         if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DECAP)) {
1596                                 rc = -EINVAL;
1597                                 NL_SET_ERR_MSG_MOD(extack, "Decap action violates action order");
1598                                 goto release;
1599                         }
1600                         act->decap = 1;
1601                         /* If we previously delivered/trapped to uplink, now
1602                          * that we've decapped we'll want another copy if we
1603                          * try to deliver/trap to uplink again.
1604                          */
1605                         uplinked = false;
1606                         break;
1607                 default:
1608                         NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
1609                                                fa->id);
1610                         rc = -EOPNOTSUPP;
1611                         goto release;
1612                 }
1613         }
1614
1615         if (act) {
1616                 if (!uplinked) {
1617                         /* Not shot/redirected, so deliver to default dest (which is
1618                          * the uplink, as this is an ingress filter)
1619                          */
1620                         efx_mae_mport_uplink(efx, &act->dest_mport);
1621                         act->deliver = 1;
1622                 }
1623                 rc = efx_mae_alloc_action_set(efx, act);
1624                 if (rc) {
1625                         NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
1626                         goto release;
1627                 }
1628                 list_add_tail(&act->list, &rule->acts.list);
1629                 act = NULL; /* Prevent double-free in error path */
1630         }
1631
1632         rule->match = match;
1633
1634         netif_dbg(efx, drv, efx->net_dev,
1635                   "Successfully parsed foreign filter (cookie %lx)\n",
1636                   tc->cookie);
1637
1638         rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
1639         if (rc) {
1640                 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
1641                 goto release;
1642         }
1643         rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
1644                                  rule->acts.fw_id, &rule->fw_id);
1645         if (rc) {
1646                 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1647                 goto release_acts;
1648         }
1649         return 0;
1650
1651 release_acts:
1652         efx_mae_free_action_set_list(efx, &rule->acts);
1653 release:
1654         /* We failed to insert the rule, so free up any entries we created in
1655          * subsidiary tables.
1656          */
1657         if (match.rid)
1658                 efx_tc_put_recirc_id(efx, match.rid);
1659         if (act)
1660                 efx_tc_free_action_set(efx, act, false);
1661         if (rule) {
1662                 if (!old)
1663                         rhashtable_remove_fast(&efx->tc->match_action_ht,
1664                                                &rule->linkage,
1665                                                efx_tc_match_action_ht_params);
1666                 efx_tc_free_action_set_list(efx, &rule->acts, false);
1667         }
1668         kfree(rule);
1669         if (match.encap)
1670                 efx_tc_flower_release_encap_match(efx, match.encap);
1671         return rc;
1672 }
1673
1674 static int efx_tc_flower_replace_lhs(struct efx_nic *efx,
1675                                      struct flow_cls_offload *tc,
1676                                      struct flow_rule *fr,
1677                                      struct efx_tc_match *match,
1678                                      struct efx_rep *efv,
1679                                      struct net_device *net_dev)
1680 {
1681         struct netlink_ext_ack *extack = tc->common.extack;
1682         struct efx_tc_lhs_rule *rule, *old;
1683         int rc;
1684
1685         if (tc->common.chain_index) {
1686                 NL_SET_ERR_MSG_MOD(extack, "LHS rule only allowed in chain 0");
1687                 return -EOPNOTSUPP;
1688         }
1689
1690         if (match->mask.ct_state_trk && match->value.ct_state_trk) {
1691                 NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk");
1692                 return -EOPNOTSUPP;
1693         }
1694         /* LHS rules are always -trk, so we don't need to match on that */
1695         match->mask.ct_state_trk = 0;
1696         match->value.ct_state_trk = 0;
1697
1698         rc = efx_mae_match_check_caps_lhs(efx, &match->mask, extack);
1699         if (rc)
1700                 return rc;
1701
1702         rule = kzalloc(sizeof(*rule), GFP_USER);
1703         if (!rule)
1704                 return -ENOMEM;
1705         rule->cookie = tc->cookie;
1706         old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht,
1707                                                 &rule->linkage,
1708                                                 efx_tc_lhs_rule_ht_params);
1709         if (IS_ERR(old)) {
1710                 rc = PTR_ERR(old);
1711                 goto release;
1712         } else if (old) {
1713                 netif_dbg(efx, drv, efx->net_dev,
1714                           "Already offloaded rule (cookie %lx)\n", tc->cookie);
1715                 rc = -EEXIST;
1716                 NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1717                 goto release;
1718         }
1719
1720         /* Parse actions */
1721         /* See note in efx_tc_flower_replace() regarding passed net_dev
1722          * (used for efx_tc_get_recirc_id()).
1723          */
1724         rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, efx->net_dev, rule);
1725         if (rc)
1726                 goto release;
1727
1728         rule->match = *match;
1729
1730         rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC);
1731         if (rc) {
1732                 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1733                 goto release;
1734         }
1735         netif_dbg(efx, drv, efx->net_dev,
1736                   "Successfully parsed lhs rule (cookie %lx)\n",
1737                   tc->cookie);
1738         return 0;
1739
1740 release:
1741         efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act);
1742         if (!old)
1743                 rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage,
1744                                        efx_tc_lhs_rule_ht_params);
1745         kfree(rule);
1746         return rc;
1747 }
1748
1749 static int efx_tc_flower_replace(struct efx_nic *efx,
1750                                  struct net_device *net_dev,
1751                                  struct flow_cls_offload *tc,
1752                                  struct efx_rep *efv)
1753 {
1754         struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
1755         struct netlink_ext_ack *extack = tc->common.extack;
1756         const struct ip_tunnel_info *encap_info = NULL;
1757         struct efx_tc_flow_rule *rule = NULL, *old;
1758         struct efx_tc_mangler_state mung = {};
1759         struct efx_tc_action_set *act = NULL;
1760         const struct flow_action_entry *fa;
1761         struct efx_rep *from_efv, *to_efv;
1762         struct efx_tc_match match;
1763         u32 acts_id;
1764         s64 rc;
1765         int i;
1766
1767         if (!tc_can_offload_extack(efx->net_dev, extack))
1768                 return -EOPNOTSUPP;
1769         if (WARN_ON(!efx->tc))
1770                 return -ENETDOWN;
1771         if (WARN_ON(!efx->tc->up))
1772                 return -ENETDOWN;
1773
1774         from_efv = efx_tc_flower_lookup_efv(efx, net_dev);
1775         if (IS_ERR(from_efv)) {
1776                 /* Not from our PF or representors, so probably a tunnel dev */
1777                 return efx_tc_flower_replace_foreign(efx, net_dev, tc);
1778         }
1779
1780         if (efv != from_efv) {
1781                 /* can't happen */
1782                 NL_SET_ERR_MSG_FMT_MOD(extack, "for %s efv is %snull but from_efv is %snull (can't happen)",
1783                                        netdev_name(net_dev), efv ? "non-" : "",
1784                                        from_efv ? "non-" : "");
1785                 return -EINVAL;
1786         }
1787
1788         /* Parse match */
1789         memset(&match, 0, sizeof(match));
1790         rc = efx_tc_flower_external_mport(efx, from_efv);
1791         if (rc < 0) {
1792                 NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port");
1793                 return rc;
1794         }
1795         match.value.ingress_port = rc;
1796         match.mask.ingress_port = ~0;
1797         rc = efx_tc_flower_parse_match(efx, fr, &match, extack);
1798         if (rc)
1799                 return rc;
1800         if (efx_tc_match_is_encap(&match.mask)) {
1801                 NL_SET_ERR_MSG_MOD(extack, "Ingress enc_key matches not supported");
1802                 return -EOPNOTSUPP;
1803         }
1804
1805         if (efx_tc_rule_is_lhs_rule(fr, &match))
1806                 return efx_tc_flower_replace_lhs(efx, tc, fr, &match, efv,
1807                                                  net_dev);
1808
1809         /* chain_index 0 is always recirc_id 0 (and does not appear in recirc_ht).
1810          * Conveniently, match.rid == NULL and match.value.recirc_id == 0 owing
1811          * to the initial memset(), so we don't need to do anything in that case.
1812          */
1813         if (tc->common.chain_index) {
1814                 struct efx_tc_recirc_id *rid;
1815
1816                 /* Note regarding passed net_dev:
1817                  * VFreps and PF can share chain namespace, as they have
1818                  * distinct ingress_mports.  So we don't need to burn an
1819                  * extra recirc_id if both use the same chain_index.
1820                  * (Strictly speaking, we could give each VFrep its own
1821                  * recirc_id namespace that doesn't take IDs away from the
1822                  * PF, but that would require a bunch of additional IDAs -
1823                  * one for each representor - and that's not likely to be
1824                  * the main cause of recirc_id exhaustion anyway.)
1825                  */
1826                 rid = efx_tc_get_recirc_id(efx, tc->common.chain_index,
1827                                            efx->net_dev);
1828                 if (IS_ERR(rid)) {
1829                         NL_SET_ERR_MSG_FMT_MOD(extack,
1830                                                "Failed to allocate a hardware recirculation ID for chain_index %u",
1831                                                tc->common.chain_index);
1832                         return PTR_ERR(rid);
1833                 }
1834                 match.rid = rid;
1835                 match.value.recirc_id = rid->fw_id;
1836         }
1837         match.mask.recirc_id = 0xff;
1838
1839         /* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
1840          * +trk+est, which is strictly implied by +est, so rewrite it to that.
1841          */
1842         if (match.mask.ct_state_trk && match.value.ct_state_trk &&
1843             match.mask.ct_state_est && match.value.ct_state_est)
1844                 match.mask.ct_state_trk = 0;
1845         /* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
1846          * match +trk-est (CT_HIT=0) despite being on an established connection.
1847          * So make -est imply -tcp_syn_fin_rst match to ensure these packets
1848          * still hit the software path.
1849          */
1850         if (match.mask.ct_state_est && !match.value.ct_state_est) {
1851                 if (match.value.tcp_syn_fin_rst) {
1852                         /* Can't offload this combination */
1853                         rc = -EOPNOTSUPP;
1854                         goto release;
1855                 }
1856                 match.mask.tcp_syn_fin_rst = true;
1857         }
1858
1859         rc = efx_mae_match_check_caps(efx, &match.mask, extack);
1860         if (rc)
1861                 goto release;
1862
1863         rule = kzalloc(sizeof(*rule), GFP_USER);
1864         if (!rule) {
1865                 rc = -ENOMEM;
1866                 goto release;
1867         }
1868         INIT_LIST_HEAD(&rule->acts.list);
1869         rule->cookie = tc->cookie;
1870         old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
1871                                                 &rule->linkage,
1872                                                 efx_tc_match_action_ht_params);
1873         if (IS_ERR(old)) {
1874                 rc = PTR_ERR(old);
1875                 goto release;
1876         } else if (old) {
1877                 netif_dbg(efx, drv, efx->net_dev,
1878                           "Already offloaded rule (cookie %lx)\n", tc->cookie);
1879                 NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1880                 rc = -EEXIST;
1881                 goto release;
1882         }
1883
1884         /* Parse actions */
1885         act = kzalloc(sizeof(*act), GFP_USER);
1886         if (!act) {
1887                 rc = -ENOMEM;
1888                 goto release;
1889         }
1890
1891         /**
1892          * DOC: TC action translation
1893          *
1894          * Actions in TC are sequential and cumulative, with delivery actions
1895          * potentially anywhere in the order.  The EF100 MAE, however, takes
1896          * an 'action set list' consisting of 'action sets', each of which is
1897          * applied to the _original_ packet, and consists of a set of optional
1898          * actions in a fixed order with delivery at the end.
1899          * To translate between these two models, we maintain a 'cursor', @act,
1900          * which describes the cumulative effect of all the packet-mutating
1901          * actions encountered so far; on handling a delivery (mirred or drop)
1902          * action, once the action-set has been inserted into hardware, we
1903          * append @act to the action-set list (@rule->acts); if this is a pipe
1904          * action (mirred mirror) we then allocate a new @act with a copy of
1905          * the cursor state _before_ the delivery action, otherwise we set @act
1906          * to %NULL.
1907          * This ensures that every allocated action-set is either attached to
1908          * @rule->acts or pointed to by @act (and never both), and that only
1909          * those action-sets in @rule->acts exist in hardware.  Consequently,
1910          * in the failure path, @act only needs to be freed in memory, whereas
1911          * for @rule->acts we remove each action-set from hardware before
1912          * freeing it (efx_tc_free_action_set_list()), even if the action-set
1913          * list itself is not in hardware.
1914          */
1915         flow_action_for_each(i, fa, &fr->action) {
1916                 struct efx_tc_action_set save;
1917                 u16 tci;
1918
1919                 if (!act) {
1920                         /* more actions after a non-pipe action */
1921                         NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
1922                         rc = -EINVAL;
1923                         goto release;
1924                 }
1925
1926                 if ((fa->id == FLOW_ACTION_REDIRECT ||
1927                      fa->id == FLOW_ACTION_MIRRED ||
1928                      fa->id == FLOW_ACTION_DROP) && fa->hw_stats) {
1929                         struct efx_tc_counter_index *ctr;
1930
1931                         /* Currently the only actions that want stats are
1932                          * mirred and gact (ok, shot, trap, goto-chain), which
1933                          * means we want stats just before delivery.  Also,
1934                          * note that tunnel_key set shouldn't change the length
1935                          * — it's only the subsequent mirred that does that,
1936                          * and the stats are taken _before_ the mirred action
1937                          * happens.
1938                          */
1939                         if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
1940                                 /* All supported actions that count either steal
1941                                  * (gact shot, mirred redirect) or clone act
1942                                  * (mirred mirror), so we should never get two
1943                                  * count actions on one action_set.
1944                                  */
1945                                 NL_SET_ERR_MSG_MOD(extack, "Count-action conflict (can't happen)");
1946                                 rc = -EOPNOTSUPP;
1947                                 goto release;
1948                         }
1949
1950                         if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1951                                 NL_SET_ERR_MSG_FMT_MOD(extack, "hw_stats_type %u not supported (only 'delayed')",
1952                                                        fa->hw_stats);
1953                                 rc = -EOPNOTSUPP;
1954                                 goto release;
1955                         }
1956
1957                         ctr = efx_tc_flower_get_counter_index(efx, tc->cookie,
1958                                                               EFX_TC_COUNTER_TYPE_AR);
1959                         if (IS_ERR(ctr)) {
1960                                 rc = PTR_ERR(ctr);
1961                                 NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1962                                 goto release;
1963                         }
1964                         act->count = ctr;
1965                         INIT_LIST_HEAD(&act->count_user);
1966                 }
1967
1968                 switch (fa->id) {
1969                 case FLOW_ACTION_DROP:
1970                         rc = efx_mae_alloc_action_set(efx, act);
1971                         if (rc) {
1972                                 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (drop)");
1973                                 goto release;
1974                         }
1975                         list_add_tail(&act->list, &rule->acts.list);
1976                         act = NULL; /* end of the line */
1977                         break;
1978                 case FLOW_ACTION_REDIRECT:
1979                 case FLOW_ACTION_MIRRED:
1980                         save = *act;
1981
1982                         if (encap_info) {
1983                                 struct efx_tc_encap_action *encap;
1984
1985                                 if (!efx_tc_flower_action_order_ok(act,
1986                                                                    EFX_TC_AO_ENCAP)) {
1987                                         rc = -EOPNOTSUPP;
1988                                         NL_SET_ERR_MSG_MOD(extack, "Encap action violates action order");
1989                                         goto release;
1990                                 }
1991                                 encap = efx_tc_flower_create_encap_md(
1992                                                 efx, encap_info, fa->dev, extack);
1993                                 if (IS_ERR_OR_NULL(encap)) {
1994                                         rc = PTR_ERR(encap);
1995                                         if (!rc)
1996                                                 rc = -EIO; /* arbitrary */
1997                                         goto release;
1998                                 }
1999                                 act->encap_md = encap;
2000                                 list_add_tail(&act->encap_user, &encap->users);
2001                                 act->dest_mport = encap->dest_mport;
2002                                 act->deliver = 1;
2003                                 if (act->count && !WARN_ON(!act->count->cnt)) {
2004                                         /* This counter is used by an encap
2005                                          * action, which needs a reference back
2006                                          * so it can prod neighbouring whenever
2007                                          * traffic is seen.
2008                                          */
2009                                         spin_lock_bh(&act->count->cnt->lock);
2010                                         list_add_tail(&act->count_user,
2011                                                       &act->count->cnt->users);
2012                                         spin_unlock_bh(&act->count->cnt->lock);
2013                                 }
2014                                 rc = efx_mae_alloc_action_set(efx, act);
2015                                 if (rc) {
2016                                         NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)");
2017                                         goto release;
2018                                 }
2019                                 list_add_tail(&act->list, &rule->acts.list);
2020                                 act->user = &rule->acts;
2021                                 act = NULL;
2022                                 if (fa->id == FLOW_ACTION_REDIRECT)
2023                                         break; /* end of the line */
2024                                 /* Mirror, so continue on with saved act */
2025                                 save.count = NULL;
2026                                 act = kzalloc(sizeof(*act), GFP_USER);
2027                                 if (!act) {
2028                                         rc = -ENOMEM;
2029                                         goto release;
2030                                 }
2031                                 *act = save;
2032                                 break;
2033                         }
2034
2035                         if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
2036                                 /* can't happen */
2037                                 rc = -EOPNOTSUPP;
2038                                 NL_SET_ERR_MSG_MOD(extack, "Deliver action violates action order (can't happen)");
2039                                 goto release;
2040                         }
2041
2042                         to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
2043                         if (IS_ERR(to_efv)) {
2044                                 NL_SET_ERR_MSG_MOD(extack, "Mirred egress device not on switch");
2045                                 rc = PTR_ERR(to_efv);
2046                                 goto release;
2047                         }
2048                         rc = efx_tc_flower_external_mport(efx, to_efv);
2049                         if (rc < 0) {
2050                                 NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
2051                                 goto release;
2052                         }
2053                         act->dest_mport = rc;
2054                         act->deliver = 1;
2055                         rc = efx_mae_alloc_action_set(efx, act);
2056                         if (rc) {
2057                                 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (mirred)");
2058                                 goto release;
2059                         }
2060                         list_add_tail(&act->list, &rule->acts.list);
2061                         act = NULL;
2062                         if (fa->id == FLOW_ACTION_REDIRECT)
2063                                 break; /* end of the line */
2064                         /* Mirror, so continue on with saved act */
2065                         save.count = NULL;
2066                         act = kzalloc(sizeof(*act), GFP_USER);
2067                         if (!act) {
2068                                 rc = -ENOMEM;
2069                                 goto release;
2070                         }
2071                         *act = save;
2072                         break;
2073                 case FLOW_ACTION_VLAN_POP:
2074                         if (act->vlan_push) {
2075                                 act->vlan_push--;
2076                         } else if (efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_POP)) {
2077                                 act->vlan_pop++;
2078                         } else {
2079                                 NL_SET_ERR_MSG_MOD(extack,
2080                                                    "More than two VLAN pops, or action order violated");
2081                                 rc = -EINVAL;
2082                                 goto release;
2083                         }
2084                         break;
2085                 case FLOW_ACTION_VLAN_PUSH:
2086                         if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_PUSH)) {
2087                                 rc = -EINVAL;
2088                                 NL_SET_ERR_MSG_MOD(extack,
2089                                                    "More than two VLAN pushes, or action order violated");
2090                                 goto release;
2091                         }
2092                         tci = fa->vlan.vid & VLAN_VID_MASK;
2093                         tci |= fa->vlan.prio << VLAN_PRIO_SHIFT;
2094                         act->vlan_tci[act->vlan_push] = cpu_to_be16(tci);
2095                         act->vlan_proto[act->vlan_push] = fa->vlan.proto;
2096                         act->vlan_push++;
2097                         break;
2098                 case FLOW_ACTION_ADD:
2099                         rc = efx_tc_pedit_add(efx, act, fa, extack);
2100                         if (rc < 0)
2101                                 goto release;
2102                         break;
2103                 case FLOW_ACTION_MANGLE:
2104                         rc = efx_tc_mangle(efx, act, fa, &mung, extack, &match);
2105                         if (rc < 0)
2106                                 goto release;
2107                         break;
2108                 case FLOW_ACTION_TUNNEL_ENCAP:
2109                         if (encap_info) {
2110                                 /* Can't specify encap multiple times.
2111                                  * If you want to overwrite an existing
2112                                  * encap_info, use an intervening
2113                                  * FLOW_ACTION_TUNNEL_DECAP to clear it.
2114                                  */
2115                                 NL_SET_ERR_MSG_MOD(extack, "Tunnel key set when already set");
2116                                 rc = -EINVAL;
2117                                 goto release;
2118                         }
2119                         if (!fa->tunnel) {
2120                                 NL_SET_ERR_MSG_MOD(extack, "Tunnel key set is missing key");
2121                                 rc = -EOPNOTSUPP;
2122                                 goto release;
2123                         }
2124                         encap_info = fa->tunnel;
2125                         break;
2126                 case FLOW_ACTION_TUNNEL_DECAP:
2127                         if (encap_info) {
2128                                 encap_info = NULL;
2129                                 break;
2130                         }
2131                         /* Since we don't support enc_key matches on ingress
2132                          * (and if we did there'd be no tunnel-device to give
2133                          * us a type), we can't offload a decap that's not
2134                          * just undoing a previous encap action.
2135                          */
2136                         NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device");
2137                         rc = -EOPNOTSUPP;
2138                         goto release;
2139                 default:
2140                         NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
2141                                                fa->id);
2142                         rc = -EOPNOTSUPP;
2143                         goto release;
2144                 }
2145         }
2146
2147         rc = efx_tc_incomplete_mangle(&mung, extack);
2148         if (rc < 0)
2149                 goto release;
2150         if (act) {
2151                 /* Not shot/redirected, so deliver to default dest */
2152                 if (from_efv == EFX_EFV_PF)
2153                         /* Rule applies to traffic from the wire,
2154                          * and default dest is thus the PF
2155                          */
2156                         efx_mae_mport_uplink(efx, &act->dest_mport);
2157                 else
2158                         /* Representor, so rule applies to traffic from
2159                          * representee, and default dest is thus the rep.
2160                          * All reps use the same mport for delivery
2161                          */
2162                         efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
2163                                             &act->dest_mport);
2164                 act->deliver = 1;
2165                 rc = efx_mae_alloc_action_set(efx, act);
2166                 if (rc) {
2167                         NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
2168                         goto release;
2169                 }
2170                 list_add_tail(&act->list, &rule->acts.list);
2171                 act = NULL; /* Prevent double-free in error path */
2172         }
2173
2174         netif_dbg(efx, drv, efx->net_dev,
2175                   "Successfully parsed filter (cookie %lx)\n",
2176                   tc->cookie);
2177
2178         rule->match = match;
2179
2180         rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
2181         if (rc) {
2182                 NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
2183                 goto release;
2184         }
2185         if (from_efv == EFX_EFV_PF)
2186                 /* PF netdev, so rule applies to traffic from wire */
2187                 rule->fallback = &efx->tc->facts.pf;
2188         else
2189                 /* repdev, so rule applies to traffic from representee */
2190                 rule->fallback = &efx->tc->facts.reps;
2191         if (!efx_tc_check_ready(efx, rule)) {
2192                 netif_dbg(efx, drv, efx->net_dev, "action not ready for hw\n");
2193                 acts_id = rule->fallback->fw_id;
2194         } else {
2195                 netif_dbg(efx, drv, efx->net_dev, "ready for hw\n");
2196                 acts_id = rule->acts.fw_id;
2197         }
2198         rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
2199                                  acts_id, &rule->fw_id);
2200         if (rc) {
2201                 NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
2202                 goto release_acts;
2203         }
2204         return 0;
2205
2206 release_acts:
2207         efx_mae_free_action_set_list(efx, &rule->acts);
2208 release:
2209         /* We failed to insert the rule, so free up any entries we created in
2210          * subsidiary tables.
2211          */
2212         if (match.rid)
2213                 efx_tc_put_recirc_id(efx, match.rid);
2214         if (act)
2215                 efx_tc_free_action_set(efx, act, false);
2216         if (rule) {
2217                 if (!old)
2218                         rhashtable_remove_fast(&efx->tc->match_action_ht,
2219                                                &rule->linkage,
2220                                                efx_tc_match_action_ht_params);
2221                 efx_tc_free_action_set_list(efx, &rule->acts, false);
2222         }
2223         kfree(rule);
2224         return rc;
2225 }
2226
2227 static int efx_tc_flower_destroy(struct efx_nic *efx,
2228                                  struct net_device *net_dev,
2229                                  struct flow_cls_offload *tc)
2230 {
2231         struct netlink_ext_ack *extack = tc->common.extack;
2232         struct efx_tc_lhs_rule *lhs_rule;
2233         struct efx_tc_flow_rule *rule;
2234
2235         lhs_rule = rhashtable_lookup_fast(&efx->tc->lhs_rule_ht, &tc->cookie,
2236                                           efx_tc_lhs_rule_ht_params);
2237         if (lhs_rule) {
2238                 /* Remove it from HW */
2239                 efx_mae_remove_lhs_rule(efx, lhs_rule);
2240                 /* Delete it from SW */
2241                 efx_tc_flower_release_lhs_actions(efx, &lhs_rule->lhs_act);
2242                 rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &lhs_rule->linkage,
2243                                        efx_tc_lhs_rule_ht_params);
2244                 if (lhs_rule->match.encap)
2245                         efx_tc_flower_release_encap_match(efx, lhs_rule->match.encap);
2246                 netif_dbg(efx, drv, efx->net_dev, "Removed (lhs) filter %lx\n",
2247                           lhs_rule->cookie);
2248                 kfree(lhs_rule);
2249                 return 0;
2250         }
2251
2252         rule = rhashtable_lookup_fast(&efx->tc->match_action_ht, &tc->cookie,
2253                                       efx_tc_match_action_ht_params);
2254         if (!rule) {
2255                 /* Only log a message if we're the ingress device.  Otherwise
2256                  * it's a foreign filter and we might just not have been
2257                  * interested (e.g. we might not have been the egress device
2258                  * either).
2259                  */
2260                 if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
2261                         netif_warn(efx, drv, efx->net_dev,
2262                                    "Filter %lx not found to remove\n", tc->cookie);
2263                 NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
2264                 return -ENOENT;
2265         }
2266
2267         /* Remove it from HW */
2268         efx_tc_delete_rule(efx, rule);
2269         /* Delete it from SW */
2270         rhashtable_remove_fast(&efx->tc->match_action_ht, &rule->linkage,
2271                                efx_tc_match_action_ht_params);
2272         netif_dbg(efx, drv, efx->net_dev, "Removed filter %lx\n", rule->cookie);
2273         kfree(rule);
2274         return 0;
2275 }
2276
2277 static int efx_tc_flower_stats(struct efx_nic *efx, struct net_device *net_dev,
2278                                struct flow_cls_offload *tc)
2279 {
2280         struct netlink_ext_ack *extack = tc->common.extack;
2281         struct efx_tc_counter_index *ctr;
2282         struct efx_tc_counter *cnt;
2283         u64 packets, bytes;
2284
2285         ctr = efx_tc_flower_find_counter_index(efx, tc->cookie);
2286         if (!ctr) {
2287                 /* See comment in efx_tc_flower_destroy() */
2288                 if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
2289                         if (net_ratelimit())
2290                                 netif_warn(efx, drv, efx->net_dev,
2291                                            "Filter %lx not found for stats\n",
2292                                            tc->cookie);
2293                 NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
2294                 return -ENOENT;
2295         }
2296         if (WARN_ON(!ctr->cnt)) /* can't happen */
2297                 return -EIO;
2298         cnt = ctr->cnt;
2299
2300         spin_lock_bh(&cnt->lock);
2301         /* Report only new pkts/bytes since last time TC asked */
2302         packets = cnt->packets;
2303         bytes = cnt->bytes;
2304         flow_stats_update(&tc->stats, bytes - cnt->old_bytes,
2305                           packets - cnt->old_packets, 0, cnt->touched,
2306                           FLOW_ACTION_HW_STATS_DELAYED);
2307         cnt->old_packets = packets;
2308         cnt->old_bytes = bytes;
2309         spin_unlock_bh(&cnt->lock);
2310         return 0;
2311 }
2312
2313 int efx_tc_flower(struct efx_nic *efx, struct net_device *net_dev,
2314                   struct flow_cls_offload *tc, struct efx_rep *efv)
2315 {
2316         int rc;
2317
2318         if (!efx->tc)
2319                 return -EOPNOTSUPP;
2320
2321         mutex_lock(&efx->tc->mutex);
2322         switch (tc->command) {
2323         case FLOW_CLS_REPLACE:
2324                 rc = efx_tc_flower_replace(efx, net_dev, tc, efv);
2325                 break;
2326         case FLOW_CLS_DESTROY:
2327                 rc = efx_tc_flower_destroy(efx, net_dev, tc);
2328                 break;
2329         case FLOW_CLS_STATS:
2330                 rc = efx_tc_flower_stats(efx, net_dev, tc);
2331                 break;
2332         default:
2333                 rc = -EOPNOTSUPP;
2334                 break;
2335         }
2336         mutex_unlock(&efx->tc->mutex);
2337         return rc;
2338 }
2339
2340 static int efx_tc_configure_default_rule(struct efx_nic *efx, u32 ing_port,
2341                                          u32 eg_port, struct efx_tc_flow_rule *rule)
2342 {
2343         struct efx_tc_action_set_list *acts = &rule->acts;
2344         struct efx_tc_match *match = &rule->match;
2345         struct efx_tc_action_set *act;
2346         int rc;
2347
2348         match->value.ingress_port = ing_port;
2349         match->mask.ingress_port = ~0;
2350         act = kzalloc(sizeof(*act), GFP_KERNEL);
2351         if (!act)
2352                 return -ENOMEM;
2353         act->deliver = 1;
2354         act->dest_mport = eg_port;
2355         rc = efx_mae_alloc_action_set(efx, act);
2356         if (rc)
2357                 goto fail1;
2358         EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
2359         list_add_tail(&act->list, &acts->list);
2360         rc = efx_mae_alloc_action_set_list(efx, acts);
2361         if (rc)
2362                 goto fail2;
2363         rc = efx_mae_insert_rule(efx, match, EFX_TC_PRIO_DFLT,
2364                                  acts->fw_id, &rule->fw_id);
2365         if (rc)
2366                 goto fail3;
2367         return 0;
2368 fail3:
2369         efx_mae_free_action_set_list(efx, acts);
2370 fail2:
2371         list_del(&act->list);
2372         efx_mae_free_action_set(efx, act->fw_id);
2373 fail1:
2374         kfree(act);
2375         return rc;
2376 }
2377
2378 static int efx_tc_configure_default_rule_pf(struct efx_nic *efx)
2379 {
2380         struct efx_tc_flow_rule *rule = &efx->tc->dflt.pf;
2381         u32 ing_port, eg_port;
2382
2383         efx_mae_mport_uplink(efx, &ing_port);
2384         efx_mae_mport_wire(efx, &eg_port);
2385         return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2386 }
2387
2388 static int efx_tc_configure_default_rule_wire(struct efx_nic *efx)
2389 {
2390         struct efx_tc_flow_rule *rule = &efx->tc->dflt.wire;
2391         u32 ing_port, eg_port;
2392
2393         efx_mae_mport_wire(efx, &ing_port);
2394         efx_mae_mport_uplink(efx, &eg_port);
2395         return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2396 }
2397
2398 int efx_tc_configure_default_rule_rep(struct efx_rep *efv)
2399 {
2400         struct efx_tc_flow_rule *rule = &efv->dflt;
2401         struct efx_nic *efx = efv->parent;
2402         u32 ing_port, eg_port;
2403
2404         efx_mae_mport_mport(efx, efv->mport, &ing_port);
2405         efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
2406         return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2407 }
2408
2409 void efx_tc_deconfigure_default_rule(struct efx_nic *efx,
2410                                      struct efx_tc_flow_rule *rule)
2411 {
2412         if (rule->fw_id != MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL)
2413                 efx_tc_delete_rule(efx, rule);
2414         rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2415 }
2416
2417 static int efx_tc_configure_fallback_acts(struct efx_nic *efx, u32 eg_port,
2418                                           struct efx_tc_action_set_list *acts)
2419 {
2420         struct efx_tc_action_set *act;
2421         int rc;
2422
2423         act = kzalloc(sizeof(*act), GFP_KERNEL);
2424         if (!act)
2425                 return -ENOMEM;
2426         act->deliver = 1;
2427         act->dest_mport = eg_port;
2428         rc = efx_mae_alloc_action_set(efx, act);
2429         if (rc)
2430                 goto fail1;
2431         EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
2432         list_add_tail(&act->list, &acts->list);
2433         rc = efx_mae_alloc_action_set_list(efx, acts);
2434         if (rc)
2435                 goto fail2;
2436         return 0;
2437 fail2:
2438         list_del(&act->list);
2439         efx_mae_free_action_set(efx, act->fw_id);
2440 fail1:
2441         kfree(act);
2442         return rc;
2443 }
2444
2445 static int efx_tc_configure_fallback_acts_pf(struct efx_nic *efx)
2446 {
2447         struct efx_tc_action_set_list *acts = &efx->tc->facts.pf;
2448         u32 eg_port;
2449
2450         efx_mae_mport_uplink(efx, &eg_port);
2451         return efx_tc_configure_fallback_acts(efx, eg_port, acts);
2452 }
2453
2454 static int efx_tc_configure_fallback_acts_reps(struct efx_nic *efx)
2455 {
2456         struct efx_tc_action_set_list *acts = &efx->tc->facts.reps;
2457         u32 eg_port;
2458
2459         efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
2460         return efx_tc_configure_fallback_acts(efx, eg_port, acts);
2461 }
2462
2463 static void efx_tc_deconfigure_fallback_acts(struct efx_nic *efx,
2464                                              struct efx_tc_action_set_list *acts)
2465 {
2466         efx_tc_free_action_set_list(efx, acts, true);
2467 }
2468
2469 static int efx_tc_configure_rep_mport(struct efx_nic *efx)
2470 {
2471         u32 rep_mport_label;
2472         int rc;
2473
2474         rc = efx_mae_allocate_mport(efx, &efx->tc->reps_mport_id, &rep_mport_label);
2475         if (rc)
2476                 return rc;
2477         pci_dbg(efx->pci_dev, "created rep mport 0x%08x (0x%04x)\n",
2478                 efx->tc->reps_mport_id, rep_mport_label);
2479         /* Use mport *selector* as vport ID */
2480         efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
2481                             &efx->tc->reps_mport_vport_id);
2482         return 0;
2483 }
2484
2485 static void efx_tc_deconfigure_rep_mport(struct efx_nic *efx)
2486 {
2487         efx_mae_free_mport(efx, efx->tc->reps_mport_id);
2488         efx->tc->reps_mport_id = MAE_MPORT_SELECTOR_NULL;
2489 }
2490
2491 int efx_tc_insert_rep_filters(struct efx_nic *efx)
2492 {
2493         struct efx_filter_spec promisc, allmulti;
2494         int rc;
2495
2496         if (efx->type->is_vf)
2497                 return 0;
2498         if (!efx->tc)
2499                 return 0;
2500         efx_filter_init_rx(&promisc, EFX_FILTER_PRI_REQUIRED, 0, 0);
2501         efx_filter_set_uc_def(&promisc);
2502         efx_filter_set_vport_id(&promisc, efx->tc->reps_mport_vport_id);
2503         rc = efx_filter_insert_filter(efx, &promisc, false);
2504         if (rc < 0)
2505                 return rc;
2506         efx->tc->reps_filter_uc = rc;
2507         efx_filter_init_rx(&allmulti, EFX_FILTER_PRI_REQUIRED, 0, 0);
2508         efx_filter_set_mc_def(&allmulti);
2509         efx_filter_set_vport_id(&allmulti, efx->tc->reps_mport_vport_id);
2510         rc = efx_filter_insert_filter(efx, &allmulti, false);
2511         if (rc < 0)
2512                 return rc;
2513         efx->tc->reps_filter_mc = rc;
2514         return 0;
2515 }
2516
2517 void efx_tc_remove_rep_filters(struct efx_nic *efx)
2518 {
2519         if (efx->type->is_vf)
2520                 return;
2521         if (!efx->tc)
2522                 return;
2523         if (efx->tc->reps_filter_mc >= 0)
2524                 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_mc);
2525         efx->tc->reps_filter_mc = -1;
2526         if (efx->tc->reps_filter_uc >= 0)
2527                 efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_uc);
2528         efx->tc->reps_filter_uc = -1;
2529 }
2530
2531 int efx_init_tc(struct efx_nic *efx)
2532 {
2533         int rc;
2534
2535         rc = efx_mae_get_caps(efx, efx->tc->caps);
2536         if (rc)
2537                 return rc;
2538         if (efx->tc->caps->match_field_count > MAE_NUM_FIELDS)
2539                 /* Firmware supports some match fields the driver doesn't know
2540                  * about.  Not fatal, unless any of those fields are required
2541                  * (MAE_FIELD_SUPPORTED_MATCH_ALWAYS) but if so we don't know.
2542                  */
2543                 netif_warn(efx, probe, efx->net_dev,
2544                            "FW reports additional match fields %u\n",
2545                            efx->tc->caps->match_field_count);
2546         if (efx->tc->caps->action_prios < EFX_TC_PRIO__NUM) {
2547                 netif_err(efx, probe, efx->net_dev,
2548                           "Too few action prios supported (have %u, need %u)\n",
2549                           efx->tc->caps->action_prios, EFX_TC_PRIO__NUM);
2550                 return -EIO;
2551         }
2552         rc = efx_tc_configure_default_rule_pf(efx);
2553         if (rc)
2554                 return rc;
2555         rc = efx_tc_configure_default_rule_wire(efx);
2556         if (rc)
2557                 return rc;
2558         rc = efx_tc_configure_rep_mport(efx);
2559         if (rc)
2560                 return rc;
2561         rc = efx_tc_configure_fallback_acts_pf(efx);
2562         if (rc)
2563                 return rc;
2564         rc = efx_tc_configure_fallback_acts_reps(efx);
2565         if (rc)
2566                 return rc;
2567         rc = efx_mae_get_tables(efx);
2568         if (rc)
2569                 return rc;
2570         rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx);
2571         if (rc)
2572                 goto out_free;
2573         efx->tc->up = true;
2574         return 0;
2575 out_free:
2576         efx_mae_free_tables(efx);
2577         return rc;
2578 }
2579
2580 void efx_fini_tc(struct efx_nic *efx)
2581 {
2582         /* We can get called even if efx_init_struct_tc() failed */
2583         if (!efx->tc)
2584                 return;
2585         if (efx->tc->up)
2586                 flow_indr_dev_unregister(efx_tc_indr_setup_cb, efx, efx_tc_block_unbind);
2587         efx_tc_deconfigure_rep_mport(efx);
2588         efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf);
2589         efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire);
2590         efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.pf);
2591         efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.reps);
2592         efx->tc->up = false;
2593         efx_mae_free_tables(efx);
2594 }
2595
2596 /* At teardown time, all TC filter rules (and thus all resources they created)
2597  * should already have been removed.  If we find any in our hashtables, make a
2598  * cursory attempt to clean up the software side.
2599  */
2600 static void efx_tc_encap_match_free(void *ptr, void *__unused)
2601 {
2602         struct efx_tc_encap_match *encap = ptr;
2603
2604         WARN_ON(refcount_read(&encap->ref));
2605         kfree(encap);
2606 }
2607
2608 static void efx_tc_recirc_free(void *ptr, void *arg)
2609 {
2610         struct efx_tc_recirc_id *rid = ptr;
2611         struct efx_nic *efx = arg;
2612
2613         WARN_ON(refcount_read(&rid->ref));
2614         ida_free(&efx->tc->recirc_ida, rid->fw_id);
2615         kfree(rid);
2616 }
2617
2618 static void efx_tc_lhs_free(void *ptr, void *arg)
2619 {
2620         struct efx_tc_lhs_rule *rule = ptr;
2621         struct efx_nic *efx = arg;
2622
2623         netif_err(efx, drv, efx->net_dev,
2624                   "tc lhs_rule %lx still present at teardown, removing\n",
2625                   rule->cookie);
2626
2627         if (rule->lhs_act.zone)
2628                 efx_tc_ct_unregister_zone(efx, rule->lhs_act.zone);
2629         if (rule->lhs_act.count)
2630                 efx_tc_flower_put_counter_index(efx, rule->lhs_act.count);
2631         efx_mae_remove_lhs_rule(efx, rule);
2632
2633         kfree(rule);
2634 }
2635
2636 static void efx_tc_mac_free(void *ptr, void *__unused)
2637 {
2638         struct efx_tc_mac_pedit_action *ped = ptr;
2639
2640         WARN_ON(refcount_read(&ped->ref));
2641         kfree(ped);
2642 }
2643
2644 static void efx_tc_flow_free(void *ptr, void *arg)
2645 {
2646         struct efx_tc_flow_rule *rule = ptr;
2647         struct efx_nic *efx = arg;
2648
2649         netif_err(efx, drv, efx->net_dev,
2650                   "tc rule %lx still present at teardown, removing\n",
2651                   rule->cookie);
2652
2653         /* Also releases entries in subsidiary tables */
2654         efx_tc_delete_rule(efx, rule);
2655
2656         kfree(rule);
2657 }
2658
2659 int efx_init_struct_tc(struct efx_nic *efx)
2660 {
2661         int rc;
2662
2663         if (efx->type->is_vf)
2664                 return 0;
2665
2666         efx->tc = kzalloc(sizeof(*efx->tc), GFP_KERNEL);
2667         if (!efx->tc)
2668                 return -ENOMEM;
2669         efx->tc->caps = kzalloc(sizeof(struct mae_caps), GFP_KERNEL);
2670         if (!efx->tc->caps) {
2671                 rc = -ENOMEM;
2672                 goto fail_alloc_caps;
2673         }
2674         INIT_LIST_HEAD(&efx->tc->block_list);
2675
2676         mutex_init(&efx->tc->mutex);
2677         init_waitqueue_head(&efx->tc->flush_wq);
2678         rc = efx_tc_init_encap_actions(efx);
2679         if (rc < 0)
2680                 goto fail_encap_actions;
2681         rc = efx_tc_init_counters(efx);
2682         if (rc < 0)
2683                 goto fail_counters;
2684         rc = rhashtable_init(&efx->tc->mac_ht, &efx_tc_mac_ht_params);
2685         if (rc < 0)
2686                 goto fail_mac_ht;
2687         rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params);
2688         if (rc < 0)
2689                 goto fail_encap_match_ht;
2690         rc = rhashtable_init(&efx->tc->match_action_ht, &efx_tc_match_action_ht_params);
2691         if (rc < 0)
2692                 goto fail_match_action_ht;
2693         rc = rhashtable_init(&efx->tc->lhs_rule_ht, &efx_tc_lhs_rule_ht_params);
2694         if (rc < 0)
2695                 goto fail_lhs_rule_ht;
2696         rc = efx_tc_init_conntrack(efx);
2697         if (rc < 0)
2698                 goto fail_conntrack;
2699         rc = rhashtable_init(&efx->tc->recirc_ht, &efx_tc_recirc_ht_params);
2700         if (rc < 0)
2701                 goto fail_recirc_ht;
2702         ida_init(&efx->tc->recirc_ida);
2703         efx->tc->reps_filter_uc = -1;
2704         efx->tc->reps_filter_mc = -1;
2705         INIT_LIST_HEAD(&efx->tc->dflt.pf.acts.list);
2706         efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2707         INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list);
2708         efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2709         INIT_LIST_HEAD(&efx->tc->facts.pf.list);
2710         efx->tc->facts.pf.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
2711         INIT_LIST_HEAD(&efx->tc->facts.reps.list);
2712         efx->tc->facts.reps.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
2713         efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type;
2714         return 0;
2715 fail_recirc_ht:
2716         efx_tc_destroy_conntrack(efx);
2717 fail_conntrack:
2718         rhashtable_destroy(&efx->tc->lhs_rule_ht);
2719 fail_lhs_rule_ht:
2720         rhashtable_destroy(&efx->tc->match_action_ht);
2721 fail_match_action_ht:
2722         rhashtable_destroy(&efx->tc->encap_match_ht);
2723 fail_encap_match_ht:
2724         rhashtable_destroy(&efx->tc->mac_ht);
2725 fail_mac_ht:
2726         efx_tc_destroy_counters(efx);
2727 fail_counters:
2728         efx_tc_destroy_encap_actions(efx);
2729 fail_encap_actions:
2730         mutex_destroy(&efx->tc->mutex);
2731         kfree(efx->tc->caps);
2732 fail_alloc_caps:
2733         kfree(efx->tc);
2734         efx->tc = NULL;
2735         return rc;
2736 }
2737
2738 void efx_fini_struct_tc(struct efx_nic *efx)
2739 {
2740         if (!efx->tc)
2741                 return;
2742
2743         mutex_lock(&efx->tc->mutex);
2744         EFX_WARN_ON_PARANOID(efx->tc->dflt.pf.fw_id !=
2745                              MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
2746         EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id !=
2747                              MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
2748         EFX_WARN_ON_PARANOID(efx->tc->facts.pf.fw_id !=
2749                              MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
2750         EFX_WARN_ON_PARANOID(efx->tc->facts.reps.fw_id !=
2751                              MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
2752         rhashtable_free_and_destroy(&efx->tc->lhs_rule_ht, efx_tc_lhs_free, efx);
2753         rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free,
2754                                     efx);
2755         rhashtable_free_and_destroy(&efx->tc->encap_match_ht,
2756                                     efx_tc_encap_match_free, NULL);
2757         efx_tc_fini_conntrack(efx);
2758         rhashtable_free_and_destroy(&efx->tc->recirc_ht, efx_tc_recirc_free, efx);
2759         WARN_ON(!ida_is_empty(&efx->tc->recirc_ida));
2760         ida_destroy(&efx->tc->recirc_ida);
2761         rhashtable_free_and_destroy(&efx->tc->mac_ht, efx_tc_mac_free, NULL);
2762         efx_tc_fini_counters(efx);
2763         efx_tc_fini_encap_actions(efx);
2764         mutex_unlock(&efx->tc->mutex);
2765         mutex_destroy(&efx->tc->mutex);
2766         kfree(efx->tc->caps);
2767         kfree(efx->tc);
2768         efx->tc = NULL;
2769 }