sfc: conntrack state matches in TC rules
authorEdward Cree <ecree.xilinx@gmail.com>
Mon, 7 Aug 2023 13:48:10 +0000 (14:48 +0100)
committerDavid S. Miller <davem@davemloft.net>
Wed, 9 Aug 2023 10:14:38 +0000 (11:14 +0100)
Parse ct_state trk/est, mark and zone out of flower keys, and plumb
 them through to the hardware, performing some minor translations.
Nothing can actually hit them yet as we're not offloading any DO_CT
 actions.

Reviewed-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/sfc/mae.c
drivers/net/ethernet/sfc/mcdi.h
drivers/net/ethernet/sfc/tc.c
drivers/net/ethernet/sfc/tc.h

index 8ebf71a..1fa0958 100644 (file)
@@ -695,8 +695,13 @@ int efx_mae_match_check_caps(struct efx_nic *efx,
            CHECK(L4_SPORT, l4_sport) ||
            CHECK(L4_DPORT, l4_dport) ||
            CHECK(TCP_FLAGS, tcp_flags) ||
+           CHECK_BIT(TCP_SYN_FIN_RST, tcp_syn_fin_rst) ||
            CHECK_BIT(IS_IP_FRAG, ip_frag) ||
            CHECK_BIT(IP_FIRST_FRAG, ip_firstfrag) ||
+           CHECK_BIT(DO_CT, ct_state_trk) ||
+           CHECK_BIT(CT_HIT, ct_state_est) ||
+           CHECK(CT_MARK, ct_mark) ||
+           CHECK(CT_DOMAIN, ct_zone) ||
            CHECK(RECIRC_ID, recirc_id))
                return rc;
        /* Matches on outer fields are done in a separate hardware table,
@@ -1672,20 +1677,40 @@ static int efx_mae_populate_match_criteria(MCDI_DECLARE_STRUCT_PTR(match_crit),
        }
        MCDI_STRUCT_SET_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_INGRESS_MPORT_SELECTOR_MASK,
                              match->mask.ingress_port);
-       EFX_POPULATE_DWORD_2(*_MCDI_STRUCT_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_FLAGS),
+       EFX_POPULATE_DWORD_5(*_MCDI_STRUCT_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_FLAGS),
+                            MAE_FIELD_MASK_VALUE_PAIRS_V2_DO_CT,
+                            match->value.ct_state_trk,
+                            MAE_FIELD_MASK_VALUE_PAIRS_V2_CT_HIT,
+                            match->value.ct_state_est,
                             MAE_FIELD_MASK_VALUE_PAIRS_V2_IS_IP_FRAG,
                             match->value.ip_frag,
                             MAE_FIELD_MASK_VALUE_PAIRS_V2_IP_FIRST_FRAG,
-                            match->value.ip_firstfrag);
-       EFX_POPULATE_DWORD_2(*_MCDI_STRUCT_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_FLAGS_MASK),
+                            match->value.ip_firstfrag,
+                            MAE_FIELD_MASK_VALUE_PAIRS_V2_TCP_SYN_FIN_RST,
+                            match->value.tcp_syn_fin_rst);
+       EFX_POPULATE_DWORD_5(*_MCDI_STRUCT_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_FLAGS_MASK),
+                            MAE_FIELD_MASK_VALUE_PAIRS_V2_DO_CT,
+                            match->mask.ct_state_trk,
+                            MAE_FIELD_MASK_VALUE_PAIRS_V2_CT_HIT,
+                            match->mask.ct_state_est,
                             MAE_FIELD_MASK_VALUE_PAIRS_V2_IS_IP_FRAG,
                             match->mask.ip_frag,
                             MAE_FIELD_MASK_VALUE_PAIRS_V2_IP_FIRST_FRAG,
-                            match->mask.ip_firstfrag);
+                            match->mask.ip_firstfrag,
+                            MAE_FIELD_MASK_VALUE_PAIRS_V2_TCP_SYN_FIN_RST,
+                            match->mask.tcp_syn_fin_rst);
        MCDI_STRUCT_SET_BYTE(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_RECIRC_ID,
                             match->value.recirc_id);
        MCDI_STRUCT_SET_BYTE(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_RECIRC_ID_MASK,
                             match->mask.recirc_id);
+       MCDI_STRUCT_SET_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_CT_MARK,
+                             match->value.ct_mark);
+       MCDI_STRUCT_SET_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_CT_MARK_MASK,
+                             match->mask.ct_mark);
+       MCDI_STRUCT_SET_WORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_CT_DOMAIN,
+                            match->value.ct_zone);
+       MCDI_STRUCT_SET_WORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_CT_DOMAIN_MASK,
+                            match->mask.ct_zone);
        MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_ETHER_TYPE_BE,
                                match->value.eth_proto);
        MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_ETHER_TYPE_BE_MASK,
index 995a266..700d025 100644 (file)
@@ -229,6 +229,11 @@ void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev);
        BUILD_BUG_ON(MC_CMD_ ## _field ## _OFST & 1);                   \
        *(__force __le16 *)MCDI_PTR(_buf, _field) = cpu_to_le16(_value);\
        } while (0)
+#define MCDI_STRUCT_SET_WORD(_buf, _field, _value) do {                        \
+       BUILD_BUG_ON(_field ## _LEN != 2);                              \
+       BUILD_BUG_ON(_field ## _OFST & 1);                              \
+       *(__force __le16 *)MCDI_STRUCT_PTR(_buf, _field) = cpu_to_le16(_value);\
+       } while (0)
 #define MCDI_WORD(_buf, _field)                                                \
        ((u16)BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 2) +       \
         le16_to_cpu(*(__force const __le16 *)MCDI_PTR(_buf, _field)))
index 181636d..a9f4bfa 100644 (file)
@@ -222,6 +222,7 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx,
              BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
              BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
              BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
+             BIT_ULL(FLOW_DISSECTOR_KEY_CT) |
              BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
              BIT_ULL(FLOW_DISSECTOR_KEY_IP))) {
                NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported flower keys %#llx",
@@ -363,6 +364,31 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx,
                                       dissector->used_keys);
                return -EOPNOTSUPP;
        }
+       if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) {
+               struct flow_match_ct fm;
+
+               flow_rule_match_ct(rule, &fm);
+               match->value.ct_state_trk = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
+               match->mask.ct_state_trk = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
+               match->value.ct_state_est = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
+               match->mask.ct_state_est = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
+               if (fm.mask->ct_state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+                                         TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)) {
+                       NL_SET_ERR_MSG_FMT_MOD(extack,
+                                              "Unsupported ct_state match %#x",
+                                              fm.mask->ct_state);
+                       return -EOPNOTSUPP;
+               }
+               match->value.ct_mark = fm.key->ct_mark;
+               match->mask.ct_mark = fm.mask->ct_mark;
+               match->value.ct_zone = fm.key->ct_zone;
+               match->mask.ct_zone = fm.mask->ct_zone;
+
+               if (memchr_inv(fm.mask->ct_labels, 0, sizeof(fm.mask->ct_labels))) {
+                       NL_SET_ERR_MSG_MOD(extack, "Matching on ct_label not supported");
+                       return -EOPNOTSUPP;
+               }
+       }
 
        return 0;
 }
@@ -758,6 +784,26 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
        }
        match.mask.recirc_id = 0xff;
 
+       /* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
+        * +trk+est, which is strictly implied by +est, so rewrite it to that.
+        */
+       if (match.mask.ct_state_trk && match.value.ct_state_trk &&
+           match.mask.ct_state_est && match.value.ct_state_est)
+               match.mask.ct_state_trk = 0;
+       /* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
+        * match +trk-est (CT_HIT=0) despite being on an established connection.
+        * So make -est imply -tcp_syn_fin_rst match to ensure these packets
+        * still hit the software path.
+        */
+       if (match.mask.ct_state_est && !match.value.ct_state_est) {
+               if (match.value.tcp_syn_fin_rst) {
+                       /* Can't offload this combination */
+                       rc = -EOPNOTSUPP;
+                       goto release;
+               }
+               match.mask.tcp_syn_fin_rst = true;
+       }
+
        flow_action_for_each(i, fa, &fr->action) {
                switch (fa->id) {
                case FLOW_ACTION_REDIRECT:
@@ -1089,6 +1135,26 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
        }
        match.mask.recirc_id = 0xff;
 
+       /* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
+        * +trk+est, which is strictly implied by +est, so rewrite it to that.
+        */
+       if (match.mask.ct_state_trk && match.value.ct_state_trk &&
+           match.mask.ct_state_est && match.value.ct_state_est)
+               match.mask.ct_state_trk = 0;
+       /* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
+        * match +trk-est (CT_HIT=0) despite being on an established connection.
+        * So make -est imply -tcp_syn_fin_rst match to ensure these packets
+        * still hit the software path.
+        */
+       if (match.mask.ct_state_est && !match.value.ct_state_est) {
+               if (match.value.tcp_syn_fin_rst) {
+                       /* Can't offload this combination */
+                       rc = -EOPNOTSUPP;
+                       goto release;
+               }
+               match.mask.tcp_syn_fin_rst = true;
+       }
+
        rc = efx_mae_match_check_caps(efx, &match.mask, extack);
        if (rc)
                goto release;
index af15020..ce8e307 100644 (file)
@@ -60,6 +60,7 @@ struct efx_tc_match_fields {
        /* L4 */
        __be16 l4_sport, l4_dport; /* Ports (UDP, TCP) */
        __be16 tcp_flags;
+       bool tcp_syn_fin_rst; /* true if ANY of SYN/FIN/RST are set */
        /* Encap.  The following are *outer* fields.  Note that there are no
         * outer eth (L2) fields; this is because TC doesn't have them.
         */
@@ -68,6 +69,10 @@ struct efx_tc_match_fields {
        u8 enc_ip_tos, enc_ip_ttl;
        __be16 enc_sport, enc_dport;
        __be32 enc_keyid; /* e.g. VNI, VSID */
+       /* Conntrack. */
+       u16 ct_state_trk:1, ct_state_est:1;
+       u32 ct_mark;
+       u16 ct_zone;
 };
 
 static inline bool efx_tc_match_is_encap(const struct efx_tc_match_fields *mask)