cls_flower: Support filtering on multiple MPLS Label Stack Entries
authorGuillaume Nault <gnault@redhat.com>
Tue, 26 May 2020 12:29:04 +0000 (14:29 +0200)
committerDavid S. Miller <davem@davemloft.net>
Tue, 26 May 2020 22:22:58 +0000 (15:22 -0700)
With struct flow_dissector_key_mpls now recording the first
FLOW_DIS_MPLS_MAX labels, we can extend Flower to filter on any of
these LSEs independently.

In order to avoid creating new netlink attributes for every possible
depth, let's define a new TCA_FLOWER_KEY_MPLS_OPTS nested attribute
that contains the list of LSEs to match. Each LSE is represented by
another attribute, TCA_FLOWER_KEY_MPLS_OPTS_LSE, which then contains
the attributes representing the depth and the MPLS fields to match at
this depth (label, TTL, etc.).

For each MPLS field, the mask is always set to all-ones, as this is
what the original API did. We could allow user configurable masks in
the future if there is demand for more flexibility.

The new API also allows to only specify an LSE depth. In that case,
Flower only verifies that the MPLS label stack depth is greater or
equal to the provided depth (that is, an LSE exists at this depth).

Filters that only match on one (or more) fields of the first LSE are
dumped using the old netlink attributes, to avoid confusing user space
programs that don't understand the new API.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/uapi/linux/pkt_cls.h
net/sched/cls_flower.c

index fc672b232437dcc4578aa90be14ae51caa69e099..7576209d96f9f6a8e8977d69a6a0a72a4040b58b 100644 (file)
@@ -576,6 +576,8 @@ enum {
        TCA_FLOWER_KEY_CT_LABELS,       /* u128 */
        TCA_FLOWER_KEY_CT_LABELS_MASK,  /* u128 */
 
+       TCA_FLOWER_KEY_MPLS_OPTS,
+
        __TCA_FLOWER_MAX,
 };
 
@@ -640,6 +642,27 @@ enum {
 #define TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX \
                (__TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX - 1)
 
+enum {
+       TCA_FLOWER_KEY_MPLS_OPTS_UNSPEC,
+       TCA_FLOWER_KEY_MPLS_OPTS_LSE,
+       __TCA_FLOWER_KEY_MPLS_OPTS_MAX,
+};
+
+#define TCA_FLOWER_KEY_MPLS_OPTS_MAX (__TCA_FLOWER_KEY_MPLS_OPTS_MAX - 1)
+
+enum {
+       TCA_FLOWER_KEY_MPLS_OPT_LSE_UNSPEC,
+       TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH,
+       TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL,
+       TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS,
+       TCA_FLOWER_KEY_MPLS_OPT_LSE_TC,
+       TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL,
+       __TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX,
+};
+
+#define TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX \
+               (__TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX - 1)
+
 enum {
        TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
        TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
index f524afe0b7f52c1c59933f505b780d645baacc5f..96f5999281e0ea01fec44fe0057cec183f2f5835 100644 (file)
@@ -668,6 +668,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
        [TCA_FLOWER_KEY_MPLS_BOS]       = { .type = NLA_U8 },
        [TCA_FLOWER_KEY_MPLS_TC]        = { .type = NLA_U8 },
        [TCA_FLOWER_KEY_MPLS_LABEL]     = { .type = NLA_U32 },
+       [TCA_FLOWER_KEY_MPLS_OPTS]      = { .type = NLA_NESTED },
        [TCA_FLOWER_KEY_TCP_FLAGS]      = { .type = NLA_U16 },
        [TCA_FLOWER_KEY_TCP_FLAGS_MASK] = { .type = NLA_U16 },
        [TCA_FLOWER_KEY_IP_TOS]         = { .type = NLA_U8 },
@@ -726,6 +727,20 @@ erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = {
        [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]       = { .type = NLA_U8 },
 };
 
+static const struct nla_policy
+mpls_opts_policy[TCA_FLOWER_KEY_MPLS_OPTS_MAX + 1] = {
+       [TCA_FLOWER_KEY_MPLS_OPTS_LSE]    = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
+       [TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]    = { .type = NLA_U8 },
+       [TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]      = { .type = NLA_U8 },
+       [TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]      = { .type = NLA_U8 },
+       [TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]       = { .type = NLA_U8 },
+       [TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]    = { .type = NLA_U32 },
+};
+
 static void fl_set_key_val(struct nlattr **tb,
                           void *val, int val_type,
                           void *mask, int mask_type, int len)
@@ -776,6 +791,126 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
        return 0;
 }
 
+static int fl_set_key_mpls_lse(const struct nlattr *nla_lse,
+                              struct flow_dissector_key_mpls *key_val,
+                              struct flow_dissector_key_mpls *key_mask,
+                              struct netlink_ext_ack *extack)
+{
+       struct nlattr *tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1];
+       struct flow_dissector_mpls_lse *lse_mask;
+       struct flow_dissector_mpls_lse *lse_val;
+       u8 lse_index;
+       u8 depth;
+       int err;
+
+       err = nla_parse_nested(tb, TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX, nla_lse,
+                              mpls_stack_entry_policy, extack);
+       if (err < 0)
+               return err;
+
+       if (!tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]) {
+               NL_SET_ERR_MSG(extack, "Missing MPLS option \"depth\"");
+               return -EINVAL;
+       }
+
+       depth = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]);
+
+       /* LSE depth starts at 1, for consistency with terminology used by
+        * RFC 3031 (section 3.9), where depth 0 refers to unlabeled packets.
+        */
+       if (depth < 1 || depth > FLOW_DIS_MPLS_MAX) {
+               NL_SET_ERR_MSG_ATTR(extack,
+                                   tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH],
+                                   "Invalid MPLS depth");
+               return -EINVAL;
+       }
+       lse_index = depth - 1;
+
+       dissector_set_mpls_lse(key_val, lse_index);
+       dissector_set_mpls_lse(key_mask, lse_index);
+
+       lse_val = &key_val->ls[lse_index];
+       lse_mask = &key_mask->ls[lse_index];
+
+       if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]) {
+               lse_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]);
+               lse_mask->mpls_ttl = MPLS_TTL_MASK;
+       }
+       if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]) {
+               u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]);
+
+               if (bos & ~MPLS_BOS_MASK) {
+                       NL_SET_ERR_MSG_ATTR(extack,
+                                           tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS],
+                                           "Bottom Of Stack (BOS) must be 0 or 1");
+                       return -EINVAL;
+               }
+               lse_val->mpls_bos = bos;
+               lse_mask->mpls_bos = MPLS_BOS_MASK;
+       }
+       if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]) {
+               u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]);
+
+               if (tc & ~MPLS_TC_MASK) {
+                       NL_SET_ERR_MSG_ATTR(extack,
+                                           tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC],
+                                           "Traffic Class (TC) must be between 0 and 7");
+                       return -EINVAL;
+               }
+               lse_val->mpls_tc = tc;
+               lse_mask->mpls_tc = MPLS_TC_MASK;
+       }
+       if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]) {
+               u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]);
+
+               if (label & ~MPLS_LABEL_MASK) {
+                       NL_SET_ERR_MSG_ATTR(extack,
+                                           tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL],
+                                           "Label must be between 0 and 1048575");
+                       return -EINVAL;
+               }
+               lse_val->mpls_label = label;
+               lse_mask->mpls_label = MPLS_LABEL_MASK;
+       }
+
+       return 0;
+}
+
+static int fl_set_key_mpls_opts(const struct nlattr *nla_mpls_opts,
+                               struct flow_dissector_key_mpls *key_val,
+                               struct flow_dissector_key_mpls *key_mask,
+                               struct netlink_ext_ack *extack)
+{
+       struct nlattr *nla_lse;
+       int rem;
+       int err;
+
+       if (!(nla_mpls_opts->nla_type & NLA_F_NESTED)) {
+               NL_SET_ERR_MSG_ATTR(extack, nla_mpls_opts,
+                                   "NLA_F_NESTED is missing");
+               return -EINVAL;
+       }
+
+       nla_for_each_nested(nla_lse, nla_mpls_opts, rem) {
+               if (nla_type(nla_lse) != TCA_FLOWER_KEY_MPLS_OPTS_LSE) {
+                       NL_SET_ERR_MSG_ATTR(extack, nla_lse,
+                                           "Invalid MPLS option type");
+                       return -EINVAL;
+               }
+
+               err = fl_set_key_mpls_lse(nla_lse, key_val, key_mask, extack);
+               if (err < 0)
+                       return err;
+       }
+       if (rem) {
+               NL_SET_ERR_MSG(extack,
+                              "Bytes leftover after parsing MPLS options");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int fl_set_key_mpls(struct nlattr **tb,
                           struct flow_dissector_key_mpls *key_val,
                           struct flow_dissector_key_mpls *key_mask,
@@ -784,6 +919,21 @@ static int fl_set_key_mpls(struct nlattr **tb,
        struct flow_dissector_mpls_lse *lse_mask;
        struct flow_dissector_mpls_lse *lse_val;
 
+       if (tb[TCA_FLOWER_KEY_MPLS_OPTS]) {
+               if (tb[TCA_FLOWER_KEY_MPLS_TTL] ||
+                   tb[TCA_FLOWER_KEY_MPLS_BOS] ||
+                   tb[TCA_FLOWER_KEY_MPLS_TC] ||
+                   tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
+                       NL_SET_ERR_MSG_ATTR(extack,
+                                           tb[TCA_FLOWER_KEY_MPLS_OPTS],
+                                           "MPLS label, Traffic Class, Bottom Of Stack and Time To Live must be encapsulated in the MPLS options attribute");
+                       return -EBADMSG;
+               }
+
+               return fl_set_key_mpls_opts(tb[TCA_FLOWER_KEY_MPLS_OPTS],
+                                           key_val, key_mask, extack);
+       }
+
        lse_val = &key_val->ls[0];
        lse_mask = &key_mask->ls[0];
 
@@ -2232,6 +2382,89 @@ static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key,
        return 0;
 }
 
+static int fl_dump_key_mpls_opt_lse(struct sk_buff *skb,
+                                   struct flow_dissector_key_mpls *mpls_key,
+                                   struct flow_dissector_key_mpls *mpls_mask,
+                                   u8 lse_index)
+{
+       struct flow_dissector_mpls_lse *lse_mask = &mpls_mask->ls[lse_index];
+       struct flow_dissector_mpls_lse *lse_key = &mpls_key->ls[lse_index];
+       int err;
+
+       err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH,
+                        lse_index + 1);
+       if (err)
+               return err;
+
+       if (lse_mask->mpls_ttl) {
+               err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL,
+                                lse_key->mpls_ttl);
+               if (err)
+                       return err;
+       }
+       if (lse_mask->mpls_bos) {
+               err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS,
+                                lse_key->mpls_bos);
+               if (err)
+                       return err;
+       }
+       if (lse_mask->mpls_tc) {
+               err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_TC,
+                                lse_key->mpls_tc);
+               if (err)
+                       return err;
+       }
+       if (lse_mask->mpls_label) {
+               err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL,
+                                lse_key->mpls_label);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static int fl_dump_key_mpls_opts(struct sk_buff *skb,
+                                struct flow_dissector_key_mpls *mpls_key,
+                                struct flow_dissector_key_mpls *mpls_mask)
+{
+       struct nlattr *opts;
+       struct nlattr *lse;
+       u8 lse_index;
+       int err;
+
+       opts = nla_nest_start(skb, TCA_FLOWER_KEY_MPLS_OPTS);
+       if (!opts)
+               return -EMSGSIZE;
+
+       for (lse_index = 0; lse_index < FLOW_DIS_MPLS_MAX; lse_index++) {
+               if (!(mpls_mask->used_lses & 1 << lse_index))
+                       continue;
+
+               lse = nla_nest_start(skb, TCA_FLOWER_KEY_MPLS_OPTS_LSE);
+               if (!lse) {
+                       err = -EMSGSIZE;
+                       goto err_opts;
+               }
+
+               err = fl_dump_key_mpls_opt_lse(skb, mpls_key, mpls_mask,
+                                              lse_index);
+               if (err)
+                       goto err_opts_lse;
+               nla_nest_end(skb, lse);
+       }
+       nla_nest_end(skb, opts);
+
+       return 0;
+
+err_opts_lse:
+       nla_nest_cancel(skb, lse);
+err_opts:
+       nla_nest_cancel(skb, opts);
+
+       return err;
+}
+
 static int fl_dump_key_mpls(struct sk_buff *skb,
                            struct flow_dissector_key_mpls *mpls_key,
                            struct flow_dissector_key_mpls *mpls_mask)
@@ -2240,12 +2473,20 @@ static int fl_dump_key_mpls(struct sk_buff *skb,
        struct flow_dissector_mpls_lse *lse_key;
        int err;
 
-       if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask)))
+       if (!mpls_mask->used_lses)
                return 0;
 
        lse_mask = &mpls_mask->ls[0];
        lse_key = &mpls_key->ls[0];
 
+       /* For backward compatibility, don't use the MPLS nested attributes if
+        * the rule can be expressed using the old attributes.
+        */
+       if (mpls_mask->used_lses & ~1 ||
+           (!lse_mask->mpls_ttl && !lse_mask->mpls_bos &&
+            !lse_mask->mpls_tc && !lse_mask->mpls_label))
+               return fl_dump_key_mpls_opts(skb, mpls_key, mpls_mask);
+
        if (lse_mask->mpls_ttl) {
                err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL,
                                 lse_key->mpls_ttl);