bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()
authorZiyang Xuan <william.xuanziyang@huawei.com>
Fri, 13 Jan 2023 09:24:51 +0000 (17:24 +0800)
committerMartin KaFai Lau <martin.lau@kernel.org>
Sun, 15 Jan 2023 20:56:17 +0000 (12:56 -0800)
Add ipip6 and ip6ip decap support for bpf_skb_adjust_room().
Main use case is for using cls_bpf on ingress hook to decapsulate
IPv4 over IPv6 and IPv6 over IPv4 tunnel packets.

Add two new flags BPF_F_ADJ_ROOM_DECAP_L3_IPV{4,6} to indicate the
new IP header version after decapsulating the outer IP header.

Suggested-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/b268ec7f0ff9431f4f43b1b40ab856ebb28cb4e1.1673574419.git.william.xuanziyang@huawei.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
include/uapi/linux/bpf.h
net/core/filter.c
tools/include/uapi/linux/bpf.h

index bc1a3d2..adae5b1 100644 (file)
@@ -2647,6 +2647,11 @@ union bpf_attr {
  *               Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
  *               L2 type as Ethernet.
  *
+ *             * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
+ *               **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
+ *               Indicate the new IP header version after decapsulating the outer
+ *               IP header. Used when the inner and outer IP versions are different.
+ *
  *             A call to this helper is susceptible to change the underlying
  *             packet buffer. Therefore, at load time, all checks on pointers
  *             previously done by the verifier are invalidated and must be
@@ -5807,6 +5812,8 @@ enum {
        BPF_F_ADJ_ROOM_ENCAP_L4_UDP     = (1ULL << 4),
        BPF_F_ADJ_ROOM_NO_CSUM_RESET    = (1ULL << 5),
        BPF_F_ADJ_ROOM_ENCAP_L2_ETH     = (1ULL << 6),
+       BPF_F_ADJ_ROOM_DECAP_L3_IPV4    = (1ULL << 7),
+       BPF_F_ADJ_ROOM_DECAP_L3_IPV6    = (1ULL << 8),
 };
 
 enum {
index d9befa6..b4547a2 100644 (file)
@@ -3381,13 +3381,17 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
 #define BPF_F_ADJ_ROOM_ENCAP_L3_MASK   (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
                                         BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
 
+#define BPF_F_ADJ_ROOM_DECAP_L3_MASK   (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
+                                        BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
+
 #define BPF_F_ADJ_ROOM_MASK            (BPF_F_ADJ_ROOM_FIXED_GSO | \
                                         BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
                                         BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
                                         BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
                                         BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
                                         BPF_F_ADJ_ROOM_ENCAP_L2( \
-                                         BPF_ADJ_ROOM_ENCAP_L2_MASK))
+                                         BPF_ADJ_ROOM_ENCAP_L2_MASK) | \
+                                        BPF_F_ADJ_ROOM_DECAP_L3_MASK)
 
 static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
                            u64 flags)
@@ -3501,6 +3505,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
        int ret;
 
        if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
+                              BPF_F_ADJ_ROOM_DECAP_L3_MASK |
                               BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
                return -EINVAL;
 
@@ -3519,6 +3524,14 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
        if (unlikely(ret < 0))
                return ret;
 
+       /* Match skb->protocol to new outer l3 protocol */
+       if (skb->protocol == htons(ETH_P_IP) &&
+           flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
+               skb->protocol = htons(ETH_P_IPV6);
+       else if (skb->protocol == htons(ETH_P_IPV6) &&
+                flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
+               skb->protocol = htons(ETH_P_IP);
+
        if (skb_is_gso(skb)) {
                struct skb_shared_info *shinfo = skb_shinfo(skb);
 
@@ -3608,6 +3621,22 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
                return -ENOTSUPP;
        }
 
+       if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
+               if (!shrink)
+                       return -EINVAL;
+
+               switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
+               case BPF_F_ADJ_ROOM_DECAP_L3_IPV4:
+                       len_min = sizeof(struct iphdr);
+                       break;
+               case BPF_F_ADJ_ROOM_DECAP_L3_IPV6:
+                       len_min = sizeof(struct ipv6hdr);
+                       break;
+               default:
+                       return -EINVAL;
+               }
+       }
+
        len_cur = skb->len - skb_network_offset(skb);
        if ((shrink && (len_diff_abs >= len_cur ||
                        len_cur - len_diff_abs < len_min)) ||
index bc1a3d2..142b81b 100644 (file)
@@ -2647,6 +2647,11 @@ union bpf_attr {
  *               Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
  *               L2 type as Ethernet.
  *
+ *             * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
+ *               **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
+ *               Indicate the new IP header version after decapsulating the outer
+ *               IP header. Used when the inner and outer IP versions are different.
+ *
  *             A call to this helper is susceptible to change the underlying
  *             packet buffer. Therefore, at load time, all checks on pointers
  *             previously done by the verifier are invalidated and must be
@@ -5807,6 +5812,8 @@ enum {
        BPF_F_ADJ_ROOM_ENCAP_L4_UDP     = (1ULL << 4),
        BPF_F_ADJ_ROOM_NO_CSUM_RESET    = (1ULL << 5),
        BPF_F_ADJ_ROOM_ENCAP_L2_ETH     = (1ULL << 6),
+       BPF_F_ADJ_ROOM_DECAP_L3_IPV4    = (1ULL << 7),
+       BPF_F_ADJ_ROOM_DECAP_L3_IPV6    = (1ULL << 8),
 };
 
 enum {