net: add support for ipv4 big tcp

author Xin Long <lucien.xin@gmail.com>

Sat, 28 Jan 2023 15:58:39 +0000 (10:58 -0500)

committer Jakub Kicinski <kuba@kernel.org>

Thu, 2 Feb 2023 04:54:27 +0000 (20:54 -0800)
author Xin Long <lucien.xin@gmail.com>
Sat, 28 Jan 2023 15:58:39 +0000 (10:58 -0500)
committer Jakub Kicinski <kuba@kernel.org>
Thu, 2 Feb 2023 04:54:27 +0000 (20:54 -0800)
diff --git a/net/core/gro.c b/net/core/gro.c

index 506f83d..b15f855 100644 (file)
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -162,16 +162,18 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
         struct sk_buff *lp;
         int segs;
  
-       /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
-       gro_max_size = READ_ONCE(p->dev->gro_max_size);
+       /* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */
+       gro_max_size = p->protocol == htons(ETH_P_IPV6) ?
+                       READ_ONCE(p->dev->gro_max_size) :
+                               READ_ONCE(p->dev->gro_ipv4_max_size);
  
         if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
                 return -E2BIG;
  
         if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) {
-               if (p->protocol != htons(ETH_P_IPV6) ||
-                   skb_headroom(p) < sizeof(struct hop_jumbo_hdr) ||
-                   ipv6_hdr(p)->nexthdr != IPPROTO_TCP ||
+               if (NAPI_GRO_CB(skb)->proto != IPPROTO_TCP ||
+                   (p->protocol == htons(ETH_P_IPV6) &&
+                    skb_headroom(p) < sizeof(struct hop_jumbo_hdr)) ||
                     p->encapsulation)
                         return -E2BIG;
         }
diff --git a/net/core/sock.c b/net/core/sock.c

index 7ba4891..f08b76a 100644 (file)
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2373,17 +2373,22 @@ void sk_free_unlock_clone(struct sock *sk)
  }
  EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
  
-static void sk_trim_gso_size(struct sock *sk)
+static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
  {
-       if (sk->sk_gso_max_size <= GSO_LEGACY_MAX_SIZE)
-               return;
+       bool is_ipv6 = false;
+       u32 max_size;
+
  #if IS_ENABLED(CONFIG_IPV6)
-       if (sk->sk_family == AF_INET6 &&
-           sk_is_tcp(sk) &&
-           !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
-               return;
+       is_ipv6 = (sk->sk_family == AF_INET6 &&
+                  !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
  #endif
-       sk->sk_gso_max_size = GSO_LEGACY_MAX_SIZE;
+       /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
+       max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
+                       READ_ONCE(dst->dev->gso_ipv4_max_size);
+       if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
+               max_size = GSO_LEGACY_MAX_SIZE;
+
+       return max_size - (MAX_TCP_HEADER + 1);
  }
  
  void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
@@ -2403,10 +2408,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
                 } else {
                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
-                       /* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
-                       sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
-                       sk_trim_gso_size(sk);
-                       sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
+                       sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
                         /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
                         max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
                 }
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c

index 6c0ec27..2f992a3 100644 (file)
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1485,6 +1485,7 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
         if (unlikely(ip_fast_csum((u8 *)iph, 5)))
                 goto out;
  
+       NAPI_GRO_CB(skb)->proto = proto;
         id = ntohl(*(__be32 *)&iph->id);
         flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
         id >>= 16;
@@ -1618,9 +1619,9 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
  
  int inet_gro_complete(struct sk_buff *skb, int nhoff)
  {
-       __be16 newlen = htons(skb->len - nhoff);
         struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
         const struct net_offload *ops;
+       __be16 totlen = iph->tot_len;
         int proto = iph->protocol;
         int err = -ENOSYS;
  
@@ -1629,8 +1630,8 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
                 skb_set_inner_network_header(skb, nhoff);
         }
  
-       csum_replace2(&iph->check, iph->tot_len, newlen);
-       iph->tot_len = newlen;
+       iph_set_totlen(iph, skb->len - nhoff);
+       csum_replace2(&iph->check, totlen, iph->tot_len);
  
         ops = rcu_dereference(inet_offloads[proto]);
         if (WARN_ON(!ops || !ops->callbacks.gro_complete))
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c

index e880ce7..fe9ead9 100644 (file)
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -511,7 +511,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
         if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
                 goto csum_error;
  
-       len = ntohs(iph->tot_len);
+       len = iph_totlen(skb, iph);
         if (skb->len < len) {
                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
                 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c

index 922c87e..4e4e308 100644 (file)
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -100,7 +100,7 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
         struct iphdr *iph = ip_hdr(skb);
  
-       iph->tot_len = htons(skb->len);
+       iph_set_totlen(iph, skb->len);
         ip_send_check(iph);
  
         /* if egress device is enslaved to an L3 master device pass the
author	Xin Long <lucien.xin@gmail.com>
	Sat, 28 Jan 2023 15:58:39 +0000 (10:58 -0500)
committer	Jakub Kicinski <kuba@kernel.org>
	Thu, 2 Feb 2023 04:54:27 +0000 (20:54 -0800)
net/core/gro.c		patch \| blob \| history
net/core/sock.c		patch \| blob \| history
net/ipv4/af_inet.c		patch \| blob \| history
net/ipv4/ip_input.c		patch \| blob \| history
net/ipv4/ip_output.c		patch \| blob \| history