tcp: add TTL to SCM_TIMESTAMPING_OPT_STATS
authorYousuk Seung <ysseung@google.com>
Wed, 20 Jan 2021 20:41:55 +0000 (12:41 -0800)
committerJakub Kicinski <kuba@kernel.org>
Sat, 23 Jan 2021 02:20:52 +0000 (18:20 -0800)
This patch adds TCP_NLA_TTL to SCM_TIMESTAMPING_OPT_STATS that exports
the time-to-live or hop limit of the latest incoming packet with
SCM_TSTAMP_ACK. The value exported may not be from the packet that acks
the sequence when incoming packets are aggregated. Exporting the
time-to-live or hop limit value of incoming packets helps to estimate
the hop count of the path of the flow that may change over time.

Signed-off-by: Yousuk Seung <ysseung@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Link: https://lore.kernel.org/r/20210120204155.552275-1-ysseung@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/linux/skbuff.h
include/linux/tcp.h
include/uapi/linux/tcp.h
net/core/dev.c
net/core/skbuff.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c

index 186dad2..9313b5a 100644 (file)
@@ -3859,7 +3859,7 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
 void skb_complete_tx_timestamp(struct sk_buff *skb,
                               struct skb_shared_hwtstamps *hwtstamps);
 
-void __skb_tstamp_tx(struct sk_buff *orig_skb,
+void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb,
                     struct skb_shared_hwtstamps *hwtstamps,
                     struct sock *sk, int tstype);
 
index 2f87377..48d8a36 100644 (file)
@@ -496,7 +496,8 @@ static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn)
 }
 
 struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
-                                              const struct sk_buff *orig_skb);
+                                              const struct sk_buff *orig_skb,
+                                              const struct sk_buff *ack_skb);
 
 static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
 {
index 768e93b..16dfa40 100644 (file)
@@ -314,6 +314,7 @@ enum {
        TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */
        TCP_NLA_BYTES_NOTSENT,  /* Bytes in write queue not yet sent */
        TCP_NLA_EDT,            /* Earliest departure time (CLOCK_MONOTONIC) */
+       TCP_NLA_TTL,            /* TTL or hop limit of a packet received */
 };
 
 /* for TCP_MD5SIG socket option */
index d9ce02e..6df3f1b 100644 (file)
@@ -4084,7 +4084,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
        skb_reset_mac_header(skb);
 
        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
-               __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
+               __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
 
        /* Disable soft irqs for various locks below. Also
         * stops preemption for RCU.
index 145503d..2af12f7 100644 (file)
@@ -4721,6 +4721,7 @@ err:
 EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
 
 void __skb_tstamp_tx(struct sk_buff *orig_skb,
+                    const struct sk_buff *ack_skb,
                     struct skb_shared_hwtstamps *hwtstamps,
                     struct sock *sk, int tstype)
 {
@@ -4743,7 +4744,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
                if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
                    sk->sk_protocol == IPPROTO_TCP &&
                    sk->sk_type == SOCK_STREAM) {
-                       skb = tcp_get_timestamping_opt_stats(sk, orig_skb);
+                       skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
+                                                            ack_skb);
                        opt_stats = true;
                } else
 #endif
@@ -4772,7 +4774,7 @@ EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
 void skb_tstamp_tx(struct sk_buff *orig_skb,
                   struct skb_shared_hwtstamps *hwtstamps)
 {
-       return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
+       return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk,
                               SCM_TSTAMP_SND);
 }
 EXPORT_SYMBOL_GPL(skb_tstamp_tx);
index 856ae51..a1a17b6 100644 (file)
@@ -3767,11 +3767,24 @@ static size_t tcp_opt_stats_get_size(void)
                nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */
                nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */
                nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */
+               nla_total_size(sizeof(u8)) + /* TCP_NLA_TTL */
                0;
 }
 
+/* Returns TTL or hop limit of an incoming packet from skb. */
+static u8 tcp_skb_ttl_or_hop_limit(const struct sk_buff *skb)
+{
+       if (skb->protocol == htons(ETH_P_IP))
+               return ip_hdr(skb)->ttl;
+       else if (skb->protocol == htons(ETH_P_IPV6))
+               return ipv6_hdr(skb)->hop_limit;
+       else
+               return 0;
+}
+
 struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
-                                              const struct sk_buff *orig_skb)
+                                              const struct sk_buff *orig_skb,
+                                              const struct sk_buff *ack_skb)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *stats;
@@ -3827,6 +3840,9 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
                    max_t(int, 0, tp->write_seq - tp->snd_nxt));
        nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns,
                          TCP_NLA_PAD);
+       if (ack_skb)
+               nla_put_u8(stats, TCP_NLA_TTL,
+                          tcp_skb_ttl_or_hop_limit(ack_skb));
 
        return stats;
 }
index a7dfca0..d4f66ab 100644 (file)
@@ -3145,7 +3145,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
 }
 
 static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
-                          u32 prior_snd_una)
+                          const struct sk_buff *ack_skb, u32 prior_snd_una)
 {
        const struct skb_shared_info *shinfo;
 
@@ -3157,7 +3157,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
        if (!before(shinfo->tskey, prior_snd_una) &&
            before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
                tcp_skb_tsorted_save(skb) {
-                       __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+                       __skb_tstamp_tx(skb, ack_skb, NULL, sk, SCM_TSTAMP_ACK);
                } tcp_skb_tsorted_restore(skb);
        }
 }
@@ -3166,8 +3166,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
  * is before the ack sequence we can discard it as it's confirmed to have
  * arrived at the other end.
  */
-static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
-                              u32 prior_snd_una,
+static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
+                              u32 prior_fack, u32 prior_snd_una,
                               struct tcp_sacktag_state *sack, bool ece_ack)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3256,7 +3256,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
                if (!fully_acked)
                        break;
 
-               tcp_ack_tstamp(sk, skb, prior_snd_una);
+               tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
 
                next = skb_rb_next(skb);
                if (unlikely(skb == tp->retransmit_skb_hint))
@@ -3274,7 +3274,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
                tp->snd_up = tp->snd_una;
 
        if (skb) {
-               tcp_ack_tstamp(sk, skb, prior_snd_una);
+               tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
                if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
                        flag |= FLAG_SACK_RENEGING;
        }
@@ -3809,8 +3809,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
                goto no_queue;
 
        /* See if we can take anything off of the retransmit queue. */
-       flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state,
-                                   flag & FLAG_ECE);
+       flag |= tcp_clean_rtx_queue(sk, skb, prior_fack, prior_snd_una,
+                                   &sack_state, flag & FLAG_ECE);
 
        tcp_rack_update_reo_wnd(sk, &rs);