From 27942a15209f564ed8ee2a9e126cb7b105181355 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:55:38 -0800 Subject: [PATCH] net: Handle delivery_time in skb->tstamp during network tapping with af_packet A latter patch will set the skb->mono_delivery_time to flag the skb->tstamp is used as the mono delivery_time (EDT) instead of the (rcv) timestamp. skb_clear_tstamp() will then keep this delivery_time during forwarding. This patch is to make the network tapping (with af_packet) to handle the delivery_time stored in skb->tstamp. Regardless of tapping at the ingress or egress, the tapped skb is received by the af_packet socket, so it is ingress to the af_packet socket and it expects the (rcv) timestamp. When tapping at egress, dev_queue_xmit_nit() is used. It has already expected skb->tstamp may have delivery_time, so it does skb_clone()+net_timestamp_set() to ensure the cloned skb has the (rcv) timestamp before passing to the af_packet sk. This patch only adds to clear the skb->mono_delivery_time bit in net_timestamp_set(). When tapping at ingress, it currently expects the skb->tstamp is either 0 or the (rcv) timestamp. Meaning, the tapping at ingress path has already expected the skb->tstamp could be 0 and it will get the (rcv) timestamp by ktime_get_real() when needed. There are two cases for tapping at ingress: One case is af_packet queues the skb to its sk_receive_queue. The skb is either not shared or new clone created. The newly added skb_clear_delivery_time() is called to clear the delivery_time (if any) and set the (rcv) timestamp if needed before the skb is queued to the sk_receive_queue. Another case, the ingress skb is directly copied to the rx_ring and tpacket_get_timestamp() is used to get the (rcv) timestamp. The newly added skb_tstamp() is used in tpacket_get_timestamp() to check the skb->mono_delivery_time bit before returning skb->tstamp. As mentioned earlier, the tapping@ingress has already expected the skb may not have the (rcv) timestamp (because no sk has asked for it) and has handled this case by directly calling ktime_get_real(). Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 24 ++++++++++++++++++++++++ net/core/dev.c | 4 +++- net/packet/af_packet.c | 4 +++- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 27a2892..7e2d796 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3996,6 +3996,22 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, skb->mono_delivery_time = 0; } +DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); + +/* It is used in the ingress path to clear the delivery_time. + * If needed, set the skb->tstamp to the (rcv) timestamp. + */ +static inline void skb_clear_delivery_time(struct sk_buff *skb) +{ + if (skb->mono_delivery_time) { + skb->mono_delivery_time = 0; + if (static_branch_unlikely(&netstamp_needed_key)) + skb->tstamp = ktime_get_real(); + else + skb->tstamp = 0; + } +} + static inline void skb_clear_tstamp(struct sk_buff *skb) { if (skb->mono_delivery_time) @@ -4004,6 +4020,14 @@ static inline void skb_clear_tstamp(struct sk_buff *skb) skb->tstamp = 0; } +static inline ktime_t skb_tstamp(const struct sk_buff *skb) +{ + if (skb->mono_delivery_time) + return 0; + + return skb->tstamp; +} + static inline u8 skb_metadata_len(const struct sk_buff *skb) { return skb_shinfo(skb)->meta_len; diff --git a/net/core/dev.c b/net/core/dev.c index c9e54e5..e128f26 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2047,7 +2047,8 @@ void net_dec_egress_queue(void) EXPORT_SYMBOL_GPL(net_dec_egress_queue); #endif -static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key); +DEFINE_STATIC_KEY_FALSE(netstamp_needed_key); +EXPORT_SYMBOL(netstamp_needed_key); #ifdef CONFIG_JUMP_LABEL static atomic_t netstamp_needed_deferred; static atomic_t netstamp_wanted; @@ -2108,6 +2109,7 @@ EXPORT_SYMBOL(net_disable_timestamp); static inline void net_timestamp_set(struct sk_buff *skb) { skb->tstamp = 0; + skb->mono_delivery_time = 0; if (static_branch_unlikely(&netstamp_needed_key)) __net_timestamp(skb); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ab87f22..1b93ce1 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -460,7 +460,7 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts, return TP_STATUS_TS_RAW_HARDWARE; if ((flags & SOF_TIMESTAMPING_SOFTWARE) && - ktime_to_timespec64_cond(skb->tstamp, ts)) + ktime_to_timespec64_cond(skb_tstamp(skb), ts)) return TP_STATUS_TS_SOFTWARE; return 0; @@ -2199,6 +2199,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, spin_lock(&sk->sk_receive_queue.lock); po->stats.stats1.tp_packets++; sock_skb_set_dropcount(sk, skb); + skb_clear_delivery_time(skb); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); @@ -2377,6 +2378,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; + skb_clear_delivery_time(copy_skb); __skb_queue_tail(&sk->sk_receive_queue, copy_skb); } spin_unlock(&sk->sk_receive_queue.lock); -- 2.7.4