net: sk_buff rbnode reorg
authorEric Dumazet <edumazet@google.com>
Tue, 19 Sep 2017 12:14:24 +0000 (05:14 -0700)
committerDavid S. Miller <davem@davemloft.net>
Tue, 19 Sep 2017 22:20:22 +0000 (15:20 -0700)
skb->rbnode shares space with skb->next, skb->prev and skb->tstamp

Current uses (TCP receive ofo queue and netem) need to save/restore
tstamp, while skb->dev is either NULL (TCP) or a constant for a given
queue (netem).

Since we plan using an RB tree for TCP retransmit queue to speedup SACK
processing with large BDP, this patch exchanges skb->dev and
skb->tstamp.

This saves some overhead in both TCP and netem.

v2: removes the swtstamp field from struct tcp_skb_cb

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Wei Wang <weiwan@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/skbuff.h
include/net/tcp.h
net/ipv4/tcp_input.c
net/sched/sch_netem.c

index 72299ef..4928288 100644 (file)
@@ -661,8 +661,12 @@ struct sk_buff {
                        struct sk_buff          *prev;
 
                        union {
-                               ktime_t         tstamp;
-                               u64             skb_mstamp;
+                               struct net_device       *dev;
+                               /* Some protocols might use this space to store information,
+                                * while device pointer would be NULL.
+                                * UDP receive path is one user.
+                                */
+                               unsigned long           dev_scratch;
                        };
                };
                struct rb_node  rbnode; /* used in netem & tcp stack */
@@ -670,12 +674,8 @@ struct sk_buff {
        struct sock             *sk;
 
        union {
-               struct net_device       *dev;
-               /* Some protocols might use this space to store information,
-                * while device pointer would be NULL.
-                * UDP receive path is one user.
-                */
-               unsigned long           dev_scratch;
+               ktime_t         tstamp;
+               u64             skb_mstamp;
        };
        /*
         * This is the control buffer. It is free to use for every
index b510f28..49a8a46 100644 (file)
@@ -797,12 +797,6 @@ struct tcp_skb_cb {
                        u16     tcp_gso_segs;
                        u16     tcp_gso_size;
                };
-
-               /* Used to stash the receive timestamp while this skb is in the
-                * out of order queue, as skb->tstamp is overwritten by the
-                * rbnode.
-                */
-               ktime_t         swtstamp;
        };
        __u8            tcp_flags;      /* TCP header flags. (tcp[13])  */
 
index bddf724..db9bb46 100644 (file)
@@ -4266,11 +4266,6 @@ static void tcp_sack_remove(struct tcp_sock *tp)
        tp->rx_opt.num_sacks = num_sacks;
 }
 
-enum tcp_queue {
-       OOO_QUEUE,
-       RCV_QUEUE,
-};
-
 /**
  * tcp_try_coalesce - try to merge skb to prior one
  * @sk: socket
@@ -4286,7 +4281,6 @@ enum tcp_queue {
  * Returns true if caller should free @from instead of queueing it
  */
 static bool tcp_try_coalesce(struct sock *sk,
-                            enum tcp_queue dest,
                             struct sk_buff *to,
                             struct sk_buff *from,
                             bool *fragstolen)
@@ -4311,10 +4305,7 @@ static bool tcp_try_coalesce(struct sock *sk,
 
        if (TCP_SKB_CB(from)->has_rxtstamp) {
                TCP_SKB_CB(to)->has_rxtstamp = true;
-               if (dest == OOO_QUEUE)
-                       TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
-               else
-                       to->tstamp = from->tstamp;
+               to->tstamp = from->tstamp;
        }
 
        return true;
@@ -4351,9 +4342,6 @@ static void tcp_ofo_queue(struct sock *sk)
                }
                p = rb_next(p);
                rb_erase(&skb->rbnode, &tp->out_of_order_queue);
-               /* Replace tstamp which was stomped by rbnode */
-               if (TCP_SKB_CB(skb)->has_rxtstamp)
-                       skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
 
                if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
                        SOCK_DEBUG(sk, "ofo packet was already received\n");
@@ -4365,8 +4353,7 @@ static void tcp_ofo_queue(struct sock *sk)
                           TCP_SKB_CB(skb)->end_seq);
 
                tail = skb_peek_tail(&sk->sk_receive_queue);
-               eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE,
-                                                tail, skb, &fragstolen);
+               eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
                tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
                fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
                if (!eaten)
@@ -4420,10 +4407,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
                return;
        }
 
-       /* Stash tstamp to avoid being stomped on by rbnode */
-       if (TCP_SKB_CB(skb)->has_rxtstamp)
-               TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
-
        /* Disable header prediction. */
        tp->pred_flags = 0;
        inet_csk_schedule_ack(sk);
@@ -4451,7 +4434,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
        /* In the typical case, we are adding an skb to the end of the list.
         * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
         */
-       if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb,
+       if (tcp_try_coalesce(sk, tp->ooo_last_skb,
                             skb, &fragstolen)) {
 coalesce_done:
                tcp_grow_window(sk, skb);
@@ -4502,7 +4485,7 @@ coalesce_done:
                                __kfree_skb(skb1);
                                goto merge_right;
                        }
-               } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1,
+               } else if (tcp_try_coalesce(sk, skb1,
                                            skb, &fragstolen)) {
                        goto coalesce_done;
                }
@@ -4554,7 +4537,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
 
        __skb_pull(skb, hdrlen);
        eaten = (tail &&
-                tcp_try_coalesce(sk, RCV_QUEUE, tail,
+                tcp_try_coalesce(sk, tail,
                                  skb, fragstolen)) ? 1 : 0;
        tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
        if (!eaten) {
index b1266e7..063a4bd 100644 (file)
@@ -146,7 +146,6 @@ struct netem_sched_data {
  */
 struct netem_skb_cb {
        psched_time_t   time_to_send;
-       ktime_t         tstamp_save;
 };
 
 
@@ -561,7 +560,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                }
 
                cb->time_to_send = now + delay;
-               cb->tstamp_save = skb->tstamp;
                ++q->counter;
                tfifo_enqueue(skb, sch);
        } else {
@@ -629,7 +627,10 @@ deliver:
                        qdisc_qstats_backlog_dec(sch, skb);
                        skb->next = NULL;
                        skb->prev = NULL;
-                       skb->tstamp = netem_skb_cb(skb)->tstamp_save;
+                       /* skb->dev shares skb->rbnode area,
+                        * we need to restore its value.
+                        */
+                       skb->dev = qdisc_dev(sch);
 
 #ifdef CONFIG_NET_CLS_ACT
                        /*