net: sched: implement TCQ_F_CAN_BYPASS for lockless qdisc

author Yunsheng Lin <linyunsheng@huawei.com>

Tue, 22 Jun 2021 06:49:56 +0000 (14:49 +0800)

committer David S. Miller <davem@davemloft.net>

Wed, 23 Jun 2021 19:17:35 +0000 (12:17 -0700)
author Yunsheng Lin <linyunsheng@huawei.com>
Tue, 22 Jun 2021 06:49:56 +0000 (14:49 +0800)
committer David S. Miller <davem@davemloft.net>
Wed, 23 Jun 2021 19:17:35 +0000 (12:17 -0700)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h

index 3ed6bcc..177f240 100644 (file)
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -37,8 +37,15 @@ enum qdisc_state_t {
         __QDISC_STATE_SCHED,
         __QDISC_STATE_DEACTIVATED,
         __QDISC_STATE_MISSED,
+       __QDISC_STATE_DRAINING,
  };
  
+#define QDISC_STATE_MISSED     BIT(__QDISC_STATE_MISSED)
+#define QDISC_STATE_DRAINING   BIT(__QDISC_STATE_DRAINING)
+
+#define QDISC_STATE_NON_EMPTY  (QDISC_STATE_MISSED | \
+                                       QDISC_STATE_DRAINING)
+
  struct qdisc_size_table {
         struct rcu_head         rcu;
         struct list_head        list;
@@ -145,6 +152,11 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
         return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
  }
  
+static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
+{
+       return !(READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY);
+}
+
  static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
  {
         return q->flags & TCQ_F_CPUSTATS;
@@ -206,10 +218,8 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
                 spin_unlock(&qdisc->seqlock);
  
                 if (unlikely(test_bit(__QDISC_STATE_MISSED,
-                                     &qdisc->state))) {
-                       clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+                                     &qdisc->state)))
                         __netif_schedule(qdisc);
-               }
         } else {
                 write_seqcount_end(&qdisc->running);
         }
diff --git a/net/core/dev.c b/net/core/dev.c

index 50531a2..991d09b 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3852,10 +3852,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
         qdisc_calculate_pkt_len(skb, q);
  
         if (q->flags & TCQ_F_NOLOCK) {
+               if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
+                   qdisc_run_begin(q)) {
+                       /* Retest nolock_qdisc_is_empty() within the protection
+                        * of q->seqlock to protect from racing with requeuing.
+                        */
+                       if (unlikely(!nolock_qdisc_is_empty(q))) {
+                               rc = q->enqueue(skb, q, &to_free) &
+                                       NET_XMIT_MASK;
+                               __qdisc_run(q);
+                               qdisc_run_end(q);
+
+                               goto no_lock_out;
+                       }
+
+                       qdisc_bstats_cpu_update(q, skb);
+                       if (sch_direct_xmit(skb, q, dev, txq, NULL, true) &&
+                           !nolock_qdisc_is_empty(q))
+                               __qdisc_run(q);
+
+                       qdisc_run_end(q);
+                       return NET_XMIT_SUCCESS;
+               }
+
                 rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
-               if (likely(!netif_xmit_frozen_or_stopped(txq)))
-                       qdisc_run(q);
+               qdisc_run(q);
  
+no_lock_out:
                 if (unlikely(to_free))
                         kfree_skb_list(to_free);
                 return rc;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c

index e9c0afc..9984ccc 100644 (file)
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -52,6 +52,8 @@ static void qdisc_maybe_clear_missed(struct Qdisc *q,
          */
         if (!netif_xmit_frozen_or_stopped(txq))
                 set_bit(__QDISC_STATE_MISSED, &q->state);
+       else
+               set_bit(__QDISC_STATE_DRAINING, &q->state);
  }
  
  /* Main transmission queue. */
@@ -164,9 +166,13 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
  
                 skb = next;
         }
-       if (lock)
+
+       if (lock) {
                 spin_unlock(lock);
-       __netif_schedule(q);
+               set_bit(__QDISC_STATE_MISSED, &q->state);
+       } else {
+               __netif_schedule(q);
+       }
  }
  
  static void try_bulk_dequeue_skb(struct Qdisc *q,
@@ -409,7 +415,11 @@ void __qdisc_run(struct Qdisc *q)
         while (qdisc_restart(q, &packets)) {
                 quota -= packets;
                 if (quota <= 0) {
-                       __netif_schedule(q);
+                       if (q->flags & TCQ_F_NOLOCK)
+                               set_bit(__QDISC_STATE_MISSED, &q->state);
+                       else
+                               __netif_schedule(q);
+
                         break;
                 }
         }
@@ -698,13 +708,14 @@ retry:
         if (likely(skb)) {
                 qdisc_update_stats_at_dequeue(qdisc, skb);
         } else if (need_retry &&
-                  test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
+                  READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) {
                 /* Delay clearing the STATE_MISSED here to reduce
                  * the overhead of the second spin_trylock() in
                  * qdisc_run_begin() and __netif_schedule() calling
                  * in qdisc_run_end().
                  */
                 clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+               clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
  
                 /* Make sure dequeuing happens after clearing
                  * STATE_MISSED.
@@ -1222,6 +1233,7 @@ static void dev_reset_queue(struct net_device *dev,
         spin_unlock_bh(qdisc_lock(qdisc));
         if (nolock) {
                 clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+               clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
                 spin_unlock_bh(&qdisc->seqlock);
         }
  }
author	Yunsheng Lin <linyunsheng@huawei.com>
	Tue, 22 Jun 2021 06:49:56 +0000 (14:49 +0800)
committer	David S. Miller <davem@davemloft.net>
	Wed, 23 Jun 2021 19:17:35 +0000 (12:17 -0700)
include/net/sch_generic.h		patch \| blob \| history
net/core/dev.c		patch \| blob \| history
net/sched/sch_generic.c		patch \| blob \| history