net: sched: fix packet stuck problem for lockless qdisc

author Yunsheng Lin <linyunsheng@huawei.com>

Fri, 14 May 2021 03:16:59 +0000 (11:16 +0800)

committer David S. Miller <davem@davemloft.net>

Fri, 14 May 2021 22:05:46 +0000 (15:05 -0700)
author Yunsheng Lin <linyunsheng@huawei.com>
Fri, 14 May 2021 03:16:59 +0000 (11:16 +0800)
committer David S. Miller <davem@davemloft.net>
Fri, 14 May 2021 22:05:46 +0000 (15:05 -0700)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h

index f7a6e14..1e62551 100644 (file)
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -36,6 +36,7 @@ struct qdisc_rate_table {
  enum qdisc_state_t {
         __QDISC_STATE_SCHED,
         __QDISC_STATE_DEACTIVATED,
+       __QDISC_STATE_MISSED,
  };
  
  struct qdisc_size_table {
@@ -159,8 +160,33 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
  static inline bool qdisc_run_begin(struct Qdisc *qdisc)
  {
         if (qdisc->flags & TCQ_F_NOLOCK) {
+               if (spin_trylock(&qdisc->seqlock))
+                       goto nolock_empty;
+
+               /* If the MISSED flag is set, it means other thread has
+                * set the MISSED flag before second spin_trylock(), so
+                * we can return false here to avoid multi cpus doing
+                * the set_bit() and second spin_trylock() concurrently.
+                */
+               if (test_bit(__QDISC_STATE_MISSED, &qdisc->state))
+                       return false;
+
+               /* Set the MISSED flag before the second spin_trylock(),
+                * if the second spin_trylock() return false, it means
+                * other cpu holding the lock will do dequeuing for us
+                * or it will see the MISSED flag set after releasing
+                * lock and reschedule the net_tx_action() to do the
+                * dequeuing.
+                */
+               set_bit(__QDISC_STATE_MISSED, &qdisc->state);
+
+               /* Retry again in case other CPU may not see the new flag
+                * after it releases the lock at the end of qdisc_run_end().
+                */
                 if (!spin_trylock(&qdisc->seqlock))
                         return false;
+
+nolock_empty:
                 WRITE_ONCE(qdisc->empty, false);
         } else if (qdisc_is_running(qdisc)) {
                 return false;
@@ -176,8 +202,15 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
  static inline void qdisc_run_end(struct Qdisc *qdisc)
  {
         write_seqcount_end(&qdisc->running);
-       if (qdisc->flags & TCQ_F_NOLOCK)
+       if (qdisc->flags & TCQ_F_NOLOCK) {
                 spin_unlock(&qdisc->seqlock);
+
+               if (unlikely(test_bit(__QDISC_STATE_MISSED,
+                                     &qdisc->state))) {
+                       clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+                       __netif_schedule(qdisc);
+               }
+       }
  }
  
  static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c

index 44991ea..795d986 100644 (file)
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -640,8 +640,10 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
  {
         struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
         struct sk_buff *skb = NULL;
+       bool need_retry = true;
         int band;
  
+retry:
         for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
                 struct skb_array *q = band2list(priv, band);
  
@@ -652,6 +654,23 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
         }
         if (likely(skb)) {
                 qdisc_update_stats_at_dequeue(qdisc, skb);
+       } else if (need_retry &&
+                  test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
+               /* Delay clearing the STATE_MISSED here to reduce
+                * the overhead of the second spin_trylock() in
+                * qdisc_run_begin() and __netif_schedule() calling
+                * in qdisc_run_end().
+                */
+               clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+
+               /* Make sure dequeuing happens after clearing
+                * STATE_MISSED.
+                */
+               smp_mb__after_atomic();
+
+               need_retry = false;
+
+               goto retry;
         } else {
                 WRITE_ONCE(qdisc->empty, true);
         }
author	Yunsheng Lin <linyunsheng@huawei.com>
	Fri, 14 May 2021 03:16:59 +0000 (11:16 +0800)
committer	David S. Miller <davem@davemloft.net>
	Fri, 14 May 2021 22:05:46 +0000 (15:05 -0700)
include/net/sch_generic.h		patch \| blob \| history
net/sched/sch_generic.c		patch \| blob \| history