[ Upstream commit
f52ed89971adbe79b6438c459814034707b8ab91 ]
For performance reasons, sch_fq tried hard to not setup timers for every
sent packet, using a quantum based heuristic : A delay is setup only if
the flow exhausted its credit.
Problem is that application limited flows can refill their credit
for every queued packet, and they can evade pacing.
This problem can also be triggered when TCP flows use small MSS values,
as TSO auto sizing builds packets that are smaller than the default fq
quantum (3028 bytes)
This patch adds a 40 ms delay to guard flow credit refill.
Fixes:
afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Maciej Żenczykowski <maze@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */
TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */
TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */
TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */
+
+ TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */
+
struct fq_flow internal; /* for non classified or high prio packets */
u32 quantum;
u32 initial_quantum;
struct fq_flow internal; /* for non classified or high prio packets */
u32 quantum;
u32 initial_quantum;
u32 flow_max_rate; /* optional max rate per flow */
u32 flow_plimit; /* max packets per flow */
struct rb_root *fq_root;
u32 flow_max_rate; /* optional max rate per flow */
u32 flow_plimit; /* max packets per flow */
struct rb_root *fq_root;
static void fq_flow_set_detached(struct fq_flow *f)
{
f->next = &detached;
static void fq_flow_set_detached(struct fq_flow *f)
{
f->next = &detached;
}
static bool fq_flow_is_detached(const struct fq_flow *f)
}
static bool fq_flow_is_detached(const struct fq_flow *f)
- flow_queue_add(f, skb);
if (skb_is_retransmit(skb))
q->stat_tcp_retrans++;
sch->qstats.backlog += qdisc_pkt_len(skb);
if (fq_flow_is_detached(f)) {
fq_flow_add_tail(&q->new_flows, f);
if (skb_is_retransmit(skb))
q->stat_tcp_retrans++;
sch->qstats.backlog += qdisc_pkt_len(skb);
if (fq_flow_is_detached(f)) {
fq_flow_add_tail(&q->new_flows, f);
- if (q->quantum > f->credit)
- f->credit = q->quantum;
+ if (time_after(jiffies, f->age + q->flow_refill_delay))
+ f->credit = max_t(u32, f->credit, q->quantum);
q->inactive_flows--;
qdisc_unthrottled(sch);
}
q->inactive_flows--;
qdisc_unthrottled(sch);
}
+
+ /* Note: this overwrites f->age */
+ flow_queue_add(f, skb);
+
if (unlikely(f == &q->internal)) {
q->stat_internal_packets++;
qdisc_unthrottled(sch);
if (unlikely(f == &q->internal)) {
q->stat_internal_packets++;
qdisc_unthrottled(sch);
fq_flow_add_tail(&q->old_flows, f);
} else {
fq_flow_set_detached(f);
fq_flow_add_tail(&q->old_flows, f);
} else {
fq_flow_set_detached(f);
q->inactive_flows++;
}
goto begin;
q->inactive_flows++;
}
goto begin;
[TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 },
[TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
[TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 },
[TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
+ [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
};
static int fq_change(struct Qdisc *sch, struct nlattr *opt)
};
static int fq_change(struct Qdisc *sch, struct nlattr *opt)
+ if (tb[TCA_FQ_FLOW_REFILL_DELAY]) {
+ u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ;
+
+ q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
+ }
+
if (!err)
err = fq_resize(q, fq_log);
if (!err)
err = fq_resize(q, fq_log);
q->flow_plimit = 100;
q->quantum = 2 * psched_mtu(qdisc_dev(sch));
q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
q->flow_plimit = 100;
q->quantum = 2 * psched_mtu(qdisc_dev(sch));
q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
+ q->flow_refill_delay = msecs_to_jiffies(40);
q->flow_max_rate = ~0U;
q->rate_enable = 1;
q->new_flows.first = NULL;
q->flow_max_rate = ~0U;
q->rate_enable = 1;
q->new_flows.first = NULL;
nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
+ nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
+ jiffies_to_usecs(q->flow_refill_delay)) ||
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
goto nla_put_failure;
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
goto nla_put_failure;