1 // SPDX-License-Identifier: GPL-2.0
3 /* net/sched/sch_etf.c Earliest TxTime First queueing discipline.
5 * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
6 * Vinicius Costa Gomes <vinicius.gomes@intel.com>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/errqueue.h>
15 #include <linux/rbtree.h>
16 #include <linux/skbuff.h>
17 #include <linux/posix-timers.h>
18 #include <net/netlink.h>
19 #include <net/sch_generic.h>
20 #include <net/pkt_sched.h>
23 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
24 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
25 #define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
27 struct etf_sched_data {
33 s32 delta; /* in ns */
34 ktime_t last; /* The txtime of the last skb sent to the netdevice. */
35 struct rb_root_cached head;
36 struct qdisc_watchdog watchdog;
37 ktime_t (*get_time)(void);
40 static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
41 [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
44 static inline int validate_input_params(struct tc_etf_qopt *qopt,
45 struct netlink_ext_ack *extack)
47 /* Check if params comply to the following rules:
48 * * Clockid and delta must be valid.
50 * * Dynamic clockids are not supported.
52 * * Delta must be a positive integer.
54 * Also note that for the HW offload case, we must
55 * expect that system clocks have been synchronized to PHC.
57 if (qopt->clockid < 0) {
58 NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
62 if (qopt->clockid != CLOCK_TAI) {
63 NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
67 if (qopt->delta < 0) {
68 NL_SET_ERR_MSG(extack, "Delta must be positive");
75 static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
77 struct etf_sched_data *q = qdisc_priv(sch);
78 ktime_t txtime = nskb->tstamp;
79 struct sock *sk = nskb->sk;
82 if (q->skip_sock_check)
85 if (!sk || !sk_fullsock(sk))
88 if (!sock_flag(sk, SOCK_TXTIME))
91 /* We don't perform crosstimestamping.
92 * Drop if packet's clockid differs from qdisc's.
94 if (sk->sk_clockid != q->clockid)
97 if (sk->sk_txtime_deadline_mode != q->deadline_mode)
102 if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
108 static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
110 struct etf_sched_data *q = qdisc_priv(sch);
113 p = rb_first_cached(&q->head);
120 static void reset_watchdog(struct Qdisc *sch)
122 struct etf_sched_data *q = qdisc_priv(sch);
123 struct sk_buff *skb = etf_peek_timesortedlist(sch);
127 qdisc_watchdog_cancel(&q->watchdog);
131 next = ktime_sub_ns(skb->tstamp, q->delta);
132 qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
135 static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
137 struct sock_exterr_skb *serr;
138 struct sk_buff *clone;
139 ktime_t txtime = skb->tstamp;
140 struct sock *sk = skb->sk;
142 if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors))
145 clone = skb_clone(skb, GFP_ATOMIC);
149 serr = SKB_EXT_ERR(clone);
150 serr->ee.ee_errno = err;
151 serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
152 serr->ee.ee_type = 0;
153 serr->ee.ee_code = code;
155 serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
156 serr->ee.ee_info = txtime; /* low part of tstamp */
158 if (sock_queue_err_skb(sk, clone))
162 static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
163 struct sk_buff **to_free)
165 struct etf_sched_data *q = qdisc_priv(sch);
166 struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
167 ktime_t txtime = nskb->tstamp;
168 bool leftmost = true;
170 if (!is_packet_valid(sch, nskb)) {
171 report_sock_error(nskb, EINVAL,
172 SO_EE_CODE_TXTIME_INVALID_PARAM);
173 return qdisc_drop(nskb, sch, to_free);
180 skb = rb_to_skb(parent);
181 if (ktime_compare(txtime, skb->tstamp) >= 0) {
182 p = &parent->rb_right;
185 p = &parent->rb_left;
188 rb_link_node(&nskb->rbnode, parent, p);
189 rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost);
191 qdisc_qstats_backlog_inc(sch, nskb);
194 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
197 return NET_XMIT_SUCCESS;
200 static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb,
203 struct etf_sched_data *q = qdisc_priv(sch);
204 struct sk_buff *to_free = NULL;
205 struct sk_buff *tmp = NULL;
207 skb_rbtree_walk_from_safe(skb, tmp) {
208 if (ktime_after(skb->tstamp, now))
211 rb_erase_cached(&skb->rbnode, &q->head);
213 /* The rbnode field in the skb re-uses these fields, now that
214 * we are done with the rbnode, reset them.
218 skb->dev = qdisc_dev(sch);
220 report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
222 qdisc_qstats_backlog_dec(sch, skb);
223 qdisc_drop(skb, sch, &to_free);
224 qdisc_qstats_overlimit(sch);
228 kfree_skb_list(to_free);
231 static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb)
233 struct etf_sched_data *q = qdisc_priv(sch);
235 rb_erase_cached(&skb->rbnode, &q->head);
237 /* The rbnode field in the skb re-uses these fields, now that
238 * we are done with the rbnode, reset them.
242 skb->dev = qdisc_dev(sch);
244 qdisc_qstats_backlog_dec(sch, skb);
246 qdisc_bstats_update(sch, skb);
248 q->last = skb->tstamp;
253 static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
255 struct etf_sched_data *q = qdisc_priv(sch);
259 skb = etf_peek_timesortedlist(sch);
265 /* Drop if packet has expired while in queue. */
266 if (ktime_before(skb->tstamp, now)) {
267 timesortedlist_drop(sch, skb, now);
272 /* When in deadline mode, dequeue as soon as possible and change the
273 * txtime from deadline to (now + delta).
275 if (q->deadline_mode) {
276 timesortedlist_remove(sch, skb);
281 next = ktime_sub_ns(skb->tstamp, q->delta);
283 /* Dequeue only if now is within the [txtime - delta, txtime] range. */
284 if (ktime_after(now, next))
285 timesortedlist_remove(sch, skb);
290 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
296 static void etf_disable_offload(struct net_device *dev,
297 struct etf_sched_data *q)
299 struct tc_etf_qopt_offload etf = { };
300 const struct net_device_ops *ops;
306 ops = dev->netdev_ops;
307 if (!ops->ndo_setup_tc)
310 etf.queue = q->queue;
313 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
315 pr_warn("Couldn't disable ETF offload for queue %d\n",
319 static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
320 struct netlink_ext_ack *extack)
322 const struct net_device_ops *ops = dev->netdev_ops;
323 struct tc_etf_qopt_offload etf = { };
326 if (!ops->ndo_setup_tc) {
327 NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
331 etf.queue = q->queue;
334 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
336 NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
343 static int etf_init(struct Qdisc *sch, struct nlattr *opt,
344 struct netlink_ext_ack *extack)
346 struct etf_sched_data *q = qdisc_priv(sch);
347 struct net_device *dev = qdisc_dev(sch);
348 struct nlattr *tb[TCA_ETF_MAX + 1];
349 struct tc_etf_qopt *qopt;
353 NL_SET_ERR_MSG(extack,
354 "Missing ETF qdisc options which are mandatory");
358 err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy,
363 if (!tb[TCA_ETF_PARMS]) {
364 NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
368 qopt = nla_data(tb[TCA_ETF_PARMS]);
370 pr_debug("delta %d clockid %d offload %s deadline %s\n",
371 qopt->delta, qopt->clockid,
372 OFFLOAD_IS_ON(qopt) ? "on" : "off",
373 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
375 err = validate_input_params(qopt, extack);
379 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
381 if (OFFLOAD_IS_ON(qopt)) {
382 err = etf_enable_offload(dev, q, extack);
387 /* Everything went OK, save the parameters used. */
388 q->delta = qopt->delta;
389 q->clockid = qopt->clockid;
390 q->offload = OFFLOAD_IS_ON(qopt);
391 q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
392 q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt);
394 switch (q->clockid) {
396 q->get_time = ktime_get_real;
398 case CLOCK_MONOTONIC:
399 q->get_time = ktime_get;
402 q->get_time = ktime_get_boottime;
405 q->get_time = ktime_get_clocktai;
408 NL_SET_ERR_MSG(extack, "Clockid is not supported");
412 qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
417 static void timesortedlist_clear(struct Qdisc *sch)
419 struct etf_sched_data *q = qdisc_priv(sch);
420 struct rb_node *p = rb_first_cached(&q->head);
423 struct sk_buff *skb = rb_to_skb(p);
427 rb_erase_cached(&skb->rbnode, &q->head);
428 rtnl_kfree_skbs(skb, skb);
433 static void etf_reset(struct Qdisc *sch)
435 struct etf_sched_data *q = qdisc_priv(sch);
437 /* Only cancel watchdog if it's been initialized. */
438 if (q->watchdog.qdisc == sch)
439 qdisc_watchdog_cancel(&q->watchdog);
441 /* No matter which mode we are on, it's safe to clear both lists. */
442 timesortedlist_clear(sch);
443 __qdisc_reset_queue(&sch->q);
448 static void etf_destroy(struct Qdisc *sch)
450 struct etf_sched_data *q = qdisc_priv(sch);
451 struct net_device *dev = qdisc_dev(sch);
453 /* Only cancel watchdog if it's been initialized. */
454 if (q->watchdog.qdisc == sch)
455 qdisc_watchdog_cancel(&q->watchdog);
457 etf_disable_offload(dev, q);
460 static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
462 struct etf_sched_data *q = qdisc_priv(sch);
463 struct tc_etf_qopt opt = { };
466 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
468 goto nla_put_failure;
470 opt.delta = q->delta;
471 opt.clockid = q->clockid;
473 opt.flags |= TC_ETF_OFFLOAD_ON;
475 if (q->deadline_mode)
476 opt.flags |= TC_ETF_DEADLINE_MODE_ON;
478 if (q->skip_sock_check)
479 opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
481 if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
482 goto nla_put_failure;
484 return nla_nest_end(skb, nest);
487 nla_nest_cancel(skb, nest);
491 static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
493 .priv_size = sizeof(struct etf_sched_data),
494 .enqueue = etf_enqueue_timesortedlist,
495 .dequeue = etf_dequeue_timesortedlist,
496 .peek = etf_peek_timesortedlist,
499 .destroy = etf_destroy,
501 .owner = THIS_MODULE,
504 static int __init etf_module_init(void)
506 return register_qdisc(&etf_qdisc_ops);
509 static void __exit etf_module_exit(void)
511 unregister_qdisc(&etf_qdisc_ops);
513 module_init(etf_module_init)
514 module_exit(etf_module_exit)
515 MODULE_LICENSE("GPL");