net: sched: Add TCA_HW_OFFLOAD
[platform/kernel/linux-rpi.git] / net / sched / sch_api.c
1 /*
2  * net/sched/sch_api.c  Packet scheduler API.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 #include <net/pkt_cls.h>
39
40 /*
41
42    Short review.
43    -------------
44
45    This file consists of two interrelated parts:
46
47    1. queueing disciplines manager frontend.
48    2. traffic classes manager frontend.
49
50    Generally, queueing discipline ("qdisc") is a black box,
51    which is able to enqueue packets and to dequeue them (when
52    device is ready to send something) in order and at times
53    determined by algorithm hidden in it.
54
55    qdisc's are divided to two categories:
56    - "queues", which have no internal structure visible from outside.
57    - "schedulers", which split all the packets to "traffic classes",
58      using "packet classifiers" (look at cls_api.c)
59
60    In turn, classes may have child qdiscs (as rule, queues)
61    attached to them etc. etc. etc.
62
63    The goal of the routines in this file is to translate
64    information supplied by user in the form of handles
65    to more intelligible for kernel form, to make some sanity
66    checks and part of work, which is common to all qdiscs
67    and to provide rtnetlink notifications.
68
69    All real intelligent work is done inside qdisc modules.
70
71
72
73    Every discipline has two major routines: enqueue and dequeue.
74
75    ---dequeue
76
77    dequeue usually returns a skb to send. It is allowed to return NULL,
78    but it does not mean that queue is empty, it just means that
79    discipline does not want to send anything this time.
80    Queue is really empty if q->q.qlen == 0.
81    For complicated disciplines with multiple queues q->q is not
82    real packet queue, but however q->q.qlen must be valid.
83
84    ---enqueue
85
86    enqueue returns 0, if packet was enqueued successfully.
87    If packet (this one or another one) was dropped, it returns
88    not zero error code.
89    NET_XMIT_DROP        - this packet dropped
90      Expected action: do not backoff, but wait until queue will clear.
91    NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
92      Expected action: backoff or ignore
93
94    Auxiliary routines:
95
96    ---peek
97
98    like dequeue but without removing a packet from the queue
99
100    ---reset
101
102    returns qdisc to initial state: purge all buffers, clear all
103    timers, counters (except for statistics) etc.
104
105    ---init
106
107    initializes newly created qdisc.
108
109    ---destroy
110
111    destroys resources allocated by init and during lifetime of qdisc.
112
113    ---change
114
115    changes qdisc parameters.
116  */
117
118 /* Protects list of registered TC modules. It is pure SMP lock. */
119 static DEFINE_RWLOCK(qdisc_mod_lock);
120
121
122 /************************************************
123  *      Queueing disciplines manipulation.      *
124  ************************************************/
125
126
127 /* The list of all installed queueing disciplines. */
128
129 static struct Qdisc_ops *qdisc_base;
130
131 /* Register/unregister queueing discipline */
132
133 int register_qdisc(struct Qdisc_ops *qops)
134 {
135         struct Qdisc_ops *q, **qp;
136         int rc = -EEXIST;
137
138         write_lock(&qdisc_mod_lock);
139         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140                 if (!strcmp(qops->id, q->id))
141                         goto out;
142
143         if (qops->enqueue == NULL)
144                 qops->enqueue = noop_qdisc_ops.enqueue;
145         if (qops->peek == NULL) {
146                 if (qops->dequeue == NULL)
147                         qops->peek = noop_qdisc_ops.peek;
148                 else
149                         goto out_einval;
150         }
151         if (qops->dequeue == NULL)
152                 qops->dequeue = noop_qdisc_ops.dequeue;
153
154         if (qops->cl_ops) {
155                 const struct Qdisc_class_ops *cops = qops->cl_ops;
156
157                 if (!(cops->find && cops->walk && cops->leaf))
158                         goto out_einval;
159
160                 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
161                         goto out_einval;
162         }
163
164         qops->next = NULL;
165         *qp = qops;
166         rc = 0;
167 out:
168         write_unlock(&qdisc_mod_lock);
169         return rc;
170
171 out_einval:
172         rc = -EINVAL;
173         goto out;
174 }
175 EXPORT_SYMBOL(register_qdisc);
176
177 int unregister_qdisc(struct Qdisc_ops *qops)
178 {
179         struct Qdisc_ops *q, **qp;
180         int err = -ENOENT;
181
182         write_lock(&qdisc_mod_lock);
183         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
184                 if (q == qops)
185                         break;
186         if (q) {
187                 *qp = q->next;
188                 q->next = NULL;
189                 err = 0;
190         }
191         write_unlock(&qdisc_mod_lock);
192         return err;
193 }
194 EXPORT_SYMBOL(unregister_qdisc);
195
196 /* Get default qdisc if not otherwise specified */
197 void qdisc_get_default(char *name, size_t len)
198 {
199         read_lock(&qdisc_mod_lock);
200         strlcpy(name, default_qdisc_ops->id, len);
201         read_unlock(&qdisc_mod_lock);
202 }
203
204 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
205 {
206         struct Qdisc_ops *q = NULL;
207
208         for (q = qdisc_base; q; q = q->next) {
209                 if (!strcmp(name, q->id)) {
210                         if (!try_module_get(q->owner))
211                                 q = NULL;
212                         break;
213                 }
214         }
215
216         return q;
217 }
218
219 /* Set new default qdisc to use */
220 int qdisc_set_default(const char *name)
221 {
222         const struct Qdisc_ops *ops;
223
224         if (!capable(CAP_NET_ADMIN))
225                 return -EPERM;
226
227         write_lock(&qdisc_mod_lock);
228         ops = qdisc_lookup_default(name);
229         if (!ops) {
230                 /* Not found, drop lock and try to load module */
231                 write_unlock(&qdisc_mod_lock);
232                 request_module("sch_%s", name);
233                 write_lock(&qdisc_mod_lock);
234
235                 ops = qdisc_lookup_default(name);
236         }
237
238         if (ops) {
239                 /* Set new default */
240                 module_put(default_qdisc_ops->owner);
241                 default_qdisc_ops = ops;
242         }
243         write_unlock(&qdisc_mod_lock);
244
245         return ops ? 0 : -ENOENT;
246 }
247
248 #ifdef CONFIG_NET_SCH_DEFAULT
249 /* Set default value from kernel config */
250 static int __init sch_default_qdisc(void)
251 {
252         return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
253 }
254 late_initcall(sch_default_qdisc);
255 #endif
256
257 /* We know handle. Find qdisc among all qdisc's attached to device
258  * (root qdisc, all its children, children of children etc.)
259  * Note: caller either uses rtnl or rcu_read_lock()
260  */
261
262 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
263 {
264         struct Qdisc *q;
265
266         if (!qdisc_dev(root))
267                 return (root->handle == handle ? root : NULL);
268
269         if (!(root->flags & TCQ_F_BUILTIN) &&
270             root->handle == handle)
271                 return root;
272
273         hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
274                 if (q->handle == handle)
275                         return q;
276         }
277         return NULL;
278 }
279
280 void qdisc_hash_add(struct Qdisc *q, bool invisible)
281 {
282         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
283                 ASSERT_RTNL();
284                 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285                 if (invisible)
286                         q->flags |= TCQ_F_INVISIBLE;
287         }
288 }
289 EXPORT_SYMBOL(qdisc_hash_add);
290
291 void qdisc_hash_del(struct Qdisc *q)
292 {
293         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
294                 ASSERT_RTNL();
295                 hash_del_rcu(&q->hash);
296         }
297 }
298 EXPORT_SYMBOL(qdisc_hash_del);
299
300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
301 {
302         struct Qdisc *q;
303
304         if (!handle)
305                 return NULL;
306         q = qdisc_match_from_root(dev->qdisc, handle);
307         if (q)
308                 goto out;
309
310         if (dev_ingress_queue(dev))
311                 q = qdisc_match_from_root(
312                         dev_ingress_queue(dev)->qdisc_sleeping,
313                         handle);
314 out:
315         return q;
316 }
317
318 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
319 {
320         unsigned long cl;
321         struct Qdisc *leaf;
322         const struct Qdisc_class_ops *cops = p->ops->cl_ops;
323
324         if (cops == NULL)
325                 return NULL;
326         cl = cops->find(p, classid);
327
328         if (cl == 0)
329                 return NULL;
330         leaf = cops->leaf(p, cl);
331         return leaf;
332 }
333
334 /* Find queueing discipline by name */
335
336 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
337 {
338         struct Qdisc_ops *q = NULL;
339
340         if (kind) {
341                 read_lock(&qdisc_mod_lock);
342                 for (q = qdisc_base; q; q = q->next) {
343                         if (nla_strcmp(kind, q->id) == 0) {
344                                 if (!try_module_get(q->owner))
345                                         q = NULL;
346                                 break;
347                         }
348                 }
349                 read_unlock(&qdisc_mod_lock);
350         }
351         return q;
352 }
353
354 /* The linklayer setting were not transferred from iproute2, in older
355  * versions, and the rate tables lookup systems have been dropped in
356  * the kernel. To keep backward compatible with older iproute2 tc
357  * utils, we detect the linklayer setting by detecting if the rate
358  * table were modified.
359  *
360  * For linklayer ATM table entries, the rate table will be aligned to
361  * 48 bytes, thus some table entries will contain the same value.  The
362  * mpu (min packet unit) is also encoded into the old rate table, thus
363  * starting from the mpu, we find low and high table entries for
364  * mapping this cell.  If these entries contain the same value, when
365  * the rate tables have been modified for linklayer ATM.
366  *
367  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
368  * and then roundup to the next cell, calc the table entry one below,
369  * and compare.
370  */
371 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
372 {
373         int low       = roundup(r->mpu, 48);
374         int high      = roundup(low+1, 48);
375         int cell_low  = low >> r->cell_log;
376         int cell_high = (high >> r->cell_log) - 1;
377
378         /* rtab is too inaccurate at rates > 100Mbit/s */
379         if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
380                 pr_debug("TC linklayer: Giving up ATM detection\n");
381                 return TC_LINKLAYER_ETHERNET;
382         }
383
384         if ((cell_high > cell_low) && (cell_high < 256)
385             && (rtab[cell_low] == rtab[cell_high])) {
386                 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
387                          cell_low, cell_high, rtab[cell_high]);
388                 return TC_LINKLAYER_ATM;
389         }
390         return TC_LINKLAYER_ETHERNET;
391 }
392
393 static struct qdisc_rate_table *qdisc_rtab_list;
394
395 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
396                                         struct nlattr *tab)
397 {
398         struct qdisc_rate_table *rtab;
399
400         if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
401             nla_len(tab) != TC_RTAB_SIZE)
402                 return NULL;
403
404         for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
405                 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
406                     !memcmp(&rtab->data, nla_data(tab), 1024)) {
407                         rtab->refcnt++;
408                         return rtab;
409                 }
410         }
411
412         rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
413         if (rtab) {
414                 rtab->rate = *r;
415                 rtab->refcnt = 1;
416                 memcpy(rtab->data, nla_data(tab), 1024);
417                 if (r->linklayer == TC_LINKLAYER_UNAWARE)
418                         r->linklayer = __detect_linklayer(r, rtab->data);
419                 rtab->next = qdisc_rtab_list;
420                 qdisc_rtab_list = rtab;
421         }
422         return rtab;
423 }
424 EXPORT_SYMBOL(qdisc_get_rtab);
425
426 void qdisc_put_rtab(struct qdisc_rate_table *tab)
427 {
428         struct qdisc_rate_table *rtab, **rtabp;
429
430         if (!tab || --tab->refcnt)
431                 return;
432
433         for (rtabp = &qdisc_rtab_list;
434              (rtab = *rtabp) != NULL;
435              rtabp = &rtab->next) {
436                 if (rtab == tab) {
437                         *rtabp = rtab->next;
438                         kfree(rtab);
439                         return;
440                 }
441         }
442 }
443 EXPORT_SYMBOL(qdisc_put_rtab);
444
445 static LIST_HEAD(qdisc_stab_list);
446
447 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
448         [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
449         [TCA_STAB_DATA] = { .type = NLA_BINARY },
450 };
451
452 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
453 {
454         struct nlattr *tb[TCA_STAB_MAX + 1];
455         struct qdisc_size_table *stab;
456         struct tc_sizespec *s;
457         unsigned int tsize = 0;
458         u16 *tab = NULL;
459         int err;
460
461         err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
462         if (err < 0)
463                 return ERR_PTR(err);
464         if (!tb[TCA_STAB_BASE])
465                 return ERR_PTR(-EINVAL);
466
467         s = nla_data(tb[TCA_STAB_BASE]);
468
469         if (s->tsize > 0) {
470                 if (!tb[TCA_STAB_DATA])
471                         return ERR_PTR(-EINVAL);
472                 tab = nla_data(tb[TCA_STAB_DATA]);
473                 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
474         }
475
476         if (tsize != s->tsize || (!tab && tsize > 0))
477                 return ERR_PTR(-EINVAL);
478
479         list_for_each_entry(stab, &qdisc_stab_list, list) {
480                 if (memcmp(&stab->szopts, s, sizeof(*s)))
481                         continue;
482                 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
483                         continue;
484                 stab->refcnt++;
485                 return stab;
486         }
487
488         stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
489         if (!stab)
490                 return ERR_PTR(-ENOMEM);
491
492         stab->refcnt = 1;
493         stab->szopts = *s;
494         if (tsize > 0)
495                 memcpy(stab->data, tab, tsize * sizeof(u16));
496
497         list_add_tail(&stab->list, &qdisc_stab_list);
498
499         return stab;
500 }
501
502 static void stab_kfree_rcu(struct rcu_head *head)
503 {
504         kfree(container_of(head, struct qdisc_size_table, rcu));
505 }
506
507 void qdisc_put_stab(struct qdisc_size_table *tab)
508 {
509         if (!tab)
510                 return;
511
512         if (--tab->refcnt == 0) {
513                 list_del(&tab->list);
514                 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
515         }
516 }
517 EXPORT_SYMBOL(qdisc_put_stab);
518
519 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
520 {
521         struct nlattr *nest;
522
523         nest = nla_nest_start(skb, TCA_STAB);
524         if (nest == NULL)
525                 goto nla_put_failure;
526         if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
527                 goto nla_put_failure;
528         nla_nest_end(skb, nest);
529
530         return skb->len;
531
532 nla_put_failure:
533         return -1;
534 }
535
536 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
537                                const struct qdisc_size_table *stab)
538 {
539         int pkt_len, slot;
540
541         pkt_len = skb->len + stab->szopts.overhead;
542         if (unlikely(!stab->szopts.tsize))
543                 goto out;
544
545         slot = pkt_len + stab->szopts.cell_align;
546         if (unlikely(slot < 0))
547                 slot = 0;
548
549         slot >>= stab->szopts.cell_log;
550         if (likely(slot < stab->szopts.tsize))
551                 pkt_len = stab->data[slot];
552         else
553                 pkt_len = stab->data[stab->szopts.tsize - 1] *
554                                 (slot / stab->szopts.tsize) +
555                                 stab->data[slot % stab->szopts.tsize];
556
557         pkt_len <<= stab->szopts.size_log;
558 out:
559         if (unlikely(pkt_len < 1))
560                 pkt_len = 1;
561         qdisc_skb_cb(skb)->pkt_len = pkt_len;
562 }
563 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
564
565 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
566 {
567         if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
568                 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
569                         txt, qdisc->ops->id, qdisc->handle >> 16);
570                 qdisc->flags |= TCQ_F_WARN_NONWC;
571         }
572 }
573 EXPORT_SYMBOL(qdisc_warn_nonwc);
574
575 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
576 {
577         struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
578                                                  timer);
579
580         rcu_read_lock();
581         __netif_schedule(qdisc_root(wd->qdisc));
582         rcu_read_unlock();
583
584         return HRTIMER_NORESTART;
585 }
586
587 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
588 {
589         hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
590         wd->timer.function = qdisc_watchdog;
591         wd->qdisc = qdisc;
592 }
593 EXPORT_SYMBOL(qdisc_watchdog_init);
594
595 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
596 {
597         if (test_bit(__QDISC_STATE_DEACTIVATED,
598                      &qdisc_root_sleeping(wd->qdisc)->state))
599                 return;
600
601         if (wd->last_expires == expires)
602                 return;
603
604         wd->last_expires = expires;
605         hrtimer_start(&wd->timer,
606                       ns_to_ktime(expires),
607                       HRTIMER_MODE_ABS_PINNED);
608 }
609 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
610
611 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
612 {
613         hrtimer_cancel(&wd->timer);
614 }
615 EXPORT_SYMBOL(qdisc_watchdog_cancel);
616
617 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
618 {
619         struct hlist_head *h;
620         unsigned int i;
621
622         h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
623
624         if (h != NULL) {
625                 for (i = 0; i < n; i++)
626                         INIT_HLIST_HEAD(&h[i]);
627         }
628         return h;
629 }
630
631 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
632 {
633         struct Qdisc_class_common *cl;
634         struct hlist_node *next;
635         struct hlist_head *nhash, *ohash;
636         unsigned int nsize, nmask, osize;
637         unsigned int i, h;
638
639         /* Rehash when load factor exceeds 0.75 */
640         if (clhash->hashelems * 4 <= clhash->hashsize * 3)
641                 return;
642         nsize = clhash->hashsize * 2;
643         nmask = nsize - 1;
644         nhash = qdisc_class_hash_alloc(nsize);
645         if (nhash == NULL)
646                 return;
647
648         ohash = clhash->hash;
649         osize = clhash->hashsize;
650
651         sch_tree_lock(sch);
652         for (i = 0; i < osize; i++) {
653                 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
654                         h = qdisc_class_hash(cl->classid, nmask);
655                         hlist_add_head(&cl->hnode, &nhash[h]);
656                 }
657         }
658         clhash->hash     = nhash;
659         clhash->hashsize = nsize;
660         clhash->hashmask = nmask;
661         sch_tree_unlock(sch);
662
663         kvfree(ohash);
664 }
665 EXPORT_SYMBOL(qdisc_class_hash_grow);
666
667 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
668 {
669         unsigned int size = 4;
670
671         clhash->hash = qdisc_class_hash_alloc(size);
672         if (clhash->hash == NULL)
673                 return -ENOMEM;
674         clhash->hashsize  = size;
675         clhash->hashmask  = size - 1;
676         clhash->hashelems = 0;
677         return 0;
678 }
679 EXPORT_SYMBOL(qdisc_class_hash_init);
680
681 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
682 {
683         kvfree(clhash->hash);
684 }
685 EXPORT_SYMBOL(qdisc_class_hash_destroy);
686
687 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
688                              struct Qdisc_class_common *cl)
689 {
690         unsigned int h;
691
692         INIT_HLIST_NODE(&cl->hnode);
693         h = qdisc_class_hash(cl->classid, clhash->hashmask);
694         hlist_add_head(&cl->hnode, &clhash->hash[h]);
695         clhash->hashelems++;
696 }
697 EXPORT_SYMBOL(qdisc_class_hash_insert);
698
699 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
700                              struct Qdisc_class_common *cl)
701 {
702         hlist_del(&cl->hnode);
703         clhash->hashelems--;
704 }
705 EXPORT_SYMBOL(qdisc_class_hash_remove);
706
707 /* Allocate an unique handle from space managed by kernel
708  * Possible range is [8000-FFFF]:0000 (0x8000 values)
709  */
710 static u32 qdisc_alloc_handle(struct net_device *dev)
711 {
712         int i = 0x8000;
713         static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
714
715         do {
716                 autohandle += TC_H_MAKE(0x10000U, 0);
717                 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
718                         autohandle = TC_H_MAKE(0x80000000U, 0);
719                 if (!qdisc_lookup(dev, autohandle))
720                         return autohandle;
721                 cond_resched();
722         } while (--i > 0);
723
724         return 0;
725 }
726
727 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
728                                unsigned int len)
729 {
730         const struct Qdisc_class_ops *cops;
731         unsigned long cl;
732         u32 parentid;
733         bool notify;
734         int drops;
735
736         if (n == 0 && len == 0)
737                 return;
738         drops = max_t(int, n, 0);
739         rcu_read_lock();
740         while ((parentid = sch->parent)) {
741                 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
742                         break;
743
744                 if (sch->flags & TCQ_F_NOPARENT)
745                         break;
746                 /* Notify parent qdisc only if child qdisc becomes empty.
747                  *
748                  * If child was empty even before update then backlog
749                  * counter is screwed and we skip notification because
750                  * parent class is already passive.
751                  */
752                 notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
753                 /* TODO: perform the search on a per txq basis */
754                 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
755                 if (sch == NULL) {
756                         WARN_ON_ONCE(parentid != TC_H_ROOT);
757                         break;
758                 }
759                 cops = sch->ops->cl_ops;
760                 if (notify && cops->qlen_notify) {
761                         cl = cops->find(sch, parentid);
762                         cops->qlen_notify(sch, cl);
763                 }
764                 sch->q.qlen -= n;
765                 sch->qstats.backlog -= len;
766                 __qdisc_qstats_drop(sch, drops);
767         }
768         rcu_read_unlock();
769 }
770 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
771
772 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
773                          u32 portid, u32 seq, u16 flags, int event)
774 {
775         struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
776         struct gnet_stats_queue __percpu *cpu_qstats = NULL;
777         struct tcmsg *tcm;
778         struct nlmsghdr  *nlh;
779         unsigned char *b = skb_tail_pointer(skb);
780         struct gnet_dump d;
781         struct qdisc_size_table *stab;
782         __u32 qlen;
783
784         cond_resched();
785         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
786         if (!nlh)
787                 goto out_nlmsg_trim;
788         tcm = nlmsg_data(nlh);
789         tcm->tcm_family = AF_UNSPEC;
790         tcm->tcm__pad1 = 0;
791         tcm->tcm__pad2 = 0;
792         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
793         tcm->tcm_parent = clid;
794         tcm->tcm_handle = q->handle;
795         tcm->tcm_info = refcount_read(&q->refcnt);
796         if (nla_put_string(skb, TCA_KIND, q->ops->id))
797                 goto nla_put_failure;
798         if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
799                 goto nla_put_failure;
800         if (q->ops->dump && q->ops->dump(q, skb) < 0)
801                 goto nla_put_failure;
802         qlen = q->q.qlen;
803
804         stab = rtnl_dereference(q->stab);
805         if (stab && qdisc_dump_stab(skb, stab) < 0)
806                 goto nla_put_failure;
807
808         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
809                                          NULL, &d, TCA_PAD) < 0)
810                 goto nla_put_failure;
811
812         if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
813                 goto nla_put_failure;
814
815         if (qdisc_is_percpu_stats(q)) {
816                 cpu_bstats = q->cpu_bstats;
817                 cpu_qstats = q->cpu_qstats;
818         }
819
820         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
821                                   &d, cpu_bstats, &q->bstats) < 0 ||
822             gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
823             gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
824                 goto nla_put_failure;
825
826         if (gnet_stats_finish_copy(&d) < 0)
827                 goto nla_put_failure;
828
829         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
830         return skb->len;
831
832 out_nlmsg_trim:
833 nla_put_failure:
834         nlmsg_trim(skb, b);
835         return -1;
836 }
837
838 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
839 {
840         if (q->flags & TCQ_F_BUILTIN)
841                 return true;
842         if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
843                 return true;
844
845         return false;
846 }
847
848 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
849                         struct nlmsghdr *n, u32 clid,
850                         struct Qdisc *old, struct Qdisc *new)
851 {
852         struct sk_buff *skb;
853         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
854
855         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
856         if (!skb)
857                 return -ENOBUFS;
858
859         if (old && !tc_qdisc_dump_ignore(old, false)) {
860                 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
861                                   0, RTM_DELQDISC) < 0)
862                         goto err_out;
863         }
864         if (new && !tc_qdisc_dump_ignore(new, false)) {
865                 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
866                                   old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
867                         goto err_out;
868         }
869
870         if (skb->len)
871                 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
872                                       n->nlmsg_flags & NLM_F_ECHO);
873
874 err_out:
875         kfree_skb(skb);
876         return -EINVAL;
877 }
878
879 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
880                                struct nlmsghdr *n, u32 clid,
881                                struct Qdisc *old, struct Qdisc *new)
882 {
883         if (new || old)
884                 qdisc_notify(net, skb, n, clid, old, new);
885
886         if (old)
887                 qdisc_destroy(old);
888 }
889
890 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
891  * to device "dev".
892  *
893  * When appropriate send a netlink notification using 'skb'
894  * and "n".
895  *
896  * On success, destroy old qdisc.
897  */
898
899 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
900                        struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
901                        struct Qdisc *new, struct Qdisc *old)
902 {
903         struct Qdisc *q = old;
904         struct net *net = dev_net(dev);
905         int err = 0;
906
907         if (parent == NULL) {
908                 unsigned int i, num_q, ingress;
909
910                 ingress = 0;
911                 num_q = dev->num_tx_queues;
912                 if ((q && q->flags & TCQ_F_INGRESS) ||
913                     (new && new->flags & TCQ_F_INGRESS)) {
914                         num_q = 1;
915                         ingress = 1;
916                         if (!dev_ingress_queue(dev))
917                                 return -ENOENT;
918                 }
919
920                 if (dev->flags & IFF_UP)
921                         dev_deactivate(dev);
922
923                 if (new && new->ops->attach)
924                         goto skip;
925
926                 for (i = 0; i < num_q; i++) {
927                         struct netdev_queue *dev_queue = dev_ingress_queue(dev);
928
929                         if (!ingress)
930                                 dev_queue = netdev_get_tx_queue(dev, i);
931
932                         old = dev_graft_qdisc(dev_queue, new);
933                         if (new && i > 0)
934                                 qdisc_refcount_inc(new);
935
936                         if (!ingress)
937                                 qdisc_destroy(old);
938                 }
939
940 skip:
941                 if (!ingress) {
942                         notify_and_destroy(net, skb, n, classid,
943                                            dev->qdisc, new);
944                         if (new && !new->ops->attach)
945                                 qdisc_refcount_inc(new);
946                         dev->qdisc = new ? : &noop_qdisc;
947
948                         if (new && new->ops->attach)
949                                 new->ops->attach(new);
950                 } else {
951                         notify_and_destroy(net, skb, n, classid, old, new);
952                 }
953
954                 if (dev->flags & IFF_UP)
955                         dev_activate(dev);
956         } else {
957                 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
958
959                 err = -EOPNOTSUPP;
960                 if (cops && cops->graft) {
961                         unsigned long cl = cops->find(parent, classid);
962
963                         if (cl)
964                                 err = cops->graft(parent, cl, new, &old);
965                         else
966                                 err = -ENOENT;
967                 }
968                 if (!err)
969                         notify_and_destroy(net, skb, n, classid, old, new);
970         }
971         return err;
972 }
973
974 /* lockdep annotation is needed for ingress; egress gets it only for name */
975 static struct lock_class_key qdisc_tx_lock;
976 static struct lock_class_key qdisc_rx_lock;
977
978 /*
979    Allocate and initialize new qdisc.
980
981    Parameters are passed via opt.
982  */
983
984 static struct Qdisc *qdisc_create(struct net_device *dev,
985                                   struct netdev_queue *dev_queue,
986                                   struct Qdisc *p, u32 parent, u32 handle,
987                                   struct nlattr **tca, int *errp)
988 {
989         int err;
990         struct nlattr *kind = tca[TCA_KIND];
991         struct Qdisc *sch;
992         struct Qdisc_ops *ops;
993         struct qdisc_size_table *stab;
994
995         ops = qdisc_lookup_ops(kind);
996 #ifdef CONFIG_MODULES
997         if (ops == NULL && kind != NULL) {
998                 char name[IFNAMSIZ];
999                 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1000                         /* We dropped the RTNL semaphore in order to
1001                          * perform the module load.  So, even if we
1002                          * succeeded in loading the module we have to
1003                          * tell the caller to replay the request.  We
1004                          * indicate this using -EAGAIN.
1005                          * We replay the request because the device may
1006                          * go away in the mean time.
1007                          */
1008                         rtnl_unlock();
1009                         request_module("sch_%s", name);
1010                         rtnl_lock();
1011                         ops = qdisc_lookup_ops(kind);
1012                         if (ops != NULL) {
1013                                 /* We will try again qdisc_lookup_ops,
1014                                  * so don't keep a reference.
1015                                  */
1016                                 module_put(ops->owner);
1017                                 err = -EAGAIN;
1018                                 goto err_out;
1019                         }
1020                 }
1021         }
1022 #endif
1023
1024         err = -ENOENT;
1025         if (ops == NULL)
1026                 goto err_out;
1027
1028         sch = qdisc_alloc(dev_queue, ops);
1029         if (IS_ERR(sch)) {
1030                 err = PTR_ERR(sch);
1031                 goto err_out2;
1032         }
1033
1034         sch->parent = parent;
1035
1036         if (handle == TC_H_INGRESS) {
1037                 sch->flags |= TCQ_F_INGRESS;
1038                 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1039                 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
1040         } else {
1041                 if (handle == 0) {
1042                         handle = qdisc_alloc_handle(dev);
1043                         err = -ENOMEM;
1044                         if (handle == 0)
1045                                 goto err_out3;
1046                 }
1047                 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
1048                 if (!netif_is_multiqueue(dev))
1049                         sch->flags |= TCQ_F_ONETXQUEUE;
1050         }
1051
1052         sch->handle = handle;
1053
1054         /* This exist to keep backward compatible with a userspace
1055          * loophole, what allowed userspace to get IFF_NO_QUEUE
1056          * facility on older kernels by setting tx_queue_len=0 (prior
1057          * to qdisc init), and then forgot to reinit tx_queue_len
1058          * before again attaching a qdisc.
1059          */
1060         if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1061                 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1062                 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1063         }
1064
1065         if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
1066                 if (qdisc_is_percpu_stats(sch)) {
1067                         sch->cpu_bstats =
1068                                 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
1069                         if (!sch->cpu_bstats)
1070                                 goto err_out4;
1071
1072                         sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
1073                         if (!sch->cpu_qstats)
1074                                 goto err_out4;
1075                 }
1076
1077                 if (tca[TCA_STAB]) {
1078                         stab = qdisc_get_stab(tca[TCA_STAB]);
1079                         if (IS_ERR(stab)) {
1080                                 err = PTR_ERR(stab);
1081                                 goto err_out4;
1082                         }
1083                         rcu_assign_pointer(sch->stab, stab);
1084                 }
1085                 if (tca[TCA_RATE]) {
1086                         seqcount_t *running;
1087
1088                         err = -EOPNOTSUPP;
1089                         if (sch->flags & TCQ_F_MQROOT)
1090                                 goto err_out4;
1091
1092                         if ((sch->parent != TC_H_ROOT) &&
1093                             !(sch->flags & TCQ_F_INGRESS) &&
1094                             (!p || !(p->flags & TCQ_F_MQROOT)))
1095                                 running = qdisc_root_sleeping_running(sch);
1096                         else
1097                                 running = &sch->running;
1098
1099                         err = gen_new_estimator(&sch->bstats,
1100                                                 sch->cpu_bstats,
1101                                                 &sch->rate_est,
1102                                                 NULL,
1103                                                 running,
1104                                                 tca[TCA_RATE]);
1105                         if (err)
1106                                 goto err_out4;
1107                 }
1108
1109                 qdisc_hash_add(sch, false);
1110
1111                 return sch;
1112         }
1113         /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1114         if (ops->destroy)
1115                 ops->destroy(sch);
1116 err_out3:
1117         dev_put(dev);
1118         kfree((char *) sch - sch->padded);
1119 err_out2:
1120         module_put(ops->owner);
1121 err_out:
1122         *errp = err;
1123         return NULL;
1124
1125 err_out4:
1126         free_percpu(sch->cpu_bstats);
1127         free_percpu(sch->cpu_qstats);
1128         /*
1129          * Any broken qdiscs that would require a ops->reset() here?
1130          * The qdisc was never in action so it shouldn't be necessary.
1131          */
1132         qdisc_put_stab(rtnl_dereference(sch->stab));
1133         if (ops->destroy)
1134                 ops->destroy(sch);
1135         goto err_out3;
1136 }
1137
1138 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1139 {
1140         struct qdisc_size_table *ostab, *stab = NULL;
1141         int err = 0;
1142
1143         if (tca[TCA_OPTIONS]) {
1144                 if (sch->ops->change == NULL)
1145                         return -EINVAL;
1146                 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1147                 if (err)
1148                         return err;
1149         }
1150
1151         if (tca[TCA_STAB]) {
1152                 stab = qdisc_get_stab(tca[TCA_STAB]);
1153                 if (IS_ERR(stab))
1154                         return PTR_ERR(stab);
1155         }
1156
1157         ostab = rtnl_dereference(sch->stab);
1158         rcu_assign_pointer(sch->stab, stab);
1159         qdisc_put_stab(ostab);
1160
1161         if (tca[TCA_RATE]) {
1162                 /* NB: ignores errors from replace_estimator
1163                    because change can't be undone. */
1164                 if (sch->flags & TCQ_F_MQROOT)
1165                         goto out;
1166                 gen_replace_estimator(&sch->bstats,
1167                                       sch->cpu_bstats,
1168                                       &sch->rate_est,
1169                                       NULL,
1170                                       qdisc_root_sleeping_running(sch),
1171                                       tca[TCA_RATE]);
1172         }
1173 out:
1174         return 0;
1175 }
1176
1177 struct check_loop_arg {
1178         struct qdisc_walker     w;
1179         struct Qdisc            *p;
1180         int                     depth;
1181 };
1182
1183 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1184                          struct qdisc_walker *w);
1185
1186 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1187 {
1188         struct check_loop_arg   arg;
1189
1190         if (q->ops->cl_ops == NULL)
1191                 return 0;
1192
1193         arg.w.stop = arg.w.skip = arg.w.count = 0;
1194         arg.w.fn = check_loop_fn;
1195         arg.depth = depth;
1196         arg.p = p;
1197         q->ops->cl_ops->walk(q, &arg.w);
1198         return arg.w.stop ? -ELOOP : 0;
1199 }
1200
1201 static int
1202 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1203 {
1204         struct Qdisc *leaf;
1205         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1206         struct check_loop_arg *arg = (struct check_loop_arg *)w;
1207
1208         leaf = cops->leaf(q, cl);
1209         if (leaf) {
1210                 if (leaf == arg->p || arg->depth > 7)
1211                         return -ELOOP;
1212                 return check_loop(leaf, arg->p, arg->depth + 1);
1213         }
1214         return 0;
1215 }
1216
1217 /*
1218  * Delete/get qdisc.
1219  */
1220
1221 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1222                         struct netlink_ext_ack *extack)
1223 {
1224         struct net *net = sock_net(skb->sk);
1225         struct tcmsg *tcm = nlmsg_data(n);
1226         struct nlattr *tca[TCA_MAX + 1];
1227         struct net_device *dev;
1228         u32 clid;
1229         struct Qdisc *q = NULL;
1230         struct Qdisc *p = NULL;
1231         int err;
1232
1233         if ((n->nlmsg_type != RTM_GETQDISC) &&
1234             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1235                 return -EPERM;
1236
1237         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1238         if (err < 0)
1239                 return err;
1240
1241         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1242         if (!dev)
1243                 return -ENODEV;
1244
1245         clid = tcm->tcm_parent;
1246         if (clid) {
1247                 if (clid != TC_H_ROOT) {
1248                         if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1249                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1250                                 if (!p)
1251                                         return -ENOENT;
1252                                 q = qdisc_leaf(p, clid);
1253                         } else if (dev_ingress_queue(dev)) {
1254                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1255                         }
1256                 } else {
1257                         q = dev->qdisc;
1258                 }
1259                 if (!q)
1260                         return -ENOENT;
1261
1262                 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1263                         return -EINVAL;
1264         } else {
1265                 q = qdisc_lookup(dev, tcm->tcm_handle);
1266                 if (!q)
1267                         return -ENOENT;
1268         }
1269
1270         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1271                 return -EINVAL;
1272
1273         if (n->nlmsg_type == RTM_DELQDISC) {
1274                 if (!clid)
1275                         return -EINVAL;
1276                 if (q->handle == 0)
1277                         return -ENOENT;
1278                 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1279                 if (err != 0)
1280                         return err;
1281         } else {
1282                 qdisc_notify(net, skb, n, clid, NULL, q);
1283         }
1284         return 0;
1285 }
1286
1287 /*
1288  * Create/change qdisc.
1289  */
1290
1291 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1292                            struct netlink_ext_ack *extack)
1293 {
1294         struct net *net = sock_net(skb->sk);
1295         struct tcmsg *tcm;
1296         struct nlattr *tca[TCA_MAX + 1];
1297         struct net_device *dev;
1298         u32 clid;
1299         struct Qdisc *q, *p;
1300         int err;
1301
1302         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1303                 return -EPERM;
1304
1305 replay:
1306         /* Reinit, just in case something touches this. */
1307         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1308         if (err < 0)
1309                 return err;
1310
1311         tcm = nlmsg_data(n);
1312         clid = tcm->tcm_parent;
1313         q = p = NULL;
1314
1315         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1316         if (!dev)
1317                 return -ENODEV;
1318
1319
1320         if (clid) {
1321                 if (clid != TC_H_ROOT) {
1322                         if (clid != TC_H_INGRESS) {
1323                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1324                                 if (!p)
1325                                         return -ENOENT;
1326                                 q = qdisc_leaf(p, clid);
1327                         } else if (dev_ingress_queue_create(dev)) {
1328                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1329                         }
1330                 } else {
1331                         q = dev->qdisc;
1332                 }
1333
1334                 /* It may be default qdisc, ignore it */
1335                 if (q && q->handle == 0)
1336                         q = NULL;
1337
1338                 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1339                         if (tcm->tcm_handle) {
1340                                 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1341                                         return -EEXIST;
1342                                 if (TC_H_MIN(tcm->tcm_handle))
1343                                         return -EINVAL;
1344                                 q = qdisc_lookup(dev, tcm->tcm_handle);
1345                                 if (!q)
1346                                         goto create_n_graft;
1347                                 if (n->nlmsg_flags & NLM_F_EXCL)
1348                                         return -EEXIST;
1349                                 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1350                                         return -EINVAL;
1351                                 if (q == p ||
1352                                     (p && check_loop(q, p, 0)))
1353                                         return -ELOOP;
1354                                 qdisc_refcount_inc(q);
1355                                 goto graft;
1356                         } else {
1357                                 if (!q)
1358                                         goto create_n_graft;
1359
1360                                 /* This magic test requires explanation.
1361                                  *
1362                                  *   We know, that some child q is already
1363                                  *   attached to this parent and have choice:
1364                                  *   either to change it or to create/graft new one.
1365                                  *
1366                                  *   1. We are allowed to create/graft only
1367                                  *   if CREATE and REPLACE flags are set.
1368                                  *
1369                                  *   2. If EXCL is set, requestor wanted to say,
1370                                  *   that qdisc tcm_handle is not expected
1371                                  *   to exist, so that we choose create/graft too.
1372                                  *
1373                                  *   3. The last case is when no flags are set.
1374                                  *   Alas, it is sort of hole in API, we
1375                                  *   cannot decide what to do unambiguously.
1376                                  *   For now we select create/graft, if
1377                                  *   user gave KIND, which does not match existing.
1378                                  */
1379                                 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1380                                     (n->nlmsg_flags & NLM_F_REPLACE) &&
1381                                     ((n->nlmsg_flags & NLM_F_EXCL) ||
1382                                      (tca[TCA_KIND] &&
1383                                       nla_strcmp(tca[TCA_KIND], q->ops->id))))
1384                                         goto create_n_graft;
1385                         }
1386                 }
1387         } else {
1388                 if (!tcm->tcm_handle)
1389                         return -EINVAL;
1390                 q = qdisc_lookup(dev, tcm->tcm_handle);
1391         }
1392
1393         /* Change qdisc parameters */
1394         if (q == NULL)
1395                 return -ENOENT;
1396         if (n->nlmsg_flags & NLM_F_EXCL)
1397                 return -EEXIST;
1398         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1399                 return -EINVAL;
1400         err = qdisc_change(q, tca);
1401         if (err == 0)
1402                 qdisc_notify(net, skb, n, clid, NULL, q);
1403         return err;
1404
1405 create_n_graft:
1406         if (!(n->nlmsg_flags & NLM_F_CREATE))
1407                 return -ENOENT;
1408         if (clid == TC_H_INGRESS) {
1409                 if (dev_ingress_queue(dev))
1410                         q = qdisc_create(dev, dev_ingress_queue(dev), p,
1411                                          tcm->tcm_parent, tcm->tcm_parent,
1412                                          tca, &err);
1413                 else
1414                         err = -ENOENT;
1415         } else {
1416                 struct netdev_queue *dev_queue;
1417
1418                 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1419                         dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1420                 else if (p)
1421                         dev_queue = p->dev_queue;
1422                 else
1423                         dev_queue = netdev_get_tx_queue(dev, 0);
1424
1425                 q = qdisc_create(dev, dev_queue, p,
1426                                  tcm->tcm_parent, tcm->tcm_handle,
1427                                  tca, &err);
1428         }
1429         if (q == NULL) {
1430                 if (err == -EAGAIN)
1431                         goto replay;
1432                 return err;
1433         }
1434
1435 graft:
1436         err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1437         if (err) {
1438                 if (q)
1439                         qdisc_destroy(q);
1440                 return err;
1441         }
1442
1443         return 0;
1444 }
1445
1446 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1447                               struct netlink_callback *cb,
1448                               int *q_idx_p, int s_q_idx, bool recur,
1449                               bool dump_invisible)
1450 {
1451         int ret = 0, q_idx = *q_idx_p;
1452         struct Qdisc *q;
1453         int b;
1454
1455         if (!root)
1456                 return 0;
1457
1458         q = root;
1459         if (q_idx < s_q_idx) {
1460                 q_idx++;
1461         } else {
1462                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1463                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1464                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1465                                   RTM_NEWQDISC) <= 0)
1466                         goto done;
1467                 q_idx++;
1468         }
1469
1470         /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1471          * itself has already been dumped.
1472          *
1473          * If we've already dumped the top-level (ingress) qdisc above and the global
1474          * qdisc hashtable, we don't want to hit it again
1475          */
1476         if (!qdisc_dev(root) || !recur)
1477                 goto out;
1478
1479         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1480                 if (q_idx < s_q_idx) {
1481                         q_idx++;
1482                         continue;
1483                 }
1484                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1485                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1486                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1487                                   RTM_NEWQDISC) <= 0)
1488                         goto done;
1489                 q_idx++;
1490         }
1491
1492 out:
1493         *q_idx_p = q_idx;
1494         return ret;
1495 done:
1496         ret = -1;
1497         goto out;
1498 }
1499
1500 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1501 {
1502         struct net *net = sock_net(skb->sk);
1503         int idx, q_idx;
1504         int s_idx, s_q_idx;
1505         struct net_device *dev;
1506         const struct nlmsghdr *nlh = cb->nlh;
1507         struct nlattr *tca[TCA_MAX + 1];
1508         int err;
1509
1510         s_idx = cb->args[0];
1511         s_q_idx = q_idx = cb->args[1];
1512
1513         idx = 0;
1514         ASSERT_RTNL();
1515
1516         err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
1517         if (err < 0)
1518                 return err;
1519
1520         for_each_netdev(net, dev) {
1521                 struct netdev_queue *dev_queue;
1522
1523                 if (idx < s_idx)
1524                         goto cont;
1525                 if (idx > s_idx)
1526                         s_q_idx = 0;
1527                 q_idx = 0;
1528
1529                 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1530                                        true, tca[TCA_DUMP_INVISIBLE]) < 0)
1531                         goto done;
1532
1533                 dev_queue = dev_ingress_queue(dev);
1534                 if (dev_queue &&
1535                     tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1536                                        &q_idx, s_q_idx, false,
1537                                        tca[TCA_DUMP_INVISIBLE]) < 0)
1538                         goto done;
1539
1540 cont:
1541                 idx++;
1542         }
1543
1544 done:
1545         cb->args[0] = idx;
1546         cb->args[1] = q_idx;
1547
1548         return skb->len;
1549 }
1550
1551
1552
1553 /************************************************
1554  *      Traffic classes manipulation.           *
1555  ************************************************/
1556
1557 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1558                           unsigned long cl,
1559                           u32 portid, u32 seq, u16 flags, int event)
1560 {
1561         struct tcmsg *tcm;
1562         struct nlmsghdr  *nlh;
1563         unsigned char *b = skb_tail_pointer(skb);
1564         struct gnet_dump d;
1565         const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1566
1567         cond_resched();
1568         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1569         if (!nlh)
1570                 goto out_nlmsg_trim;
1571         tcm = nlmsg_data(nlh);
1572         tcm->tcm_family = AF_UNSPEC;
1573         tcm->tcm__pad1 = 0;
1574         tcm->tcm__pad2 = 0;
1575         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1576         tcm->tcm_parent = q->handle;
1577         tcm->tcm_handle = q->handle;
1578         tcm->tcm_info = 0;
1579         if (nla_put_string(skb, TCA_KIND, q->ops->id))
1580                 goto nla_put_failure;
1581         if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1582                 goto nla_put_failure;
1583
1584         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1585                                          NULL, &d, TCA_PAD) < 0)
1586                 goto nla_put_failure;
1587
1588         if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1589                 goto nla_put_failure;
1590
1591         if (gnet_stats_finish_copy(&d) < 0)
1592                 goto nla_put_failure;
1593
1594         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1595         return skb->len;
1596
1597 out_nlmsg_trim:
1598 nla_put_failure:
1599         nlmsg_trim(skb, b);
1600         return -1;
1601 }
1602
1603 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1604                          struct nlmsghdr *n, struct Qdisc *q,
1605                          unsigned long cl, int event)
1606 {
1607         struct sk_buff *skb;
1608         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1609
1610         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1611         if (!skb)
1612                 return -ENOBUFS;
1613
1614         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1615                 kfree_skb(skb);
1616                 return -EINVAL;
1617         }
1618
1619         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1620                               n->nlmsg_flags & NLM_F_ECHO);
1621 }
1622
1623 static int tclass_del_notify(struct net *net,
1624                              const struct Qdisc_class_ops *cops,
1625                              struct sk_buff *oskb, struct nlmsghdr *n,
1626                              struct Qdisc *q, unsigned long cl)
1627 {
1628         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1629         struct sk_buff *skb;
1630         int err = 0;
1631
1632         if (!cops->delete)
1633                 return -EOPNOTSUPP;
1634
1635         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1636         if (!skb)
1637                 return -ENOBUFS;
1638
1639         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1640                            RTM_DELTCLASS) < 0) {
1641                 kfree_skb(skb);
1642                 return -EINVAL;
1643         }
1644
1645         err = cops->delete(q, cl);
1646         if (err) {
1647                 kfree_skb(skb);
1648                 return err;
1649         }
1650
1651         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1652                               n->nlmsg_flags & NLM_F_ECHO);
1653 }
1654
1655 #ifdef CONFIG_NET_CLS
1656
1657 struct tcf_bind_args {
1658         struct tcf_walker w;
1659         u32 classid;
1660         unsigned long cl;
1661 };
1662
1663 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1664 {
1665         struct tcf_bind_args *a = (void *)arg;
1666
1667         if (tp->ops->bind_class) {
1668                 struct Qdisc *q = tcf_block_q(tp->chain->block);
1669
1670                 sch_tree_lock(q);
1671                 tp->ops->bind_class(n, a->classid, a->cl);
1672                 sch_tree_unlock(q);
1673         }
1674         return 0;
1675 }
1676
1677 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1678                            unsigned long new_cl)
1679 {
1680         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1681         struct tcf_block *block;
1682         struct tcf_chain *chain;
1683         unsigned long cl;
1684
1685         cl = cops->find(q, portid);
1686         if (!cl)
1687                 return;
1688         block = cops->tcf_block(q, cl);
1689         if (!block)
1690                 return;
1691         list_for_each_entry(chain, &block->chain_list, list) {
1692                 struct tcf_proto *tp;
1693
1694                 for (tp = rtnl_dereference(chain->filter_chain);
1695                      tp; tp = rtnl_dereference(tp->next)) {
1696                         struct tcf_bind_args arg = {};
1697
1698                         arg.w.fn = tcf_node_bind;
1699                         arg.classid = clid;
1700                         arg.cl = new_cl;
1701                         tp->ops->walk(tp, &arg.w);
1702                 }
1703         }
1704 }
1705
1706 #else
1707
1708 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1709                            unsigned long new_cl)
1710 {
1711 }
1712
1713 #endif
1714
1715 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1716                          struct netlink_ext_ack *extack)
1717 {
1718         struct net *net = sock_net(skb->sk);
1719         struct tcmsg *tcm = nlmsg_data(n);
1720         struct nlattr *tca[TCA_MAX + 1];
1721         struct net_device *dev;
1722         struct Qdisc *q = NULL;
1723         const struct Qdisc_class_ops *cops;
1724         unsigned long cl = 0;
1725         unsigned long new_cl;
1726         u32 portid;
1727         u32 clid;
1728         u32 qid;
1729         int err;
1730
1731         if ((n->nlmsg_type != RTM_GETTCLASS) &&
1732             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1733                 return -EPERM;
1734
1735         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1736         if (err < 0)
1737                 return err;
1738
1739         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1740         if (!dev)
1741                 return -ENODEV;
1742
1743         /*
1744            parent == TC_H_UNSPEC - unspecified parent.
1745            parent == TC_H_ROOT   - class is root, which has no parent.
1746            parent == X:0         - parent is root class.
1747            parent == X:Y         - parent is a node in hierarchy.
1748            parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
1749
1750            handle == 0:0         - generate handle from kernel pool.
1751            handle == 0:Y         - class is X:Y, where X:0 is qdisc.
1752            handle == X:Y         - clear.
1753            handle == X:0         - root class.
1754          */
1755
1756         /* Step 1. Determine qdisc handle X:0 */
1757
1758         portid = tcm->tcm_parent;
1759         clid = tcm->tcm_handle;
1760         qid = TC_H_MAJ(clid);
1761
1762         if (portid != TC_H_ROOT) {
1763                 u32 qid1 = TC_H_MAJ(portid);
1764
1765                 if (qid && qid1) {
1766                         /* If both majors are known, they must be identical. */
1767                         if (qid != qid1)
1768                                 return -EINVAL;
1769                 } else if (qid1) {
1770                         qid = qid1;
1771                 } else if (qid == 0)
1772                         qid = dev->qdisc->handle;
1773
1774                 /* Now qid is genuine qdisc handle consistent
1775                  * both with parent and child.
1776                  *
1777                  * TC_H_MAJ(portid) still may be unspecified, complete it now.
1778                  */
1779                 if (portid)
1780                         portid = TC_H_MAKE(qid, portid);
1781         } else {
1782                 if (qid == 0)
1783                         qid = dev->qdisc->handle;
1784         }
1785
1786         /* OK. Locate qdisc */
1787         q = qdisc_lookup(dev, qid);
1788         if (!q)
1789                 return -ENOENT;
1790
1791         /* An check that it supports classes */
1792         cops = q->ops->cl_ops;
1793         if (cops == NULL)
1794                 return -EINVAL;
1795
1796         /* Now try to get class */
1797         if (clid == 0) {
1798                 if (portid == TC_H_ROOT)
1799                         clid = qid;
1800         } else
1801                 clid = TC_H_MAKE(qid, clid);
1802
1803         if (clid)
1804                 cl = cops->find(q, clid);
1805
1806         if (cl == 0) {
1807                 err = -ENOENT;
1808                 if (n->nlmsg_type != RTM_NEWTCLASS ||
1809                     !(n->nlmsg_flags & NLM_F_CREATE))
1810                         goto out;
1811         } else {
1812                 switch (n->nlmsg_type) {
1813                 case RTM_NEWTCLASS:
1814                         err = -EEXIST;
1815                         if (n->nlmsg_flags & NLM_F_EXCL)
1816                                 goto out;
1817                         break;
1818                 case RTM_DELTCLASS:
1819                         err = tclass_del_notify(net, cops, skb, n, q, cl);
1820                         /* Unbind the class with flilters with 0 */
1821                         tc_bind_tclass(q, portid, clid, 0);
1822                         goto out;
1823                 case RTM_GETTCLASS:
1824                         err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1825                         goto out;
1826                 default:
1827                         err = -EINVAL;
1828                         goto out;
1829                 }
1830         }
1831
1832         new_cl = cl;
1833         err = -EOPNOTSUPP;
1834         if (cops->change)
1835                 err = cops->change(q, clid, portid, tca, &new_cl);
1836         if (err == 0) {
1837                 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1838                 /* We just create a new class, need to do reverse binding. */
1839                 if (cl != new_cl)
1840                         tc_bind_tclass(q, portid, clid, new_cl);
1841         }
1842 out:
1843         return err;
1844 }
1845
1846 struct qdisc_dump_args {
1847         struct qdisc_walker     w;
1848         struct sk_buff          *skb;
1849         struct netlink_callback *cb;
1850 };
1851
1852 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1853                             struct qdisc_walker *arg)
1854 {
1855         struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1856
1857         return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1858                               a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1859                               RTM_NEWTCLASS);
1860 }
1861
1862 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1863                                 struct tcmsg *tcm, struct netlink_callback *cb,
1864                                 int *t_p, int s_t)
1865 {
1866         struct qdisc_dump_args arg;
1867
1868         if (tc_qdisc_dump_ignore(q, false) ||
1869             *t_p < s_t || !q->ops->cl_ops ||
1870             (tcm->tcm_parent &&
1871              TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1872                 (*t_p)++;
1873                 return 0;
1874         }
1875         if (*t_p > s_t)
1876                 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1877         arg.w.fn = qdisc_class_dump;
1878         arg.skb = skb;
1879         arg.cb = cb;
1880         arg.w.stop  = 0;
1881         arg.w.skip = cb->args[1];
1882         arg.w.count = 0;
1883         q->ops->cl_ops->walk(q, &arg.w);
1884         cb->args[1] = arg.w.count;
1885         if (arg.w.stop)
1886                 return -1;
1887         (*t_p)++;
1888         return 0;
1889 }
1890
1891 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1892                                struct tcmsg *tcm, struct netlink_callback *cb,
1893                                int *t_p, int s_t)
1894 {
1895         struct Qdisc *q;
1896         int b;
1897
1898         if (!root)
1899                 return 0;
1900
1901         if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1902                 return -1;
1903
1904         if (!qdisc_dev(root))
1905                 return 0;
1906
1907         if (tcm->tcm_parent) {
1908                 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
1909                 if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1910                         return -1;
1911                 return 0;
1912         }
1913         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1914                 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1915                         return -1;
1916         }
1917
1918         return 0;
1919 }
1920
1921 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1922 {
1923         struct tcmsg *tcm = nlmsg_data(cb->nlh);
1924         struct net *net = sock_net(skb->sk);
1925         struct netdev_queue *dev_queue;
1926         struct net_device *dev;
1927         int t, s_t;
1928
1929         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1930                 return 0;
1931         dev = dev_get_by_index(net, tcm->tcm_ifindex);
1932         if (!dev)
1933                 return 0;
1934
1935         s_t = cb->args[0];
1936         t = 0;
1937
1938         if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1939                 goto done;
1940
1941         dev_queue = dev_ingress_queue(dev);
1942         if (dev_queue &&
1943             tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1944                                 &t, s_t) < 0)
1945                 goto done;
1946
1947 done:
1948         cb->args[0] = t;
1949
1950         dev_put(dev);
1951         return skb->len;
1952 }
1953
1954 #ifdef CONFIG_PROC_FS
1955 static int psched_show(struct seq_file *seq, void *v)
1956 {
1957         seq_printf(seq, "%08x %08x %08x %08x\n",
1958                    (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1959                    1000000,
1960                    (u32)NSEC_PER_SEC / hrtimer_resolution);
1961
1962         return 0;
1963 }
1964
1965 static int psched_open(struct inode *inode, struct file *file)
1966 {
1967         return single_open(file, psched_show, NULL);
1968 }
1969
1970 static const struct file_operations psched_fops = {
1971         .owner = THIS_MODULE,
1972         .open = psched_open,
1973         .read  = seq_read,
1974         .llseek = seq_lseek,
1975         .release = single_release,
1976 };
1977
1978 static int __net_init psched_net_init(struct net *net)
1979 {
1980         struct proc_dir_entry *e;
1981
1982         e = proc_create("psched", 0, net->proc_net, &psched_fops);
1983         if (e == NULL)
1984                 return -ENOMEM;
1985
1986         return 0;
1987 }
1988
1989 static void __net_exit psched_net_exit(struct net *net)
1990 {
1991         remove_proc_entry("psched", net->proc_net);
1992 }
1993 #else
1994 static int __net_init psched_net_init(struct net *net)
1995 {
1996         return 0;
1997 }
1998
1999 static void __net_exit psched_net_exit(struct net *net)
2000 {
2001 }
2002 #endif
2003
2004 static struct pernet_operations psched_net_ops = {
2005         .init = psched_net_init,
2006         .exit = psched_net_exit,
2007 };
2008
2009 static int __init pktsched_init(void)
2010 {
2011         int err;
2012
2013         err = register_pernet_subsys(&psched_net_ops);
2014         if (err) {
2015                 pr_err("pktsched_init: "
2016                        "cannot initialize per netns operations\n");
2017                 return err;
2018         }
2019
2020         register_qdisc(&pfifo_fast_ops);
2021         register_qdisc(&pfifo_qdisc_ops);
2022         register_qdisc(&bfifo_qdisc_ops);
2023         register_qdisc(&pfifo_head_drop_qdisc_ops);
2024         register_qdisc(&mq_qdisc_ops);
2025         register_qdisc(&noqueue_qdisc_ops);
2026
2027         rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2028         rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2029         rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2030                       0);
2031         rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2032         rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2033         rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2034                       0);
2035
2036         return 0;
2037 }
2038
2039 subsys_initcall(pktsched_init);