f6a7b876d595458ef790f92dbec489f518b987c8
[platform/kernel/linux-starfive.git] / net / sched / sch_api.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_api.c  Packet scheduler API.
4  *
5  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Fixes:
8  *
9  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12  */
13
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <linux/init.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/kmod.h>
24 #include <linux/list.h>
25 #include <linux/hrtimer.h>
26 #include <linux/slab.h>
27 #include <linux/hashtable.h>
28
29 #include <net/net_namespace.h>
30 #include <net/sock.h>
31 #include <net/netlink.h>
32 #include <net/pkt_sched.h>
33 #include <net/pkt_cls.h>
34
35 #include <trace/events/qdisc.h>
36
37 /*
38
39    Short review.
40    -------------
41
42    This file consists of two interrelated parts:
43
44    1. queueing disciplines manager frontend.
45    2. traffic classes manager frontend.
46
47    Generally, queueing discipline ("qdisc") is a black box,
48    which is able to enqueue packets and to dequeue them (when
49    device is ready to send something) in order and at times
50    determined by algorithm hidden in it.
51
52    qdisc's are divided to two categories:
53    - "queues", which have no internal structure visible from outside.
54    - "schedulers", which split all the packets to "traffic classes",
55      using "packet classifiers" (look at cls_api.c)
56
57    In turn, classes may have child qdiscs (as rule, queues)
58    attached to them etc. etc. etc.
59
60    The goal of the routines in this file is to translate
61    information supplied by user in the form of handles
62    to more intelligible for kernel form, to make some sanity
63    checks and part of work, which is common to all qdiscs
64    and to provide rtnetlink notifications.
65
66    All real intelligent work is done inside qdisc modules.
67
68
69
70    Every discipline has two major routines: enqueue and dequeue.
71
72    ---dequeue
73
74    dequeue usually returns a skb to send. It is allowed to return NULL,
75    but it does not mean that queue is empty, it just means that
76    discipline does not want to send anything this time.
77    Queue is really empty if q->q.qlen == 0.
78    For complicated disciplines with multiple queues q->q is not
79    real packet queue, but however q->q.qlen must be valid.
80
81    ---enqueue
82
83    enqueue returns 0, if packet was enqueued successfully.
84    If packet (this one or another one) was dropped, it returns
85    not zero error code.
86    NET_XMIT_DROP        - this packet dropped
87      Expected action: do not backoff, but wait until queue will clear.
88    NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
89      Expected action: backoff or ignore
90
91    Auxiliary routines:
92
93    ---peek
94
95    like dequeue but without removing a packet from the queue
96
97    ---reset
98
99    returns qdisc to initial state: purge all buffers, clear all
100    timers, counters (except for statistics) etc.
101
102    ---init
103
104    initializes newly created qdisc.
105
106    ---destroy
107
108    destroys resources allocated by init and during lifetime of qdisc.
109
110    ---change
111
112    changes qdisc parameters.
113  */
114
115 /* Protects list of registered TC modules. It is pure SMP lock. */
116 static DEFINE_RWLOCK(qdisc_mod_lock);
117
118
119 /************************************************
120  *      Queueing disciplines manipulation.      *
121  ************************************************/
122
123
124 /* The list of all installed queueing disciplines. */
125
126 static struct Qdisc_ops *qdisc_base;
127
128 /* Register/unregister queueing discipline */
129
130 int register_qdisc(struct Qdisc_ops *qops)
131 {
132         struct Qdisc_ops *q, **qp;
133         int rc = -EEXIST;
134
135         write_lock(&qdisc_mod_lock);
136         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
137                 if (!strcmp(qops->id, q->id))
138                         goto out;
139
140         if (qops->enqueue == NULL)
141                 qops->enqueue = noop_qdisc_ops.enqueue;
142         if (qops->peek == NULL) {
143                 if (qops->dequeue == NULL)
144                         qops->peek = noop_qdisc_ops.peek;
145                 else
146                         goto out_einval;
147         }
148         if (qops->dequeue == NULL)
149                 qops->dequeue = noop_qdisc_ops.dequeue;
150
151         if (qops->cl_ops) {
152                 const struct Qdisc_class_ops *cops = qops->cl_ops;
153
154                 if (!(cops->find && cops->walk && cops->leaf))
155                         goto out_einval;
156
157                 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
158                         goto out_einval;
159         }
160
161         qops->next = NULL;
162         *qp = qops;
163         rc = 0;
164 out:
165         write_unlock(&qdisc_mod_lock);
166         return rc;
167
168 out_einval:
169         rc = -EINVAL;
170         goto out;
171 }
172 EXPORT_SYMBOL(register_qdisc);
173
174 void unregister_qdisc(struct Qdisc_ops *qops)
175 {
176         struct Qdisc_ops *q, **qp;
177         int err = -ENOENT;
178
179         write_lock(&qdisc_mod_lock);
180         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
181                 if (q == qops)
182                         break;
183         if (q) {
184                 *qp = q->next;
185                 q->next = NULL;
186                 err = 0;
187         }
188         write_unlock(&qdisc_mod_lock);
189
190         WARN(err, "unregister qdisc(%s) failed\n", qops->id);
191 }
192 EXPORT_SYMBOL(unregister_qdisc);
193
194 /* Get default qdisc if not otherwise specified */
195 void qdisc_get_default(char *name, size_t len)
196 {
197         read_lock(&qdisc_mod_lock);
198         strscpy(name, default_qdisc_ops->id, len);
199         read_unlock(&qdisc_mod_lock);
200 }
201
202 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
203 {
204         struct Qdisc_ops *q = NULL;
205
206         for (q = qdisc_base; q; q = q->next) {
207                 if (!strcmp(name, q->id)) {
208                         if (!try_module_get(q->owner))
209                                 q = NULL;
210                         break;
211                 }
212         }
213
214         return q;
215 }
216
217 /* Set new default qdisc to use */
218 int qdisc_set_default(const char *name)
219 {
220         const struct Qdisc_ops *ops;
221
222         if (!capable(CAP_NET_ADMIN))
223                 return -EPERM;
224
225         write_lock(&qdisc_mod_lock);
226         ops = qdisc_lookup_default(name);
227         if (!ops) {
228                 /* Not found, drop lock and try to load module */
229                 write_unlock(&qdisc_mod_lock);
230                 request_module("sch_%s", name);
231                 write_lock(&qdisc_mod_lock);
232
233                 ops = qdisc_lookup_default(name);
234         }
235
236         if (ops) {
237                 /* Set new default */
238                 module_put(default_qdisc_ops->owner);
239                 default_qdisc_ops = ops;
240         }
241         write_unlock(&qdisc_mod_lock);
242
243         return ops ? 0 : -ENOENT;
244 }
245
246 #ifdef CONFIG_NET_SCH_DEFAULT
247 /* Set default value from kernel config */
248 static int __init sch_default_qdisc(void)
249 {
250         return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
251 }
252 late_initcall(sch_default_qdisc);
253 #endif
254
255 /* We know handle. Find qdisc among all qdisc's attached to device
256  * (root qdisc, all its children, children of children etc.)
257  * Note: caller either uses rtnl or rcu_read_lock()
258  */
259
260 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
261 {
262         struct Qdisc *q;
263
264         if (!qdisc_dev(root))
265                 return (root->handle == handle ? root : NULL);
266
267         if (!(root->flags & TCQ_F_BUILTIN) &&
268             root->handle == handle)
269                 return root;
270
271         hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
272                                    lockdep_rtnl_is_held()) {
273                 if (q->handle == handle)
274                         return q;
275         }
276         return NULL;
277 }
278
279 void qdisc_hash_add(struct Qdisc *q, bool invisible)
280 {
281         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
282                 ASSERT_RTNL();
283                 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
284                 if (invisible)
285                         q->flags |= TCQ_F_INVISIBLE;
286         }
287 }
288 EXPORT_SYMBOL(qdisc_hash_add);
289
290 void qdisc_hash_del(struct Qdisc *q)
291 {
292         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
293                 ASSERT_RTNL();
294                 hash_del_rcu(&q->hash);
295         }
296 }
297 EXPORT_SYMBOL(qdisc_hash_del);
298
299 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
300 {
301         struct Qdisc *q;
302
303         if (!handle)
304                 return NULL;
305         q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
306         if (q)
307                 goto out;
308
309         if (dev_ingress_queue(dev))
310                 q = qdisc_match_from_root(
311                         dev_ingress_queue(dev)->qdisc_sleeping,
312                         handle);
313 out:
314         return q;
315 }
316
317 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
318 {
319         struct netdev_queue *nq;
320         struct Qdisc *q;
321
322         if (!handle)
323                 return NULL;
324         q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
325         if (q)
326                 goto out;
327
328         nq = dev_ingress_queue_rcu(dev);
329         if (nq)
330                 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
331 out:
332         return q;
333 }
334
335 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
336 {
337         unsigned long cl;
338         const struct Qdisc_class_ops *cops = p->ops->cl_ops;
339
340         if (cops == NULL)
341                 return NULL;
342         cl = cops->find(p, classid);
343
344         if (cl == 0)
345                 return NULL;
346         return cops->leaf(p, cl);
347 }
348
349 /* Find queueing discipline by name */
350
351 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
352 {
353         struct Qdisc_ops *q = NULL;
354
355         if (kind) {
356                 read_lock(&qdisc_mod_lock);
357                 for (q = qdisc_base; q; q = q->next) {
358                         if (nla_strcmp(kind, q->id) == 0) {
359                                 if (!try_module_get(q->owner))
360                                         q = NULL;
361                                 break;
362                         }
363                 }
364                 read_unlock(&qdisc_mod_lock);
365         }
366         return q;
367 }
368
369 /* The linklayer setting were not transferred from iproute2, in older
370  * versions, and the rate tables lookup systems have been dropped in
371  * the kernel. To keep backward compatible with older iproute2 tc
372  * utils, we detect the linklayer setting by detecting if the rate
373  * table were modified.
374  *
375  * For linklayer ATM table entries, the rate table will be aligned to
376  * 48 bytes, thus some table entries will contain the same value.  The
377  * mpu (min packet unit) is also encoded into the old rate table, thus
378  * starting from the mpu, we find low and high table entries for
379  * mapping this cell.  If these entries contain the same value, when
380  * the rate tables have been modified for linklayer ATM.
381  *
382  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
383  * and then roundup to the next cell, calc the table entry one below,
384  * and compare.
385  */
386 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
387 {
388         int low       = roundup(r->mpu, 48);
389         int high      = roundup(low+1, 48);
390         int cell_low  = low >> r->cell_log;
391         int cell_high = (high >> r->cell_log) - 1;
392
393         /* rtab is too inaccurate at rates > 100Mbit/s */
394         if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
395                 pr_debug("TC linklayer: Giving up ATM detection\n");
396                 return TC_LINKLAYER_ETHERNET;
397         }
398
399         if ((cell_high > cell_low) && (cell_high < 256)
400             && (rtab[cell_low] == rtab[cell_high])) {
401                 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
402                          cell_low, cell_high, rtab[cell_high]);
403                 return TC_LINKLAYER_ATM;
404         }
405         return TC_LINKLAYER_ETHERNET;
406 }
407
408 static struct qdisc_rate_table *qdisc_rtab_list;
409
410 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
411                                         struct nlattr *tab,
412                                         struct netlink_ext_ack *extack)
413 {
414         struct qdisc_rate_table *rtab;
415
416         if (tab == NULL || r->rate == 0 ||
417             r->cell_log == 0 || r->cell_log >= 32 ||
418             nla_len(tab) != TC_RTAB_SIZE) {
419                 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
420                 return NULL;
421         }
422
423         for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
424                 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
425                     !memcmp(&rtab->data, nla_data(tab), 1024)) {
426                         rtab->refcnt++;
427                         return rtab;
428                 }
429         }
430
431         rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
432         if (rtab) {
433                 rtab->rate = *r;
434                 rtab->refcnt = 1;
435                 memcpy(rtab->data, nla_data(tab), 1024);
436                 if (r->linklayer == TC_LINKLAYER_UNAWARE)
437                         r->linklayer = __detect_linklayer(r, rtab->data);
438                 rtab->next = qdisc_rtab_list;
439                 qdisc_rtab_list = rtab;
440         } else {
441                 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
442         }
443         return rtab;
444 }
445 EXPORT_SYMBOL(qdisc_get_rtab);
446
447 void qdisc_put_rtab(struct qdisc_rate_table *tab)
448 {
449         struct qdisc_rate_table *rtab, **rtabp;
450
451         if (!tab || --tab->refcnt)
452                 return;
453
454         for (rtabp = &qdisc_rtab_list;
455              (rtab = *rtabp) != NULL;
456              rtabp = &rtab->next) {
457                 if (rtab == tab) {
458                         *rtabp = rtab->next;
459                         kfree(rtab);
460                         return;
461                 }
462         }
463 }
464 EXPORT_SYMBOL(qdisc_put_rtab);
465
466 static LIST_HEAD(qdisc_stab_list);
467
468 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
469         [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
470         [TCA_STAB_DATA] = { .type = NLA_BINARY },
471 };
472
473 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
474                                                struct netlink_ext_ack *extack)
475 {
476         struct nlattr *tb[TCA_STAB_MAX + 1];
477         struct qdisc_size_table *stab;
478         struct tc_sizespec *s;
479         unsigned int tsize = 0;
480         u16 *tab = NULL;
481         int err;
482
483         err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
484                                           extack);
485         if (err < 0)
486                 return ERR_PTR(err);
487         if (!tb[TCA_STAB_BASE]) {
488                 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
489                 return ERR_PTR(-EINVAL);
490         }
491
492         s = nla_data(tb[TCA_STAB_BASE]);
493
494         if (s->tsize > 0) {
495                 if (!tb[TCA_STAB_DATA]) {
496                         NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
497                         return ERR_PTR(-EINVAL);
498                 }
499                 tab = nla_data(tb[TCA_STAB_DATA]);
500                 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
501         }
502
503         if (tsize != s->tsize || (!tab && tsize > 0)) {
504                 NL_SET_ERR_MSG(extack, "Invalid size of size table");
505                 return ERR_PTR(-EINVAL);
506         }
507
508         list_for_each_entry(stab, &qdisc_stab_list, list) {
509                 if (memcmp(&stab->szopts, s, sizeof(*s)))
510                         continue;
511                 if (tsize > 0 &&
512                     memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
513                         continue;
514                 stab->refcnt++;
515                 return stab;
516         }
517
518         if (s->size_log > STAB_SIZE_LOG_MAX ||
519             s->cell_log > STAB_SIZE_LOG_MAX) {
520                 NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
521                 return ERR_PTR(-EINVAL);
522         }
523
524         stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
525         if (!stab)
526                 return ERR_PTR(-ENOMEM);
527
528         stab->refcnt = 1;
529         stab->szopts = *s;
530         if (tsize > 0)
531                 memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
532
533         list_add_tail(&stab->list, &qdisc_stab_list);
534
535         return stab;
536 }
537
538 void qdisc_put_stab(struct qdisc_size_table *tab)
539 {
540         if (!tab)
541                 return;
542
543         if (--tab->refcnt == 0) {
544                 list_del(&tab->list);
545                 kfree_rcu(tab, rcu);
546         }
547 }
548 EXPORT_SYMBOL(qdisc_put_stab);
549
550 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
551 {
552         struct nlattr *nest;
553
554         nest = nla_nest_start_noflag(skb, TCA_STAB);
555         if (nest == NULL)
556                 goto nla_put_failure;
557         if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
558                 goto nla_put_failure;
559         nla_nest_end(skb, nest);
560
561         return skb->len;
562
563 nla_put_failure:
564         return -1;
565 }
566
567 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
568                                const struct qdisc_size_table *stab)
569 {
570         int pkt_len, slot;
571
572         pkt_len = skb->len + stab->szopts.overhead;
573         if (unlikely(!stab->szopts.tsize))
574                 goto out;
575
576         slot = pkt_len + stab->szopts.cell_align;
577         if (unlikely(slot < 0))
578                 slot = 0;
579
580         slot >>= stab->szopts.cell_log;
581         if (likely(slot < stab->szopts.tsize))
582                 pkt_len = stab->data[slot];
583         else
584                 pkt_len = stab->data[stab->szopts.tsize - 1] *
585                                 (slot / stab->szopts.tsize) +
586                                 stab->data[slot % stab->szopts.tsize];
587
588         pkt_len <<= stab->szopts.size_log;
589 out:
590         if (unlikely(pkt_len < 1))
591                 pkt_len = 1;
592         qdisc_skb_cb(skb)->pkt_len = pkt_len;
593 }
594 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
595
596 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
597 {
598         if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
599                 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
600                         txt, qdisc->ops->id, qdisc->handle >> 16);
601                 qdisc->flags |= TCQ_F_WARN_NONWC;
602         }
603 }
604 EXPORT_SYMBOL(qdisc_warn_nonwc);
605
606 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
607 {
608         struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
609                                                  timer);
610
611         rcu_read_lock();
612         __netif_schedule(qdisc_root(wd->qdisc));
613         rcu_read_unlock();
614
615         return HRTIMER_NORESTART;
616 }
617
618 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
619                                  clockid_t clockid)
620 {
621         hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
622         wd->timer.function = qdisc_watchdog;
623         wd->qdisc = qdisc;
624 }
625 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
626
627 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
628 {
629         qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
630 }
631 EXPORT_SYMBOL(qdisc_watchdog_init);
632
633 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
634                                       u64 delta_ns)
635 {
636         if (test_bit(__QDISC_STATE_DEACTIVATED,
637                      &qdisc_root_sleeping(wd->qdisc)->state))
638                 return;
639
640         if (hrtimer_is_queued(&wd->timer)) {
641                 /* If timer is already set in [expires, expires + delta_ns],
642                  * do not reprogram it.
643                  */
644                 if (wd->last_expires - expires <= delta_ns)
645                         return;
646         }
647
648         wd->last_expires = expires;
649         hrtimer_start_range_ns(&wd->timer,
650                                ns_to_ktime(expires),
651                                delta_ns,
652                                HRTIMER_MODE_ABS_PINNED);
653 }
654 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
655
656 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
657 {
658         hrtimer_cancel(&wd->timer);
659 }
660 EXPORT_SYMBOL(qdisc_watchdog_cancel);
661
662 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
663 {
664         struct hlist_head *h;
665         unsigned int i;
666
667         h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
668
669         if (h != NULL) {
670                 for (i = 0; i < n; i++)
671                         INIT_HLIST_HEAD(&h[i]);
672         }
673         return h;
674 }
675
676 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
677 {
678         struct Qdisc_class_common *cl;
679         struct hlist_node *next;
680         struct hlist_head *nhash, *ohash;
681         unsigned int nsize, nmask, osize;
682         unsigned int i, h;
683
684         /* Rehash when load factor exceeds 0.75 */
685         if (clhash->hashelems * 4 <= clhash->hashsize * 3)
686                 return;
687         nsize = clhash->hashsize * 2;
688         nmask = nsize - 1;
689         nhash = qdisc_class_hash_alloc(nsize);
690         if (nhash == NULL)
691                 return;
692
693         ohash = clhash->hash;
694         osize = clhash->hashsize;
695
696         sch_tree_lock(sch);
697         for (i = 0; i < osize; i++) {
698                 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
699                         h = qdisc_class_hash(cl->classid, nmask);
700                         hlist_add_head(&cl->hnode, &nhash[h]);
701                 }
702         }
703         clhash->hash     = nhash;
704         clhash->hashsize = nsize;
705         clhash->hashmask = nmask;
706         sch_tree_unlock(sch);
707
708         kvfree(ohash);
709 }
710 EXPORT_SYMBOL(qdisc_class_hash_grow);
711
712 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
713 {
714         unsigned int size = 4;
715
716         clhash->hash = qdisc_class_hash_alloc(size);
717         if (!clhash->hash)
718                 return -ENOMEM;
719         clhash->hashsize  = size;
720         clhash->hashmask  = size - 1;
721         clhash->hashelems = 0;
722         return 0;
723 }
724 EXPORT_SYMBOL(qdisc_class_hash_init);
725
726 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
727 {
728         kvfree(clhash->hash);
729 }
730 EXPORT_SYMBOL(qdisc_class_hash_destroy);
731
732 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
733                              struct Qdisc_class_common *cl)
734 {
735         unsigned int h;
736
737         INIT_HLIST_NODE(&cl->hnode);
738         h = qdisc_class_hash(cl->classid, clhash->hashmask);
739         hlist_add_head(&cl->hnode, &clhash->hash[h]);
740         clhash->hashelems++;
741 }
742 EXPORT_SYMBOL(qdisc_class_hash_insert);
743
744 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
745                              struct Qdisc_class_common *cl)
746 {
747         hlist_del(&cl->hnode);
748         clhash->hashelems--;
749 }
750 EXPORT_SYMBOL(qdisc_class_hash_remove);
751
752 /* Allocate an unique handle from space managed by kernel
753  * Possible range is [8000-FFFF]:0000 (0x8000 values)
754  */
755 static u32 qdisc_alloc_handle(struct net_device *dev)
756 {
757         int i = 0x8000;
758         static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
759
760         do {
761                 autohandle += TC_H_MAKE(0x10000U, 0);
762                 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
763                         autohandle = TC_H_MAKE(0x80000000U, 0);
764                 if (!qdisc_lookup(dev, autohandle))
765                         return autohandle;
766                 cond_resched();
767         } while (--i > 0);
768
769         return 0;
770 }
771
772 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
773 {
774         bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
775         const struct Qdisc_class_ops *cops;
776         unsigned long cl;
777         u32 parentid;
778         bool notify;
779         int drops;
780
781         if (n == 0 && len == 0)
782                 return;
783         drops = max_t(int, n, 0);
784         rcu_read_lock();
785         while ((parentid = sch->parent)) {
786                 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
787                         break;
788
789                 if (sch->flags & TCQ_F_NOPARENT)
790                         break;
791                 /* Notify parent qdisc only if child qdisc becomes empty.
792                  *
793                  * If child was empty even before update then backlog
794                  * counter is screwed and we skip notification because
795                  * parent class is already passive.
796                  *
797                  * If the original child was offloaded then it is allowed
798                  * to be seem as empty, so the parent is notified anyway.
799                  */
800                 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
801                                                        !qdisc_is_offloaded);
802                 /* TODO: perform the search on a per txq basis */
803                 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
804                 if (sch == NULL) {
805                         WARN_ON_ONCE(parentid != TC_H_ROOT);
806                         break;
807                 }
808                 cops = sch->ops->cl_ops;
809                 if (notify && cops->qlen_notify) {
810                         cl = cops->find(sch, parentid);
811                         cops->qlen_notify(sch, cl);
812                 }
813                 sch->q.qlen -= n;
814                 sch->qstats.backlog -= len;
815                 __qdisc_qstats_drop(sch, drops);
816         }
817         rcu_read_unlock();
818 }
819 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
820
821 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
822                               void *type_data)
823 {
824         struct net_device *dev = qdisc_dev(sch);
825         int err;
826
827         sch->flags &= ~TCQ_F_OFFLOADED;
828         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
829                 return 0;
830
831         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
832         if (err == -EOPNOTSUPP)
833                 return 0;
834
835         if (!err)
836                 sch->flags |= TCQ_F_OFFLOADED;
837
838         return err;
839 }
840 EXPORT_SYMBOL(qdisc_offload_dump_helper);
841
842 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
843                                 struct Qdisc *new, struct Qdisc *old,
844                                 enum tc_setup_type type, void *type_data,
845                                 struct netlink_ext_ack *extack)
846 {
847         bool any_qdisc_is_offloaded;
848         int err;
849
850         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
851                 return;
852
853         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
854
855         /* Don't report error if the graft is part of destroy operation. */
856         if (!err || !new || new == &noop_qdisc)
857                 return;
858
859         /* Don't report error if the parent, the old child and the new
860          * one are not offloaded.
861          */
862         any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
863         any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
864         any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
865
866         if (any_qdisc_is_offloaded)
867                 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
868 }
869 EXPORT_SYMBOL(qdisc_offload_graft_helper);
870
871 void qdisc_offload_query_caps(struct net_device *dev,
872                               enum tc_setup_type type,
873                               void *caps, size_t caps_len)
874 {
875         const struct net_device_ops *ops = dev->netdev_ops;
876         struct tc_query_caps_base base = {
877                 .type = type,
878                 .caps = caps,
879         };
880
881         memset(caps, 0, caps_len);
882
883         if (ops->ndo_setup_tc)
884                 ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
885 }
886 EXPORT_SYMBOL(qdisc_offload_query_caps);
887
888 static void qdisc_offload_graft_root(struct net_device *dev,
889                                      struct Qdisc *new, struct Qdisc *old,
890                                      struct netlink_ext_ack *extack)
891 {
892         struct tc_root_qopt_offload graft_offload = {
893                 .command        = TC_ROOT_GRAFT,
894                 .handle         = new ? new->handle : 0,
895                 .ingress        = (new && new->flags & TCQ_F_INGRESS) ||
896                                   (old && old->flags & TCQ_F_INGRESS),
897         };
898
899         qdisc_offload_graft_helper(dev, NULL, new, old,
900                                    TC_SETUP_ROOT_QDISC, &graft_offload, extack);
901 }
902
903 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
904                          u32 portid, u32 seq, u16 flags, int event)
905 {
906         struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
907         struct gnet_stats_queue __percpu *cpu_qstats = NULL;
908         struct tcmsg *tcm;
909         struct nlmsghdr  *nlh;
910         unsigned char *b = skb_tail_pointer(skb);
911         struct gnet_dump d;
912         struct qdisc_size_table *stab;
913         u32 block_index;
914         __u32 qlen;
915
916         cond_resched();
917         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
918         if (!nlh)
919                 goto out_nlmsg_trim;
920         tcm = nlmsg_data(nlh);
921         tcm->tcm_family = AF_UNSPEC;
922         tcm->tcm__pad1 = 0;
923         tcm->tcm__pad2 = 0;
924         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
925         tcm->tcm_parent = clid;
926         tcm->tcm_handle = q->handle;
927         tcm->tcm_info = refcount_read(&q->refcnt);
928         if (nla_put_string(skb, TCA_KIND, q->ops->id))
929                 goto nla_put_failure;
930         if (q->ops->ingress_block_get) {
931                 block_index = q->ops->ingress_block_get(q);
932                 if (block_index &&
933                     nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
934                         goto nla_put_failure;
935         }
936         if (q->ops->egress_block_get) {
937                 block_index = q->ops->egress_block_get(q);
938                 if (block_index &&
939                     nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
940                         goto nla_put_failure;
941         }
942         if (q->ops->dump && q->ops->dump(q, skb) < 0)
943                 goto nla_put_failure;
944         if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
945                 goto nla_put_failure;
946         qlen = qdisc_qlen_sum(q);
947
948         stab = rtnl_dereference(q->stab);
949         if (stab && qdisc_dump_stab(skb, stab) < 0)
950                 goto nla_put_failure;
951
952         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
953                                          NULL, &d, TCA_PAD) < 0)
954                 goto nla_put_failure;
955
956         if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
957                 goto nla_put_failure;
958
959         if (qdisc_is_percpu_stats(q)) {
960                 cpu_bstats = q->cpu_bstats;
961                 cpu_qstats = q->cpu_qstats;
962         }
963
964         if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
965             gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
966             gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
967                 goto nla_put_failure;
968
969         if (gnet_stats_finish_copy(&d) < 0)
970                 goto nla_put_failure;
971
972         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
973         return skb->len;
974
975 out_nlmsg_trim:
976 nla_put_failure:
977         nlmsg_trim(skb, b);
978         return -1;
979 }
980
981 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
982 {
983         if (q->flags & TCQ_F_BUILTIN)
984                 return true;
985         if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
986                 return true;
987
988         return false;
989 }
990
991 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
992                         struct nlmsghdr *n, u32 clid,
993                         struct Qdisc *old, struct Qdisc *new)
994 {
995         struct sk_buff *skb;
996         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
997
998         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
999         if (!skb)
1000                 return -ENOBUFS;
1001
1002         if (old && !tc_qdisc_dump_ignore(old, false)) {
1003                 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1004                                   0, RTM_DELQDISC) < 0)
1005                         goto err_out;
1006         }
1007         if (new && !tc_qdisc_dump_ignore(new, false)) {
1008                 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1009                                   old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1010                         goto err_out;
1011         }
1012
1013         if (skb->len)
1014                 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1015                                       n->nlmsg_flags & NLM_F_ECHO);
1016
1017 err_out:
1018         kfree_skb(skb);
1019         return -EINVAL;
1020 }
1021
1022 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1023                                struct nlmsghdr *n, u32 clid,
1024                                struct Qdisc *old, struct Qdisc *new)
1025 {
1026         if (new || old)
1027                 qdisc_notify(net, skb, n, clid, old, new);
1028
1029         if (old)
1030                 qdisc_put(old);
1031 }
1032
1033 static void qdisc_clear_nolock(struct Qdisc *sch)
1034 {
1035         sch->flags &= ~TCQ_F_NOLOCK;
1036         if (!(sch->flags & TCQ_F_CPUSTATS))
1037                 return;
1038
1039         free_percpu(sch->cpu_bstats);
1040         free_percpu(sch->cpu_qstats);
1041         sch->cpu_bstats = NULL;
1042         sch->cpu_qstats = NULL;
1043         sch->flags &= ~TCQ_F_CPUSTATS;
1044 }
1045
1046 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1047  * to device "dev".
1048  *
1049  * When appropriate send a netlink notification using 'skb'
1050  * and "n".
1051  *
1052  * On success, destroy old qdisc.
1053  */
1054
1055 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1056                        struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1057                        struct Qdisc *new, struct Qdisc *old,
1058                        struct netlink_ext_ack *extack)
1059 {
1060         struct Qdisc *q = old;
1061         struct net *net = dev_net(dev);
1062
1063         if (parent == NULL) {
1064                 unsigned int i, num_q, ingress;
1065
1066                 ingress = 0;
1067                 num_q = dev->num_tx_queues;
1068                 if ((q && q->flags & TCQ_F_INGRESS) ||
1069                     (new && new->flags & TCQ_F_INGRESS)) {
1070                         num_q = 1;
1071                         ingress = 1;
1072                         if (!dev_ingress_queue(dev)) {
1073                                 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1074                                 return -ENOENT;
1075                         }
1076                 }
1077
1078                 if (dev->flags & IFF_UP)
1079                         dev_deactivate(dev);
1080
1081                 qdisc_offload_graft_root(dev, new, old, extack);
1082
1083                 if (new && new->ops->attach && !ingress)
1084                         goto skip;
1085
1086                 for (i = 0; i < num_q; i++) {
1087                         struct netdev_queue *dev_queue = dev_ingress_queue(dev);
1088
1089                         if (!ingress)
1090                                 dev_queue = netdev_get_tx_queue(dev, i);
1091
1092                         old = dev_graft_qdisc(dev_queue, new);
1093                         if (new && i > 0)
1094                                 qdisc_refcount_inc(new);
1095
1096                         if (!ingress)
1097                                 qdisc_put(old);
1098                 }
1099
1100 skip:
1101                 if (!ingress) {
1102                         old = rtnl_dereference(dev->qdisc);
1103                         if (new && !new->ops->attach)
1104                                 qdisc_refcount_inc(new);
1105                         rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1106
1107                         notify_and_destroy(net, skb, n, classid, old, new);
1108
1109                         if (new && new->ops->attach)
1110                                 new->ops->attach(new);
1111                 } else {
1112                         notify_and_destroy(net, skb, n, classid, old, new);
1113                 }
1114
1115                 if (dev->flags & IFF_UP)
1116                         dev_activate(dev);
1117         } else {
1118                 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1119                 unsigned long cl;
1120                 int err;
1121
1122                 /* Only support running class lockless if parent is lockless */
1123                 if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1124                         qdisc_clear_nolock(new);
1125
1126                 if (!cops || !cops->graft)
1127                         return -EOPNOTSUPP;
1128
1129                 cl = cops->find(parent, classid);
1130                 if (!cl) {
1131                         NL_SET_ERR_MSG(extack, "Specified class not found");
1132                         return -ENOENT;
1133                 }
1134
1135                 if (new && new->ops == &noqueue_qdisc_ops) {
1136                         NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
1137                         return -EINVAL;
1138                 }
1139
1140                 err = cops->graft(parent, cl, new, &old, extack);
1141                 if (err)
1142                         return err;
1143                 notify_and_destroy(net, skb, n, classid, old, new);
1144         }
1145         return 0;
1146 }
1147
1148 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1149                                    struct netlink_ext_ack *extack)
1150 {
1151         u32 block_index;
1152
1153         if (tca[TCA_INGRESS_BLOCK]) {
1154                 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1155
1156                 if (!block_index) {
1157                         NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1158                         return -EINVAL;
1159                 }
1160                 if (!sch->ops->ingress_block_set) {
1161                         NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1162                         return -EOPNOTSUPP;
1163                 }
1164                 sch->ops->ingress_block_set(sch, block_index);
1165         }
1166         if (tca[TCA_EGRESS_BLOCK]) {
1167                 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1168
1169                 if (!block_index) {
1170                         NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1171                         return -EINVAL;
1172                 }
1173                 if (!sch->ops->egress_block_set) {
1174                         NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1175                         return -EOPNOTSUPP;
1176                 }
1177                 sch->ops->egress_block_set(sch, block_index);
1178         }
1179         return 0;
1180 }
1181
1182 /*
1183    Allocate and initialize new qdisc.
1184
1185    Parameters are passed via opt.
1186  */
1187
1188 static struct Qdisc *qdisc_create(struct net_device *dev,
1189                                   struct netdev_queue *dev_queue,
1190                                   u32 parent, u32 handle,
1191                                   struct nlattr **tca, int *errp,
1192                                   struct netlink_ext_ack *extack)
1193 {
1194         int err;
1195         struct nlattr *kind = tca[TCA_KIND];
1196         struct Qdisc *sch;
1197         struct Qdisc_ops *ops;
1198         struct qdisc_size_table *stab;
1199
1200         ops = qdisc_lookup_ops(kind);
1201 #ifdef CONFIG_MODULES
1202         if (ops == NULL && kind != NULL) {
1203                 char name[IFNAMSIZ];
1204                 if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1205                         /* We dropped the RTNL semaphore in order to
1206                          * perform the module load.  So, even if we
1207                          * succeeded in loading the module we have to
1208                          * tell the caller to replay the request.  We
1209                          * indicate this using -EAGAIN.
1210                          * We replay the request because the device may
1211                          * go away in the mean time.
1212                          */
1213                         rtnl_unlock();
1214                         request_module("sch_%s", name);
1215                         rtnl_lock();
1216                         ops = qdisc_lookup_ops(kind);
1217                         if (ops != NULL) {
1218                                 /* We will try again qdisc_lookup_ops,
1219                                  * so don't keep a reference.
1220                                  */
1221                                 module_put(ops->owner);
1222                                 err = -EAGAIN;
1223                                 goto err_out;
1224                         }
1225                 }
1226         }
1227 #endif
1228
1229         err = -ENOENT;
1230         if (!ops) {
1231                 NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1232                 goto err_out;
1233         }
1234
1235         sch = qdisc_alloc(dev_queue, ops, extack);
1236         if (IS_ERR(sch)) {
1237                 err = PTR_ERR(sch);
1238                 goto err_out2;
1239         }
1240
1241         sch->parent = parent;
1242
1243         if (handle == TC_H_INGRESS) {
1244                 if (!(sch->flags & TCQ_F_INGRESS)) {
1245                         NL_SET_ERR_MSG(extack,
1246                                        "Specified parent ID is reserved for ingress and clsact Qdiscs");
1247                         err = -EINVAL;
1248                         goto err_out3;
1249                 }
1250                 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1251         } else {
1252                 if (handle == 0) {
1253                         handle = qdisc_alloc_handle(dev);
1254                         if (handle == 0) {
1255                                 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1256                                 err = -ENOSPC;
1257                                 goto err_out3;
1258                         }
1259                 }
1260                 if (!netif_is_multiqueue(dev))
1261                         sch->flags |= TCQ_F_ONETXQUEUE;
1262         }
1263
1264         sch->handle = handle;
1265
1266         /* This exist to keep backward compatible with a userspace
1267          * loophole, what allowed userspace to get IFF_NO_QUEUE
1268          * facility on older kernels by setting tx_queue_len=0 (prior
1269          * to qdisc init), and then forgot to reinit tx_queue_len
1270          * before again attaching a qdisc.
1271          */
1272         if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1273                 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1274                 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1275         }
1276
1277         err = qdisc_block_indexes_set(sch, tca, extack);
1278         if (err)
1279                 goto err_out3;
1280
1281         if (ops->init) {
1282                 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1283                 if (err != 0)
1284                         goto err_out5;
1285         }
1286
1287         if (tca[TCA_STAB]) {
1288                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1289                 if (IS_ERR(stab)) {
1290                         err = PTR_ERR(stab);
1291                         goto err_out4;
1292                 }
1293                 rcu_assign_pointer(sch->stab, stab);
1294         }
1295         if (tca[TCA_RATE]) {
1296                 err = -EOPNOTSUPP;
1297                 if (sch->flags & TCQ_F_MQROOT) {
1298                         NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1299                         goto err_out4;
1300                 }
1301
1302                 err = gen_new_estimator(&sch->bstats,
1303                                         sch->cpu_bstats,
1304                                         &sch->rate_est,
1305                                         NULL,
1306                                         true,
1307                                         tca[TCA_RATE]);
1308                 if (err) {
1309                         NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1310                         goto err_out4;
1311                 }
1312         }
1313
1314         qdisc_hash_add(sch, false);
1315         trace_qdisc_create(ops, dev, parent);
1316
1317         return sch;
1318
1319 err_out5:
1320         /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1321         if (ops->destroy)
1322                 ops->destroy(sch);
1323 err_out3:
1324         netdev_put(dev, &sch->dev_tracker);
1325         qdisc_free(sch);
1326 err_out2:
1327         module_put(ops->owner);
1328 err_out:
1329         *errp = err;
1330         return NULL;
1331
1332 err_out4:
1333         /*
1334          * Any broken qdiscs that would require a ops->reset() here?
1335          * The qdisc was never in action so it shouldn't be necessary.
1336          */
1337         qdisc_put_stab(rtnl_dereference(sch->stab));
1338         if (ops->destroy)
1339                 ops->destroy(sch);
1340         goto err_out3;
1341 }
1342
1343 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1344                         struct netlink_ext_ack *extack)
1345 {
1346         struct qdisc_size_table *ostab, *stab = NULL;
1347         int err = 0;
1348
1349         if (tca[TCA_OPTIONS]) {
1350                 if (!sch->ops->change) {
1351                         NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1352                         return -EINVAL;
1353                 }
1354                 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1355                         NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1356                         return -EOPNOTSUPP;
1357                 }
1358                 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1359                 if (err)
1360                         return err;
1361         }
1362
1363         if (tca[TCA_STAB]) {
1364                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1365                 if (IS_ERR(stab))
1366                         return PTR_ERR(stab);
1367         }
1368
1369         ostab = rtnl_dereference(sch->stab);
1370         rcu_assign_pointer(sch->stab, stab);
1371         qdisc_put_stab(ostab);
1372
1373         if (tca[TCA_RATE]) {
1374                 /* NB: ignores errors from replace_estimator
1375                    because change can't be undone. */
1376                 if (sch->flags & TCQ_F_MQROOT)
1377                         goto out;
1378                 gen_replace_estimator(&sch->bstats,
1379                                       sch->cpu_bstats,
1380                                       &sch->rate_est,
1381                                       NULL,
1382                                       true,
1383                                       tca[TCA_RATE]);
1384         }
1385 out:
1386         return 0;
1387 }
1388
1389 struct check_loop_arg {
1390         struct qdisc_walker     w;
1391         struct Qdisc            *p;
1392         int                     depth;
1393 };
1394
1395 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1396                          struct qdisc_walker *w);
1397
1398 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1399 {
1400         struct check_loop_arg   arg;
1401
1402         if (q->ops->cl_ops == NULL)
1403                 return 0;
1404
1405         arg.w.stop = arg.w.skip = arg.w.count = 0;
1406         arg.w.fn = check_loop_fn;
1407         arg.depth = depth;
1408         arg.p = p;
1409         q->ops->cl_ops->walk(q, &arg.w);
1410         return arg.w.stop ? -ELOOP : 0;
1411 }
1412
1413 static int
1414 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1415 {
1416         struct Qdisc *leaf;
1417         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1418         struct check_loop_arg *arg = (struct check_loop_arg *)w;
1419
1420         leaf = cops->leaf(q, cl);
1421         if (leaf) {
1422                 if (leaf == arg->p || arg->depth > 7)
1423                         return -ELOOP;
1424                 return check_loop(leaf, arg->p, arg->depth + 1);
1425         }
1426         return 0;
1427 }
1428
1429 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1430         [TCA_KIND]              = { .type = NLA_STRING },
1431         [TCA_RATE]              = { .type = NLA_BINARY,
1432                                     .len = sizeof(struct tc_estimator) },
1433         [TCA_STAB]              = { .type = NLA_NESTED },
1434         [TCA_DUMP_INVISIBLE]    = { .type = NLA_FLAG },
1435         [TCA_CHAIN]             = { .type = NLA_U32 },
1436         [TCA_INGRESS_BLOCK]     = { .type = NLA_U32 },
1437         [TCA_EGRESS_BLOCK]      = { .type = NLA_U32 },
1438 };
1439
1440 /*
1441  * Delete/get qdisc.
1442  */
1443
1444 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1445                         struct netlink_ext_ack *extack)
1446 {
1447         struct net *net = sock_net(skb->sk);
1448         struct tcmsg *tcm = nlmsg_data(n);
1449         struct nlattr *tca[TCA_MAX + 1];
1450         struct net_device *dev;
1451         u32 clid;
1452         struct Qdisc *q = NULL;
1453         struct Qdisc *p = NULL;
1454         int err;
1455
1456         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1457                                      rtm_tca_policy, extack);
1458         if (err < 0)
1459                 return err;
1460
1461         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1462         if (!dev)
1463                 return -ENODEV;
1464
1465         clid = tcm->tcm_parent;
1466         if (clid) {
1467                 if (clid != TC_H_ROOT) {
1468                         if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1469                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1470                                 if (!p) {
1471                                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1472                                         return -ENOENT;
1473                                 }
1474                                 q = qdisc_leaf(p, clid);
1475                         } else if (dev_ingress_queue(dev)) {
1476                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1477                         }
1478                 } else {
1479                         q = rtnl_dereference(dev->qdisc);
1480                 }
1481                 if (!q) {
1482                         NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1483                         return -ENOENT;
1484                 }
1485
1486                 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1487                         NL_SET_ERR_MSG(extack, "Invalid handle");
1488                         return -EINVAL;
1489                 }
1490         } else {
1491                 q = qdisc_lookup(dev, tcm->tcm_handle);
1492                 if (!q) {
1493                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1494                         return -ENOENT;
1495                 }
1496         }
1497
1498         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1499                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1500                 return -EINVAL;
1501         }
1502
1503         if (n->nlmsg_type == RTM_DELQDISC) {
1504                 if (!clid) {
1505                         NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1506                         return -EINVAL;
1507                 }
1508                 if (q->handle == 0) {
1509                         NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1510                         return -ENOENT;
1511                 }
1512                 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1513                 if (err != 0)
1514                         return err;
1515         } else {
1516                 qdisc_notify(net, skb, n, clid, NULL, q);
1517         }
1518         return 0;
1519 }
1520
1521 /*
1522  * Create/change qdisc.
1523  */
1524
1525 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1526                            struct netlink_ext_ack *extack)
1527 {
1528         struct net *net = sock_net(skb->sk);
1529         struct tcmsg *tcm;
1530         struct nlattr *tca[TCA_MAX + 1];
1531         struct net_device *dev;
1532         u32 clid;
1533         struct Qdisc *q, *p;
1534         int err;
1535
1536 replay:
1537         /* Reinit, just in case something touches this. */
1538         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1539                                      rtm_tca_policy, extack);
1540         if (err < 0)
1541                 return err;
1542
1543         tcm = nlmsg_data(n);
1544         clid = tcm->tcm_parent;
1545         q = p = NULL;
1546
1547         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1548         if (!dev)
1549                 return -ENODEV;
1550
1551
1552         if (clid) {
1553                 if (clid != TC_H_ROOT) {
1554                         if (clid != TC_H_INGRESS) {
1555                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1556                                 if (!p) {
1557                                         NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1558                                         return -ENOENT;
1559                                 }
1560                                 q = qdisc_leaf(p, clid);
1561                         } else if (dev_ingress_queue_create(dev)) {
1562                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1563                         }
1564                 } else {
1565                         q = rtnl_dereference(dev->qdisc);
1566                 }
1567
1568                 /* It may be default qdisc, ignore it */
1569                 if (q && q->handle == 0)
1570                         q = NULL;
1571
1572                 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1573                         if (tcm->tcm_handle) {
1574                                 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1575                                         NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1576                                         return -EEXIST;
1577                                 }
1578                                 if (TC_H_MIN(tcm->tcm_handle)) {
1579                                         NL_SET_ERR_MSG(extack, "Invalid minor handle");
1580                                         return -EINVAL;
1581                                 }
1582                                 q = qdisc_lookup(dev, tcm->tcm_handle);
1583                                 if (!q)
1584                                         goto create_n_graft;
1585                                 if (n->nlmsg_flags & NLM_F_EXCL) {
1586                                         NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1587                                         return -EEXIST;
1588                                 }
1589                                 if (tca[TCA_KIND] &&
1590                                     nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1591                                         NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1592                                         return -EINVAL;
1593                                 }
1594                                 if (q->flags & TCQ_F_INGRESS) {
1595                                         NL_SET_ERR_MSG(extack,
1596                                                        "Cannot regraft ingress or clsact Qdiscs");
1597                                         return -EINVAL;
1598                                 }
1599                                 if (q == p ||
1600                                     (p && check_loop(q, p, 0))) {
1601                                         NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1602                                         return -ELOOP;
1603                                 }
1604                                 if (clid == TC_H_INGRESS) {
1605                                         NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
1606                                         return -EINVAL;
1607                                 }
1608                                 qdisc_refcount_inc(q);
1609                                 goto graft;
1610                         } else {
1611                                 if (!q)
1612                                         goto create_n_graft;
1613
1614                                 /* This magic test requires explanation.
1615                                  *
1616                                  *   We know, that some child q is already
1617                                  *   attached to this parent and have choice:
1618                                  *   either to change it or to create/graft new one.
1619                                  *
1620                                  *   1. We are allowed to create/graft only
1621                                  *   if CREATE and REPLACE flags are set.
1622                                  *
1623                                  *   2. If EXCL is set, requestor wanted to say,
1624                                  *   that qdisc tcm_handle is not expected
1625                                  *   to exist, so that we choose create/graft too.
1626                                  *
1627                                  *   3. The last case is when no flags are set.
1628                                  *   Alas, it is sort of hole in API, we
1629                                  *   cannot decide what to do unambiguously.
1630                                  *   For now we select create/graft, if
1631                                  *   user gave KIND, which does not match existing.
1632                                  */
1633                                 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1634                                     (n->nlmsg_flags & NLM_F_REPLACE) &&
1635                                     ((n->nlmsg_flags & NLM_F_EXCL) ||
1636                                      (tca[TCA_KIND] &&
1637                                       nla_strcmp(tca[TCA_KIND], q->ops->id))))
1638                                         goto create_n_graft;
1639                         }
1640                 }
1641         } else {
1642                 if (!tcm->tcm_handle) {
1643                         NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1644                         return -EINVAL;
1645                 }
1646                 q = qdisc_lookup(dev, tcm->tcm_handle);
1647         }
1648
1649         /* Change qdisc parameters */
1650         if (!q) {
1651                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1652                 return -ENOENT;
1653         }
1654         if (n->nlmsg_flags & NLM_F_EXCL) {
1655                 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1656                 return -EEXIST;
1657         }
1658         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1659                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1660                 return -EINVAL;
1661         }
1662         err = qdisc_change(q, tca, extack);
1663         if (err == 0)
1664                 qdisc_notify(net, skb, n, clid, NULL, q);
1665         return err;
1666
1667 create_n_graft:
1668         if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1669                 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1670                 return -ENOENT;
1671         }
1672         if (clid == TC_H_INGRESS) {
1673                 if (dev_ingress_queue(dev)) {
1674                         q = qdisc_create(dev, dev_ingress_queue(dev),
1675                                          tcm->tcm_parent, tcm->tcm_parent,
1676                                          tca, &err, extack);
1677                 } else {
1678                         NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1679                         err = -ENOENT;
1680                 }
1681         } else {
1682                 struct netdev_queue *dev_queue;
1683
1684                 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1685                         dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1686                 else if (p)
1687                         dev_queue = p->dev_queue;
1688                 else
1689                         dev_queue = netdev_get_tx_queue(dev, 0);
1690
1691                 q = qdisc_create(dev, dev_queue,
1692                                  tcm->tcm_parent, tcm->tcm_handle,
1693                                  tca, &err, extack);
1694         }
1695         if (q == NULL) {
1696                 if (err == -EAGAIN)
1697                         goto replay;
1698                 return err;
1699         }
1700
1701 graft:
1702         err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1703         if (err) {
1704                 if (q)
1705                         qdisc_put(q);
1706                 return err;
1707         }
1708
1709         return 0;
1710 }
1711
1712 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1713                               struct netlink_callback *cb,
1714                               int *q_idx_p, int s_q_idx, bool recur,
1715                               bool dump_invisible)
1716 {
1717         int ret = 0, q_idx = *q_idx_p;
1718         struct Qdisc *q;
1719         int b;
1720
1721         if (!root)
1722                 return 0;
1723
1724         q = root;
1725         if (q_idx < s_q_idx) {
1726                 q_idx++;
1727         } else {
1728                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1729                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1730                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1731                                   RTM_NEWQDISC) <= 0)
1732                         goto done;
1733                 q_idx++;
1734         }
1735
1736         /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1737          * itself has already been dumped.
1738          *
1739          * If we've already dumped the top-level (ingress) qdisc above and the global
1740          * qdisc hashtable, we don't want to hit it again
1741          */
1742         if (!qdisc_dev(root) || !recur)
1743                 goto out;
1744
1745         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1746                 if (q_idx < s_q_idx) {
1747                         q_idx++;
1748                         continue;
1749                 }
1750                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1751                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1752                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1753                                   RTM_NEWQDISC) <= 0)
1754                         goto done;
1755                 q_idx++;
1756         }
1757
1758 out:
1759         *q_idx_p = q_idx;
1760         return ret;
1761 done:
1762         ret = -1;
1763         goto out;
1764 }
1765
1766 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1767 {
1768         struct net *net = sock_net(skb->sk);
1769         int idx, q_idx;
1770         int s_idx, s_q_idx;
1771         struct net_device *dev;
1772         const struct nlmsghdr *nlh = cb->nlh;
1773         struct nlattr *tca[TCA_MAX + 1];
1774         int err;
1775
1776         s_idx = cb->args[0];
1777         s_q_idx = q_idx = cb->args[1];
1778
1779         idx = 0;
1780         ASSERT_RTNL();
1781
1782         err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1783                                      rtm_tca_policy, cb->extack);
1784         if (err < 0)
1785                 return err;
1786
1787         for_each_netdev(net, dev) {
1788                 struct netdev_queue *dev_queue;
1789
1790                 if (idx < s_idx)
1791                         goto cont;
1792                 if (idx > s_idx)
1793                         s_q_idx = 0;
1794                 q_idx = 0;
1795
1796                 if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1797                                        skb, cb, &q_idx, s_q_idx,
1798                                        true, tca[TCA_DUMP_INVISIBLE]) < 0)
1799                         goto done;
1800
1801                 dev_queue = dev_ingress_queue(dev);
1802                 if (dev_queue &&
1803                     tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1804                                        &q_idx, s_q_idx, false,
1805                                        tca[TCA_DUMP_INVISIBLE]) < 0)
1806                         goto done;
1807
1808 cont:
1809                 idx++;
1810         }
1811
1812 done:
1813         cb->args[0] = idx;
1814         cb->args[1] = q_idx;
1815
1816         return skb->len;
1817 }
1818
1819
1820
1821 /************************************************
1822  *      Traffic classes manipulation.           *
1823  ************************************************/
1824
1825 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1826                           unsigned long cl,
1827                           u32 portid, u32 seq, u16 flags, int event)
1828 {
1829         struct tcmsg *tcm;
1830         struct nlmsghdr  *nlh;
1831         unsigned char *b = skb_tail_pointer(skb);
1832         struct gnet_dump d;
1833         const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1834
1835         cond_resched();
1836         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1837         if (!nlh)
1838                 goto out_nlmsg_trim;
1839         tcm = nlmsg_data(nlh);
1840         tcm->tcm_family = AF_UNSPEC;
1841         tcm->tcm__pad1 = 0;
1842         tcm->tcm__pad2 = 0;
1843         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1844         tcm->tcm_parent = q->handle;
1845         tcm->tcm_handle = q->handle;
1846         tcm->tcm_info = 0;
1847         if (nla_put_string(skb, TCA_KIND, q->ops->id))
1848                 goto nla_put_failure;
1849         if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1850                 goto nla_put_failure;
1851
1852         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1853                                          NULL, &d, TCA_PAD) < 0)
1854                 goto nla_put_failure;
1855
1856         if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1857                 goto nla_put_failure;
1858
1859         if (gnet_stats_finish_copy(&d) < 0)
1860                 goto nla_put_failure;
1861
1862         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1863         return skb->len;
1864
1865 out_nlmsg_trim:
1866 nla_put_failure:
1867         nlmsg_trim(skb, b);
1868         return -1;
1869 }
1870
1871 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1872                          struct nlmsghdr *n, struct Qdisc *q,
1873                          unsigned long cl, int event)
1874 {
1875         struct sk_buff *skb;
1876         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1877
1878         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1879         if (!skb)
1880                 return -ENOBUFS;
1881
1882         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1883                 kfree_skb(skb);
1884                 return -EINVAL;
1885         }
1886
1887         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1888                               n->nlmsg_flags & NLM_F_ECHO);
1889 }
1890
1891 static int tclass_del_notify(struct net *net,
1892                              const struct Qdisc_class_ops *cops,
1893                              struct sk_buff *oskb, struct nlmsghdr *n,
1894                              struct Qdisc *q, unsigned long cl,
1895                              struct netlink_ext_ack *extack)
1896 {
1897         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1898         struct sk_buff *skb;
1899         int err = 0;
1900
1901         if (!cops->delete)
1902                 return -EOPNOTSUPP;
1903
1904         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1905         if (!skb)
1906                 return -ENOBUFS;
1907
1908         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1909                            RTM_DELTCLASS) < 0) {
1910                 kfree_skb(skb);
1911                 return -EINVAL;
1912         }
1913
1914         err = cops->delete(q, cl, extack);
1915         if (err) {
1916                 kfree_skb(skb);
1917                 return err;
1918         }
1919
1920         err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1921                              n->nlmsg_flags & NLM_F_ECHO);
1922         return err;
1923 }
1924
1925 #ifdef CONFIG_NET_CLS
1926
1927 struct tcf_bind_args {
1928         struct tcf_walker w;
1929         unsigned long base;
1930         unsigned long cl;
1931         u32 classid;
1932 };
1933
1934 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1935 {
1936         struct tcf_bind_args *a = (void *)arg;
1937
1938         if (n && tp->ops->bind_class) {
1939                 struct Qdisc *q = tcf_block_q(tp->chain->block);
1940
1941                 sch_tree_lock(q);
1942                 tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
1943                 sch_tree_unlock(q);
1944         }
1945         return 0;
1946 }
1947
1948 struct tc_bind_class_args {
1949         struct qdisc_walker w;
1950         unsigned long new_cl;
1951         u32 portid;
1952         u32 clid;
1953 };
1954
1955 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
1956                                 struct qdisc_walker *w)
1957 {
1958         struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
1959         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1960         struct tcf_block *block;
1961         struct tcf_chain *chain;
1962
1963         block = cops->tcf_block(q, cl, NULL);
1964         if (!block)
1965                 return 0;
1966         for (chain = tcf_get_next_chain(block, NULL);
1967              chain;
1968              chain = tcf_get_next_chain(block, chain)) {
1969                 struct tcf_proto *tp;
1970
1971                 for (tp = tcf_get_next_proto(chain, NULL);
1972                      tp; tp = tcf_get_next_proto(chain, tp)) {
1973                         struct tcf_bind_args arg = {};
1974
1975                         arg.w.fn = tcf_node_bind;
1976                         arg.classid = a->clid;
1977                         arg.base = cl;
1978                         arg.cl = a->new_cl;
1979                         tp->ops->walk(tp, &arg.w, true);
1980                 }
1981         }
1982
1983         return 0;
1984 }
1985
1986 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1987                            unsigned long new_cl)
1988 {
1989         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1990         struct tc_bind_class_args args = {};
1991
1992         if (!cops->tcf_block)
1993                 return;
1994         args.portid = portid;
1995         args.clid = clid;
1996         args.new_cl = new_cl;
1997         args.w.fn = tc_bind_class_walker;
1998         q->ops->cl_ops->walk(q, &args.w);
1999 }
2000
2001 #else
2002
2003 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2004                            unsigned long new_cl)
2005 {
2006 }
2007
2008 #endif
2009
2010 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2011                          struct netlink_ext_ack *extack)
2012 {
2013         struct net *net = sock_net(skb->sk);
2014         struct tcmsg *tcm = nlmsg_data(n);
2015         struct nlattr *tca[TCA_MAX + 1];
2016         struct net_device *dev;
2017         struct Qdisc *q = NULL;
2018         const struct Qdisc_class_ops *cops;
2019         unsigned long cl = 0;
2020         unsigned long new_cl;
2021         u32 portid;
2022         u32 clid;
2023         u32 qid;
2024         int err;
2025
2026         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2027                                      rtm_tca_policy, extack);
2028         if (err < 0)
2029                 return err;
2030
2031         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2032         if (!dev)
2033                 return -ENODEV;
2034
2035         /*
2036            parent == TC_H_UNSPEC - unspecified parent.
2037            parent == TC_H_ROOT   - class is root, which has no parent.
2038            parent == X:0         - parent is root class.
2039            parent == X:Y         - parent is a node in hierarchy.
2040            parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
2041
2042            handle == 0:0         - generate handle from kernel pool.
2043            handle == 0:Y         - class is X:Y, where X:0 is qdisc.
2044            handle == X:Y         - clear.
2045            handle == X:0         - root class.
2046          */
2047
2048         /* Step 1. Determine qdisc handle X:0 */
2049
2050         portid = tcm->tcm_parent;
2051         clid = tcm->tcm_handle;
2052         qid = TC_H_MAJ(clid);
2053
2054         if (portid != TC_H_ROOT) {
2055                 u32 qid1 = TC_H_MAJ(portid);
2056
2057                 if (qid && qid1) {
2058                         /* If both majors are known, they must be identical. */
2059                         if (qid != qid1)
2060                                 return -EINVAL;
2061                 } else if (qid1) {
2062                         qid = qid1;
2063                 } else if (qid == 0)
2064                         qid = rtnl_dereference(dev->qdisc)->handle;
2065
2066                 /* Now qid is genuine qdisc handle consistent
2067                  * both with parent and child.
2068                  *
2069                  * TC_H_MAJ(portid) still may be unspecified, complete it now.
2070                  */
2071                 if (portid)
2072                         portid = TC_H_MAKE(qid, portid);
2073         } else {
2074                 if (qid == 0)
2075                         qid = rtnl_dereference(dev->qdisc)->handle;
2076         }
2077
2078         /* OK. Locate qdisc */
2079         q = qdisc_lookup(dev, qid);
2080         if (!q)
2081                 return -ENOENT;
2082
2083         /* An check that it supports classes */
2084         cops = q->ops->cl_ops;
2085         if (cops == NULL)
2086                 return -EINVAL;
2087
2088         /* Now try to get class */
2089         if (clid == 0) {
2090                 if (portid == TC_H_ROOT)
2091                         clid = qid;
2092         } else
2093                 clid = TC_H_MAKE(qid, clid);
2094
2095         if (clid)
2096                 cl = cops->find(q, clid);
2097
2098         if (cl == 0) {
2099                 err = -ENOENT;
2100                 if (n->nlmsg_type != RTM_NEWTCLASS ||
2101                     !(n->nlmsg_flags & NLM_F_CREATE))
2102                         goto out;
2103         } else {
2104                 switch (n->nlmsg_type) {
2105                 case RTM_NEWTCLASS:
2106                         err = -EEXIST;
2107                         if (n->nlmsg_flags & NLM_F_EXCL)
2108                                 goto out;
2109                         break;
2110                 case RTM_DELTCLASS:
2111                         err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
2112                         /* Unbind the class with flilters with 0 */
2113                         tc_bind_tclass(q, portid, clid, 0);
2114                         goto out;
2115                 case RTM_GETTCLASS:
2116                         err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
2117                         goto out;
2118                 default:
2119                         err = -EINVAL;
2120                         goto out;
2121                 }
2122         }
2123
2124         if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2125                 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2126                 return -EOPNOTSUPP;
2127         }
2128
2129         new_cl = cl;
2130         err = -EOPNOTSUPP;
2131         if (cops->change)
2132                 err = cops->change(q, clid, portid, tca, &new_cl, extack);
2133         if (err == 0) {
2134                 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
2135                 /* We just create a new class, need to do reverse binding. */
2136                 if (cl != new_cl)
2137                         tc_bind_tclass(q, portid, clid, new_cl);
2138         }
2139 out:
2140         return err;
2141 }
2142
2143 struct qdisc_dump_args {
2144         struct qdisc_walker     w;
2145         struct sk_buff          *skb;
2146         struct netlink_callback *cb;
2147 };
2148
2149 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2150                             struct qdisc_walker *arg)
2151 {
2152         struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2153
2154         return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2155                               a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2156                               RTM_NEWTCLASS);
2157 }
2158
2159 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2160                                 struct tcmsg *tcm, struct netlink_callback *cb,
2161                                 int *t_p, int s_t)
2162 {
2163         struct qdisc_dump_args arg;
2164
2165         if (tc_qdisc_dump_ignore(q, false) ||
2166             *t_p < s_t || !q->ops->cl_ops ||
2167             (tcm->tcm_parent &&
2168              TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2169                 (*t_p)++;
2170                 return 0;
2171         }
2172         if (*t_p > s_t)
2173                 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2174         arg.w.fn = qdisc_class_dump;
2175         arg.skb = skb;
2176         arg.cb = cb;
2177         arg.w.stop  = 0;
2178         arg.w.skip = cb->args[1];
2179         arg.w.count = 0;
2180         q->ops->cl_ops->walk(q, &arg.w);
2181         cb->args[1] = arg.w.count;
2182         if (arg.w.stop)
2183                 return -1;
2184         (*t_p)++;
2185         return 0;
2186 }
2187
2188 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2189                                struct tcmsg *tcm, struct netlink_callback *cb,
2190                                int *t_p, int s_t, bool recur)
2191 {
2192         struct Qdisc *q;
2193         int b;
2194
2195         if (!root)
2196                 return 0;
2197
2198         if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2199                 return -1;
2200
2201         if (!qdisc_dev(root) || !recur)
2202                 return 0;
2203
2204         if (tcm->tcm_parent) {
2205                 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2206                 if (q && q != root &&
2207                     tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2208                         return -1;
2209                 return 0;
2210         }
2211         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2212                 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2213                         return -1;
2214         }
2215
2216         return 0;
2217 }
2218
2219 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2220 {
2221         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2222         struct net *net = sock_net(skb->sk);
2223         struct netdev_queue *dev_queue;
2224         struct net_device *dev;
2225         int t, s_t;
2226
2227         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2228                 return 0;
2229         dev = dev_get_by_index(net, tcm->tcm_ifindex);
2230         if (!dev)
2231                 return 0;
2232
2233         s_t = cb->args[0];
2234         t = 0;
2235
2236         if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2237                                 skb, tcm, cb, &t, s_t, true) < 0)
2238                 goto done;
2239
2240         dev_queue = dev_ingress_queue(dev);
2241         if (dev_queue &&
2242             tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2243                                 &t, s_t, false) < 0)
2244                 goto done;
2245
2246 done:
2247         cb->args[0] = t;
2248
2249         dev_put(dev);
2250         return skb->len;
2251 }
2252
2253 #ifdef CONFIG_PROC_FS
2254 static int psched_show(struct seq_file *seq, void *v)
2255 {
2256         seq_printf(seq, "%08x %08x %08x %08x\n",
2257                    (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2258                    1000000,
2259                    (u32)NSEC_PER_SEC / hrtimer_resolution);
2260
2261         return 0;
2262 }
2263
2264 static int __net_init psched_net_init(struct net *net)
2265 {
2266         struct proc_dir_entry *e;
2267
2268         e = proc_create_single("psched", 0, net->proc_net, psched_show);
2269         if (e == NULL)
2270                 return -ENOMEM;
2271
2272         return 0;
2273 }
2274
2275 static void __net_exit psched_net_exit(struct net *net)
2276 {
2277         remove_proc_entry("psched", net->proc_net);
2278 }
2279 #else
2280 static int __net_init psched_net_init(struct net *net)
2281 {
2282         return 0;
2283 }
2284
2285 static void __net_exit psched_net_exit(struct net *net)
2286 {
2287 }
2288 #endif
2289
2290 static struct pernet_operations psched_net_ops = {
2291         .init = psched_net_init,
2292         .exit = psched_net_exit,
2293 };
2294
2295 static int __init pktsched_init(void)
2296 {
2297         int err;
2298
2299         err = register_pernet_subsys(&psched_net_ops);
2300         if (err) {
2301                 pr_err("pktsched_init: "
2302                        "cannot initialize per netns operations\n");
2303                 return err;
2304         }
2305
2306         register_qdisc(&pfifo_fast_ops);
2307         register_qdisc(&pfifo_qdisc_ops);
2308         register_qdisc(&bfifo_qdisc_ops);
2309         register_qdisc(&pfifo_head_drop_qdisc_ops);
2310         register_qdisc(&mq_qdisc_ops);
2311         register_qdisc(&noqueue_qdisc_ops);
2312
2313         rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2314         rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2315         rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2316                       0);
2317         rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2318         rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2319         rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2320                       0);
2321
2322         return 0;
2323 }
2324
2325 subsys_initcall(pktsched_init);