2244e00ea9a1055ba4d15fbbb6fa5e683ece61ac
[platform/kernel/linux-starfive.git] / net / sched / sch_api.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_api.c  Packet scheduler API.
4  *
5  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Fixes:
8  *
9  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12  */
13
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <linux/init.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/kmod.h>
24 #include <linux/list.h>
25 #include <linux/hrtimer.h>
26 #include <linux/slab.h>
27 #include <linux/hashtable.h>
28
29 #include <net/net_namespace.h>
30 #include <net/sock.h>
31 #include <net/netlink.h>
32 #include <net/pkt_sched.h>
33 #include <net/pkt_cls.h>
34
35 #include <trace/events/qdisc.h>
36
37 /*
38
39    Short review.
40    -------------
41
42    This file consists of two interrelated parts:
43
44    1. queueing disciplines manager frontend.
45    2. traffic classes manager frontend.
46
47    Generally, queueing discipline ("qdisc") is a black box,
48    which is able to enqueue packets and to dequeue them (when
49    device is ready to send something) in order and at times
50    determined by algorithm hidden in it.
51
52    qdisc's are divided to two categories:
53    - "queues", which have no internal structure visible from outside.
54    - "schedulers", which split all the packets to "traffic classes",
55      using "packet classifiers" (look at cls_api.c)
56
57    In turn, classes may have child qdiscs (as rule, queues)
58    attached to them etc. etc. etc.
59
60    The goal of the routines in this file is to translate
61    information supplied by user in the form of handles
62    to more intelligible for kernel form, to make some sanity
63    checks and part of work, which is common to all qdiscs
64    and to provide rtnetlink notifications.
65
66    All real intelligent work is done inside qdisc modules.
67
68
69
70    Every discipline has two major routines: enqueue and dequeue.
71
72    ---dequeue
73
74    dequeue usually returns a skb to send. It is allowed to return NULL,
75    but it does not mean that queue is empty, it just means that
76    discipline does not want to send anything this time.
77    Queue is really empty if q->q.qlen == 0.
78    For complicated disciplines with multiple queues q->q is not
79    real packet queue, but however q->q.qlen must be valid.
80
81    ---enqueue
82
83    enqueue returns 0, if packet was enqueued successfully.
84    If packet (this one or another one) was dropped, it returns
85    not zero error code.
86    NET_XMIT_DROP        - this packet dropped
87      Expected action: do not backoff, but wait until queue will clear.
88    NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
89      Expected action: backoff or ignore
90
91    Auxiliary routines:
92
93    ---peek
94
95    like dequeue but without removing a packet from the queue
96
97    ---reset
98
99    returns qdisc to initial state: purge all buffers, clear all
100    timers, counters (except for statistics) etc.
101
102    ---init
103
104    initializes newly created qdisc.
105
106    ---destroy
107
108    destroys resources allocated by init and during lifetime of qdisc.
109
110    ---change
111
112    changes qdisc parameters.
113  */
114
115 /* Protects list of registered TC modules. It is pure SMP lock. */
116 static DEFINE_RWLOCK(qdisc_mod_lock);
117
118
119 /************************************************
120  *      Queueing disciplines manipulation.      *
121  ************************************************/
122
123
124 /* The list of all installed queueing disciplines. */
125
126 static struct Qdisc_ops *qdisc_base;
127
128 /* Register/unregister queueing discipline */
129
130 int register_qdisc(struct Qdisc_ops *qops)
131 {
132         struct Qdisc_ops *q, **qp;
133         int rc = -EEXIST;
134
135         write_lock(&qdisc_mod_lock);
136         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
137                 if (!strcmp(qops->id, q->id))
138                         goto out;
139
140         if (qops->enqueue == NULL)
141                 qops->enqueue = noop_qdisc_ops.enqueue;
142         if (qops->peek == NULL) {
143                 if (qops->dequeue == NULL)
144                         qops->peek = noop_qdisc_ops.peek;
145                 else
146                         goto out_einval;
147         }
148         if (qops->dequeue == NULL)
149                 qops->dequeue = noop_qdisc_ops.dequeue;
150
151         if (qops->cl_ops) {
152                 const struct Qdisc_class_ops *cops = qops->cl_ops;
153
154                 if (!(cops->find && cops->walk && cops->leaf))
155                         goto out_einval;
156
157                 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
158                         goto out_einval;
159         }
160
161         qops->next = NULL;
162         *qp = qops;
163         rc = 0;
164 out:
165         write_unlock(&qdisc_mod_lock);
166         return rc;
167
168 out_einval:
169         rc = -EINVAL;
170         goto out;
171 }
172 EXPORT_SYMBOL(register_qdisc);
173
174 void unregister_qdisc(struct Qdisc_ops *qops)
175 {
176         struct Qdisc_ops *q, **qp;
177         int err = -ENOENT;
178
179         write_lock(&qdisc_mod_lock);
180         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
181                 if (q == qops)
182                         break;
183         if (q) {
184                 *qp = q->next;
185                 q->next = NULL;
186                 err = 0;
187         }
188         write_unlock(&qdisc_mod_lock);
189
190         WARN(err, "unregister qdisc(%s) failed\n", qops->id);
191 }
192 EXPORT_SYMBOL(unregister_qdisc);
193
194 /* Get default qdisc if not otherwise specified */
195 void qdisc_get_default(char *name, size_t len)
196 {
197         read_lock(&qdisc_mod_lock);
198         strscpy(name, default_qdisc_ops->id, len);
199         read_unlock(&qdisc_mod_lock);
200 }
201
202 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
203 {
204         struct Qdisc_ops *q = NULL;
205
206         for (q = qdisc_base; q; q = q->next) {
207                 if (!strcmp(name, q->id)) {
208                         if (!try_module_get(q->owner))
209                                 q = NULL;
210                         break;
211                 }
212         }
213
214         return q;
215 }
216
217 /* Set new default qdisc to use */
218 int qdisc_set_default(const char *name)
219 {
220         const struct Qdisc_ops *ops;
221
222         if (!capable(CAP_NET_ADMIN))
223                 return -EPERM;
224
225         write_lock(&qdisc_mod_lock);
226         ops = qdisc_lookup_default(name);
227         if (!ops) {
228                 /* Not found, drop lock and try to load module */
229                 write_unlock(&qdisc_mod_lock);
230                 request_module("sch_%s", name);
231                 write_lock(&qdisc_mod_lock);
232
233                 ops = qdisc_lookup_default(name);
234         }
235
236         if (ops) {
237                 /* Set new default */
238                 module_put(default_qdisc_ops->owner);
239                 default_qdisc_ops = ops;
240         }
241         write_unlock(&qdisc_mod_lock);
242
243         return ops ? 0 : -ENOENT;
244 }
245
246 #ifdef CONFIG_NET_SCH_DEFAULT
247 /* Set default value from kernel config */
248 static int __init sch_default_qdisc(void)
249 {
250         return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
251 }
252 late_initcall(sch_default_qdisc);
253 #endif
254
255 /* We know handle. Find qdisc among all qdisc's attached to device
256  * (root qdisc, all its children, children of children etc.)
257  * Note: caller either uses rtnl or rcu_read_lock()
258  */
259
260 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
261 {
262         struct Qdisc *q;
263
264         if (!qdisc_dev(root))
265                 return (root->handle == handle ? root : NULL);
266
267         if (!(root->flags & TCQ_F_BUILTIN) &&
268             root->handle == handle)
269                 return root;
270
271         hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
272                                    lockdep_rtnl_is_held()) {
273                 if (q->handle == handle)
274                         return q;
275         }
276         return NULL;
277 }
278
279 void qdisc_hash_add(struct Qdisc *q, bool invisible)
280 {
281         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
282                 ASSERT_RTNL();
283                 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
284                 if (invisible)
285                         q->flags |= TCQ_F_INVISIBLE;
286         }
287 }
288 EXPORT_SYMBOL(qdisc_hash_add);
289
290 void qdisc_hash_del(struct Qdisc *q)
291 {
292         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
293                 ASSERT_RTNL();
294                 hash_del_rcu(&q->hash);
295         }
296 }
297 EXPORT_SYMBOL(qdisc_hash_del);
298
299 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
300 {
301         struct Qdisc *q;
302
303         if (!handle)
304                 return NULL;
305         q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
306         if (q)
307                 goto out;
308
309         if (dev_ingress_queue(dev))
310                 q = qdisc_match_from_root(
311                         dev_ingress_queue(dev)->qdisc_sleeping,
312                         handle);
313 out:
314         return q;
315 }
316
317 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
318 {
319         struct netdev_queue *nq;
320         struct Qdisc *q;
321
322         if (!handle)
323                 return NULL;
324         q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
325         if (q)
326                 goto out;
327
328         nq = dev_ingress_queue_rcu(dev);
329         if (nq)
330                 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
331 out:
332         return q;
333 }
334
335 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
336 {
337         unsigned long cl;
338         const struct Qdisc_class_ops *cops = p->ops->cl_ops;
339
340         if (cops == NULL)
341                 return NULL;
342         cl = cops->find(p, classid);
343
344         if (cl == 0)
345                 return NULL;
346         return cops->leaf(p, cl);
347 }
348
349 /* Find queueing discipline by name */
350
351 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
352 {
353         struct Qdisc_ops *q = NULL;
354
355         if (kind) {
356                 read_lock(&qdisc_mod_lock);
357                 for (q = qdisc_base; q; q = q->next) {
358                         if (nla_strcmp(kind, q->id) == 0) {
359                                 if (!try_module_get(q->owner))
360                                         q = NULL;
361                                 break;
362                         }
363                 }
364                 read_unlock(&qdisc_mod_lock);
365         }
366         return q;
367 }
368
369 /* The linklayer setting were not transferred from iproute2, in older
370  * versions, and the rate tables lookup systems have been dropped in
371  * the kernel. To keep backward compatible with older iproute2 tc
372  * utils, we detect the linklayer setting by detecting if the rate
373  * table were modified.
374  *
375  * For linklayer ATM table entries, the rate table will be aligned to
376  * 48 bytes, thus some table entries will contain the same value.  The
377  * mpu (min packet unit) is also encoded into the old rate table, thus
378  * starting from the mpu, we find low and high table entries for
379  * mapping this cell.  If these entries contain the same value, when
380  * the rate tables have been modified for linklayer ATM.
381  *
382  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
383  * and then roundup to the next cell, calc the table entry one below,
384  * and compare.
385  */
386 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
387 {
388         int low       = roundup(r->mpu, 48);
389         int high      = roundup(low+1, 48);
390         int cell_low  = low >> r->cell_log;
391         int cell_high = (high >> r->cell_log) - 1;
392
393         /* rtab is too inaccurate at rates > 100Mbit/s */
394         if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
395                 pr_debug("TC linklayer: Giving up ATM detection\n");
396                 return TC_LINKLAYER_ETHERNET;
397         }
398
399         if ((cell_high > cell_low) && (cell_high < 256)
400             && (rtab[cell_low] == rtab[cell_high])) {
401                 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
402                          cell_low, cell_high, rtab[cell_high]);
403                 return TC_LINKLAYER_ATM;
404         }
405         return TC_LINKLAYER_ETHERNET;
406 }
407
408 static struct qdisc_rate_table *qdisc_rtab_list;
409
410 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
411                                         struct nlattr *tab,
412                                         struct netlink_ext_ack *extack)
413 {
414         struct qdisc_rate_table *rtab;
415
416         if (tab == NULL || r->rate == 0 ||
417             r->cell_log == 0 || r->cell_log >= 32 ||
418             nla_len(tab) != TC_RTAB_SIZE) {
419                 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
420                 return NULL;
421         }
422
423         for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
424                 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
425                     !memcmp(&rtab->data, nla_data(tab), 1024)) {
426                         rtab->refcnt++;
427                         return rtab;
428                 }
429         }
430
431         rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
432         if (rtab) {
433                 rtab->rate = *r;
434                 rtab->refcnt = 1;
435                 memcpy(rtab->data, nla_data(tab), 1024);
436                 if (r->linklayer == TC_LINKLAYER_UNAWARE)
437                         r->linklayer = __detect_linklayer(r, rtab->data);
438                 rtab->next = qdisc_rtab_list;
439                 qdisc_rtab_list = rtab;
440         } else {
441                 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
442         }
443         return rtab;
444 }
445 EXPORT_SYMBOL(qdisc_get_rtab);
446
447 void qdisc_put_rtab(struct qdisc_rate_table *tab)
448 {
449         struct qdisc_rate_table *rtab, **rtabp;
450
451         if (!tab || --tab->refcnt)
452                 return;
453
454         for (rtabp = &qdisc_rtab_list;
455              (rtab = *rtabp) != NULL;
456              rtabp = &rtab->next) {
457                 if (rtab == tab) {
458                         *rtabp = rtab->next;
459                         kfree(rtab);
460                         return;
461                 }
462         }
463 }
464 EXPORT_SYMBOL(qdisc_put_rtab);
465
466 static LIST_HEAD(qdisc_stab_list);
467
468 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
469         [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
470         [TCA_STAB_DATA] = { .type = NLA_BINARY },
471 };
472
473 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
474                                                struct netlink_ext_ack *extack)
475 {
476         struct nlattr *tb[TCA_STAB_MAX + 1];
477         struct qdisc_size_table *stab;
478         struct tc_sizespec *s;
479         unsigned int tsize = 0;
480         u16 *tab = NULL;
481         int err;
482
483         err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
484                                           extack);
485         if (err < 0)
486                 return ERR_PTR(err);
487         if (!tb[TCA_STAB_BASE]) {
488                 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
489                 return ERR_PTR(-EINVAL);
490         }
491
492         s = nla_data(tb[TCA_STAB_BASE]);
493
494         if (s->tsize > 0) {
495                 if (!tb[TCA_STAB_DATA]) {
496                         NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
497                         return ERR_PTR(-EINVAL);
498                 }
499                 tab = nla_data(tb[TCA_STAB_DATA]);
500                 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
501         }
502
503         if (tsize != s->tsize || (!tab && tsize > 0)) {
504                 NL_SET_ERR_MSG(extack, "Invalid size of size table");
505                 return ERR_PTR(-EINVAL);
506         }
507
508         list_for_each_entry(stab, &qdisc_stab_list, list) {
509                 if (memcmp(&stab->szopts, s, sizeof(*s)))
510                         continue;
511                 if (tsize > 0 &&
512                     memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
513                         continue;
514                 stab->refcnt++;
515                 return stab;
516         }
517
518         if (s->size_log > STAB_SIZE_LOG_MAX ||
519             s->cell_log > STAB_SIZE_LOG_MAX) {
520                 NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
521                 return ERR_PTR(-EINVAL);
522         }
523
524         stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
525         if (!stab)
526                 return ERR_PTR(-ENOMEM);
527
528         stab->refcnt = 1;
529         stab->szopts = *s;
530         if (tsize > 0)
531                 memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
532
533         list_add_tail(&stab->list, &qdisc_stab_list);
534
535         return stab;
536 }
537
538 void qdisc_put_stab(struct qdisc_size_table *tab)
539 {
540         if (!tab)
541                 return;
542
543         if (--tab->refcnt == 0) {
544                 list_del(&tab->list);
545                 kfree_rcu(tab, rcu);
546         }
547 }
548 EXPORT_SYMBOL(qdisc_put_stab);
549
550 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
551 {
552         struct nlattr *nest;
553
554         nest = nla_nest_start_noflag(skb, TCA_STAB);
555         if (nest == NULL)
556                 goto nla_put_failure;
557         if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
558                 goto nla_put_failure;
559         nla_nest_end(skb, nest);
560
561         return skb->len;
562
563 nla_put_failure:
564         return -1;
565 }
566
567 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
568                                const struct qdisc_size_table *stab)
569 {
570         int pkt_len, slot;
571
572         pkt_len = skb->len + stab->szopts.overhead;
573         if (unlikely(!stab->szopts.tsize))
574                 goto out;
575
576         slot = pkt_len + stab->szopts.cell_align;
577         if (unlikely(slot < 0))
578                 slot = 0;
579
580         slot >>= stab->szopts.cell_log;
581         if (likely(slot < stab->szopts.tsize))
582                 pkt_len = stab->data[slot];
583         else
584                 pkt_len = stab->data[stab->szopts.tsize - 1] *
585                                 (slot / stab->szopts.tsize) +
586                                 stab->data[slot % stab->szopts.tsize];
587
588         pkt_len <<= stab->szopts.size_log;
589 out:
590         if (unlikely(pkt_len < 1))
591                 pkt_len = 1;
592         qdisc_skb_cb(skb)->pkt_len = pkt_len;
593 }
594 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
595
596 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
597 {
598         if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
599                 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
600                         txt, qdisc->ops->id, qdisc->handle >> 16);
601                 qdisc->flags |= TCQ_F_WARN_NONWC;
602         }
603 }
604 EXPORT_SYMBOL(qdisc_warn_nonwc);
605
606 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
607 {
608         struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
609                                                  timer);
610
611         rcu_read_lock();
612         __netif_schedule(qdisc_root(wd->qdisc));
613         rcu_read_unlock();
614
615         return HRTIMER_NORESTART;
616 }
617
618 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
619                                  clockid_t clockid)
620 {
621         hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
622         wd->timer.function = qdisc_watchdog;
623         wd->qdisc = qdisc;
624 }
625 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
626
627 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
628 {
629         qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
630 }
631 EXPORT_SYMBOL(qdisc_watchdog_init);
632
633 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
634                                       u64 delta_ns)
635 {
636         if (test_bit(__QDISC_STATE_DEACTIVATED,
637                      &qdisc_root_sleeping(wd->qdisc)->state))
638                 return;
639
640         if (hrtimer_is_queued(&wd->timer)) {
641                 /* If timer is already set in [expires, expires + delta_ns],
642                  * do not reprogram it.
643                  */
644                 if (wd->last_expires - expires <= delta_ns)
645                         return;
646         }
647
648         wd->last_expires = expires;
649         hrtimer_start_range_ns(&wd->timer,
650                                ns_to_ktime(expires),
651                                delta_ns,
652                                HRTIMER_MODE_ABS_PINNED);
653 }
654 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
655
656 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
657 {
658         hrtimer_cancel(&wd->timer);
659 }
660 EXPORT_SYMBOL(qdisc_watchdog_cancel);
661
662 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
663 {
664         struct hlist_head *h;
665         unsigned int i;
666
667         h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
668
669         if (h != NULL) {
670                 for (i = 0; i < n; i++)
671                         INIT_HLIST_HEAD(&h[i]);
672         }
673         return h;
674 }
675
676 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
677 {
678         struct Qdisc_class_common *cl;
679         struct hlist_node *next;
680         struct hlist_head *nhash, *ohash;
681         unsigned int nsize, nmask, osize;
682         unsigned int i, h;
683
684         /* Rehash when load factor exceeds 0.75 */
685         if (clhash->hashelems * 4 <= clhash->hashsize * 3)
686                 return;
687         nsize = clhash->hashsize * 2;
688         nmask = nsize - 1;
689         nhash = qdisc_class_hash_alloc(nsize);
690         if (nhash == NULL)
691                 return;
692
693         ohash = clhash->hash;
694         osize = clhash->hashsize;
695
696         sch_tree_lock(sch);
697         for (i = 0; i < osize; i++) {
698                 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
699                         h = qdisc_class_hash(cl->classid, nmask);
700                         hlist_add_head(&cl->hnode, &nhash[h]);
701                 }
702         }
703         clhash->hash     = nhash;
704         clhash->hashsize = nsize;
705         clhash->hashmask = nmask;
706         sch_tree_unlock(sch);
707
708         kvfree(ohash);
709 }
710 EXPORT_SYMBOL(qdisc_class_hash_grow);
711
712 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
713 {
714         unsigned int size = 4;
715
716         clhash->hash = qdisc_class_hash_alloc(size);
717         if (!clhash->hash)
718                 return -ENOMEM;
719         clhash->hashsize  = size;
720         clhash->hashmask  = size - 1;
721         clhash->hashelems = 0;
722         return 0;
723 }
724 EXPORT_SYMBOL(qdisc_class_hash_init);
725
726 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
727 {
728         kvfree(clhash->hash);
729 }
730 EXPORT_SYMBOL(qdisc_class_hash_destroy);
731
732 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
733                              struct Qdisc_class_common *cl)
734 {
735         unsigned int h;
736
737         INIT_HLIST_NODE(&cl->hnode);
738         h = qdisc_class_hash(cl->classid, clhash->hashmask);
739         hlist_add_head(&cl->hnode, &clhash->hash[h]);
740         clhash->hashelems++;
741 }
742 EXPORT_SYMBOL(qdisc_class_hash_insert);
743
744 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
745                              struct Qdisc_class_common *cl)
746 {
747         hlist_del(&cl->hnode);
748         clhash->hashelems--;
749 }
750 EXPORT_SYMBOL(qdisc_class_hash_remove);
751
752 /* Allocate an unique handle from space managed by kernel
753  * Possible range is [8000-FFFF]:0000 (0x8000 values)
754  */
755 static u32 qdisc_alloc_handle(struct net_device *dev)
756 {
757         int i = 0x8000;
758         static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
759
760         do {
761                 autohandle += TC_H_MAKE(0x10000U, 0);
762                 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
763                         autohandle = TC_H_MAKE(0x80000000U, 0);
764                 if (!qdisc_lookup(dev, autohandle))
765                         return autohandle;
766                 cond_resched();
767         } while (--i > 0);
768
769         return 0;
770 }
771
772 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
773 {
774         bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
775         const struct Qdisc_class_ops *cops;
776         unsigned long cl;
777         u32 parentid;
778         bool notify;
779         int drops;
780
781         if (n == 0 && len == 0)
782                 return;
783         drops = max_t(int, n, 0);
784         rcu_read_lock();
785         while ((parentid = sch->parent)) {
786                 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
787                         break;
788
789                 if (sch->flags & TCQ_F_NOPARENT)
790                         break;
791                 /* Notify parent qdisc only if child qdisc becomes empty.
792                  *
793                  * If child was empty even before update then backlog
794                  * counter is screwed and we skip notification because
795                  * parent class is already passive.
796                  *
797                  * If the original child was offloaded then it is allowed
798                  * to be seem as empty, so the parent is notified anyway.
799                  */
800                 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
801                                                        !qdisc_is_offloaded);
802                 /* TODO: perform the search on a per txq basis */
803                 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
804                 if (sch == NULL) {
805                         WARN_ON_ONCE(parentid != TC_H_ROOT);
806                         break;
807                 }
808                 cops = sch->ops->cl_ops;
809                 if (notify && cops->qlen_notify) {
810                         cl = cops->find(sch, parentid);
811                         cops->qlen_notify(sch, cl);
812                 }
813                 sch->q.qlen -= n;
814                 sch->qstats.backlog -= len;
815                 __qdisc_qstats_drop(sch, drops);
816         }
817         rcu_read_unlock();
818 }
819 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
820
821 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
822                               void *type_data)
823 {
824         struct net_device *dev = qdisc_dev(sch);
825         int err;
826
827         sch->flags &= ~TCQ_F_OFFLOADED;
828         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
829                 return 0;
830
831         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
832         if (err == -EOPNOTSUPP)
833                 return 0;
834
835         if (!err)
836                 sch->flags |= TCQ_F_OFFLOADED;
837
838         return err;
839 }
840 EXPORT_SYMBOL(qdisc_offload_dump_helper);
841
842 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
843                                 struct Qdisc *new, struct Qdisc *old,
844                                 enum tc_setup_type type, void *type_data,
845                                 struct netlink_ext_ack *extack)
846 {
847         bool any_qdisc_is_offloaded;
848         int err;
849
850         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
851                 return;
852
853         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
854
855         /* Don't report error if the graft is part of destroy operation. */
856         if (!err || !new || new == &noop_qdisc)
857                 return;
858
859         /* Don't report error if the parent, the old child and the new
860          * one are not offloaded.
861          */
862         any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
863         any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
864         any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
865
866         if (any_qdisc_is_offloaded)
867                 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
868 }
869 EXPORT_SYMBOL(qdisc_offload_graft_helper);
870
871 void qdisc_offload_query_caps(struct net_device *dev,
872                               enum tc_setup_type type,
873                               void *caps, size_t caps_len)
874 {
875         const struct net_device_ops *ops = dev->netdev_ops;
876         struct tc_query_caps_base base = {
877                 .type = type,
878                 .caps = caps,
879         };
880
881         memset(caps, 0, caps_len);
882
883         if (ops->ndo_setup_tc)
884                 ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
885 }
886 EXPORT_SYMBOL(qdisc_offload_query_caps);
887
888 static void qdisc_offload_graft_root(struct net_device *dev,
889                                      struct Qdisc *new, struct Qdisc *old,
890                                      struct netlink_ext_ack *extack)
891 {
892         struct tc_root_qopt_offload graft_offload = {
893                 .command        = TC_ROOT_GRAFT,
894                 .handle         = new ? new->handle : 0,
895                 .ingress        = (new && new->flags & TCQ_F_INGRESS) ||
896                                   (old && old->flags & TCQ_F_INGRESS),
897         };
898
899         qdisc_offload_graft_helper(dev, NULL, new, old,
900                                    TC_SETUP_ROOT_QDISC, &graft_offload, extack);
901 }
902
903 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
904                          u32 portid, u32 seq, u16 flags, int event)
905 {
906         struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
907         struct gnet_stats_queue __percpu *cpu_qstats = NULL;
908         struct tcmsg *tcm;
909         struct nlmsghdr  *nlh;
910         unsigned char *b = skb_tail_pointer(skb);
911         struct gnet_dump d;
912         struct qdisc_size_table *stab;
913         u32 block_index;
914         __u32 qlen;
915
916         cond_resched();
917         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
918         if (!nlh)
919                 goto out_nlmsg_trim;
920         tcm = nlmsg_data(nlh);
921         tcm->tcm_family = AF_UNSPEC;
922         tcm->tcm__pad1 = 0;
923         tcm->tcm__pad2 = 0;
924         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
925         tcm->tcm_parent = clid;
926         tcm->tcm_handle = q->handle;
927         tcm->tcm_info = refcount_read(&q->refcnt);
928         if (nla_put_string(skb, TCA_KIND, q->ops->id))
929                 goto nla_put_failure;
930         if (q->ops->ingress_block_get) {
931                 block_index = q->ops->ingress_block_get(q);
932                 if (block_index &&
933                     nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
934                         goto nla_put_failure;
935         }
936         if (q->ops->egress_block_get) {
937                 block_index = q->ops->egress_block_get(q);
938                 if (block_index &&
939                     nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
940                         goto nla_put_failure;
941         }
942         if (q->ops->dump && q->ops->dump(q, skb) < 0)
943                 goto nla_put_failure;
944         if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
945                 goto nla_put_failure;
946         qlen = qdisc_qlen_sum(q);
947
948         stab = rtnl_dereference(q->stab);
949         if (stab && qdisc_dump_stab(skb, stab) < 0)
950                 goto nla_put_failure;
951
952         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
953                                          NULL, &d, TCA_PAD) < 0)
954                 goto nla_put_failure;
955
956         if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
957                 goto nla_put_failure;
958
959         if (qdisc_is_percpu_stats(q)) {
960                 cpu_bstats = q->cpu_bstats;
961                 cpu_qstats = q->cpu_qstats;
962         }
963
964         if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
965             gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
966             gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
967                 goto nla_put_failure;
968
969         if (gnet_stats_finish_copy(&d) < 0)
970                 goto nla_put_failure;
971
972         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
973         return skb->len;
974
975 out_nlmsg_trim:
976 nla_put_failure:
977         nlmsg_trim(skb, b);
978         return -1;
979 }
980
981 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
982 {
983         if (q->flags & TCQ_F_BUILTIN)
984                 return true;
985         if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
986                 return true;
987
988         return false;
989 }
990
991 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
992                         struct nlmsghdr *n, u32 clid,
993                         struct Qdisc *old, struct Qdisc *new)
994 {
995         struct sk_buff *skb;
996         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
997
998         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
999         if (!skb)
1000                 return -ENOBUFS;
1001
1002         if (old && !tc_qdisc_dump_ignore(old, false)) {
1003                 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1004                                   0, RTM_DELQDISC) < 0)
1005                         goto err_out;
1006         }
1007         if (new && !tc_qdisc_dump_ignore(new, false)) {
1008                 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1009                                   old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1010                         goto err_out;
1011         }
1012
1013         if (skb->len)
1014                 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1015                                       n->nlmsg_flags & NLM_F_ECHO);
1016
1017 err_out:
1018         kfree_skb(skb);
1019         return -EINVAL;
1020 }
1021
1022 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1023                                struct nlmsghdr *n, u32 clid,
1024                                struct Qdisc *old, struct Qdisc *new)
1025 {
1026         if (new || old)
1027                 qdisc_notify(net, skb, n, clid, old, new);
1028
1029         if (old)
1030                 qdisc_put(old);
1031 }
1032
1033 static void qdisc_clear_nolock(struct Qdisc *sch)
1034 {
1035         sch->flags &= ~TCQ_F_NOLOCK;
1036         if (!(sch->flags & TCQ_F_CPUSTATS))
1037                 return;
1038
1039         free_percpu(sch->cpu_bstats);
1040         free_percpu(sch->cpu_qstats);
1041         sch->cpu_bstats = NULL;
1042         sch->cpu_qstats = NULL;
1043         sch->flags &= ~TCQ_F_CPUSTATS;
1044 }
1045
1046 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1047  * to device "dev".
1048  *
1049  * When appropriate send a netlink notification using 'skb'
1050  * and "n".
1051  *
1052  * On success, destroy old qdisc.
1053  */
1054
1055 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1056                        struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1057                        struct Qdisc *new, struct Qdisc *old,
1058                        struct netlink_ext_ack *extack)
1059 {
1060         struct Qdisc *q = old;
1061         struct net *net = dev_net(dev);
1062
1063         if (parent == NULL) {
1064                 unsigned int i, num_q, ingress;
1065
1066                 ingress = 0;
1067                 num_q = dev->num_tx_queues;
1068                 if ((q && q->flags & TCQ_F_INGRESS) ||
1069                     (new && new->flags & TCQ_F_INGRESS)) {
1070                         num_q = 1;
1071                         ingress = 1;
1072                         if (!dev_ingress_queue(dev)) {
1073                                 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1074                                 return -ENOENT;
1075                         }
1076                 }
1077
1078                 if (dev->flags & IFF_UP)
1079                         dev_deactivate(dev);
1080
1081                 qdisc_offload_graft_root(dev, new, old, extack);
1082
1083                 if (new && new->ops->attach && !ingress)
1084                         goto skip;
1085
1086                 for (i = 0; i < num_q; i++) {
1087                         struct netdev_queue *dev_queue = dev_ingress_queue(dev);
1088
1089                         if (!ingress)
1090                                 dev_queue = netdev_get_tx_queue(dev, i);
1091
1092                         old = dev_graft_qdisc(dev_queue, new);
1093                         if (new && i > 0)
1094                                 qdisc_refcount_inc(new);
1095
1096                         if (!ingress)
1097                                 qdisc_put(old);
1098                 }
1099
1100 skip:
1101                 if (!ingress) {
1102                         old = rtnl_dereference(dev->qdisc);
1103                         if (new && !new->ops->attach)
1104                                 qdisc_refcount_inc(new);
1105                         rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1106
1107                         notify_and_destroy(net, skb, n, classid, old, new);
1108
1109                         if (new && new->ops->attach)
1110                                 new->ops->attach(new);
1111                 } else {
1112                         notify_and_destroy(net, skb, n, classid, old, new);
1113                 }
1114
1115                 if (dev->flags & IFF_UP)
1116                         dev_activate(dev);
1117         } else {
1118                 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1119                 unsigned long cl;
1120                 int err;
1121
1122                 /* Only support running class lockless if parent is lockless */
1123                 if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1124                         qdisc_clear_nolock(new);
1125
1126                 if (!cops || !cops->graft)
1127                         return -EOPNOTSUPP;
1128
1129                 cl = cops->find(parent, classid);
1130                 if (!cl) {
1131                         NL_SET_ERR_MSG(extack, "Specified class not found");
1132                         return -ENOENT;
1133                 }
1134
1135                 if (new && new->ops == &noqueue_qdisc_ops) {
1136                         NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
1137                         return -EINVAL;
1138                 }
1139
1140                 err = cops->graft(parent, cl, new, &old, extack);
1141                 if (err)
1142                         return err;
1143                 notify_and_destroy(net, skb, n, classid, old, new);
1144         }
1145         return 0;
1146 }
1147
1148 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1149                                    struct netlink_ext_ack *extack)
1150 {
1151         u32 block_index;
1152
1153         if (tca[TCA_INGRESS_BLOCK]) {
1154                 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1155
1156                 if (!block_index) {
1157                         NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1158                         return -EINVAL;
1159                 }
1160                 if (!sch->ops->ingress_block_set) {
1161                         NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1162                         return -EOPNOTSUPP;
1163                 }
1164                 sch->ops->ingress_block_set(sch, block_index);
1165         }
1166         if (tca[TCA_EGRESS_BLOCK]) {
1167                 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1168
1169                 if (!block_index) {
1170                         NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1171                         return -EINVAL;
1172                 }
1173                 if (!sch->ops->egress_block_set) {
1174                         NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1175                         return -EOPNOTSUPP;
1176                 }
1177                 sch->ops->egress_block_set(sch, block_index);
1178         }
1179         return 0;
1180 }
1181
1182 /*
1183    Allocate and initialize new qdisc.
1184
1185    Parameters are passed via opt.
1186  */
1187
1188 static struct Qdisc *qdisc_create(struct net_device *dev,
1189                                   struct netdev_queue *dev_queue,
1190                                   u32 parent, u32 handle,
1191                                   struct nlattr **tca, int *errp,
1192                                   struct netlink_ext_ack *extack)
1193 {
1194         int err;
1195         struct nlattr *kind = tca[TCA_KIND];
1196         struct Qdisc *sch;
1197         struct Qdisc_ops *ops;
1198         struct qdisc_size_table *stab;
1199
1200         ops = qdisc_lookup_ops(kind);
1201 #ifdef CONFIG_MODULES
1202         if (ops == NULL && kind != NULL) {
1203                 char name[IFNAMSIZ];
1204                 if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1205                         /* We dropped the RTNL semaphore in order to
1206                          * perform the module load.  So, even if we
1207                          * succeeded in loading the module we have to
1208                          * tell the caller to replay the request.  We
1209                          * indicate this using -EAGAIN.
1210                          * We replay the request because the device may
1211                          * go away in the mean time.
1212                          */
1213                         rtnl_unlock();
1214                         request_module("sch_%s", name);
1215                         rtnl_lock();
1216                         ops = qdisc_lookup_ops(kind);
1217                         if (ops != NULL) {
1218                                 /* We will try again qdisc_lookup_ops,
1219                                  * so don't keep a reference.
1220                                  */
1221                                 module_put(ops->owner);
1222                                 err = -EAGAIN;
1223                                 goto err_out;
1224                         }
1225                 }
1226         }
1227 #endif
1228
1229         err = -ENOENT;
1230         if (!ops) {
1231                 NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1232                 goto err_out;
1233         }
1234
1235         sch = qdisc_alloc(dev_queue, ops, extack);
1236         if (IS_ERR(sch)) {
1237                 err = PTR_ERR(sch);
1238                 goto err_out2;
1239         }
1240
1241         sch->parent = parent;
1242
1243         if (handle == TC_H_INGRESS) {
1244                 if (!(sch->flags & TCQ_F_INGRESS)) {
1245                         NL_SET_ERR_MSG(extack,
1246                                        "Specified parent ID is reserved for ingress and clsact Qdiscs");
1247                         err = -EINVAL;
1248                         goto err_out3;
1249                 }
1250                 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1251         } else {
1252                 if (handle == 0) {
1253                         handle = qdisc_alloc_handle(dev);
1254                         if (handle == 0) {
1255                                 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1256                                 err = -ENOSPC;
1257                                 goto err_out3;
1258                         }
1259                 }
1260                 if (!netif_is_multiqueue(dev))
1261                         sch->flags |= TCQ_F_ONETXQUEUE;
1262         }
1263
1264         sch->handle = handle;
1265
1266         /* This exist to keep backward compatible with a userspace
1267          * loophole, what allowed userspace to get IFF_NO_QUEUE
1268          * facility on older kernels by setting tx_queue_len=0 (prior
1269          * to qdisc init), and then forgot to reinit tx_queue_len
1270          * before again attaching a qdisc.
1271          */
1272         if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1273                 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1274                 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1275         }
1276
1277         err = qdisc_block_indexes_set(sch, tca, extack);
1278         if (err)
1279                 goto err_out3;
1280
1281         if (ops->init) {
1282                 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1283                 if (err != 0)
1284                         goto err_out5;
1285         }
1286
1287         if (tca[TCA_STAB]) {
1288                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1289                 if (IS_ERR(stab)) {
1290                         err = PTR_ERR(stab);
1291                         goto err_out4;
1292                 }
1293                 rcu_assign_pointer(sch->stab, stab);
1294         }
1295         if (tca[TCA_RATE]) {
1296                 err = -EOPNOTSUPP;
1297                 if (sch->flags & TCQ_F_MQROOT) {
1298                         NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1299                         goto err_out4;
1300                 }
1301
1302                 err = gen_new_estimator(&sch->bstats,
1303                                         sch->cpu_bstats,
1304                                         &sch->rate_est,
1305                                         NULL,
1306                                         true,
1307                                         tca[TCA_RATE]);
1308                 if (err) {
1309                         NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1310                         goto err_out4;
1311                 }
1312         }
1313
1314         qdisc_hash_add(sch, false);
1315         trace_qdisc_create(ops, dev, parent);
1316
1317         return sch;
1318
1319 err_out5:
1320         /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1321         if (ops->destroy)
1322                 ops->destroy(sch);
1323 err_out3:
1324         netdev_put(dev, &sch->dev_tracker);
1325         qdisc_free(sch);
1326 err_out2:
1327         module_put(ops->owner);
1328 err_out:
1329         *errp = err;
1330         return NULL;
1331
1332 err_out4:
1333         /*
1334          * Any broken qdiscs that would require a ops->reset() here?
1335          * The qdisc was never in action so it shouldn't be necessary.
1336          */
1337         qdisc_put_stab(rtnl_dereference(sch->stab));
1338         if (ops->destroy)
1339                 ops->destroy(sch);
1340         goto err_out3;
1341 }
1342
1343 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1344                         struct netlink_ext_ack *extack)
1345 {
1346         struct qdisc_size_table *ostab, *stab = NULL;
1347         int err = 0;
1348
1349         if (tca[TCA_OPTIONS]) {
1350                 if (!sch->ops->change) {
1351                         NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1352                         return -EINVAL;
1353                 }
1354                 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1355                         NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1356                         return -EOPNOTSUPP;
1357                 }
1358                 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1359                 if (err)
1360                         return err;
1361         }
1362
1363         if (tca[TCA_STAB]) {
1364                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1365                 if (IS_ERR(stab))
1366                         return PTR_ERR(stab);
1367         }
1368
1369         ostab = rtnl_dereference(sch->stab);
1370         rcu_assign_pointer(sch->stab, stab);
1371         qdisc_put_stab(ostab);
1372
1373         if (tca[TCA_RATE]) {
1374                 /* NB: ignores errors from replace_estimator
1375                    because change can't be undone. */
1376                 if (sch->flags & TCQ_F_MQROOT)
1377                         goto out;
1378                 gen_replace_estimator(&sch->bstats,
1379                                       sch->cpu_bstats,
1380                                       &sch->rate_est,
1381                                       NULL,
1382                                       true,
1383                                       tca[TCA_RATE]);
1384         }
1385 out:
1386         return 0;
1387 }
1388
1389 struct check_loop_arg {
1390         struct qdisc_walker     w;
1391         struct Qdisc            *p;
1392         int                     depth;
1393 };
1394
1395 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1396                          struct qdisc_walker *w);
1397
1398 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1399 {
1400         struct check_loop_arg   arg;
1401
1402         if (q->ops->cl_ops == NULL)
1403                 return 0;
1404
1405         arg.w.stop = arg.w.skip = arg.w.count = 0;
1406         arg.w.fn = check_loop_fn;
1407         arg.depth = depth;
1408         arg.p = p;
1409         q->ops->cl_ops->walk(q, &arg.w);
1410         return arg.w.stop ? -ELOOP : 0;
1411 }
1412
1413 static int
1414 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1415 {
1416         struct Qdisc *leaf;
1417         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1418         struct check_loop_arg *arg = (struct check_loop_arg *)w;
1419
1420         leaf = cops->leaf(q, cl);
1421         if (leaf) {
1422                 if (leaf == arg->p || arg->depth > 7)
1423                         return -ELOOP;
1424                 return check_loop(leaf, arg->p, arg->depth + 1);
1425         }
1426         return 0;
1427 }
1428
1429 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1430         [TCA_KIND]              = { .type = NLA_STRING },
1431         [TCA_RATE]              = { .type = NLA_BINARY,
1432                                     .len = sizeof(struct tc_estimator) },
1433         [TCA_STAB]              = { .type = NLA_NESTED },
1434         [TCA_DUMP_INVISIBLE]    = { .type = NLA_FLAG },
1435         [TCA_CHAIN]             = { .type = NLA_U32 },
1436         [TCA_INGRESS_BLOCK]     = { .type = NLA_U32 },
1437         [TCA_EGRESS_BLOCK]      = { .type = NLA_U32 },
1438 };
1439
1440 /*
1441  * Delete/get qdisc.
1442  */
1443
1444 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1445                         struct netlink_ext_ack *extack)
1446 {
1447         struct net *net = sock_net(skb->sk);
1448         struct tcmsg *tcm = nlmsg_data(n);
1449         struct nlattr *tca[TCA_MAX + 1];
1450         struct net_device *dev;
1451         u32 clid;
1452         struct Qdisc *q = NULL;
1453         struct Qdisc *p = NULL;
1454         int err;
1455
1456         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1457                                      rtm_tca_policy, extack);
1458         if (err < 0)
1459                 return err;
1460
1461         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1462         if (!dev)
1463                 return -ENODEV;
1464
1465         clid = tcm->tcm_parent;
1466         if (clid) {
1467                 if (clid != TC_H_ROOT) {
1468                         if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1469                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1470                                 if (!p) {
1471                                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1472                                         return -ENOENT;
1473                                 }
1474                                 q = qdisc_leaf(p, clid);
1475                         } else if (dev_ingress_queue(dev)) {
1476                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1477                         }
1478                 } else {
1479                         q = rtnl_dereference(dev->qdisc);
1480                 }
1481                 if (!q) {
1482                         NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1483                         return -ENOENT;
1484                 }
1485
1486                 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1487                         NL_SET_ERR_MSG(extack, "Invalid handle");
1488                         return -EINVAL;
1489                 }
1490         } else {
1491                 q = qdisc_lookup(dev, tcm->tcm_handle);
1492                 if (!q) {
1493                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1494                         return -ENOENT;
1495                 }
1496         }
1497
1498         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1499                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1500                 return -EINVAL;
1501         }
1502
1503         if (n->nlmsg_type == RTM_DELQDISC) {
1504                 if (!clid) {
1505                         NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1506                         return -EINVAL;
1507                 }
1508                 if (q->handle == 0) {
1509                         NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1510                         return -ENOENT;
1511                 }
1512                 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1513                 if (err != 0)
1514                         return err;
1515         } else {
1516                 qdisc_notify(net, skb, n, clid, NULL, q);
1517         }
1518         return 0;
1519 }
1520
1521 /*
1522  * Create/change qdisc.
1523  */
1524
1525 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1526                            struct netlink_ext_ack *extack)
1527 {
1528         struct net *net = sock_net(skb->sk);
1529         struct tcmsg *tcm;
1530         struct nlattr *tca[TCA_MAX + 1];
1531         struct net_device *dev;
1532         u32 clid;
1533         struct Qdisc *q, *p;
1534         int err;
1535
1536 replay:
1537         /* Reinit, just in case something touches this. */
1538         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1539                                      rtm_tca_policy, extack);
1540         if (err < 0)
1541                 return err;
1542
1543         tcm = nlmsg_data(n);
1544         clid = tcm->tcm_parent;
1545         q = p = NULL;
1546
1547         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1548         if (!dev)
1549                 return -ENODEV;
1550
1551
1552         if (clid) {
1553                 if (clid != TC_H_ROOT) {
1554                         if (clid != TC_H_INGRESS) {
1555                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1556                                 if (!p) {
1557                                         NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1558                                         return -ENOENT;
1559                                 }
1560                                 q = qdisc_leaf(p, clid);
1561                         } else if (dev_ingress_queue_create(dev)) {
1562                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1563                         }
1564                 } else {
1565                         q = rtnl_dereference(dev->qdisc);
1566                 }
1567
1568                 /* It may be default qdisc, ignore it */
1569                 if (q && q->handle == 0)
1570                         q = NULL;
1571
1572                 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1573                         if (tcm->tcm_handle) {
1574                                 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1575                                         NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1576                                         return -EEXIST;
1577                                 }
1578                                 if (TC_H_MIN(tcm->tcm_handle)) {
1579                                         NL_SET_ERR_MSG(extack, "Invalid minor handle");
1580                                         return -EINVAL;
1581                                 }
1582                                 q = qdisc_lookup(dev, tcm->tcm_handle);
1583                                 if (!q)
1584                                         goto create_n_graft;
1585                                 if (n->nlmsg_flags & NLM_F_EXCL) {
1586                                         NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1587                                         return -EEXIST;
1588                                 }
1589                                 if (tca[TCA_KIND] &&
1590                                     nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1591                                         NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1592                                         return -EINVAL;
1593                                 }
1594                                 if (q == p ||
1595                                     (p && check_loop(q, p, 0))) {
1596                                         NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1597                                         return -ELOOP;
1598                                 }
1599                                 qdisc_refcount_inc(q);
1600                                 goto graft;
1601                         } else {
1602                                 if (!q)
1603                                         goto create_n_graft;
1604
1605                                 /* This magic test requires explanation.
1606                                  *
1607                                  *   We know, that some child q is already
1608                                  *   attached to this parent and have choice:
1609                                  *   either to change it or to create/graft new one.
1610                                  *
1611                                  *   1. We are allowed to create/graft only
1612                                  *   if CREATE and REPLACE flags are set.
1613                                  *
1614                                  *   2. If EXCL is set, requestor wanted to say,
1615                                  *   that qdisc tcm_handle is not expected
1616                                  *   to exist, so that we choose create/graft too.
1617                                  *
1618                                  *   3. The last case is when no flags are set.
1619                                  *   Alas, it is sort of hole in API, we
1620                                  *   cannot decide what to do unambiguously.
1621                                  *   For now we select create/graft, if
1622                                  *   user gave KIND, which does not match existing.
1623                                  */
1624                                 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1625                                     (n->nlmsg_flags & NLM_F_REPLACE) &&
1626                                     ((n->nlmsg_flags & NLM_F_EXCL) ||
1627                                      (tca[TCA_KIND] &&
1628                                       nla_strcmp(tca[TCA_KIND], q->ops->id))))
1629                                         goto create_n_graft;
1630                         }
1631                 }
1632         } else {
1633                 if (!tcm->tcm_handle) {
1634                         NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1635                         return -EINVAL;
1636                 }
1637                 q = qdisc_lookup(dev, tcm->tcm_handle);
1638         }
1639
1640         /* Change qdisc parameters */
1641         if (!q) {
1642                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1643                 return -ENOENT;
1644         }
1645         if (n->nlmsg_flags & NLM_F_EXCL) {
1646                 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1647                 return -EEXIST;
1648         }
1649         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1650                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1651                 return -EINVAL;
1652         }
1653         err = qdisc_change(q, tca, extack);
1654         if (err == 0)
1655                 qdisc_notify(net, skb, n, clid, NULL, q);
1656         return err;
1657
1658 create_n_graft:
1659         if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1660                 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1661                 return -ENOENT;
1662         }
1663         if (clid == TC_H_INGRESS) {
1664                 if (dev_ingress_queue(dev)) {
1665                         q = qdisc_create(dev, dev_ingress_queue(dev),
1666                                          tcm->tcm_parent, tcm->tcm_parent,
1667                                          tca, &err, extack);
1668                 } else {
1669                         NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1670                         err = -ENOENT;
1671                 }
1672         } else {
1673                 struct netdev_queue *dev_queue;
1674
1675                 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1676                         dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1677                 else if (p)
1678                         dev_queue = p->dev_queue;
1679                 else
1680                         dev_queue = netdev_get_tx_queue(dev, 0);
1681
1682                 q = qdisc_create(dev, dev_queue,
1683                                  tcm->tcm_parent, tcm->tcm_handle,
1684                                  tca, &err, extack);
1685         }
1686         if (q == NULL) {
1687                 if (err == -EAGAIN)
1688                         goto replay;
1689                 return err;
1690         }
1691
1692 graft:
1693         err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1694         if (err) {
1695                 if (q)
1696                         qdisc_put(q);
1697                 return err;
1698         }
1699
1700         return 0;
1701 }
1702
1703 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1704                               struct netlink_callback *cb,
1705                               int *q_idx_p, int s_q_idx, bool recur,
1706                               bool dump_invisible)
1707 {
1708         int ret = 0, q_idx = *q_idx_p;
1709         struct Qdisc *q;
1710         int b;
1711
1712         if (!root)
1713                 return 0;
1714
1715         q = root;
1716         if (q_idx < s_q_idx) {
1717                 q_idx++;
1718         } else {
1719                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1720                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1721                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1722                                   RTM_NEWQDISC) <= 0)
1723                         goto done;
1724                 q_idx++;
1725         }
1726
1727         /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1728          * itself has already been dumped.
1729          *
1730          * If we've already dumped the top-level (ingress) qdisc above and the global
1731          * qdisc hashtable, we don't want to hit it again
1732          */
1733         if (!qdisc_dev(root) || !recur)
1734                 goto out;
1735
1736         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1737                 if (q_idx < s_q_idx) {
1738                         q_idx++;
1739                         continue;
1740                 }
1741                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1742                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1743                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1744                                   RTM_NEWQDISC) <= 0)
1745                         goto done;
1746                 q_idx++;
1747         }
1748
1749 out:
1750         *q_idx_p = q_idx;
1751         return ret;
1752 done:
1753         ret = -1;
1754         goto out;
1755 }
1756
1757 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1758 {
1759         struct net *net = sock_net(skb->sk);
1760         int idx, q_idx;
1761         int s_idx, s_q_idx;
1762         struct net_device *dev;
1763         const struct nlmsghdr *nlh = cb->nlh;
1764         struct nlattr *tca[TCA_MAX + 1];
1765         int err;
1766
1767         s_idx = cb->args[0];
1768         s_q_idx = q_idx = cb->args[1];
1769
1770         idx = 0;
1771         ASSERT_RTNL();
1772
1773         err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1774                                      rtm_tca_policy, cb->extack);
1775         if (err < 0)
1776                 return err;
1777
1778         for_each_netdev(net, dev) {
1779                 struct netdev_queue *dev_queue;
1780
1781                 if (idx < s_idx)
1782                         goto cont;
1783                 if (idx > s_idx)
1784                         s_q_idx = 0;
1785                 q_idx = 0;
1786
1787                 if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1788                                        skb, cb, &q_idx, s_q_idx,
1789                                        true, tca[TCA_DUMP_INVISIBLE]) < 0)
1790                         goto done;
1791
1792                 dev_queue = dev_ingress_queue(dev);
1793                 if (dev_queue &&
1794                     tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1795                                        &q_idx, s_q_idx, false,
1796                                        tca[TCA_DUMP_INVISIBLE]) < 0)
1797                         goto done;
1798
1799 cont:
1800                 idx++;
1801         }
1802
1803 done:
1804         cb->args[0] = idx;
1805         cb->args[1] = q_idx;
1806
1807         return skb->len;
1808 }
1809
1810
1811
1812 /************************************************
1813  *      Traffic classes manipulation.           *
1814  ************************************************/
1815
1816 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1817                           unsigned long cl,
1818                           u32 portid, u32 seq, u16 flags, int event)
1819 {
1820         struct tcmsg *tcm;
1821         struct nlmsghdr  *nlh;
1822         unsigned char *b = skb_tail_pointer(skb);
1823         struct gnet_dump d;
1824         const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1825
1826         cond_resched();
1827         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1828         if (!nlh)
1829                 goto out_nlmsg_trim;
1830         tcm = nlmsg_data(nlh);
1831         tcm->tcm_family = AF_UNSPEC;
1832         tcm->tcm__pad1 = 0;
1833         tcm->tcm__pad2 = 0;
1834         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1835         tcm->tcm_parent = q->handle;
1836         tcm->tcm_handle = q->handle;
1837         tcm->tcm_info = 0;
1838         if (nla_put_string(skb, TCA_KIND, q->ops->id))
1839                 goto nla_put_failure;
1840         if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1841                 goto nla_put_failure;
1842
1843         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1844                                          NULL, &d, TCA_PAD) < 0)
1845                 goto nla_put_failure;
1846
1847         if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1848                 goto nla_put_failure;
1849
1850         if (gnet_stats_finish_copy(&d) < 0)
1851                 goto nla_put_failure;
1852
1853         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1854         return skb->len;
1855
1856 out_nlmsg_trim:
1857 nla_put_failure:
1858         nlmsg_trim(skb, b);
1859         return -1;
1860 }
1861
1862 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1863                          struct nlmsghdr *n, struct Qdisc *q,
1864                          unsigned long cl, int event)
1865 {
1866         struct sk_buff *skb;
1867         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1868
1869         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1870         if (!skb)
1871                 return -ENOBUFS;
1872
1873         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1874                 kfree_skb(skb);
1875                 return -EINVAL;
1876         }
1877
1878         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1879                               n->nlmsg_flags & NLM_F_ECHO);
1880 }
1881
1882 static int tclass_del_notify(struct net *net,
1883                              const struct Qdisc_class_ops *cops,
1884                              struct sk_buff *oskb, struct nlmsghdr *n,
1885                              struct Qdisc *q, unsigned long cl,
1886                              struct netlink_ext_ack *extack)
1887 {
1888         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1889         struct sk_buff *skb;
1890         int err = 0;
1891
1892         if (!cops->delete)
1893                 return -EOPNOTSUPP;
1894
1895         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1896         if (!skb)
1897                 return -ENOBUFS;
1898
1899         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1900                            RTM_DELTCLASS) < 0) {
1901                 kfree_skb(skb);
1902                 return -EINVAL;
1903         }
1904
1905         err = cops->delete(q, cl, extack);
1906         if (err) {
1907                 kfree_skb(skb);
1908                 return err;
1909         }
1910
1911         err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1912                              n->nlmsg_flags & NLM_F_ECHO);
1913         return err;
1914 }
1915
1916 #ifdef CONFIG_NET_CLS
1917
1918 struct tcf_bind_args {
1919         struct tcf_walker w;
1920         unsigned long base;
1921         unsigned long cl;
1922         u32 classid;
1923 };
1924
1925 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1926 {
1927         struct tcf_bind_args *a = (void *)arg;
1928
1929         if (n && tp->ops->bind_class) {
1930                 struct Qdisc *q = tcf_block_q(tp->chain->block);
1931
1932                 sch_tree_lock(q);
1933                 tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
1934                 sch_tree_unlock(q);
1935         }
1936         return 0;
1937 }
1938
1939 struct tc_bind_class_args {
1940         struct qdisc_walker w;
1941         unsigned long new_cl;
1942         u32 portid;
1943         u32 clid;
1944 };
1945
1946 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
1947                                 struct qdisc_walker *w)
1948 {
1949         struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
1950         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1951         struct tcf_block *block;
1952         struct tcf_chain *chain;
1953
1954         block = cops->tcf_block(q, cl, NULL);
1955         if (!block)
1956                 return 0;
1957         for (chain = tcf_get_next_chain(block, NULL);
1958              chain;
1959              chain = tcf_get_next_chain(block, chain)) {
1960                 struct tcf_proto *tp;
1961
1962                 for (tp = tcf_get_next_proto(chain, NULL);
1963                      tp; tp = tcf_get_next_proto(chain, tp)) {
1964                         struct tcf_bind_args arg = {};
1965
1966                         arg.w.fn = tcf_node_bind;
1967                         arg.classid = a->clid;
1968                         arg.base = cl;
1969                         arg.cl = a->new_cl;
1970                         tp->ops->walk(tp, &arg.w, true);
1971                 }
1972         }
1973
1974         return 0;
1975 }
1976
1977 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1978                            unsigned long new_cl)
1979 {
1980         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1981         struct tc_bind_class_args args = {};
1982
1983         if (!cops->tcf_block)
1984                 return;
1985         args.portid = portid;
1986         args.clid = clid;
1987         args.new_cl = new_cl;
1988         args.w.fn = tc_bind_class_walker;
1989         q->ops->cl_ops->walk(q, &args.w);
1990 }
1991
1992 #else
1993
1994 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1995                            unsigned long new_cl)
1996 {
1997 }
1998
1999 #endif
2000
2001 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2002                          struct netlink_ext_ack *extack)
2003 {
2004         struct net *net = sock_net(skb->sk);
2005         struct tcmsg *tcm = nlmsg_data(n);
2006         struct nlattr *tca[TCA_MAX + 1];
2007         struct net_device *dev;
2008         struct Qdisc *q = NULL;
2009         const struct Qdisc_class_ops *cops;
2010         unsigned long cl = 0;
2011         unsigned long new_cl;
2012         u32 portid;
2013         u32 clid;
2014         u32 qid;
2015         int err;
2016
2017         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2018                                      rtm_tca_policy, extack);
2019         if (err < 0)
2020                 return err;
2021
2022         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2023         if (!dev)
2024                 return -ENODEV;
2025
2026         /*
2027            parent == TC_H_UNSPEC - unspecified parent.
2028            parent == TC_H_ROOT   - class is root, which has no parent.
2029            parent == X:0         - parent is root class.
2030            parent == X:Y         - parent is a node in hierarchy.
2031            parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
2032
2033            handle == 0:0         - generate handle from kernel pool.
2034            handle == 0:Y         - class is X:Y, where X:0 is qdisc.
2035            handle == X:Y         - clear.
2036            handle == X:0         - root class.
2037          */
2038
2039         /* Step 1. Determine qdisc handle X:0 */
2040
2041         portid = tcm->tcm_parent;
2042         clid = tcm->tcm_handle;
2043         qid = TC_H_MAJ(clid);
2044
2045         if (portid != TC_H_ROOT) {
2046                 u32 qid1 = TC_H_MAJ(portid);
2047
2048                 if (qid && qid1) {
2049                         /* If both majors are known, they must be identical. */
2050                         if (qid != qid1)
2051                                 return -EINVAL;
2052                 } else if (qid1) {
2053                         qid = qid1;
2054                 } else if (qid == 0)
2055                         qid = rtnl_dereference(dev->qdisc)->handle;
2056
2057                 /* Now qid is genuine qdisc handle consistent
2058                  * both with parent and child.
2059                  *
2060                  * TC_H_MAJ(portid) still may be unspecified, complete it now.
2061                  */
2062                 if (portid)
2063                         portid = TC_H_MAKE(qid, portid);
2064         } else {
2065                 if (qid == 0)
2066                         qid = rtnl_dereference(dev->qdisc)->handle;
2067         }
2068
2069         /* OK. Locate qdisc */
2070         q = qdisc_lookup(dev, qid);
2071         if (!q)
2072                 return -ENOENT;
2073
2074         /* An check that it supports classes */
2075         cops = q->ops->cl_ops;
2076         if (cops == NULL)
2077                 return -EINVAL;
2078
2079         /* Now try to get class */
2080         if (clid == 0) {
2081                 if (portid == TC_H_ROOT)
2082                         clid = qid;
2083         } else
2084                 clid = TC_H_MAKE(qid, clid);
2085
2086         if (clid)
2087                 cl = cops->find(q, clid);
2088
2089         if (cl == 0) {
2090                 err = -ENOENT;
2091                 if (n->nlmsg_type != RTM_NEWTCLASS ||
2092                     !(n->nlmsg_flags & NLM_F_CREATE))
2093                         goto out;
2094         } else {
2095                 switch (n->nlmsg_type) {
2096                 case RTM_NEWTCLASS:
2097                         err = -EEXIST;
2098                         if (n->nlmsg_flags & NLM_F_EXCL)
2099                                 goto out;
2100                         break;
2101                 case RTM_DELTCLASS:
2102                         err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
2103                         /* Unbind the class with flilters with 0 */
2104                         tc_bind_tclass(q, portid, clid, 0);
2105                         goto out;
2106                 case RTM_GETTCLASS:
2107                         err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
2108                         goto out;
2109                 default:
2110                         err = -EINVAL;
2111                         goto out;
2112                 }
2113         }
2114
2115         if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2116                 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2117                 return -EOPNOTSUPP;
2118         }
2119
2120         new_cl = cl;
2121         err = -EOPNOTSUPP;
2122         if (cops->change)
2123                 err = cops->change(q, clid, portid, tca, &new_cl, extack);
2124         if (err == 0) {
2125                 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
2126                 /* We just create a new class, need to do reverse binding. */
2127                 if (cl != new_cl)
2128                         tc_bind_tclass(q, portid, clid, new_cl);
2129         }
2130 out:
2131         return err;
2132 }
2133
2134 struct qdisc_dump_args {
2135         struct qdisc_walker     w;
2136         struct sk_buff          *skb;
2137         struct netlink_callback *cb;
2138 };
2139
2140 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2141                             struct qdisc_walker *arg)
2142 {
2143         struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2144
2145         return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2146                               a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2147                               RTM_NEWTCLASS);
2148 }
2149
2150 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2151                                 struct tcmsg *tcm, struct netlink_callback *cb,
2152                                 int *t_p, int s_t)
2153 {
2154         struct qdisc_dump_args arg;
2155
2156         if (tc_qdisc_dump_ignore(q, false) ||
2157             *t_p < s_t || !q->ops->cl_ops ||
2158             (tcm->tcm_parent &&
2159              TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2160                 (*t_p)++;
2161                 return 0;
2162         }
2163         if (*t_p > s_t)
2164                 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2165         arg.w.fn = qdisc_class_dump;
2166         arg.skb = skb;
2167         arg.cb = cb;
2168         arg.w.stop  = 0;
2169         arg.w.skip = cb->args[1];
2170         arg.w.count = 0;
2171         q->ops->cl_ops->walk(q, &arg.w);
2172         cb->args[1] = arg.w.count;
2173         if (arg.w.stop)
2174                 return -1;
2175         (*t_p)++;
2176         return 0;
2177 }
2178
2179 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2180                                struct tcmsg *tcm, struct netlink_callback *cb,
2181                                int *t_p, int s_t, bool recur)
2182 {
2183         struct Qdisc *q;
2184         int b;
2185
2186         if (!root)
2187                 return 0;
2188
2189         if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2190                 return -1;
2191
2192         if (!qdisc_dev(root) || !recur)
2193                 return 0;
2194
2195         if (tcm->tcm_parent) {
2196                 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2197                 if (q && q != root &&
2198                     tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2199                         return -1;
2200                 return 0;
2201         }
2202         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2203                 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2204                         return -1;
2205         }
2206
2207         return 0;
2208 }
2209
2210 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2211 {
2212         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2213         struct net *net = sock_net(skb->sk);
2214         struct netdev_queue *dev_queue;
2215         struct net_device *dev;
2216         int t, s_t;
2217
2218         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2219                 return 0;
2220         dev = dev_get_by_index(net, tcm->tcm_ifindex);
2221         if (!dev)
2222                 return 0;
2223
2224         s_t = cb->args[0];
2225         t = 0;
2226
2227         if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2228                                 skb, tcm, cb, &t, s_t, true) < 0)
2229                 goto done;
2230
2231         dev_queue = dev_ingress_queue(dev);
2232         if (dev_queue &&
2233             tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2234                                 &t, s_t, false) < 0)
2235                 goto done;
2236
2237 done:
2238         cb->args[0] = t;
2239
2240         dev_put(dev);
2241         return skb->len;
2242 }
2243
2244 #ifdef CONFIG_PROC_FS
2245 static int psched_show(struct seq_file *seq, void *v)
2246 {
2247         seq_printf(seq, "%08x %08x %08x %08x\n",
2248                    (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2249                    1000000,
2250                    (u32)NSEC_PER_SEC / hrtimer_resolution);
2251
2252         return 0;
2253 }
2254
2255 static int __net_init psched_net_init(struct net *net)
2256 {
2257         struct proc_dir_entry *e;
2258
2259         e = proc_create_single("psched", 0, net->proc_net, psched_show);
2260         if (e == NULL)
2261                 return -ENOMEM;
2262
2263         return 0;
2264 }
2265
2266 static void __net_exit psched_net_exit(struct net *net)
2267 {
2268         remove_proc_entry("psched", net->proc_net);
2269 }
2270 #else
2271 static int __net_init psched_net_init(struct net *net)
2272 {
2273         return 0;
2274 }
2275
2276 static void __net_exit psched_net_exit(struct net *net)
2277 {
2278 }
2279 #endif
2280
2281 static struct pernet_operations psched_net_ops = {
2282         .init = psched_net_init,
2283         .exit = psched_net_exit,
2284 };
2285
2286 static int __init pktsched_init(void)
2287 {
2288         int err;
2289
2290         err = register_pernet_subsys(&psched_net_ops);
2291         if (err) {
2292                 pr_err("pktsched_init: "
2293                        "cannot initialize per netns operations\n");
2294                 return err;
2295         }
2296
2297         register_qdisc(&pfifo_fast_ops);
2298         register_qdisc(&pfifo_qdisc_ops);
2299         register_qdisc(&bfifo_qdisc_ops);
2300         register_qdisc(&pfifo_head_drop_qdisc_ops);
2301         register_qdisc(&mq_qdisc_ops);
2302         register_qdisc(&noqueue_qdisc_ops);
2303
2304         rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2305         rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2306         rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2307                       0);
2308         rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2309         rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2310         rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2311                       0);
2312
2313         return 0;
2314 }
2315
2316 subsys_initcall(pktsched_init);