net: fix NULL pointer in skb_segment_list
[platform/kernel/linux-rpi.git] / net / sched / sch_api.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_api.c  Packet scheduler API.
4  *
5  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Fixes:
8  *
9  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12  */
13
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <linux/init.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/kmod.h>
24 #include <linux/list.h>
25 #include <linux/hrtimer.h>
26 #include <linux/slab.h>
27 #include <linux/hashtable.h>
28
29 #include <net/net_namespace.h>
30 #include <net/sock.h>
31 #include <net/netlink.h>
32 #include <net/pkt_sched.h>
33 #include <net/pkt_cls.h>
34
35 #include <trace/events/qdisc.h>
36
37 /*
38
39    Short review.
40    -------------
41
42    This file consists of two interrelated parts:
43
44    1. queueing disciplines manager frontend.
45    2. traffic classes manager frontend.
46
47    Generally, queueing discipline ("qdisc") is a black box,
48    which is able to enqueue packets and to dequeue them (when
49    device is ready to send something) in order and at times
50    determined by algorithm hidden in it.
51
52    qdisc's are divided to two categories:
53    - "queues", which have no internal structure visible from outside.
54    - "schedulers", which split all the packets to "traffic classes",
55      using "packet classifiers" (look at cls_api.c)
56
57    In turn, classes may have child qdiscs (as rule, queues)
58    attached to them etc. etc. etc.
59
60    The goal of the routines in this file is to translate
61    information supplied by user in the form of handles
62    to more intelligible for kernel form, to make some sanity
63    checks and part of work, which is common to all qdiscs
64    and to provide rtnetlink notifications.
65
66    All real intelligent work is done inside qdisc modules.
67
68
69
70    Every discipline has two major routines: enqueue and dequeue.
71
72    ---dequeue
73
74    dequeue usually returns a skb to send. It is allowed to return NULL,
75    but it does not mean that queue is empty, it just means that
76    discipline does not want to send anything this time.
77    Queue is really empty if q->q.qlen == 0.
78    For complicated disciplines with multiple queues q->q is not
79    real packet queue, but however q->q.qlen must be valid.
80
81    ---enqueue
82
83    enqueue returns 0, if packet was enqueued successfully.
84    If packet (this one or another one) was dropped, it returns
85    not zero error code.
86    NET_XMIT_DROP        - this packet dropped
87      Expected action: do not backoff, but wait until queue will clear.
88    NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
89      Expected action: backoff or ignore
90
91    Auxiliary routines:
92
93    ---peek
94
95    like dequeue but without removing a packet from the queue
96
97    ---reset
98
99    returns qdisc to initial state: purge all buffers, clear all
100    timers, counters (except for statistics) etc.
101
102    ---init
103
104    initializes newly created qdisc.
105
106    ---destroy
107
108    destroys resources allocated by init and during lifetime of qdisc.
109
110    ---change
111
112    changes qdisc parameters.
113  */
114
115 /* Protects list of registered TC modules. It is pure SMP lock. */
116 static DEFINE_RWLOCK(qdisc_mod_lock);
117
118
119 /************************************************
120  *      Queueing disciplines manipulation.      *
121  ************************************************/
122
123
124 /* The list of all installed queueing disciplines. */
125
126 static struct Qdisc_ops *qdisc_base;
127
128 /* Register/unregister queueing discipline */
129
130 int register_qdisc(struct Qdisc_ops *qops)
131 {
132         struct Qdisc_ops *q, **qp;
133         int rc = -EEXIST;
134
135         write_lock(&qdisc_mod_lock);
136         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
137                 if (!strcmp(qops->id, q->id))
138                         goto out;
139
140         if (qops->enqueue == NULL)
141                 qops->enqueue = noop_qdisc_ops.enqueue;
142         if (qops->peek == NULL) {
143                 if (qops->dequeue == NULL)
144                         qops->peek = noop_qdisc_ops.peek;
145                 else
146                         goto out_einval;
147         }
148         if (qops->dequeue == NULL)
149                 qops->dequeue = noop_qdisc_ops.dequeue;
150
151         if (qops->cl_ops) {
152                 const struct Qdisc_class_ops *cops = qops->cl_ops;
153
154                 if (!(cops->find && cops->walk && cops->leaf))
155                         goto out_einval;
156
157                 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
158                         goto out_einval;
159         }
160
161         qops->next = NULL;
162         *qp = qops;
163         rc = 0;
164 out:
165         write_unlock(&qdisc_mod_lock);
166         return rc;
167
168 out_einval:
169         rc = -EINVAL;
170         goto out;
171 }
172 EXPORT_SYMBOL(register_qdisc);
173
174 int unregister_qdisc(struct Qdisc_ops *qops)
175 {
176         struct Qdisc_ops *q, **qp;
177         int err = -ENOENT;
178
179         write_lock(&qdisc_mod_lock);
180         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
181                 if (q == qops)
182                         break;
183         if (q) {
184                 *qp = q->next;
185                 q->next = NULL;
186                 err = 0;
187         }
188         write_unlock(&qdisc_mod_lock);
189         return err;
190 }
191 EXPORT_SYMBOL(unregister_qdisc);
192
193 /* Get default qdisc if not otherwise specified */
194 void qdisc_get_default(char *name, size_t len)
195 {
196         read_lock(&qdisc_mod_lock);
197         strlcpy(name, default_qdisc_ops->id, len);
198         read_unlock(&qdisc_mod_lock);
199 }
200
201 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
202 {
203         struct Qdisc_ops *q = NULL;
204
205         for (q = qdisc_base; q; q = q->next) {
206                 if (!strcmp(name, q->id)) {
207                         if (!try_module_get(q->owner))
208                                 q = NULL;
209                         break;
210                 }
211         }
212
213         return q;
214 }
215
216 /* Set new default qdisc to use */
217 int qdisc_set_default(const char *name)
218 {
219         const struct Qdisc_ops *ops;
220
221         if (!capable(CAP_NET_ADMIN))
222                 return -EPERM;
223
224         write_lock(&qdisc_mod_lock);
225         ops = qdisc_lookup_default(name);
226         if (!ops) {
227                 /* Not found, drop lock and try to load module */
228                 write_unlock(&qdisc_mod_lock);
229                 request_module("sch_%s", name);
230                 write_lock(&qdisc_mod_lock);
231
232                 ops = qdisc_lookup_default(name);
233         }
234
235         if (ops) {
236                 /* Set new default */
237                 module_put(default_qdisc_ops->owner);
238                 default_qdisc_ops = ops;
239         }
240         write_unlock(&qdisc_mod_lock);
241
242         return ops ? 0 : -ENOENT;
243 }
244
245 #ifdef CONFIG_NET_SCH_DEFAULT
246 /* Set default value from kernel config */
247 static int __init sch_default_qdisc(void)
248 {
249         return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
250 }
251 late_initcall(sch_default_qdisc);
252 #endif
253
254 /* We know handle. Find qdisc among all qdisc's attached to device
255  * (root qdisc, all its children, children of children etc.)
256  * Note: caller either uses rtnl or rcu_read_lock()
257  */
258
259 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
260 {
261         struct Qdisc *q;
262
263         if (!qdisc_dev(root))
264                 return (root->handle == handle ? root : NULL);
265
266         if (!(root->flags & TCQ_F_BUILTIN) &&
267             root->handle == handle)
268                 return root;
269
270         hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
271                                    lockdep_rtnl_is_held()) {
272                 if (q->handle == handle)
273                         return q;
274         }
275         return NULL;
276 }
277
278 void qdisc_hash_add(struct Qdisc *q, bool invisible)
279 {
280         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
281                 ASSERT_RTNL();
282                 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
283                 if (invisible)
284                         q->flags |= TCQ_F_INVISIBLE;
285         }
286 }
287 EXPORT_SYMBOL(qdisc_hash_add);
288
289 void qdisc_hash_del(struct Qdisc *q)
290 {
291         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
292                 ASSERT_RTNL();
293                 hash_del_rcu(&q->hash);
294         }
295 }
296 EXPORT_SYMBOL(qdisc_hash_del);
297
298 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
299 {
300         struct Qdisc *q;
301
302         if (!handle)
303                 return NULL;
304         q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
305         if (q)
306                 goto out;
307
308         if (dev_ingress_queue(dev))
309                 q = qdisc_match_from_root(
310                         dev_ingress_queue(dev)->qdisc_sleeping,
311                         handle);
312 out:
313         return q;
314 }
315
316 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
317 {
318         struct netdev_queue *nq;
319         struct Qdisc *q;
320
321         if (!handle)
322                 return NULL;
323         q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
324         if (q)
325                 goto out;
326
327         nq = dev_ingress_queue_rcu(dev);
328         if (nq)
329                 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
330 out:
331         return q;
332 }
333
334 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
335 {
336         unsigned long cl;
337         const struct Qdisc_class_ops *cops = p->ops->cl_ops;
338
339         if (cops == NULL)
340                 return NULL;
341         cl = cops->find(p, classid);
342
343         if (cl == 0)
344                 return NULL;
345         return cops->leaf(p, cl);
346 }
347
348 /* Find queueing discipline by name */
349
350 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
351 {
352         struct Qdisc_ops *q = NULL;
353
354         if (kind) {
355                 read_lock(&qdisc_mod_lock);
356                 for (q = qdisc_base; q; q = q->next) {
357                         if (nla_strcmp(kind, q->id) == 0) {
358                                 if (!try_module_get(q->owner))
359                                         q = NULL;
360                                 break;
361                         }
362                 }
363                 read_unlock(&qdisc_mod_lock);
364         }
365         return q;
366 }
367
368 /* The linklayer setting were not transferred from iproute2, in older
369  * versions, and the rate tables lookup systems have been dropped in
370  * the kernel. To keep backward compatible with older iproute2 tc
371  * utils, we detect the linklayer setting by detecting if the rate
372  * table were modified.
373  *
374  * For linklayer ATM table entries, the rate table will be aligned to
375  * 48 bytes, thus some table entries will contain the same value.  The
376  * mpu (min packet unit) is also encoded into the old rate table, thus
377  * starting from the mpu, we find low and high table entries for
378  * mapping this cell.  If these entries contain the same value, when
379  * the rate tables have been modified for linklayer ATM.
380  *
381  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
382  * and then roundup to the next cell, calc the table entry one below,
383  * and compare.
384  */
385 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
386 {
387         int low       = roundup(r->mpu, 48);
388         int high      = roundup(low+1, 48);
389         int cell_low  = low >> r->cell_log;
390         int cell_high = (high >> r->cell_log) - 1;
391
392         /* rtab is too inaccurate at rates > 100Mbit/s */
393         if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
394                 pr_debug("TC linklayer: Giving up ATM detection\n");
395                 return TC_LINKLAYER_ETHERNET;
396         }
397
398         if ((cell_high > cell_low) && (cell_high < 256)
399             && (rtab[cell_low] == rtab[cell_high])) {
400                 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
401                          cell_low, cell_high, rtab[cell_high]);
402                 return TC_LINKLAYER_ATM;
403         }
404         return TC_LINKLAYER_ETHERNET;
405 }
406
407 static struct qdisc_rate_table *qdisc_rtab_list;
408
409 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
410                                         struct nlattr *tab,
411                                         struct netlink_ext_ack *extack)
412 {
413         struct qdisc_rate_table *rtab;
414
415         if (tab == NULL || r->rate == 0 ||
416             r->cell_log == 0 || r->cell_log >= 32 ||
417             nla_len(tab) != TC_RTAB_SIZE) {
418                 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
419                 return NULL;
420         }
421
422         for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
423                 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
424                     !memcmp(&rtab->data, nla_data(tab), 1024)) {
425                         rtab->refcnt++;
426                         return rtab;
427                 }
428         }
429
430         rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
431         if (rtab) {
432                 rtab->rate = *r;
433                 rtab->refcnt = 1;
434                 memcpy(rtab->data, nla_data(tab), 1024);
435                 if (r->linklayer == TC_LINKLAYER_UNAWARE)
436                         r->linklayer = __detect_linklayer(r, rtab->data);
437                 rtab->next = qdisc_rtab_list;
438                 qdisc_rtab_list = rtab;
439         } else {
440                 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
441         }
442         return rtab;
443 }
444 EXPORT_SYMBOL(qdisc_get_rtab);
445
446 void qdisc_put_rtab(struct qdisc_rate_table *tab)
447 {
448         struct qdisc_rate_table *rtab, **rtabp;
449
450         if (!tab || --tab->refcnt)
451                 return;
452
453         for (rtabp = &qdisc_rtab_list;
454              (rtab = *rtabp) != NULL;
455              rtabp = &rtab->next) {
456                 if (rtab == tab) {
457                         *rtabp = rtab->next;
458                         kfree(rtab);
459                         return;
460                 }
461         }
462 }
463 EXPORT_SYMBOL(qdisc_put_rtab);
464
465 static LIST_HEAD(qdisc_stab_list);
466
467 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
468         [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
469         [TCA_STAB_DATA] = { .type = NLA_BINARY },
470 };
471
472 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
473                                                struct netlink_ext_ack *extack)
474 {
475         struct nlattr *tb[TCA_STAB_MAX + 1];
476         struct qdisc_size_table *stab;
477         struct tc_sizespec *s;
478         unsigned int tsize = 0;
479         u16 *tab = NULL;
480         int err;
481
482         err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
483                                           extack);
484         if (err < 0)
485                 return ERR_PTR(err);
486         if (!tb[TCA_STAB_BASE]) {
487                 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
488                 return ERR_PTR(-EINVAL);
489         }
490
491         s = nla_data(tb[TCA_STAB_BASE]);
492
493         if (s->tsize > 0) {
494                 if (!tb[TCA_STAB_DATA]) {
495                         NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
496                         return ERR_PTR(-EINVAL);
497                 }
498                 tab = nla_data(tb[TCA_STAB_DATA]);
499                 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
500         }
501
502         if (tsize != s->tsize || (!tab && tsize > 0)) {
503                 NL_SET_ERR_MSG(extack, "Invalid size of size table");
504                 return ERR_PTR(-EINVAL);
505         }
506
507         list_for_each_entry(stab, &qdisc_stab_list, list) {
508                 if (memcmp(&stab->szopts, s, sizeof(*s)))
509                         continue;
510                 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
511                         continue;
512                 stab->refcnt++;
513                 return stab;
514         }
515
516         if (s->size_log > STAB_SIZE_LOG_MAX ||
517             s->cell_log > STAB_SIZE_LOG_MAX) {
518                 NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
519                 return ERR_PTR(-EINVAL);
520         }
521
522         stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
523         if (!stab)
524                 return ERR_PTR(-ENOMEM);
525
526         stab->refcnt = 1;
527         stab->szopts = *s;
528         if (tsize > 0)
529                 memcpy(stab->data, tab, tsize * sizeof(u16));
530
531         list_add_tail(&stab->list, &qdisc_stab_list);
532
533         return stab;
534 }
535
536 void qdisc_put_stab(struct qdisc_size_table *tab)
537 {
538         if (!tab)
539                 return;
540
541         if (--tab->refcnt == 0) {
542                 list_del(&tab->list);
543                 kfree_rcu(tab, rcu);
544         }
545 }
546 EXPORT_SYMBOL(qdisc_put_stab);
547
548 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
549 {
550         struct nlattr *nest;
551
552         nest = nla_nest_start_noflag(skb, TCA_STAB);
553         if (nest == NULL)
554                 goto nla_put_failure;
555         if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
556                 goto nla_put_failure;
557         nla_nest_end(skb, nest);
558
559         return skb->len;
560
561 nla_put_failure:
562         return -1;
563 }
564
565 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
566                                const struct qdisc_size_table *stab)
567 {
568         int pkt_len, slot;
569
570         pkt_len = skb->len + stab->szopts.overhead;
571         if (unlikely(!stab->szopts.tsize))
572                 goto out;
573
574         slot = pkt_len + stab->szopts.cell_align;
575         if (unlikely(slot < 0))
576                 slot = 0;
577
578         slot >>= stab->szopts.cell_log;
579         if (likely(slot < stab->szopts.tsize))
580                 pkt_len = stab->data[slot];
581         else
582                 pkt_len = stab->data[stab->szopts.tsize - 1] *
583                                 (slot / stab->szopts.tsize) +
584                                 stab->data[slot % stab->szopts.tsize];
585
586         pkt_len <<= stab->szopts.size_log;
587 out:
588         if (unlikely(pkt_len < 1))
589                 pkt_len = 1;
590         qdisc_skb_cb(skb)->pkt_len = pkt_len;
591 }
592 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
593
594 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
595 {
596         if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
597                 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
598                         txt, qdisc->ops->id, qdisc->handle >> 16);
599                 qdisc->flags |= TCQ_F_WARN_NONWC;
600         }
601 }
602 EXPORT_SYMBOL(qdisc_warn_nonwc);
603
604 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
605 {
606         struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
607                                                  timer);
608
609         rcu_read_lock();
610         __netif_schedule(qdisc_root(wd->qdisc));
611         rcu_read_unlock();
612
613         return HRTIMER_NORESTART;
614 }
615
616 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
617                                  clockid_t clockid)
618 {
619         hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
620         wd->timer.function = qdisc_watchdog;
621         wd->qdisc = qdisc;
622 }
623 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
624
625 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
626 {
627         qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
628 }
629 EXPORT_SYMBOL(qdisc_watchdog_init);
630
631 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
632                                       u64 delta_ns)
633 {
634         if (test_bit(__QDISC_STATE_DEACTIVATED,
635                      &qdisc_root_sleeping(wd->qdisc)->state))
636                 return;
637
638         if (hrtimer_is_queued(&wd->timer)) {
639                 /* If timer is already set in [expires, expires + delta_ns],
640                  * do not reprogram it.
641                  */
642                 if (wd->last_expires - expires <= delta_ns)
643                         return;
644         }
645
646         wd->last_expires = expires;
647         hrtimer_start_range_ns(&wd->timer,
648                                ns_to_ktime(expires),
649                                delta_ns,
650                                HRTIMER_MODE_ABS_PINNED);
651 }
652 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
653
654 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
655 {
656         hrtimer_cancel(&wd->timer);
657 }
658 EXPORT_SYMBOL(qdisc_watchdog_cancel);
659
660 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
661 {
662         struct hlist_head *h;
663         unsigned int i;
664
665         h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
666
667         if (h != NULL) {
668                 for (i = 0; i < n; i++)
669                         INIT_HLIST_HEAD(&h[i]);
670         }
671         return h;
672 }
673
674 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
675 {
676         struct Qdisc_class_common *cl;
677         struct hlist_node *next;
678         struct hlist_head *nhash, *ohash;
679         unsigned int nsize, nmask, osize;
680         unsigned int i, h;
681
682         /* Rehash when load factor exceeds 0.75 */
683         if (clhash->hashelems * 4 <= clhash->hashsize * 3)
684                 return;
685         nsize = clhash->hashsize * 2;
686         nmask = nsize - 1;
687         nhash = qdisc_class_hash_alloc(nsize);
688         if (nhash == NULL)
689                 return;
690
691         ohash = clhash->hash;
692         osize = clhash->hashsize;
693
694         sch_tree_lock(sch);
695         for (i = 0; i < osize; i++) {
696                 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
697                         h = qdisc_class_hash(cl->classid, nmask);
698                         hlist_add_head(&cl->hnode, &nhash[h]);
699                 }
700         }
701         clhash->hash     = nhash;
702         clhash->hashsize = nsize;
703         clhash->hashmask = nmask;
704         sch_tree_unlock(sch);
705
706         kvfree(ohash);
707 }
708 EXPORT_SYMBOL(qdisc_class_hash_grow);
709
710 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
711 {
712         unsigned int size = 4;
713
714         clhash->hash = qdisc_class_hash_alloc(size);
715         if (!clhash->hash)
716                 return -ENOMEM;
717         clhash->hashsize  = size;
718         clhash->hashmask  = size - 1;
719         clhash->hashelems = 0;
720         return 0;
721 }
722 EXPORT_SYMBOL(qdisc_class_hash_init);
723
724 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
725 {
726         kvfree(clhash->hash);
727 }
728 EXPORT_SYMBOL(qdisc_class_hash_destroy);
729
730 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
731                              struct Qdisc_class_common *cl)
732 {
733         unsigned int h;
734
735         INIT_HLIST_NODE(&cl->hnode);
736         h = qdisc_class_hash(cl->classid, clhash->hashmask);
737         hlist_add_head(&cl->hnode, &clhash->hash[h]);
738         clhash->hashelems++;
739 }
740 EXPORT_SYMBOL(qdisc_class_hash_insert);
741
742 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
743                              struct Qdisc_class_common *cl)
744 {
745         hlist_del(&cl->hnode);
746         clhash->hashelems--;
747 }
748 EXPORT_SYMBOL(qdisc_class_hash_remove);
749
750 /* Allocate an unique handle from space managed by kernel
751  * Possible range is [8000-FFFF]:0000 (0x8000 values)
752  */
753 static u32 qdisc_alloc_handle(struct net_device *dev)
754 {
755         int i = 0x8000;
756         static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
757
758         do {
759                 autohandle += TC_H_MAKE(0x10000U, 0);
760                 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
761                         autohandle = TC_H_MAKE(0x80000000U, 0);
762                 if (!qdisc_lookup(dev, autohandle))
763                         return autohandle;
764                 cond_resched();
765         } while (--i > 0);
766
767         return 0;
768 }
769
770 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
771 {
772         bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
773         const struct Qdisc_class_ops *cops;
774         unsigned long cl;
775         u32 parentid;
776         bool notify;
777         int drops;
778
779         if (n == 0 && len == 0)
780                 return;
781         drops = max_t(int, n, 0);
782         rcu_read_lock();
783         while ((parentid = sch->parent)) {
784                 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
785                         break;
786
787                 if (sch->flags & TCQ_F_NOPARENT)
788                         break;
789                 /* Notify parent qdisc only if child qdisc becomes empty.
790                  *
791                  * If child was empty even before update then backlog
792                  * counter is screwed and we skip notification because
793                  * parent class is already passive.
794                  *
795                  * If the original child was offloaded then it is allowed
796                  * to be seem as empty, so the parent is notified anyway.
797                  */
798                 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
799                                                        !qdisc_is_offloaded);
800                 /* TODO: perform the search on a per txq basis */
801                 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
802                 if (sch == NULL) {
803                         WARN_ON_ONCE(parentid != TC_H_ROOT);
804                         break;
805                 }
806                 cops = sch->ops->cl_ops;
807                 if (notify && cops->qlen_notify) {
808                         cl = cops->find(sch, parentid);
809                         cops->qlen_notify(sch, cl);
810                 }
811                 sch->q.qlen -= n;
812                 sch->qstats.backlog -= len;
813                 __qdisc_qstats_drop(sch, drops);
814         }
815         rcu_read_unlock();
816 }
817 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
818
819 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
820                               void *type_data)
821 {
822         struct net_device *dev = qdisc_dev(sch);
823         int err;
824
825         sch->flags &= ~TCQ_F_OFFLOADED;
826         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
827                 return 0;
828
829         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
830         if (err == -EOPNOTSUPP)
831                 return 0;
832
833         if (!err)
834                 sch->flags |= TCQ_F_OFFLOADED;
835
836         return err;
837 }
838 EXPORT_SYMBOL(qdisc_offload_dump_helper);
839
840 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
841                                 struct Qdisc *new, struct Qdisc *old,
842                                 enum tc_setup_type type, void *type_data,
843                                 struct netlink_ext_ack *extack)
844 {
845         bool any_qdisc_is_offloaded;
846         int err;
847
848         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
849                 return;
850
851         err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
852
853         /* Don't report error if the graft is part of destroy operation. */
854         if (!err || !new || new == &noop_qdisc)
855                 return;
856
857         /* Don't report error if the parent, the old child and the new
858          * one are not offloaded.
859          */
860         any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
861         any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
862         any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
863
864         if (any_qdisc_is_offloaded)
865                 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
866 }
867 EXPORT_SYMBOL(qdisc_offload_graft_helper);
868
869 static void qdisc_offload_graft_root(struct net_device *dev,
870                                      struct Qdisc *new, struct Qdisc *old,
871                                      struct netlink_ext_ack *extack)
872 {
873         struct tc_root_qopt_offload graft_offload = {
874                 .command        = TC_ROOT_GRAFT,
875                 .handle         = new ? new->handle : 0,
876                 .ingress        = (new && new->flags & TCQ_F_INGRESS) ||
877                                   (old && old->flags & TCQ_F_INGRESS),
878         };
879
880         qdisc_offload_graft_helper(dev, NULL, new, old,
881                                    TC_SETUP_ROOT_QDISC, &graft_offload, extack);
882 }
883
884 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
885                          u32 portid, u32 seq, u16 flags, int event)
886 {
887         struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
888         struct gnet_stats_queue __percpu *cpu_qstats = NULL;
889         struct tcmsg *tcm;
890         struct nlmsghdr  *nlh;
891         unsigned char *b = skb_tail_pointer(skb);
892         struct gnet_dump d;
893         struct qdisc_size_table *stab;
894         u32 block_index;
895         __u32 qlen;
896
897         cond_resched();
898         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
899         if (!nlh)
900                 goto out_nlmsg_trim;
901         tcm = nlmsg_data(nlh);
902         tcm->tcm_family = AF_UNSPEC;
903         tcm->tcm__pad1 = 0;
904         tcm->tcm__pad2 = 0;
905         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
906         tcm->tcm_parent = clid;
907         tcm->tcm_handle = q->handle;
908         tcm->tcm_info = refcount_read(&q->refcnt);
909         if (nla_put_string(skb, TCA_KIND, q->ops->id))
910                 goto nla_put_failure;
911         if (q->ops->ingress_block_get) {
912                 block_index = q->ops->ingress_block_get(q);
913                 if (block_index &&
914                     nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
915                         goto nla_put_failure;
916         }
917         if (q->ops->egress_block_get) {
918                 block_index = q->ops->egress_block_get(q);
919                 if (block_index &&
920                     nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
921                         goto nla_put_failure;
922         }
923         if (q->ops->dump && q->ops->dump(q, skb) < 0)
924                 goto nla_put_failure;
925         if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
926                 goto nla_put_failure;
927         qlen = qdisc_qlen_sum(q);
928
929         stab = rtnl_dereference(q->stab);
930         if (stab && qdisc_dump_stab(skb, stab) < 0)
931                 goto nla_put_failure;
932
933         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
934                                          NULL, &d, TCA_PAD) < 0)
935                 goto nla_put_failure;
936
937         if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
938                 goto nla_put_failure;
939
940         if (qdisc_is_percpu_stats(q)) {
941                 cpu_bstats = q->cpu_bstats;
942                 cpu_qstats = q->cpu_qstats;
943         }
944
945         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
946                                   &d, cpu_bstats, &q->bstats) < 0 ||
947             gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
948             gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
949                 goto nla_put_failure;
950
951         if (gnet_stats_finish_copy(&d) < 0)
952                 goto nla_put_failure;
953
954         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
955         return skb->len;
956
957 out_nlmsg_trim:
958 nla_put_failure:
959         nlmsg_trim(skb, b);
960         return -1;
961 }
962
963 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
964 {
965         if (q->flags & TCQ_F_BUILTIN)
966                 return true;
967         if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
968                 return true;
969
970         return false;
971 }
972
973 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
974                         struct nlmsghdr *n, u32 clid,
975                         struct Qdisc *old, struct Qdisc *new)
976 {
977         struct sk_buff *skb;
978         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
979
980         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
981         if (!skb)
982                 return -ENOBUFS;
983
984         if (old && !tc_qdisc_dump_ignore(old, false)) {
985                 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
986                                   0, RTM_DELQDISC) < 0)
987                         goto err_out;
988         }
989         if (new && !tc_qdisc_dump_ignore(new, false)) {
990                 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
991                                   old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
992                         goto err_out;
993         }
994
995         if (skb->len)
996                 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
997                                       n->nlmsg_flags & NLM_F_ECHO);
998
999 err_out:
1000         kfree_skb(skb);
1001         return -EINVAL;
1002 }
1003
1004 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1005                                struct nlmsghdr *n, u32 clid,
1006                                struct Qdisc *old, struct Qdisc *new)
1007 {
1008         if (new || old)
1009                 qdisc_notify(net, skb, n, clid, old, new);
1010
1011         if (old)
1012                 qdisc_put(old);
1013 }
1014
1015 static void qdisc_clear_nolock(struct Qdisc *sch)
1016 {
1017         sch->flags &= ~TCQ_F_NOLOCK;
1018         if (!(sch->flags & TCQ_F_CPUSTATS))
1019                 return;
1020
1021         free_percpu(sch->cpu_bstats);
1022         free_percpu(sch->cpu_qstats);
1023         sch->cpu_bstats = NULL;
1024         sch->cpu_qstats = NULL;
1025         sch->flags &= ~TCQ_F_CPUSTATS;
1026 }
1027
1028 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1029  * to device "dev".
1030  *
1031  * When appropriate send a netlink notification using 'skb'
1032  * and "n".
1033  *
1034  * On success, destroy old qdisc.
1035  */
1036
1037 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1038                        struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1039                        struct Qdisc *new, struct Qdisc *old,
1040                        struct netlink_ext_ack *extack)
1041 {
1042         struct Qdisc *q = old;
1043         struct net *net = dev_net(dev);
1044
1045         if (parent == NULL) {
1046                 unsigned int i, num_q, ingress;
1047
1048                 ingress = 0;
1049                 num_q = dev->num_tx_queues;
1050                 if ((q && q->flags & TCQ_F_INGRESS) ||
1051                     (new && new->flags & TCQ_F_INGRESS)) {
1052                         num_q = 1;
1053                         ingress = 1;
1054                         if (!dev_ingress_queue(dev)) {
1055                                 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1056                                 return -ENOENT;
1057                         }
1058                 }
1059
1060                 if (dev->flags & IFF_UP)
1061                         dev_deactivate(dev);
1062
1063                 qdisc_offload_graft_root(dev, new, old, extack);
1064
1065                 if (new && new->ops->attach && !ingress)
1066                         goto skip;
1067
1068                 for (i = 0; i < num_q; i++) {
1069                         struct netdev_queue *dev_queue = dev_ingress_queue(dev);
1070
1071                         if (!ingress)
1072                                 dev_queue = netdev_get_tx_queue(dev, i);
1073
1074                         old = dev_graft_qdisc(dev_queue, new);
1075                         if (new && i > 0)
1076                                 qdisc_refcount_inc(new);
1077
1078                         if (!ingress)
1079                                 qdisc_put(old);
1080                 }
1081
1082 skip:
1083                 if (!ingress) {
1084                         old = rtnl_dereference(dev->qdisc);
1085                         if (new && !new->ops->attach)
1086                                 qdisc_refcount_inc(new);
1087                         rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1088
1089                         notify_and_destroy(net, skb, n, classid, old, new);
1090
1091                         if (new && new->ops->attach)
1092                                 new->ops->attach(new);
1093                 } else {
1094                         notify_and_destroy(net, skb, n, classid, old, new);
1095                 }
1096
1097                 if (dev->flags & IFF_UP)
1098                         dev_activate(dev);
1099         } else {
1100                 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1101                 unsigned long cl;
1102                 int err;
1103
1104                 /* Only support running class lockless if parent is lockless */
1105                 if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1106                         qdisc_clear_nolock(new);
1107
1108                 if (!cops || !cops->graft)
1109                         return -EOPNOTSUPP;
1110
1111                 cl = cops->find(parent, classid);
1112                 if (!cl) {
1113                         NL_SET_ERR_MSG(extack, "Specified class not found");
1114                         return -ENOENT;
1115                 }
1116
1117                 if (new && new->ops == &noqueue_qdisc_ops) {
1118                         NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
1119                         return -EINVAL;
1120                 }
1121
1122                 err = cops->graft(parent, cl, new, &old, extack);
1123                 if (err)
1124                         return err;
1125                 notify_and_destroy(net, skb, n, classid, old, new);
1126         }
1127         return 0;
1128 }
1129
1130 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1131                                    struct netlink_ext_ack *extack)
1132 {
1133         u32 block_index;
1134
1135         if (tca[TCA_INGRESS_BLOCK]) {
1136                 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1137
1138                 if (!block_index) {
1139                         NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1140                         return -EINVAL;
1141                 }
1142                 if (!sch->ops->ingress_block_set) {
1143                         NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1144                         return -EOPNOTSUPP;
1145                 }
1146                 sch->ops->ingress_block_set(sch, block_index);
1147         }
1148         if (tca[TCA_EGRESS_BLOCK]) {
1149                 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1150
1151                 if (!block_index) {
1152                         NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1153                         return -EINVAL;
1154                 }
1155                 if (!sch->ops->egress_block_set) {
1156                         NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1157                         return -EOPNOTSUPP;
1158                 }
1159                 sch->ops->egress_block_set(sch, block_index);
1160         }
1161         return 0;
1162 }
1163
1164 /*
1165    Allocate and initialize new qdisc.
1166
1167    Parameters are passed via opt.
1168  */
1169
1170 static struct Qdisc *qdisc_create(struct net_device *dev,
1171                                   struct netdev_queue *dev_queue,
1172                                   struct Qdisc *p, u32 parent, u32 handle,
1173                                   struct nlattr **tca, int *errp,
1174                                   struct netlink_ext_ack *extack)
1175 {
1176         int err;
1177         struct nlattr *kind = tca[TCA_KIND];
1178         struct Qdisc *sch;
1179         struct Qdisc_ops *ops;
1180         struct qdisc_size_table *stab;
1181
1182         ops = qdisc_lookup_ops(kind);
1183 #ifdef CONFIG_MODULES
1184         if (ops == NULL && kind != NULL) {
1185                 char name[IFNAMSIZ];
1186                 if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1187                         /* We dropped the RTNL semaphore in order to
1188                          * perform the module load.  So, even if we
1189                          * succeeded in loading the module we have to
1190                          * tell the caller to replay the request.  We
1191                          * indicate this using -EAGAIN.
1192                          * We replay the request because the device may
1193                          * go away in the mean time.
1194                          */
1195                         rtnl_unlock();
1196                         request_module("sch_%s", name);
1197                         rtnl_lock();
1198                         ops = qdisc_lookup_ops(kind);
1199                         if (ops != NULL) {
1200                                 /* We will try again qdisc_lookup_ops,
1201                                  * so don't keep a reference.
1202                                  */
1203                                 module_put(ops->owner);
1204                                 err = -EAGAIN;
1205                                 goto err_out;
1206                         }
1207                 }
1208         }
1209 #endif
1210
1211         err = -ENOENT;
1212         if (!ops) {
1213                 NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1214                 goto err_out;
1215         }
1216
1217         sch = qdisc_alloc(dev_queue, ops, extack);
1218         if (IS_ERR(sch)) {
1219                 err = PTR_ERR(sch);
1220                 goto err_out2;
1221         }
1222
1223         sch->parent = parent;
1224
1225         if (handle == TC_H_INGRESS) {
1226                 sch->flags |= TCQ_F_INGRESS;
1227                 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1228         } else {
1229                 if (handle == 0) {
1230                         handle = qdisc_alloc_handle(dev);
1231                         if (handle == 0) {
1232                                 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1233                                 err = -ENOSPC;
1234                                 goto err_out3;
1235                         }
1236                 }
1237                 if (!netif_is_multiqueue(dev))
1238                         sch->flags |= TCQ_F_ONETXQUEUE;
1239         }
1240
1241         sch->handle = handle;
1242
1243         /* This exist to keep backward compatible with a userspace
1244          * loophole, what allowed userspace to get IFF_NO_QUEUE
1245          * facility on older kernels by setting tx_queue_len=0 (prior
1246          * to qdisc init), and then forgot to reinit tx_queue_len
1247          * before again attaching a qdisc.
1248          */
1249         if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1250                 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1251                 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1252         }
1253
1254         err = qdisc_block_indexes_set(sch, tca, extack);
1255         if (err)
1256                 goto err_out3;
1257
1258         if (ops->init) {
1259                 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1260                 if (err != 0)
1261                         goto err_out5;
1262         }
1263
1264         if (tca[TCA_STAB]) {
1265                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1266                 if (IS_ERR(stab)) {
1267                         err = PTR_ERR(stab);
1268                         goto err_out4;
1269                 }
1270                 rcu_assign_pointer(sch->stab, stab);
1271         }
1272         if (tca[TCA_RATE]) {
1273                 seqcount_t *running;
1274
1275                 err = -EOPNOTSUPP;
1276                 if (sch->flags & TCQ_F_MQROOT) {
1277                         NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1278                         goto err_out4;
1279                 }
1280
1281                 if (sch->parent != TC_H_ROOT &&
1282                     !(sch->flags & TCQ_F_INGRESS) &&
1283                     (!p || !(p->flags & TCQ_F_MQROOT)))
1284                         running = qdisc_root_sleeping_running(sch);
1285                 else
1286                         running = &sch->running;
1287
1288                 err = gen_new_estimator(&sch->bstats,
1289                                         sch->cpu_bstats,
1290                                         &sch->rate_est,
1291                                         NULL,
1292                                         running,
1293                                         tca[TCA_RATE]);
1294                 if (err) {
1295                         NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1296                         goto err_out4;
1297                 }
1298         }
1299
1300         qdisc_hash_add(sch, false);
1301         trace_qdisc_create(ops, dev, parent);
1302
1303         return sch;
1304
1305 err_out5:
1306         /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1307         if (ops->destroy)
1308                 ops->destroy(sch);
1309 err_out3:
1310         dev_put(dev);
1311         qdisc_free(sch);
1312 err_out2:
1313         module_put(ops->owner);
1314 err_out:
1315         *errp = err;
1316         return NULL;
1317
1318 err_out4:
1319         /*
1320          * Any broken qdiscs that would require a ops->reset() here?
1321          * The qdisc was never in action so it shouldn't be necessary.
1322          */
1323         qdisc_put_stab(rtnl_dereference(sch->stab));
1324         if (ops->destroy)
1325                 ops->destroy(sch);
1326         goto err_out3;
1327 }
1328
1329 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1330                         struct netlink_ext_ack *extack)
1331 {
1332         struct qdisc_size_table *ostab, *stab = NULL;
1333         int err = 0;
1334
1335         if (tca[TCA_OPTIONS]) {
1336                 if (!sch->ops->change) {
1337                         NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1338                         return -EINVAL;
1339                 }
1340                 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1341                         NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1342                         return -EOPNOTSUPP;
1343                 }
1344                 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1345                 if (err)
1346                         return err;
1347         }
1348
1349         if (tca[TCA_STAB]) {
1350                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1351                 if (IS_ERR(stab))
1352                         return PTR_ERR(stab);
1353         }
1354
1355         ostab = rtnl_dereference(sch->stab);
1356         rcu_assign_pointer(sch->stab, stab);
1357         qdisc_put_stab(ostab);
1358
1359         if (tca[TCA_RATE]) {
1360                 /* NB: ignores errors from replace_estimator
1361                    because change can't be undone. */
1362                 if (sch->flags & TCQ_F_MQROOT)
1363                         goto out;
1364                 gen_replace_estimator(&sch->bstats,
1365                                       sch->cpu_bstats,
1366                                       &sch->rate_est,
1367                                       NULL,
1368                                       qdisc_root_sleeping_running(sch),
1369                                       tca[TCA_RATE]);
1370         }
1371 out:
1372         return 0;
1373 }
1374
1375 struct check_loop_arg {
1376         struct qdisc_walker     w;
1377         struct Qdisc            *p;
1378         int                     depth;
1379 };
1380
1381 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1382                          struct qdisc_walker *w);
1383
1384 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1385 {
1386         struct check_loop_arg   arg;
1387
1388         if (q->ops->cl_ops == NULL)
1389                 return 0;
1390
1391         arg.w.stop = arg.w.skip = arg.w.count = 0;
1392         arg.w.fn = check_loop_fn;
1393         arg.depth = depth;
1394         arg.p = p;
1395         q->ops->cl_ops->walk(q, &arg.w);
1396         return arg.w.stop ? -ELOOP : 0;
1397 }
1398
1399 static int
1400 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1401 {
1402         struct Qdisc *leaf;
1403         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1404         struct check_loop_arg *arg = (struct check_loop_arg *)w;
1405
1406         leaf = cops->leaf(q, cl);
1407         if (leaf) {
1408                 if (leaf == arg->p || arg->depth > 7)
1409                         return -ELOOP;
1410                 return check_loop(leaf, arg->p, arg->depth + 1);
1411         }
1412         return 0;
1413 }
1414
1415 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1416         [TCA_KIND]              = { .type = NLA_STRING },
1417         [TCA_RATE]              = { .type = NLA_BINARY,
1418                                     .len = sizeof(struct tc_estimator) },
1419         [TCA_STAB]              = { .type = NLA_NESTED },
1420         [TCA_DUMP_INVISIBLE]    = { .type = NLA_FLAG },
1421         [TCA_CHAIN]             = { .type = NLA_U32 },
1422         [TCA_INGRESS_BLOCK]     = { .type = NLA_U32 },
1423         [TCA_EGRESS_BLOCK]      = { .type = NLA_U32 },
1424 };
1425
1426 /*
1427  * Delete/get qdisc.
1428  */
1429
1430 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1431                         struct netlink_ext_ack *extack)
1432 {
1433         struct net *net = sock_net(skb->sk);
1434         struct tcmsg *tcm = nlmsg_data(n);
1435         struct nlattr *tca[TCA_MAX + 1];
1436         struct net_device *dev;
1437         u32 clid;
1438         struct Qdisc *q = NULL;
1439         struct Qdisc *p = NULL;
1440         int err;
1441
1442         if ((n->nlmsg_type != RTM_GETQDISC) &&
1443             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1444                 return -EPERM;
1445
1446         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1447                                      rtm_tca_policy, extack);
1448         if (err < 0)
1449                 return err;
1450
1451         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1452         if (!dev)
1453                 return -ENODEV;
1454
1455         clid = tcm->tcm_parent;
1456         if (clid) {
1457                 if (clid != TC_H_ROOT) {
1458                         if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1459                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1460                                 if (!p) {
1461                                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1462                                         return -ENOENT;
1463                                 }
1464                                 q = qdisc_leaf(p, clid);
1465                         } else if (dev_ingress_queue(dev)) {
1466                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1467                         }
1468                 } else {
1469                         q = rtnl_dereference(dev->qdisc);
1470                 }
1471                 if (!q) {
1472                         NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1473                         return -ENOENT;
1474                 }
1475
1476                 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1477                         NL_SET_ERR_MSG(extack, "Invalid handle");
1478                         return -EINVAL;
1479                 }
1480         } else {
1481                 q = qdisc_lookup(dev, tcm->tcm_handle);
1482                 if (!q) {
1483                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1484                         return -ENOENT;
1485                 }
1486         }
1487
1488         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1489                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1490                 return -EINVAL;
1491         }
1492
1493         if (n->nlmsg_type == RTM_DELQDISC) {
1494                 if (!clid) {
1495                         NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1496                         return -EINVAL;
1497                 }
1498                 if (q->handle == 0) {
1499                         NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1500                         return -ENOENT;
1501                 }
1502                 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1503                 if (err != 0)
1504                         return err;
1505         } else {
1506                 qdisc_notify(net, skb, n, clid, NULL, q);
1507         }
1508         return 0;
1509 }
1510
1511 /*
1512  * Create/change qdisc.
1513  */
1514
1515 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1516                            struct netlink_ext_ack *extack)
1517 {
1518         struct net *net = sock_net(skb->sk);
1519         struct tcmsg *tcm;
1520         struct nlattr *tca[TCA_MAX + 1];
1521         struct net_device *dev;
1522         u32 clid;
1523         struct Qdisc *q, *p;
1524         int err;
1525
1526         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1527                 return -EPERM;
1528
1529 replay:
1530         /* Reinit, just in case something touches this. */
1531         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1532                                      rtm_tca_policy, extack);
1533         if (err < 0)
1534                 return err;
1535
1536         tcm = nlmsg_data(n);
1537         clid = tcm->tcm_parent;
1538         q = p = NULL;
1539
1540         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1541         if (!dev)
1542                 return -ENODEV;
1543
1544
1545         if (clid) {
1546                 if (clid != TC_H_ROOT) {
1547                         if (clid != TC_H_INGRESS) {
1548                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1549                                 if (!p) {
1550                                         NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1551                                         return -ENOENT;
1552                                 }
1553                                 q = qdisc_leaf(p, clid);
1554                         } else if (dev_ingress_queue_create(dev)) {
1555                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1556                         }
1557                 } else {
1558                         q = rtnl_dereference(dev->qdisc);
1559                 }
1560
1561                 /* It may be default qdisc, ignore it */
1562                 if (q && q->handle == 0)
1563                         q = NULL;
1564
1565                 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1566                         if (tcm->tcm_handle) {
1567                                 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1568                                         NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1569                                         return -EEXIST;
1570                                 }
1571                                 if (TC_H_MIN(tcm->tcm_handle)) {
1572                                         NL_SET_ERR_MSG(extack, "Invalid minor handle");
1573                                         return -EINVAL;
1574                                 }
1575                                 q = qdisc_lookup(dev, tcm->tcm_handle);
1576                                 if (!q)
1577                                         goto create_n_graft;
1578                                 if (n->nlmsg_flags & NLM_F_EXCL) {
1579                                         NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1580                                         return -EEXIST;
1581                                 }
1582                                 if (tca[TCA_KIND] &&
1583                                     nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1584                                         NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1585                                         return -EINVAL;
1586                                 }
1587                                 if (q == p ||
1588                                     (p && check_loop(q, p, 0))) {
1589                                         NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1590                                         return -ELOOP;
1591                                 }
1592                                 qdisc_refcount_inc(q);
1593                                 goto graft;
1594                         } else {
1595                                 if (!q)
1596                                         goto create_n_graft;
1597
1598                                 /* This magic test requires explanation.
1599                                  *
1600                                  *   We know, that some child q is already
1601                                  *   attached to this parent and have choice:
1602                                  *   either to change it or to create/graft new one.
1603                                  *
1604                                  *   1. We are allowed to create/graft only
1605                                  *   if CREATE and REPLACE flags are set.
1606                                  *
1607                                  *   2. If EXCL is set, requestor wanted to say,
1608                                  *   that qdisc tcm_handle is not expected
1609                                  *   to exist, so that we choose create/graft too.
1610                                  *
1611                                  *   3. The last case is when no flags are set.
1612                                  *   Alas, it is sort of hole in API, we
1613                                  *   cannot decide what to do unambiguously.
1614                                  *   For now we select create/graft, if
1615                                  *   user gave KIND, which does not match existing.
1616                                  */
1617                                 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1618                                     (n->nlmsg_flags & NLM_F_REPLACE) &&
1619                                     ((n->nlmsg_flags & NLM_F_EXCL) ||
1620                                      (tca[TCA_KIND] &&
1621                                       nla_strcmp(tca[TCA_KIND], q->ops->id))))
1622                                         goto create_n_graft;
1623                         }
1624                 }
1625         } else {
1626                 if (!tcm->tcm_handle) {
1627                         NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1628                         return -EINVAL;
1629                 }
1630                 q = qdisc_lookup(dev, tcm->tcm_handle);
1631         }
1632
1633         /* Change qdisc parameters */
1634         if (!q) {
1635                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1636                 return -ENOENT;
1637         }
1638         if (n->nlmsg_flags & NLM_F_EXCL) {
1639                 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1640                 return -EEXIST;
1641         }
1642         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1643                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1644                 return -EINVAL;
1645         }
1646         err = qdisc_change(q, tca, extack);
1647         if (err == 0)
1648                 qdisc_notify(net, skb, n, clid, NULL, q);
1649         return err;
1650
1651 create_n_graft:
1652         if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1653                 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1654                 return -ENOENT;
1655         }
1656         if (clid == TC_H_INGRESS) {
1657                 if (dev_ingress_queue(dev)) {
1658                         q = qdisc_create(dev, dev_ingress_queue(dev), p,
1659                                          tcm->tcm_parent, tcm->tcm_parent,
1660                                          tca, &err, extack);
1661                 } else {
1662                         NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1663                         err = -ENOENT;
1664                 }
1665         } else {
1666                 struct netdev_queue *dev_queue;
1667
1668                 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1669                         dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1670                 else if (p)
1671                         dev_queue = p->dev_queue;
1672                 else
1673                         dev_queue = netdev_get_tx_queue(dev, 0);
1674
1675                 q = qdisc_create(dev, dev_queue, p,
1676                                  tcm->tcm_parent, tcm->tcm_handle,
1677                                  tca, &err, extack);
1678         }
1679         if (q == NULL) {
1680                 if (err == -EAGAIN)
1681                         goto replay;
1682                 return err;
1683         }
1684
1685 graft:
1686         err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1687         if (err) {
1688                 if (q)
1689                         qdisc_put(q);
1690                 return err;
1691         }
1692
1693         return 0;
1694 }
1695
1696 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1697                               struct netlink_callback *cb,
1698                               int *q_idx_p, int s_q_idx, bool recur,
1699                               bool dump_invisible)
1700 {
1701         int ret = 0, q_idx = *q_idx_p;
1702         struct Qdisc *q;
1703         int b;
1704
1705         if (!root)
1706                 return 0;
1707
1708         q = root;
1709         if (q_idx < s_q_idx) {
1710                 q_idx++;
1711         } else {
1712                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1713                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1714                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1715                                   RTM_NEWQDISC) <= 0)
1716                         goto done;
1717                 q_idx++;
1718         }
1719
1720         /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1721          * itself has already been dumped.
1722          *
1723          * If we've already dumped the top-level (ingress) qdisc above and the global
1724          * qdisc hashtable, we don't want to hit it again
1725          */
1726         if (!qdisc_dev(root) || !recur)
1727                 goto out;
1728
1729         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1730                 if (q_idx < s_q_idx) {
1731                         q_idx++;
1732                         continue;
1733                 }
1734                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1735                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1736                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1737                                   RTM_NEWQDISC) <= 0)
1738                         goto done;
1739                 q_idx++;
1740         }
1741
1742 out:
1743         *q_idx_p = q_idx;
1744         return ret;
1745 done:
1746         ret = -1;
1747         goto out;
1748 }
1749
1750 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1751 {
1752         struct net *net = sock_net(skb->sk);
1753         int idx, q_idx;
1754         int s_idx, s_q_idx;
1755         struct net_device *dev;
1756         const struct nlmsghdr *nlh = cb->nlh;
1757         struct nlattr *tca[TCA_MAX + 1];
1758         int err;
1759
1760         s_idx = cb->args[0];
1761         s_q_idx = q_idx = cb->args[1];
1762
1763         idx = 0;
1764         ASSERT_RTNL();
1765
1766         err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1767                                      rtm_tca_policy, cb->extack);
1768         if (err < 0)
1769                 return err;
1770
1771         for_each_netdev(net, dev) {
1772                 struct netdev_queue *dev_queue;
1773
1774                 if (idx < s_idx)
1775                         goto cont;
1776                 if (idx > s_idx)
1777                         s_q_idx = 0;
1778                 q_idx = 0;
1779
1780                 if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1781                                        skb, cb, &q_idx, s_q_idx,
1782                                        true, tca[TCA_DUMP_INVISIBLE]) < 0)
1783                         goto done;
1784
1785                 dev_queue = dev_ingress_queue(dev);
1786                 if (dev_queue &&
1787                     tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1788                                        &q_idx, s_q_idx, false,
1789                                        tca[TCA_DUMP_INVISIBLE]) < 0)
1790                         goto done;
1791
1792 cont:
1793                 idx++;
1794         }
1795
1796 done:
1797         cb->args[0] = idx;
1798         cb->args[1] = q_idx;
1799
1800         return skb->len;
1801 }
1802
1803
1804
1805 /************************************************
1806  *      Traffic classes manipulation.           *
1807  ************************************************/
1808
1809 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1810                           unsigned long cl,
1811                           u32 portid, u32 seq, u16 flags, int event)
1812 {
1813         struct tcmsg *tcm;
1814         struct nlmsghdr  *nlh;
1815         unsigned char *b = skb_tail_pointer(skb);
1816         struct gnet_dump d;
1817         const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1818
1819         cond_resched();
1820         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1821         if (!nlh)
1822                 goto out_nlmsg_trim;
1823         tcm = nlmsg_data(nlh);
1824         tcm->tcm_family = AF_UNSPEC;
1825         tcm->tcm__pad1 = 0;
1826         tcm->tcm__pad2 = 0;
1827         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1828         tcm->tcm_parent = q->handle;
1829         tcm->tcm_handle = q->handle;
1830         tcm->tcm_info = 0;
1831         if (nla_put_string(skb, TCA_KIND, q->ops->id))
1832                 goto nla_put_failure;
1833         if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1834                 goto nla_put_failure;
1835
1836         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1837                                          NULL, &d, TCA_PAD) < 0)
1838                 goto nla_put_failure;
1839
1840         if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1841                 goto nla_put_failure;
1842
1843         if (gnet_stats_finish_copy(&d) < 0)
1844                 goto nla_put_failure;
1845
1846         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1847         return skb->len;
1848
1849 out_nlmsg_trim:
1850 nla_put_failure:
1851         nlmsg_trim(skb, b);
1852         return -1;
1853 }
1854
1855 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1856                          struct nlmsghdr *n, struct Qdisc *q,
1857                          unsigned long cl, int event)
1858 {
1859         struct sk_buff *skb;
1860         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1861
1862         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1863         if (!skb)
1864                 return -ENOBUFS;
1865
1866         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1867                 kfree_skb(skb);
1868                 return -EINVAL;
1869         }
1870
1871         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1872                               n->nlmsg_flags & NLM_F_ECHO);
1873 }
1874
1875 static int tclass_del_notify(struct net *net,
1876                              const struct Qdisc_class_ops *cops,
1877                              struct sk_buff *oskb, struct nlmsghdr *n,
1878                              struct Qdisc *q, unsigned long cl,
1879                              struct netlink_ext_ack *extack)
1880 {
1881         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1882         struct sk_buff *skb;
1883         int err = 0;
1884
1885         if (!cops->delete)
1886                 return -EOPNOTSUPP;
1887
1888         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1889         if (!skb)
1890                 return -ENOBUFS;
1891
1892         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1893                            RTM_DELTCLASS) < 0) {
1894                 kfree_skb(skb);
1895                 return -EINVAL;
1896         }
1897
1898         err = cops->delete(q, cl, extack);
1899         if (err) {
1900                 kfree_skb(skb);
1901                 return err;
1902         }
1903
1904         err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1905                              n->nlmsg_flags & NLM_F_ECHO);
1906         return err;
1907 }
1908
1909 #ifdef CONFIG_NET_CLS
1910
1911 struct tcf_bind_args {
1912         struct tcf_walker w;
1913         unsigned long base;
1914         unsigned long cl;
1915         u32 classid;
1916 };
1917
1918 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1919 {
1920         struct tcf_bind_args *a = (void *)arg;
1921
1922         if (tp->ops->bind_class) {
1923                 struct Qdisc *q = tcf_block_q(tp->chain->block);
1924
1925                 sch_tree_lock(q);
1926                 tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
1927                 sch_tree_unlock(q);
1928         }
1929         return 0;
1930 }
1931
1932 struct tc_bind_class_args {
1933         struct qdisc_walker w;
1934         unsigned long new_cl;
1935         u32 portid;
1936         u32 clid;
1937 };
1938
1939 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
1940                                 struct qdisc_walker *w)
1941 {
1942         struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
1943         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1944         struct tcf_block *block;
1945         struct tcf_chain *chain;
1946
1947         block = cops->tcf_block(q, cl, NULL);
1948         if (!block)
1949                 return 0;
1950         for (chain = tcf_get_next_chain(block, NULL);
1951              chain;
1952              chain = tcf_get_next_chain(block, chain)) {
1953                 struct tcf_proto *tp;
1954
1955                 for (tp = tcf_get_next_proto(chain, NULL);
1956                      tp; tp = tcf_get_next_proto(chain, tp)) {
1957                         struct tcf_bind_args arg = {};
1958
1959                         arg.w.fn = tcf_node_bind;
1960                         arg.classid = a->clid;
1961                         arg.base = cl;
1962                         arg.cl = a->new_cl;
1963                         tp->ops->walk(tp, &arg.w, true);
1964                 }
1965         }
1966
1967         return 0;
1968 }
1969
1970 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1971                            unsigned long new_cl)
1972 {
1973         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1974         struct tc_bind_class_args args = {};
1975
1976         if (!cops->tcf_block)
1977                 return;
1978         args.portid = portid;
1979         args.clid = clid;
1980         args.new_cl = new_cl;
1981         args.w.fn = tc_bind_class_walker;
1982         q->ops->cl_ops->walk(q, &args.w);
1983 }
1984
1985 #else
1986
1987 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1988                            unsigned long new_cl)
1989 {
1990 }
1991
1992 #endif
1993
1994 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1995                          struct netlink_ext_ack *extack)
1996 {
1997         struct net *net = sock_net(skb->sk);
1998         struct tcmsg *tcm = nlmsg_data(n);
1999         struct nlattr *tca[TCA_MAX + 1];
2000         struct net_device *dev;
2001         struct Qdisc *q = NULL;
2002         const struct Qdisc_class_ops *cops;
2003         unsigned long cl = 0;
2004         unsigned long new_cl;
2005         u32 portid;
2006         u32 clid;
2007         u32 qid;
2008         int err;
2009
2010         if ((n->nlmsg_type != RTM_GETTCLASS) &&
2011             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2012                 return -EPERM;
2013
2014         err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2015                                      rtm_tca_policy, extack);
2016         if (err < 0)
2017                 return err;
2018
2019         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2020         if (!dev)
2021                 return -ENODEV;
2022
2023         /*
2024            parent == TC_H_UNSPEC - unspecified parent.
2025            parent == TC_H_ROOT   - class is root, which has no parent.
2026            parent == X:0         - parent is root class.
2027            parent == X:Y         - parent is a node in hierarchy.
2028            parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
2029
2030            handle == 0:0         - generate handle from kernel pool.
2031            handle == 0:Y         - class is X:Y, where X:0 is qdisc.
2032            handle == X:Y         - clear.
2033            handle == X:0         - root class.
2034          */
2035
2036         /* Step 1. Determine qdisc handle X:0 */
2037
2038         portid = tcm->tcm_parent;
2039         clid = tcm->tcm_handle;
2040         qid = TC_H_MAJ(clid);
2041
2042         if (portid != TC_H_ROOT) {
2043                 u32 qid1 = TC_H_MAJ(portid);
2044
2045                 if (qid && qid1) {
2046                         /* If both majors are known, they must be identical. */
2047                         if (qid != qid1)
2048                                 return -EINVAL;
2049                 } else if (qid1) {
2050                         qid = qid1;
2051                 } else if (qid == 0)
2052                         qid = rtnl_dereference(dev->qdisc)->handle;
2053
2054                 /* Now qid is genuine qdisc handle consistent
2055                  * both with parent and child.
2056                  *
2057                  * TC_H_MAJ(portid) still may be unspecified, complete it now.
2058                  */
2059                 if (portid)
2060                         portid = TC_H_MAKE(qid, portid);
2061         } else {
2062                 if (qid == 0)
2063                         qid = rtnl_dereference(dev->qdisc)->handle;
2064         }
2065
2066         /* OK. Locate qdisc */
2067         q = qdisc_lookup(dev, qid);
2068         if (!q)
2069                 return -ENOENT;
2070
2071         /* An check that it supports classes */
2072         cops = q->ops->cl_ops;
2073         if (cops == NULL)
2074                 return -EINVAL;
2075
2076         /* Now try to get class */
2077         if (clid == 0) {
2078                 if (portid == TC_H_ROOT)
2079                         clid = qid;
2080         } else
2081                 clid = TC_H_MAKE(qid, clid);
2082
2083         if (clid)
2084                 cl = cops->find(q, clid);
2085
2086         if (cl == 0) {
2087                 err = -ENOENT;
2088                 if (n->nlmsg_type != RTM_NEWTCLASS ||
2089                     !(n->nlmsg_flags & NLM_F_CREATE))
2090                         goto out;
2091         } else {
2092                 switch (n->nlmsg_type) {
2093                 case RTM_NEWTCLASS:
2094                         err = -EEXIST;
2095                         if (n->nlmsg_flags & NLM_F_EXCL)
2096                                 goto out;
2097                         break;
2098                 case RTM_DELTCLASS:
2099                         err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
2100                         /* Unbind the class with flilters with 0 */
2101                         tc_bind_tclass(q, portid, clid, 0);
2102                         goto out;
2103                 case RTM_GETTCLASS:
2104                         err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
2105                         goto out;
2106                 default:
2107                         err = -EINVAL;
2108                         goto out;
2109                 }
2110         }
2111
2112         if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2113                 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2114                 return -EOPNOTSUPP;
2115         }
2116
2117         new_cl = cl;
2118         err = -EOPNOTSUPP;
2119         if (cops->change)
2120                 err = cops->change(q, clid, portid, tca, &new_cl, extack);
2121         if (err == 0) {
2122                 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
2123                 /* We just create a new class, need to do reverse binding. */
2124                 if (cl != new_cl)
2125                         tc_bind_tclass(q, portid, clid, new_cl);
2126         }
2127 out:
2128         return err;
2129 }
2130
2131 struct qdisc_dump_args {
2132         struct qdisc_walker     w;
2133         struct sk_buff          *skb;
2134         struct netlink_callback *cb;
2135 };
2136
2137 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2138                             struct qdisc_walker *arg)
2139 {
2140         struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2141
2142         return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2143                               a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2144                               RTM_NEWTCLASS);
2145 }
2146
2147 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2148                                 struct tcmsg *tcm, struct netlink_callback *cb,
2149                                 int *t_p, int s_t)
2150 {
2151         struct qdisc_dump_args arg;
2152
2153         if (tc_qdisc_dump_ignore(q, false) ||
2154             *t_p < s_t || !q->ops->cl_ops ||
2155             (tcm->tcm_parent &&
2156              TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2157                 (*t_p)++;
2158                 return 0;
2159         }
2160         if (*t_p > s_t)
2161                 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2162         arg.w.fn = qdisc_class_dump;
2163         arg.skb = skb;
2164         arg.cb = cb;
2165         arg.w.stop  = 0;
2166         arg.w.skip = cb->args[1];
2167         arg.w.count = 0;
2168         q->ops->cl_ops->walk(q, &arg.w);
2169         cb->args[1] = arg.w.count;
2170         if (arg.w.stop)
2171                 return -1;
2172         (*t_p)++;
2173         return 0;
2174 }
2175
2176 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2177                                struct tcmsg *tcm, struct netlink_callback *cb,
2178                                int *t_p, int s_t, bool recur)
2179 {
2180         struct Qdisc *q;
2181         int b;
2182
2183         if (!root)
2184                 return 0;
2185
2186         if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2187                 return -1;
2188
2189         if (!qdisc_dev(root) || !recur)
2190                 return 0;
2191
2192         if (tcm->tcm_parent) {
2193                 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2194                 if (q && q != root &&
2195                     tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2196                         return -1;
2197                 return 0;
2198         }
2199         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2200                 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2201                         return -1;
2202         }
2203
2204         return 0;
2205 }
2206
2207 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2208 {
2209         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2210         struct net *net = sock_net(skb->sk);
2211         struct netdev_queue *dev_queue;
2212         struct net_device *dev;
2213         int t, s_t;
2214
2215         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2216                 return 0;
2217         dev = dev_get_by_index(net, tcm->tcm_ifindex);
2218         if (!dev)
2219                 return 0;
2220
2221         s_t = cb->args[0];
2222         t = 0;
2223
2224         if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2225                                 skb, tcm, cb, &t, s_t, true) < 0)
2226                 goto done;
2227
2228         dev_queue = dev_ingress_queue(dev);
2229         if (dev_queue &&
2230             tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2231                                 &t, s_t, false) < 0)
2232                 goto done;
2233
2234 done:
2235         cb->args[0] = t;
2236
2237         dev_put(dev);
2238         return skb->len;
2239 }
2240
2241 #ifdef CONFIG_PROC_FS
2242 static int psched_show(struct seq_file *seq, void *v)
2243 {
2244         seq_printf(seq, "%08x %08x %08x %08x\n",
2245                    (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2246                    1000000,
2247                    (u32)NSEC_PER_SEC / hrtimer_resolution);
2248
2249         return 0;
2250 }
2251
2252 static int __net_init psched_net_init(struct net *net)
2253 {
2254         struct proc_dir_entry *e;
2255
2256         e = proc_create_single("psched", 0, net->proc_net, psched_show);
2257         if (e == NULL)
2258                 return -ENOMEM;
2259
2260         return 0;
2261 }
2262
2263 static void __net_exit psched_net_exit(struct net *net)
2264 {
2265         remove_proc_entry("psched", net->proc_net);
2266 }
2267 #else
2268 static int __net_init psched_net_init(struct net *net)
2269 {
2270         return 0;
2271 }
2272
2273 static void __net_exit psched_net_exit(struct net *net)
2274 {
2275 }
2276 #endif
2277
2278 static struct pernet_operations psched_net_ops = {
2279         .init = psched_net_init,
2280         .exit = psched_net_exit,
2281 };
2282
2283 static int __init pktsched_init(void)
2284 {
2285         int err;
2286
2287         err = register_pernet_subsys(&psched_net_ops);
2288         if (err) {
2289                 pr_err("pktsched_init: "
2290                        "cannot initialize per netns operations\n");
2291                 return err;
2292         }
2293
2294         register_qdisc(&pfifo_fast_ops);
2295         register_qdisc(&pfifo_qdisc_ops);
2296         register_qdisc(&bfifo_qdisc_ops);
2297         register_qdisc(&pfifo_head_drop_qdisc_ops);
2298         register_qdisc(&mq_qdisc_ops);
2299         register_qdisc(&noqueue_qdisc_ops);
2300
2301         rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2302         rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2303         rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2304                       0);
2305         rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2306         rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2307         rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2308                       0);
2309
2310         return 0;
2311 }
2312
2313 subsys_initcall(pktsched_init);