net/sched: cls_u32: Fix reference counter leak leading to overflow
[platform/kernel/linux-starfive.git] / net / sched / sch_htb.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
4  *
5  * Authors:     Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *                      HTB support at LARTC mailing list
10  *              Ondrej Kraus, <krauso@barr.cz>
11  *                      found missing INIT_QDISC(htb)
12  *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *                      helped a lot to locate nasty class stall bug
14  *              Andi Kleen, Jamal Hadi, Bert Hubert
15  *                      code review and helpful comments on shaping
16  *              Tomasz Wrona, <tw@eter.tym.pl>
17  *                      created test case so that I was able to fix nasty bug
18  *              Wilfried Weissmann
19  *                      spotted bug in dequeue code and helped with fix
20  *              Jiri Fojtasek
21  *                      fixed requeue routine
22  *              and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011         /* major must be matched with number supplied by TC as version */
56
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71         HTB_CANT_SEND,          /* class can't send and can't borrow */
72         HTB_MAY_BORROW,         /* class can't send but may borrow */
73         HTB_CAN_SEND            /* class can send */
74 };
75
76 struct htb_prio {
77         union {
78                 struct rb_root  row;
79                 struct rb_root  feed;
80         };
81         struct rb_node  *ptr;
82         /* When class changes from state 1->2 and disconnects from
83          * parent's feed then we lost ptr value and start from the
84          * first child again. Here we store classid of the
85          * last valid ptr (used when ptr is NULL).
86          */
87         u32             last_ptr_id;
88 };
89
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95         struct Qdisc_class_common common;
96         struct psched_ratecfg   rate;
97         struct psched_ratecfg   ceil;
98         s64                     buffer, cbuffer;/* token bucket depth/rate */
99         s64                     mbuffer;        /* max wait time */
100         u32                     prio;           /* these two are used only by leaves... */
101         int                     quantum;        /* but stored for parent-to-leaf return */
102
103         struct tcf_proto __rcu  *filter_list;   /* class attached filters */
104         struct tcf_block        *block;
105         int                     filter_cnt;
106
107         int                     level;          /* our level (see above) */
108         unsigned int            children;
109         struct htb_class        *parent;        /* parent class */
110
111         struct net_rate_estimator __rcu *rate_est;
112
113         /*
114          * Written often fields
115          */
116         struct gnet_stats_basic_sync bstats;
117         struct gnet_stats_basic_sync bstats_bias;
118         struct tc_htb_xstats    xstats; /* our special stats */
119
120         /* token bucket parameters */
121         s64                     tokens, ctokens;/* current number of tokens */
122         s64                     t_c;            /* checkpoint time */
123
124         union {
125                 struct htb_class_leaf {
126                         int             deficit[TC_HTB_MAXDEPTH];
127                         struct Qdisc    *q;
128                         struct netdev_queue *offload_queue;
129                 } leaf;
130                 struct htb_class_inner {
131                         struct htb_prio clprio[TC_HTB_NUMPRIO];
132                 } inner;
133         };
134         s64                     pq_key;
135
136         int                     prio_activity;  /* for which prios are we active */
137         enum htb_cmode          cmode;          /* current mode of the class */
138         struct rb_node          pq_node;        /* node for event queue */
139         struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
140
141         unsigned int drops ____cacheline_aligned_in_smp;
142         unsigned int            overlimits;
143 };
144
145 struct htb_level {
146         struct rb_root  wait_pq;
147         struct htb_prio hprio[TC_HTB_NUMPRIO];
148 };
149
150 struct htb_sched {
151         struct Qdisc_class_hash clhash;
152         int                     defcls;         /* class where unclassified flows go to */
153         int                     rate2quantum;   /* quant = rate / rate2quantum */
154
155         /* filters for qdisc itself */
156         struct tcf_proto __rcu  *filter_list;
157         struct tcf_block        *block;
158
159 #define HTB_WARN_TOOMANYEVENTS  0x1
160         unsigned int            warned; /* only one warning */
161         int                     direct_qlen;
162         struct work_struct      work;
163
164         /* non shaped skbs; let them go directly thru */
165         struct qdisc_skb_head   direct_queue;
166         u32                     direct_pkts;
167         u32                     overlimits;
168
169         struct qdisc_watchdog   watchdog;
170
171         s64                     now;    /* cached dequeue time */
172
173         /* time of nearest event per level (row) */
174         s64                     near_ev_cache[TC_HTB_MAXDEPTH];
175
176         int                     row_mask[TC_HTB_MAXDEPTH];
177
178         struct htb_level        hlevel[TC_HTB_MAXDEPTH];
179
180         struct Qdisc            **direct_qdiscs;
181         unsigned int            num_direct_qdiscs;
182
183         bool                    offload;
184 };
185
186 /* find class in global hash table using given handle */
187 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
188 {
189         struct htb_sched *q = qdisc_priv(sch);
190         struct Qdisc_class_common *clc;
191
192         clc = qdisc_class_find(&q->clhash, handle);
193         if (clc == NULL)
194                 return NULL;
195         return container_of(clc, struct htb_class, common);
196 }
197
198 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
199 {
200         return (unsigned long)htb_find(handle, sch);
201 }
202 /**
203  * htb_classify - classify a packet into class
204  *
205  * It returns NULL if the packet should be dropped or -1 if the packet
206  * should be passed directly thru. In all other cases leaf class is returned.
207  * We allow direct class selection by classid in priority. The we examine
208  * filters in qdisc and in inner nodes (if higher filter points to the inner
209  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
210  * internal fifo (direct). These packets then go directly thru. If we still
211  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
212  * then finish and return direct queue.
213  */
214 #define HTB_DIRECT ((struct htb_class *)-1L)
215
216 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
217                                       int *qerr)
218 {
219         struct htb_sched *q = qdisc_priv(sch);
220         struct htb_class *cl;
221         struct tcf_result res;
222         struct tcf_proto *tcf;
223         int result;
224
225         /* allow to select class by setting skb->priority to valid classid;
226          * note that nfmark can be used too by attaching filter fw with no
227          * rules in it
228          */
229         if (skb->priority == sch->handle)
230                 return HTB_DIRECT;      /* X:0 (direct flow) selected */
231         cl = htb_find(skb->priority, sch);
232         if (cl) {
233                 if (cl->level == 0)
234                         return cl;
235                 /* Start with inner filter chain if a non-leaf class is selected */
236                 tcf = rcu_dereference_bh(cl->filter_list);
237         } else {
238                 tcf = rcu_dereference_bh(q->filter_list);
239         }
240
241         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
242         while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
243 #ifdef CONFIG_NET_CLS_ACT
244                 switch (result) {
245                 case TC_ACT_QUEUED:
246                 case TC_ACT_STOLEN:
247                 case TC_ACT_TRAP:
248                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
249                         fallthrough;
250                 case TC_ACT_SHOT:
251                         return NULL;
252                 }
253 #endif
254                 cl = (void *)res.class;
255                 if (!cl) {
256                         if (res.classid == sch->handle)
257                                 return HTB_DIRECT;      /* X:0 (direct flow) */
258                         cl = htb_find(res.classid, sch);
259                         if (!cl)
260                                 break;  /* filter selected invalid classid */
261                 }
262                 if (!cl->level)
263                         return cl;      /* we hit leaf; return it */
264
265                 /* we have got inner class; apply inner filter chain */
266                 tcf = rcu_dereference_bh(cl->filter_list);
267         }
268         /* classification failed; try to use default class */
269         cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
270         if (!cl || cl->level)
271                 return HTB_DIRECT;      /* bad default .. this is safe bet */
272         return cl;
273 }
274
275 /**
276  * htb_add_to_id_tree - adds class to the round robin list
277  * @root: the root of the tree
278  * @cl: the class to add
279  * @prio: the give prio in class
280  *
281  * Routine adds class to the list (actually tree) sorted by classid.
282  * Make sure that class is not already on such list for given prio.
283  */
284 static void htb_add_to_id_tree(struct rb_root *root,
285                                struct htb_class *cl, int prio)
286 {
287         struct rb_node **p = &root->rb_node, *parent = NULL;
288
289         while (*p) {
290                 struct htb_class *c;
291                 parent = *p;
292                 c = rb_entry(parent, struct htb_class, node[prio]);
293
294                 if (cl->common.classid > c->common.classid)
295                         p = &parent->rb_right;
296                 else
297                         p = &parent->rb_left;
298         }
299         rb_link_node(&cl->node[prio], parent, p);
300         rb_insert_color(&cl->node[prio], root);
301 }
302
303 /**
304  * htb_add_to_wait_tree - adds class to the event queue with delay
305  * @q: the priority event queue
306  * @cl: the class to add
307  * @delay: delay in microseconds
308  *
309  * The class is added to priority event queue to indicate that class will
310  * change its mode in cl->pq_key microseconds. Make sure that class is not
311  * already in the queue.
312  */
313 static void htb_add_to_wait_tree(struct htb_sched *q,
314                                  struct htb_class *cl, s64 delay)
315 {
316         struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
317
318         cl->pq_key = q->now + delay;
319         if (cl->pq_key == q->now)
320                 cl->pq_key++;
321
322         /* update the nearest event cache */
323         if (q->near_ev_cache[cl->level] > cl->pq_key)
324                 q->near_ev_cache[cl->level] = cl->pq_key;
325
326         while (*p) {
327                 struct htb_class *c;
328                 parent = *p;
329                 c = rb_entry(parent, struct htb_class, pq_node);
330                 if (cl->pq_key >= c->pq_key)
331                         p = &parent->rb_right;
332                 else
333                         p = &parent->rb_left;
334         }
335         rb_link_node(&cl->pq_node, parent, p);
336         rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
337 }
338
339 /**
340  * htb_next_rb_node - finds next node in binary tree
341  * @n: the current node in binary tree
342  *
343  * When we are past last key we return NULL.
344  * Average complexity is 2 steps per call.
345  */
346 static inline void htb_next_rb_node(struct rb_node **n)
347 {
348         *n = rb_next(*n);
349 }
350
351 /**
352  * htb_add_class_to_row - add class to its row
353  * @q: the priority event queue
354  * @cl: the class to add
355  * @mask: the given priorities in class in bitmap
356  *
357  * The class is added to row at priorities marked in mask.
358  * It does nothing if mask == 0.
359  */
360 static inline void htb_add_class_to_row(struct htb_sched *q,
361                                         struct htb_class *cl, int mask)
362 {
363         q->row_mask[cl->level] |= mask;
364         while (mask) {
365                 int prio = ffz(~mask);
366                 mask &= ~(1 << prio);
367                 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
368         }
369 }
370
371 /* If this triggers, it is a bug in this code, but it need not be fatal */
372 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
373 {
374         if (RB_EMPTY_NODE(rb)) {
375                 WARN_ON(1);
376         } else {
377                 rb_erase(rb, root);
378                 RB_CLEAR_NODE(rb);
379         }
380 }
381
382
383 /**
384  * htb_remove_class_from_row - removes class from its row
385  * @q: the priority event queue
386  * @cl: the class to add
387  * @mask: the given priorities in class in bitmap
388  *
389  * The class is removed from row at priorities marked in mask.
390  * It does nothing if mask == 0.
391  */
392 static inline void htb_remove_class_from_row(struct htb_sched *q,
393                                                  struct htb_class *cl, int mask)
394 {
395         int m = 0;
396         struct htb_level *hlevel = &q->hlevel[cl->level];
397
398         while (mask) {
399                 int prio = ffz(~mask);
400                 struct htb_prio *hprio = &hlevel->hprio[prio];
401
402                 mask &= ~(1 << prio);
403                 if (hprio->ptr == cl->node + prio)
404                         htb_next_rb_node(&hprio->ptr);
405
406                 htb_safe_rb_erase(cl->node + prio, &hprio->row);
407                 if (!hprio->row.rb_node)
408                         m |= 1 << prio;
409         }
410         q->row_mask[cl->level] &= ~m;
411 }
412
413 /**
414  * htb_activate_prios - creates active classe's feed chain
415  * @q: the priority event queue
416  * @cl: the class to activate
417  *
418  * The class is connected to ancestors and/or appropriate rows
419  * for priorities it is participating on. cl->cmode must be new
420  * (activated) mode. It does nothing if cl->prio_activity == 0.
421  */
422 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
423 {
424         struct htb_class *p = cl->parent;
425         long m, mask = cl->prio_activity;
426
427         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
428                 m = mask;
429                 while (m) {
430                         unsigned int prio = ffz(~m);
431
432                         if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio)))
433                                 break;
434                         m &= ~(1 << prio);
435
436                         if (p->inner.clprio[prio].feed.rb_node)
437                                 /* parent already has its feed in use so that
438                                  * reset bit in mask as parent is already ok
439                                  */
440                                 mask &= ~(1 << prio);
441
442                         htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
443                 }
444                 p->prio_activity |= mask;
445                 cl = p;
446                 p = cl->parent;
447
448         }
449         if (cl->cmode == HTB_CAN_SEND && mask)
450                 htb_add_class_to_row(q, cl, mask);
451 }
452
453 /**
454  * htb_deactivate_prios - remove class from feed chain
455  * @q: the priority event queue
456  * @cl: the class to deactivate
457  *
458  * cl->cmode must represent old mode (before deactivation). It does
459  * nothing if cl->prio_activity == 0. Class is removed from all feed
460  * chains and rows.
461  */
462 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
463 {
464         struct htb_class *p = cl->parent;
465         long m, mask = cl->prio_activity;
466
467         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
468                 m = mask;
469                 mask = 0;
470                 while (m) {
471                         int prio = ffz(~m);
472                         m &= ~(1 << prio);
473
474                         if (p->inner.clprio[prio].ptr == cl->node + prio) {
475                                 /* we are removing child which is pointed to from
476                                  * parent feed - forget the pointer but remember
477                                  * classid
478                                  */
479                                 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
480                                 p->inner.clprio[prio].ptr = NULL;
481                         }
482
483                         htb_safe_rb_erase(cl->node + prio,
484                                           &p->inner.clprio[prio].feed);
485
486                         if (!p->inner.clprio[prio].feed.rb_node)
487                                 mask |= 1 << prio;
488                 }
489
490                 p->prio_activity &= ~mask;
491                 cl = p;
492                 p = cl->parent;
493
494         }
495         if (cl->cmode == HTB_CAN_SEND && mask)
496                 htb_remove_class_from_row(q, cl, mask);
497 }
498
499 static inline s64 htb_lowater(const struct htb_class *cl)
500 {
501         if (htb_hysteresis)
502                 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
503         else
504                 return 0;
505 }
506 static inline s64 htb_hiwater(const struct htb_class *cl)
507 {
508         if (htb_hysteresis)
509                 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
510         else
511                 return 0;
512 }
513
514
515 /**
516  * htb_class_mode - computes and returns current class mode
517  * @cl: the target class
518  * @diff: diff time in microseconds
519  *
520  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
521  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
522  * from now to time when cl will change its state.
523  * Also it is worth to note that class mode doesn't change simply
524  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
525  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
526  * mode transitions per time unit. The speed gain is about 1/6.
527  */
528 static inline enum htb_cmode
529 htb_class_mode(struct htb_class *cl, s64 *diff)
530 {
531         s64 toks;
532
533         if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
534                 *diff = -toks;
535                 return HTB_CANT_SEND;
536         }
537
538         if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
539                 return HTB_CAN_SEND;
540
541         *diff = -toks;
542         return HTB_MAY_BORROW;
543 }
544
545 /**
546  * htb_change_class_mode - changes classe's mode
547  * @q: the priority event queue
548  * @cl: the target class
549  * @diff: diff time in microseconds
550  *
551  * This should be the only way how to change classe's mode under normal
552  * circumstances. Routine will update feed lists linkage, change mode
553  * and add class to the wait event queue if appropriate. New mode should
554  * be different from old one and cl->pq_key has to be valid if changing
555  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
556  */
557 static void
558 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
559 {
560         enum htb_cmode new_mode = htb_class_mode(cl, diff);
561
562         if (new_mode == cl->cmode)
563                 return;
564
565         if (new_mode == HTB_CANT_SEND) {
566                 cl->overlimits++;
567                 q->overlimits++;
568         }
569
570         if (cl->prio_activity) {        /* not necessary: speed optimization */
571                 if (cl->cmode != HTB_CANT_SEND)
572                         htb_deactivate_prios(q, cl);
573                 cl->cmode = new_mode;
574                 if (new_mode != HTB_CANT_SEND)
575                         htb_activate_prios(q, cl);
576         } else
577                 cl->cmode = new_mode;
578 }
579
580 /**
581  * htb_activate - inserts leaf cl into appropriate active feeds
582  * @q: the priority event queue
583  * @cl: the target class
584  *
585  * Routine learns (new) priority of leaf and activates feed chain
586  * for the prio. It can be called on already active leaf safely.
587  * It also adds leaf into droplist.
588  */
589 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
590 {
591         WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
592
593         if (!cl->prio_activity) {
594                 cl->prio_activity = 1 << cl->prio;
595                 htb_activate_prios(q, cl);
596         }
597 }
598
599 /**
600  * htb_deactivate - remove leaf cl from active feeds
601  * @q: the priority event queue
602  * @cl: the target class
603  *
604  * Make sure that leaf is active. In the other words it can't be called
605  * with non-active leaf. It also removes class from the drop list.
606  */
607 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
608 {
609         WARN_ON(!cl->prio_activity);
610
611         htb_deactivate_prios(q, cl);
612         cl->prio_activity = 0;
613 }
614
615 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
616                        struct sk_buff **to_free)
617 {
618         int ret;
619         unsigned int len = qdisc_pkt_len(skb);
620         struct htb_sched *q = qdisc_priv(sch);
621         struct htb_class *cl = htb_classify(skb, sch, &ret);
622
623         if (cl == HTB_DIRECT) {
624                 /* enqueue to helper queue */
625                 if (q->direct_queue.qlen < q->direct_qlen) {
626                         __qdisc_enqueue_tail(skb, &q->direct_queue);
627                         q->direct_pkts++;
628                 } else {
629                         return qdisc_drop(skb, sch, to_free);
630                 }
631 #ifdef CONFIG_NET_CLS_ACT
632         } else if (!cl) {
633                 if (ret & __NET_XMIT_BYPASS)
634                         qdisc_qstats_drop(sch);
635                 __qdisc_drop(skb, to_free);
636                 return ret;
637 #endif
638         } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
639                                         to_free)) != NET_XMIT_SUCCESS) {
640                 if (net_xmit_drop_count(ret)) {
641                         qdisc_qstats_drop(sch);
642                         cl->drops++;
643                 }
644                 return ret;
645         } else {
646                 htb_activate(q, cl);
647         }
648
649         sch->qstats.backlog += len;
650         sch->q.qlen++;
651         return NET_XMIT_SUCCESS;
652 }
653
654 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
655 {
656         s64 toks = diff + cl->tokens;
657
658         if (toks > cl->buffer)
659                 toks = cl->buffer;
660         toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
661         if (toks <= -cl->mbuffer)
662                 toks = 1 - cl->mbuffer;
663
664         cl->tokens = toks;
665 }
666
667 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
668 {
669         s64 toks = diff + cl->ctokens;
670
671         if (toks > cl->cbuffer)
672                 toks = cl->cbuffer;
673         toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
674         if (toks <= -cl->mbuffer)
675                 toks = 1 - cl->mbuffer;
676
677         cl->ctokens = toks;
678 }
679
680 /**
681  * htb_charge_class - charges amount "bytes" to leaf and ancestors
682  * @q: the priority event queue
683  * @cl: the class to start iterate
684  * @level: the minimum level to account
685  * @skb: the socket buffer
686  *
687  * Routine assumes that packet "bytes" long was dequeued from leaf cl
688  * borrowing from "level". It accounts bytes to ceil leaky bucket for
689  * leaf and all ancestors and to rate bucket for ancestors at levels
690  * "level" and higher. It also handles possible change of mode resulting
691  * from the update. Note that mode can also increase here (MAY_BORROW to
692  * CAN_SEND) because we can use more precise clock that event queue here.
693  * In such case we remove class from event queue first.
694  */
695 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
696                              int level, struct sk_buff *skb)
697 {
698         int bytes = qdisc_pkt_len(skb);
699         enum htb_cmode old_mode;
700         s64 diff;
701
702         while (cl) {
703                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
704                 if (cl->level >= level) {
705                         if (cl->level == level)
706                                 cl->xstats.lends++;
707                         htb_accnt_tokens(cl, bytes, diff);
708                 } else {
709                         cl->xstats.borrows++;
710                         cl->tokens += diff;     /* we moved t_c; update tokens */
711                 }
712                 htb_accnt_ctokens(cl, bytes, diff);
713                 cl->t_c = q->now;
714
715                 old_mode = cl->cmode;
716                 diff = 0;
717                 htb_change_class_mode(q, cl, &diff);
718                 if (old_mode != cl->cmode) {
719                         if (old_mode != HTB_CAN_SEND)
720                                 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
721                         if (cl->cmode != HTB_CAN_SEND)
722                                 htb_add_to_wait_tree(q, cl, diff);
723                 }
724
725                 /* update basic stats except for leaves which are already updated */
726                 if (cl->level)
727                         bstats_update(&cl->bstats, skb);
728
729                 cl = cl->parent;
730         }
731 }
732
733 /**
734  * htb_do_events - make mode changes to classes at the level
735  * @q: the priority event queue
736  * @level: which wait_pq in 'q->hlevel'
737  * @start: start jiffies
738  *
739  * Scans event queue for pending events and applies them. Returns time of
740  * next pending event (0 for no event in pq, q->now for too many events).
741  * Note: Applied are events whose have cl->pq_key <= q->now.
742  */
743 static s64 htb_do_events(struct htb_sched *q, const int level,
744                          unsigned long start)
745 {
746         /* don't run for longer than 2 jiffies; 2 is used instead of
747          * 1 to simplify things when jiffy is going to be incremented
748          * too soon
749          */
750         unsigned long stop_at = start + 2;
751         struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
752
753         while (time_before(jiffies, stop_at)) {
754                 struct htb_class *cl;
755                 s64 diff;
756                 struct rb_node *p = rb_first(wait_pq);
757
758                 if (!p)
759                         return 0;
760
761                 cl = rb_entry(p, struct htb_class, pq_node);
762                 if (cl->pq_key > q->now)
763                         return cl->pq_key;
764
765                 htb_safe_rb_erase(p, wait_pq);
766                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
767                 htb_change_class_mode(q, cl, &diff);
768                 if (cl->cmode != HTB_CAN_SEND)
769                         htb_add_to_wait_tree(q, cl, diff);
770         }
771
772         /* too much load - let's continue after a break for scheduling */
773         if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
774                 pr_warn("htb: too many events!\n");
775                 q->warned |= HTB_WARN_TOOMANYEVENTS;
776         }
777
778         return q->now;
779 }
780
781 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
782  * is no such one exists.
783  */
784 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
785                                               u32 id)
786 {
787         struct rb_node *r = NULL;
788         while (n) {
789                 struct htb_class *cl =
790                     rb_entry(n, struct htb_class, node[prio]);
791
792                 if (id > cl->common.classid) {
793                         n = n->rb_right;
794                 } else if (id < cl->common.classid) {
795                         r = n;
796                         n = n->rb_left;
797                 } else {
798                         return n;
799                 }
800         }
801         return r;
802 }
803
804 /**
805  * htb_lookup_leaf - returns next leaf class in DRR order
806  * @hprio: the current one
807  * @prio: which prio in class
808  *
809  * Find leaf where current feed pointers points to.
810  */
811 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
812 {
813         int i;
814         struct {
815                 struct rb_node *root;
816                 struct rb_node **pptr;
817                 u32 *pid;
818         } stk[TC_HTB_MAXDEPTH], *sp = stk;
819
820         BUG_ON(!hprio->row.rb_node);
821         sp->root = hprio->row.rb_node;
822         sp->pptr = &hprio->ptr;
823         sp->pid = &hprio->last_ptr_id;
824
825         for (i = 0; i < 65535; i++) {
826                 if (!*sp->pptr && *sp->pid) {
827                         /* ptr was invalidated but id is valid - try to recover
828                          * the original or next ptr
829                          */
830                         *sp->pptr =
831                             htb_id_find_next_upper(prio, sp->root, *sp->pid);
832                 }
833                 *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
834                                  * can become out of date quickly
835                                  */
836                 if (!*sp->pptr) {       /* we are at right end; rewind & go up */
837                         *sp->pptr = sp->root;
838                         while ((*sp->pptr)->rb_left)
839                                 *sp->pptr = (*sp->pptr)->rb_left;
840                         if (sp > stk) {
841                                 sp--;
842                                 if (!*sp->pptr) {
843                                         WARN_ON(1);
844                                         return NULL;
845                                 }
846                                 htb_next_rb_node(sp->pptr);
847                         }
848                 } else {
849                         struct htb_class *cl;
850                         struct htb_prio *clp;
851
852                         cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
853                         if (!cl->level)
854                                 return cl;
855                         clp = &cl->inner.clprio[prio];
856                         (++sp)->root = clp->feed.rb_node;
857                         sp->pptr = &clp->ptr;
858                         sp->pid = &clp->last_ptr_id;
859                 }
860         }
861         WARN_ON(1);
862         return NULL;
863 }
864
865 /* dequeues packet at given priority and level; call only if
866  * you are sure that there is active class at prio/level
867  */
868 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
869                                         const int level)
870 {
871         struct sk_buff *skb = NULL;
872         struct htb_class *cl, *start;
873         struct htb_level *hlevel = &q->hlevel[level];
874         struct htb_prio *hprio = &hlevel->hprio[prio];
875
876         /* look initial class up in the row */
877         start = cl = htb_lookup_leaf(hprio, prio);
878
879         do {
880 next:
881                 if (unlikely(!cl))
882                         return NULL;
883
884                 /* class can be empty - it is unlikely but can be true if leaf
885                  * qdisc drops packets in enqueue routine or if someone used
886                  * graft operation on the leaf since last dequeue;
887                  * simply deactivate and skip such class
888                  */
889                 if (unlikely(cl->leaf.q->q.qlen == 0)) {
890                         struct htb_class *next;
891                         htb_deactivate(q, cl);
892
893                         /* row/level might become empty */
894                         if ((q->row_mask[level] & (1 << prio)) == 0)
895                                 return NULL;
896
897                         next = htb_lookup_leaf(hprio, prio);
898
899                         if (cl == start)        /* fix start if we just deleted it */
900                                 start = next;
901                         cl = next;
902                         goto next;
903                 }
904
905                 skb = cl->leaf.q->dequeue(cl->leaf.q);
906                 if (likely(skb != NULL))
907                         break;
908
909                 qdisc_warn_nonwc("htb", cl->leaf.q);
910                 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
911                                          &q->hlevel[0].hprio[prio].ptr);
912                 cl = htb_lookup_leaf(hprio, prio);
913
914         } while (cl != start);
915
916         if (likely(skb != NULL)) {
917                 bstats_update(&cl->bstats, skb);
918                 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
919                 if (cl->leaf.deficit[level] < 0) {
920                         cl->leaf.deficit[level] += cl->quantum;
921                         htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
922                                                  &q->hlevel[0].hprio[prio].ptr);
923                 }
924                 /* this used to be after charge_class but this constelation
925                  * gives us slightly better performance
926                  */
927                 if (!cl->leaf.q->q.qlen)
928                         htb_deactivate(q, cl);
929                 htb_charge_class(q, cl, level, skb);
930         }
931         return skb;
932 }
933
934 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
935 {
936         struct sk_buff *skb;
937         struct htb_sched *q = qdisc_priv(sch);
938         int level;
939         s64 next_event;
940         unsigned long start_at;
941
942         /* try to dequeue direct packets as high prio (!) to minimize cpu work */
943         skb = __qdisc_dequeue_head(&q->direct_queue);
944         if (skb != NULL) {
945 ok:
946                 qdisc_bstats_update(sch, skb);
947                 qdisc_qstats_backlog_dec(sch, skb);
948                 sch->q.qlen--;
949                 return skb;
950         }
951
952         if (!sch->q.qlen)
953                 goto fin;
954         q->now = ktime_get_ns();
955         start_at = jiffies;
956
957         next_event = q->now + 5LLU * NSEC_PER_SEC;
958
959         for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
960                 /* common case optimization - skip event handler quickly */
961                 int m;
962                 s64 event = q->near_ev_cache[level];
963
964                 if (q->now >= event) {
965                         event = htb_do_events(q, level, start_at);
966                         if (!event)
967                                 event = q->now + NSEC_PER_SEC;
968                         q->near_ev_cache[level] = event;
969                 }
970
971                 if (next_event > event)
972                         next_event = event;
973
974                 m = ~q->row_mask[level];
975                 while (m != (int)(-1)) {
976                         int prio = ffz(m);
977
978                         m |= 1 << prio;
979                         skb = htb_dequeue_tree(q, prio, level);
980                         if (likely(skb != NULL))
981                                 goto ok;
982                 }
983         }
984         if (likely(next_event > q->now))
985                 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
986         else
987                 schedule_work(&q->work);
988 fin:
989         return skb;
990 }
991
992 /* reset all classes */
993 /* always caled under BH & queue lock */
994 static void htb_reset(struct Qdisc *sch)
995 {
996         struct htb_sched *q = qdisc_priv(sch);
997         struct htb_class *cl;
998         unsigned int i;
999
1000         for (i = 0; i < q->clhash.hashsize; i++) {
1001                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1002                         if (cl->level)
1003                                 memset(&cl->inner, 0, sizeof(cl->inner));
1004                         else {
1005                                 if (cl->leaf.q && !q->offload)
1006                                         qdisc_reset(cl->leaf.q);
1007                         }
1008                         cl->prio_activity = 0;
1009                         cl->cmode = HTB_CAN_SEND;
1010                 }
1011         }
1012         qdisc_watchdog_cancel(&q->watchdog);
1013         __qdisc_reset_queue(&q->direct_queue);
1014         memset(q->hlevel, 0, sizeof(q->hlevel));
1015         memset(q->row_mask, 0, sizeof(q->row_mask));
1016 }
1017
1018 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1019         [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1020         [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
1021         [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1022         [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1023         [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1024         [TCA_HTB_RATE64] = { .type = NLA_U64 },
1025         [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1026         [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1027 };
1028
1029 static void htb_work_func(struct work_struct *work)
1030 {
1031         struct htb_sched *q = container_of(work, struct htb_sched, work);
1032         struct Qdisc *sch = q->watchdog.qdisc;
1033
1034         rcu_read_lock();
1035         __netif_schedule(qdisc_root(sch));
1036         rcu_read_unlock();
1037 }
1038
1039 static void htb_set_lockdep_class_child(struct Qdisc *q)
1040 {
1041         static struct lock_class_key child_key;
1042
1043         lockdep_set_class(qdisc_lock(q), &child_key);
1044 }
1045
1046 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1047 {
1048         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1049 }
1050
1051 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1052                     struct netlink_ext_ack *extack)
1053 {
1054         struct net_device *dev = qdisc_dev(sch);
1055         struct tc_htb_qopt_offload offload_opt;
1056         struct htb_sched *q = qdisc_priv(sch);
1057         struct nlattr *tb[TCA_HTB_MAX + 1];
1058         struct tc_htb_glob *gopt;
1059         unsigned int ntx;
1060         bool offload;
1061         int err;
1062
1063         qdisc_watchdog_init(&q->watchdog, sch);
1064         INIT_WORK(&q->work, htb_work_func);
1065
1066         if (!opt)
1067                 return -EINVAL;
1068
1069         err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1070         if (err)
1071                 return err;
1072
1073         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1074                                           NULL);
1075         if (err < 0)
1076                 return err;
1077
1078         if (!tb[TCA_HTB_INIT])
1079                 return -EINVAL;
1080
1081         gopt = nla_data(tb[TCA_HTB_INIT]);
1082         if (gopt->version != HTB_VER >> 16)
1083                 return -EINVAL;
1084
1085         offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1086
1087         if (offload) {
1088                 if (sch->parent != TC_H_ROOT) {
1089                         NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload");
1090                         return -EOPNOTSUPP;
1091                 }
1092
1093                 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) {
1094                         NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on");
1095                         return -EOPNOTSUPP;
1096                 }
1097
1098                 q->num_direct_qdiscs = dev->real_num_tx_queues;
1099                 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1100                                            sizeof(*q->direct_qdiscs),
1101                                            GFP_KERNEL);
1102                 if (!q->direct_qdiscs)
1103                         return -ENOMEM;
1104         }
1105
1106         err = qdisc_class_hash_init(&q->clhash);
1107         if (err < 0)
1108                 return err;
1109
1110         if (tb[TCA_HTB_DIRECT_QLEN])
1111                 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1112         else
1113                 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1114
1115         if ((q->rate2quantum = gopt->rate2quantum) < 1)
1116                 q->rate2quantum = 1;
1117         q->defcls = gopt->defcls;
1118
1119         if (!offload)
1120                 return 0;
1121
1122         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1123                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1124                 struct Qdisc *qdisc;
1125
1126                 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1127                                           TC_H_MAKE(sch->handle, 0), extack);
1128                 if (!qdisc) {
1129                         return -ENOMEM;
1130                 }
1131
1132                 htb_set_lockdep_class_child(qdisc);
1133                 q->direct_qdiscs[ntx] = qdisc;
1134                 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1135         }
1136
1137         sch->flags |= TCQ_F_MQROOT;
1138
1139         offload_opt = (struct tc_htb_qopt_offload) {
1140                 .command = TC_HTB_CREATE,
1141                 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1142                 .classid = TC_H_MIN(q->defcls),
1143                 .extack = extack,
1144         };
1145         err = htb_offload(dev, &offload_opt);
1146         if (err)
1147                 return err;
1148
1149         /* Defer this assignment, so that htb_destroy skips offload-related
1150          * parts (especially calling ndo_setup_tc) on errors.
1151          */
1152         q->offload = true;
1153
1154         return 0;
1155 }
1156
1157 static void htb_attach_offload(struct Qdisc *sch)
1158 {
1159         struct net_device *dev = qdisc_dev(sch);
1160         struct htb_sched *q = qdisc_priv(sch);
1161         unsigned int ntx;
1162
1163         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1164                 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1165
1166                 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1167                 qdisc_put(old);
1168                 qdisc_hash_add(qdisc, false);
1169         }
1170         for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1171                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1172                 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1173
1174                 qdisc_put(old);
1175         }
1176
1177         kfree(q->direct_qdiscs);
1178         q->direct_qdiscs = NULL;
1179 }
1180
1181 static void htb_attach_software(struct Qdisc *sch)
1182 {
1183         struct net_device *dev = qdisc_dev(sch);
1184         unsigned int ntx;
1185
1186         /* Resemble qdisc_graft behavior. */
1187         for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1188                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1189                 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1190
1191                 qdisc_refcount_inc(sch);
1192
1193                 qdisc_put(old);
1194         }
1195 }
1196
1197 static void htb_attach(struct Qdisc *sch)
1198 {
1199         struct htb_sched *q = qdisc_priv(sch);
1200
1201         if (q->offload)
1202                 htb_attach_offload(sch);
1203         else
1204                 htb_attach_software(sch);
1205 }
1206
1207 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1208 {
1209         struct htb_sched *q = qdisc_priv(sch);
1210         struct nlattr *nest;
1211         struct tc_htb_glob gopt;
1212
1213         if (q->offload)
1214                 sch->flags |= TCQ_F_OFFLOADED;
1215         else
1216                 sch->flags &= ~TCQ_F_OFFLOADED;
1217
1218         sch->qstats.overlimits = q->overlimits;
1219         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1220          * no change can happen on the qdisc parameters.
1221          */
1222
1223         gopt.direct_pkts = q->direct_pkts;
1224         gopt.version = HTB_VER;
1225         gopt.rate2quantum = q->rate2quantum;
1226         gopt.defcls = q->defcls;
1227         gopt.debug = 0;
1228
1229         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1230         if (nest == NULL)
1231                 goto nla_put_failure;
1232         if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1233             nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1234                 goto nla_put_failure;
1235         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1236                 goto nla_put_failure;
1237
1238         return nla_nest_end(skb, nest);
1239
1240 nla_put_failure:
1241         nla_nest_cancel(skb, nest);
1242         return -1;
1243 }
1244
1245 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1246                           struct sk_buff *skb, struct tcmsg *tcm)
1247 {
1248         struct htb_class *cl = (struct htb_class *)arg;
1249         struct htb_sched *q = qdisc_priv(sch);
1250         struct nlattr *nest;
1251         struct tc_htb_opt opt;
1252
1253         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1254          * no change can happen on the class parameters.
1255          */
1256         tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1257         tcm->tcm_handle = cl->common.classid;
1258         if (!cl->level && cl->leaf.q)
1259                 tcm->tcm_info = cl->leaf.q->handle;
1260
1261         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1262         if (nest == NULL)
1263                 goto nla_put_failure;
1264
1265         memset(&opt, 0, sizeof(opt));
1266
1267         psched_ratecfg_getrate(&opt.rate, &cl->rate);
1268         opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1269         psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1270         opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1271         opt.quantum = cl->quantum;
1272         opt.prio = cl->prio;
1273         opt.level = cl->level;
1274         if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1275                 goto nla_put_failure;
1276         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1277                 goto nla_put_failure;
1278         if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1279             nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1280                               TCA_HTB_PAD))
1281                 goto nla_put_failure;
1282         if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1283             nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1284                               TCA_HTB_PAD))
1285                 goto nla_put_failure;
1286
1287         return nla_nest_end(skb, nest);
1288
1289 nla_put_failure:
1290         nla_nest_cancel(skb, nest);
1291         return -1;
1292 }
1293
1294 static void htb_offload_aggregate_stats(struct htb_sched *q,
1295                                         struct htb_class *cl)
1296 {
1297         u64 bytes = 0, packets = 0;
1298         struct htb_class *c;
1299         unsigned int i;
1300
1301         gnet_stats_basic_sync_init(&cl->bstats);
1302
1303         for (i = 0; i < q->clhash.hashsize; i++) {
1304                 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1305                         struct htb_class *p = c;
1306
1307                         while (p && p->level < cl->level)
1308                                 p = p->parent;
1309
1310                         if (p != cl)
1311                                 continue;
1312
1313                         bytes += u64_stats_read(&c->bstats_bias.bytes);
1314                         packets += u64_stats_read(&c->bstats_bias.packets);
1315                         if (c->level == 0) {
1316                                 bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
1317                                 packets += u64_stats_read(&c->leaf.q->bstats.packets);
1318                         }
1319                 }
1320         }
1321         _bstats_update(&cl->bstats, bytes, packets);
1322 }
1323
1324 static int
1325 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1326 {
1327         struct htb_class *cl = (struct htb_class *)arg;
1328         struct htb_sched *q = qdisc_priv(sch);
1329         struct gnet_stats_queue qs = {
1330                 .drops = cl->drops,
1331                 .overlimits = cl->overlimits,
1332         };
1333         __u32 qlen = 0;
1334
1335         if (!cl->level && cl->leaf.q)
1336                 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1337
1338         cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1339                                     INT_MIN, INT_MAX);
1340         cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1341                                      INT_MIN, INT_MAX);
1342
1343         if (q->offload) {
1344                 if (!cl->level) {
1345                         if (cl->leaf.q)
1346                                 cl->bstats = cl->leaf.q->bstats;
1347                         else
1348                                 gnet_stats_basic_sync_init(&cl->bstats);
1349                         _bstats_update(&cl->bstats,
1350                                        u64_stats_read(&cl->bstats_bias.bytes),
1351                                        u64_stats_read(&cl->bstats_bias.packets));
1352                 } else {
1353                         htb_offload_aggregate_stats(q, cl);
1354                 }
1355         }
1356
1357         if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
1358             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1359             gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1360                 return -1;
1361
1362         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1363 }
1364
1365 static struct netdev_queue *
1366 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1367 {
1368         struct net_device *dev = qdisc_dev(sch);
1369         struct tc_htb_qopt_offload offload_opt;
1370         struct htb_sched *q = qdisc_priv(sch);
1371         int err;
1372
1373         if (!q->offload)
1374                 return sch->dev_queue;
1375
1376         offload_opt = (struct tc_htb_qopt_offload) {
1377                 .command = TC_HTB_LEAF_QUERY_QUEUE,
1378                 .classid = TC_H_MIN(tcm->tcm_parent),
1379         };
1380         err = htb_offload(dev, &offload_opt);
1381         if (err || offload_opt.qid >= dev->num_tx_queues)
1382                 return NULL;
1383         return netdev_get_tx_queue(dev, offload_opt.qid);
1384 }
1385
1386 static struct Qdisc *
1387 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1388 {
1389         struct net_device *dev = dev_queue->dev;
1390         struct Qdisc *old_q;
1391
1392         if (dev->flags & IFF_UP)
1393                 dev_deactivate(dev);
1394         old_q = dev_graft_qdisc(dev_queue, new_q);
1395         if (new_q)
1396                 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1397         if (dev->flags & IFF_UP)
1398                 dev_activate(dev);
1399
1400         return old_q;
1401 }
1402
1403 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1404 {
1405         struct netdev_queue *queue;
1406
1407         queue = cl->leaf.offload_queue;
1408         if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1409                 WARN_ON(cl->leaf.q->dev_queue != queue);
1410
1411         return queue;
1412 }
1413
1414 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1415                                    struct htb_class *cl_new, bool destroying)
1416 {
1417         struct netdev_queue *queue_old, *queue_new;
1418         struct net_device *dev = qdisc_dev(sch);
1419
1420         queue_old = htb_offload_get_queue(cl_old);
1421         queue_new = htb_offload_get_queue(cl_new);
1422
1423         if (!destroying) {
1424                 struct Qdisc *qdisc;
1425
1426                 if (dev->flags & IFF_UP)
1427                         dev_deactivate(dev);
1428                 qdisc = dev_graft_qdisc(queue_old, NULL);
1429                 WARN_ON(qdisc != cl_old->leaf.q);
1430         }
1431
1432         if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1433                 cl_old->leaf.q->dev_queue = queue_new;
1434         cl_old->leaf.offload_queue = queue_new;
1435
1436         if (!destroying) {
1437                 struct Qdisc *qdisc;
1438
1439                 qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1440                 if (dev->flags & IFF_UP)
1441                         dev_activate(dev);
1442                 WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1443         }
1444 }
1445
1446 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1447                      struct Qdisc **old, struct netlink_ext_ack *extack)
1448 {
1449         struct netdev_queue *dev_queue = sch->dev_queue;
1450         struct htb_class *cl = (struct htb_class *)arg;
1451         struct htb_sched *q = qdisc_priv(sch);
1452         struct Qdisc *old_q;
1453
1454         if (cl->level)
1455                 return -EINVAL;
1456
1457         if (q->offload)
1458                 dev_queue = htb_offload_get_queue(cl);
1459
1460         if (!new) {
1461                 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1462                                         cl->common.classid, extack);
1463                 if (!new)
1464                         return -ENOBUFS;
1465         }
1466
1467         if (q->offload) {
1468                 htb_set_lockdep_class_child(new);
1469                 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1470                 qdisc_refcount_inc(new);
1471                 old_q = htb_graft_helper(dev_queue, new);
1472         }
1473
1474         *old = qdisc_replace(sch, new, &cl->leaf.q);
1475
1476         if (q->offload) {
1477                 WARN_ON(old_q != *old);
1478                 qdisc_put(old_q);
1479         }
1480
1481         return 0;
1482 }
1483
1484 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1485 {
1486         struct htb_class *cl = (struct htb_class *)arg;
1487         return !cl->level ? cl->leaf.q : NULL;
1488 }
1489
1490 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1491 {
1492         struct htb_class *cl = (struct htb_class *)arg;
1493
1494         htb_deactivate(qdisc_priv(sch), cl);
1495 }
1496
1497 static inline int htb_parent_last_child(struct htb_class *cl)
1498 {
1499         if (!cl->parent)
1500                 /* the root class */
1501                 return 0;
1502         if (cl->parent->children > 1)
1503                 /* not the last child */
1504                 return 0;
1505         return 1;
1506 }
1507
1508 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1509                                struct Qdisc *new_q)
1510 {
1511         struct htb_sched *q = qdisc_priv(sch);
1512         struct htb_class *parent = cl->parent;
1513
1514         WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1515
1516         if (parent->cmode != HTB_CAN_SEND)
1517                 htb_safe_rb_erase(&parent->pq_node,
1518                                   &q->hlevel[parent->level].wait_pq);
1519
1520         parent->level = 0;
1521         memset(&parent->inner, 0, sizeof(parent->inner));
1522         parent->leaf.q = new_q ? new_q : &noop_qdisc;
1523         parent->tokens = parent->buffer;
1524         parent->ctokens = parent->cbuffer;
1525         parent->t_c = ktime_get_ns();
1526         parent->cmode = HTB_CAN_SEND;
1527         if (q->offload)
1528                 parent->leaf.offload_queue = cl->leaf.offload_queue;
1529 }
1530
1531 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1532                                        struct netdev_queue *dev_queue,
1533                                        struct Qdisc *new_q)
1534 {
1535         struct Qdisc *old_q;
1536
1537         /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1538         if (new_q)
1539                 qdisc_refcount_inc(new_q);
1540         old_q = htb_graft_helper(dev_queue, new_q);
1541         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1542 }
1543
1544 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1545                                      bool last_child, bool destroying,
1546                                      struct netlink_ext_ack *extack)
1547 {
1548         struct tc_htb_qopt_offload offload_opt;
1549         struct netdev_queue *dev_queue;
1550         struct Qdisc *q = cl->leaf.q;
1551         struct Qdisc *old;
1552         int err;
1553
1554         if (cl->level)
1555                 return -EINVAL;
1556
1557         WARN_ON(!q);
1558         dev_queue = htb_offload_get_queue(cl);
1559         /* When destroying, caller qdisc_graft grafts the new qdisc and invokes
1560          * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload
1561          * does not need to graft or qdisc_put the qdisc being destroyed.
1562          */
1563         if (!destroying) {
1564                 old = htb_graft_helper(dev_queue, NULL);
1565                 /* Last qdisc grafted should be the same as cl->leaf.q when
1566                  * calling htb_delete.
1567                  */
1568                 WARN_ON(old != q);
1569         }
1570
1571         if (cl->parent) {
1572                 _bstats_update(&cl->parent->bstats_bias,
1573                                u64_stats_read(&q->bstats.bytes),
1574                                u64_stats_read(&q->bstats.packets));
1575         }
1576
1577         offload_opt = (struct tc_htb_qopt_offload) {
1578                 .command = !last_child ? TC_HTB_LEAF_DEL :
1579                            destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1580                            TC_HTB_LEAF_DEL_LAST,
1581                 .classid = cl->common.classid,
1582                 .extack = extack,
1583         };
1584         err = htb_offload(qdisc_dev(sch), &offload_opt);
1585
1586         if (!destroying) {
1587                 if (!err)
1588                         qdisc_put(old);
1589                 else
1590                         htb_graft_helper(dev_queue, old);
1591         }
1592
1593         if (last_child)
1594                 return err;
1595
1596         if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1597                 u32 classid = TC_H_MAJ(sch->handle) |
1598                               TC_H_MIN(offload_opt.classid);
1599                 struct htb_class *moved_cl = htb_find(classid, sch);
1600
1601                 htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1602         }
1603
1604         return err;
1605 }
1606
1607 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1608 {
1609         if (!cl->level) {
1610                 WARN_ON(!cl->leaf.q);
1611                 qdisc_put(cl->leaf.q);
1612         }
1613         gen_kill_estimator(&cl->rate_est);
1614         tcf_block_put(cl->block);
1615         kfree(cl);
1616 }
1617
1618 static void htb_destroy(struct Qdisc *sch)
1619 {
1620         struct net_device *dev = qdisc_dev(sch);
1621         struct tc_htb_qopt_offload offload_opt;
1622         struct htb_sched *q = qdisc_priv(sch);
1623         struct hlist_node *next;
1624         bool nonempty, changed;
1625         struct htb_class *cl;
1626         unsigned int i;
1627
1628         cancel_work_sync(&q->work);
1629         qdisc_watchdog_cancel(&q->watchdog);
1630         /* This line used to be after htb_destroy_class call below
1631          * and surprisingly it worked in 2.4. But it must precede it
1632          * because filter need its target class alive to be able to call
1633          * unbind_filter on it (without Oops).
1634          */
1635         tcf_block_put(q->block);
1636
1637         for (i = 0; i < q->clhash.hashsize; i++) {
1638                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1639                         tcf_block_put(cl->block);
1640                         cl->block = NULL;
1641                 }
1642         }
1643
1644         do {
1645                 nonempty = false;
1646                 changed = false;
1647                 for (i = 0; i < q->clhash.hashsize; i++) {
1648                         hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1649                                                   common.hnode) {
1650                                 bool last_child;
1651
1652                                 if (!q->offload) {
1653                                         htb_destroy_class(sch, cl);
1654                                         continue;
1655                                 }
1656
1657                                 nonempty = true;
1658
1659                                 if (cl->level)
1660                                         continue;
1661
1662                                 changed = true;
1663
1664                                 last_child = htb_parent_last_child(cl);
1665                                 htb_destroy_class_offload(sch, cl, last_child,
1666                                                           true, NULL);
1667                                 qdisc_class_hash_remove(&q->clhash,
1668                                                         &cl->common);
1669                                 if (cl->parent)
1670                                         cl->parent->children--;
1671                                 if (last_child)
1672                                         htb_parent_to_leaf(sch, cl, NULL);
1673                                 htb_destroy_class(sch, cl);
1674                         }
1675                 }
1676         } while (changed);
1677         WARN_ON(nonempty);
1678
1679         qdisc_class_hash_destroy(&q->clhash);
1680         __qdisc_reset_queue(&q->direct_queue);
1681
1682         if (q->offload) {
1683                 offload_opt = (struct tc_htb_qopt_offload) {
1684                         .command = TC_HTB_DESTROY,
1685                 };
1686                 htb_offload(dev, &offload_opt);
1687         }
1688
1689         if (!q->direct_qdiscs)
1690                 return;
1691         for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1692                 qdisc_put(q->direct_qdiscs[i]);
1693         kfree(q->direct_qdiscs);
1694 }
1695
1696 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1697                       struct netlink_ext_ack *extack)
1698 {
1699         struct htb_sched *q = qdisc_priv(sch);
1700         struct htb_class *cl = (struct htb_class *)arg;
1701         struct Qdisc *new_q = NULL;
1702         int last_child = 0;
1703         int err;
1704
1705         /* TODO: why don't allow to delete subtree ? references ? does
1706          * tc subsys guarantee us that in htb_destroy it holds no class
1707          * refs so that we can remove children safely there ?
1708          */
1709         if (cl->children || cl->filter_cnt)
1710                 return -EBUSY;
1711
1712         if (!cl->level && htb_parent_last_child(cl))
1713                 last_child = 1;
1714
1715         if (q->offload) {
1716                 err = htb_destroy_class_offload(sch, cl, last_child, false,
1717                                                 extack);
1718                 if (err)
1719                         return err;
1720         }
1721
1722         if (last_child) {
1723                 struct netdev_queue *dev_queue = sch->dev_queue;
1724
1725                 if (q->offload)
1726                         dev_queue = htb_offload_get_queue(cl);
1727
1728                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1729                                           cl->parent->common.classid,
1730                                           NULL);
1731                 if (q->offload) {
1732                         if (new_q)
1733                                 htb_set_lockdep_class_child(new_q);
1734                         htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1735                 }
1736         }
1737
1738         sch_tree_lock(sch);
1739
1740         if (!cl->level)
1741                 qdisc_purge_queue(cl->leaf.q);
1742
1743         /* delete from hash and active; remainder in destroy_class */
1744         qdisc_class_hash_remove(&q->clhash, &cl->common);
1745         if (cl->parent)
1746                 cl->parent->children--;
1747
1748         if (cl->prio_activity)
1749                 htb_deactivate(q, cl);
1750
1751         if (cl->cmode != HTB_CAN_SEND)
1752                 htb_safe_rb_erase(&cl->pq_node,
1753                                   &q->hlevel[cl->level].wait_pq);
1754
1755         if (last_child)
1756                 htb_parent_to_leaf(sch, cl, new_q);
1757
1758         sch_tree_unlock(sch);
1759
1760         htb_destroy_class(sch, cl);
1761         return 0;
1762 }
1763
1764 static int htb_change_class(struct Qdisc *sch, u32 classid,
1765                             u32 parentid, struct nlattr **tca,
1766                             unsigned long *arg, struct netlink_ext_ack *extack)
1767 {
1768         int err = -EINVAL;
1769         struct htb_sched *q = qdisc_priv(sch);
1770         struct htb_class *cl = (struct htb_class *)*arg, *parent;
1771         struct tc_htb_qopt_offload offload_opt;
1772         struct nlattr *opt = tca[TCA_OPTIONS];
1773         struct nlattr *tb[TCA_HTB_MAX + 1];
1774         struct Qdisc *parent_qdisc = NULL;
1775         struct netdev_queue *dev_queue;
1776         struct tc_htb_opt *hopt;
1777         u64 rate64, ceil64;
1778         int warn = 0;
1779
1780         /* extract all subattrs from opt attr */
1781         if (!opt)
1782                 goto failure;
1783
1784         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1785                                           NULL);
1786         if (err < 0)
1787                 goto failure;
1788
1789         err = -EINVAL;
1790         if (tb[TCA_HTB_PARMS] == NULL)
1791                 goto failure;
1792
1793         parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1794
1795         hopt = nla_data(tb[TCA_HTB_PARMS]);
1796         if (!hopt->rate.rate || !hopt->ceil.rate)
1797                 goto failure;
1798
1799         if (q->offload) {
1800                 /* Options not supported by the offload. */
1801                 if (hopt->rate.overhead || hopt->ceil.overhead) {
1802                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
1803                         goto failure;
1804                 }
1805                 if (hopt->rate.mpu || hopt->ceil.mpu) {
1806                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
1807                         goto failure;
1808                 }
1809                 if (hopt->quantum) {
1810                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
1811                         goto failure;
1812                 }
1813                 if (hopt->prio) {
1814                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
1815                         goto failure;
1816                 }
1817         }
1818
1819         /* Keeping backward compatible with rate_table based iproute2 tc */
1820         if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1821                 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1822                                               NULL));
1823
1824         if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1825                 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1826                                               NULL));
1827
1828         rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1829         ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1830
1831         if (!cl) {              /* new class */
1832                 struct net_device *dev = qdisc_dev(sch);
1833                 struct Qdisc *new_q, *old_q;
1834                 int prio;
1835                 struct {
1836                         struct nlattr           nla;
1837                         struct gnet_estimator   opt;
1838                 } est = {
1839                         .nla = {
1840                                 .nla_len        = nla_attr_size(sizeof(est.opt)),
1841                                 .nla_type       = TCA_RATE,
1842                         },
1843                         .opt = {
1844                                 /* 4s interval, 16s averaging constant */
1845                                 .interval       = 2,
1846                                 .ewma_log       = 2,
1847                         },
1848                 };
1849
1850                 /* check for valid classid */
1851                 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1852                     htb_find(classid, sch))
1853                         goto failure;
1854
1855                 /* check maximal depth */
1856                 if (parent && parent->parent && parent->parent->level < 2) {
1857                         pr_err("htb: tree is too deep\n");
1858                         goto failure;
1859                 }
1860                 err = -ENOBUFS;
1861                 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1862                 if (!cl)
1863                         goto failure;
1864
1865                 gnet_stats_basic_sync_init(&cl->bstats);
1866                 gnet_stats_basic_sync_init(&cl->bstats_bias);
1867
1868                 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1869                 if (err) {
1870                         kfree(cl);
1871                         goto failure;
1872                 }
1873                 if (htb_rate_est || tca[TCA_RATE]) {
1874                         err = gen_new_estimator(&cl->bstats, NULL,
1875                                                 &cl->rate_est,
1876                                                 NULL,
1877                                                 true,
1878                                                 tca[TCA_RATE] ? : &est.nla);
1879                         if (err)
1880                                 goto err_block_put;
1881                 }
1882
1883                 cl->children = 0;
1884                 RB_CLEAR_NODE(&cl->pq_node);
1885
1886                 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1887                         RB_CLEAR_NODE(&cl->node[prio]);
1888
1889                 cl->common.classid = classid;
1890
1891                 /* Make sure nothing interrupts us in between of two
1892                  * ndo_setup_tc calls.
1893                  */
1894                 ASSERT_RTNL();
1895
1896                 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1897                  * so that can't be used inside of sch_tree_lock
1898                  * -- thanks to Karlis Peisenieks
1899                  */
1900                 if (!q->offload) {
1901                         dev_queue = sch->dev_queue;
1902                 } else if (!(parent && !parent->level)) {
1903                         /* Assign a dev_queue to this classid. */
1904                         offload_opt = (struct tc_htb_qopt_offload) {
1905                                 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1906                                 .classid = cl->common.classid,
1907                                 .parent_classid = parent ?
1908                                         TC_H_MIN(parent->common.classid) :
1909                                         TC_HTB_CLASSID_ROOT,
1910                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1911                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1912                                 .extack = extack,
1913                         };
1914                         err = htb_offload(dev, &offload_opt);
1915                         if (err) {
1916                                 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1917                                        err);
1918                                 goto err_kill_estimator;
1919                         }
1920                         dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1921                 } else { /* First child. */
1922                         dev_queue = htb_offload_get_queue(parent);
1923                         old_q = htb_graft_helper(dev_queue, NULL);
1924                         WARN_ON(old_q != parent->leaf.q);
1925                         offload_opt = (struct tc_htb_qopt_offload) {
1926                                 .command = TC_HTB_LEAF_TO_INNER,
1927                                 .classid = cl->common.classid,
1928                                 .parent_classid =
1929                                         TC_H_MIN(parent->common.classid),
1930                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1931                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1932                                 .extack = extack,
1933                         };
1934                         err = htb_offload(dev, &offload_opt);
1935                         if (err) {
1936                                 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1937                                        err);
1938                                 htb_graft_helper(dev_queue, old_q);
1939                                 goto err_kill_estimator;
1940                         }
1941                         _bstats_update(&parent->bstats_bias,
1942                                        u64_stats_read(&old_q->bstats.bytes),
1943                                        u64_stats_read(&old_q->bstats.packets));
1944                         qdisc_put(old_q);
1945                 }
1946                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1947                                           classid, NULL);
1948                 if (q->offload) {
1949                         if (new_q) {
1950                                 htb_set_lockdep_class_child(new_q);
1951                                 /* One ref for cl->leaf.q, the other for
1952                                  * dev_queue->qdisc.
1953                                  */
1954                                 qdisc_refcount_inc(new_q);
1955                         }
1956                         old_q = htb_graft_helper(dev_queue, new_q);
1957                         /* No qdisc_put needed. */
1958                         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1959                 }
1960                 sch_tree_lock(sch);
1961                 if (parent && !parent->level) {
1962                         /* turn parent into inner node */
1963                         qdisc_purge_queue(parent->leaf.q);
1964                         parent_qdisc = parent->leaf.q;
1965                         if (parent->prio_activity)
1966                                 htb_deactivate(q, parent);
1967
1968                         /* remove from evt list because of level change */
1969                         if (parent->cmode != HTB_CAN_SEND) {
1970                                 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1971                                 parent->cmode = HTB_CAN_SEND;
1972                         }
1973                         parent->level = (parent->parent ? parent->parent->level
1974                                          : TC_HTB_MAXDEPTH) - 1;
1975                         memset(&parent->inner, 0, sizeof(parent->inner));
1976                 }
1977
1978                 /* leaf (we) needs elementary qdisc */
1979                 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1980                 if (q->offload)
1981                         cl->leaf.offload_queue = dev_queue;
1982
1983                 cl->parent = parent;
1984
1985                 /* set class to be in HTB_CAN_SEND state */
1986                 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1987                 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1988                 cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1989                 cl->t_c = ktime_get_ns();
1990                 cl->cmode = HTB_CAN_SEND;
1991
1992                 /* attach to the hash list and parent's family */
1993                 qdisc_class_hash_insert(&q->clhash, &cl->common);
1994                 if (parent)
1995                         parent->children++;
1996                 if (cl->leaf.q != &noop_qdisc)
1997                         qdisc_hash_add(cl->leaf.q, true);
1998         } else {
1999                 if (tca[TCA_RATE]) {
2000                         err = gen_replace_estimator(&cl->bstats, NULL,
2001                                                     &cl->rate_est,
2002                                                     NULL,
2003                                                     true,
2004                                                     tca[TCA_RATE]);
2005                         if (err)
2006                                 return err;
2007                 }
2008
2009                 if (q->offload) {
2010                         struct net_device *dev = qdisc_dev(sch);
2011
2012                         offload_opt = (struct tc_htb_qopt_offload) {
2013                                 .command = TC_HTB_NODE_MODIFY,
2014                                 .classid = cl->common.classid,
2015                                 .rate = max_t(u64, hopt->rate.rate, rate64),
2016                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
2017                                 .extack = extack,
2018                         };
2019                         err = htb_offload(dev, &offload_opt);
2020                         if (err)
2021                                 /* Estimator was replaced, and rollback may fail
2022                                  * as well, so we don't try to recover it, and
2023                                  * the estimator won't work property with the
2024                                  * offload anyway, because bstats are updated
2025                                  * only when the stats are queried.
2026                                  */
2027                                 return err;
2028                 }
2029
2030                 sch_tree_lock(sch);
2031         }
2032
2033         psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2034         psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2035
2036         /* it used to be a nasty bug here, we have to check that node
2037          * is really leaf before changing cl->leaf !
2038          */
2039         if (!cl->level) {
2040                 u64 quantum = cl->rate.rate_bytes_ps;
2041
2042                 do_div(quantum, q->rate2quantum);
2043                 cl->quantum = min_t(u64, quantum, INT_MAX);
2044
2045                 if (!hopt->quantum && cl->quantum < 1000) {
2046                         warn = -1;
2047                         cl->quantum = 1000;
2048                 }
2049                 if (!hopt->quantum && cl->quantum > 200000) {
2050                         warn = 1;
2051                         cl->quantum = 200000;
2052                 }
2053                 if (hopt->quantum)
2054                         cl->quantum = hopt->quantum;
2055                 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2056                         cl->prio = TC_HTB_NUMPRIO - 1;
2057         }
2058
2059         cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2060         cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2061
2062         sch_tree_unlock(sch);
2063         qdisc_put(parent_qdisc);
2064
2065         if (warn)
2066                 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2067                             cl->common.classid, (warn == -1 ? "small" : "big"));
2068
2069         qdisc_class_hash_grow(sch, &q->clhash);
2070
2071         *arg = (unsigned long)cl;
2072         return 0;
2073
2074 err_kill_estimator:
2075         gen_kill_estimator(&cl->rate_est);
2076 err_block_put:
2077         tcf_block_put(cl->block);
2078         kfree(cl);
2079 failure:
2080         return err;
2081 }
2082
2083 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2084                                        struct netlink_ext_ack *extack)
2085 {
2086         struct htb_sched *q = qdisc_priv(sch);
2087         struct htb_class *cl = (struct htb_class *)arg;
2088
2089         return cl ? cl->block : q->block;
2090 }
2091
2092 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2093                                      u32 classid)
2094 {
2095         struct htb_class *cl = htb_find(classid, sch);
2096
2097         /*if (cl && !cl->level) return 0;
2098          * The line above used to be there to prevent attaching filters to
2099          * leaves. But at least tc_index filter uses this just to get class
2100          * for other reasons so that we have to allow for it.
2101          * ----
2102          * 19.6.2002 As Werner explained it is ok - bind filter is just
2103          * another way to "lock" the class - unlike "get" this lock can
2104          * be broken by class during destroy IIUC.
2105          */
2106         if (cl)
2107                 cl->filter_cnt++;
2108         return (unsigned long)cl;
2109 }
2110
2111 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2112 {
2113         struct htb_class *cl = (struct htb_class *)arg;
2114
2115         if (cl)
2116                 cl->filter_cnt--;
2117 }
2118
2119 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2120 {
2121         struct htb_sched *q = qdisc_priv(sch);
2122         struct htb_class *cl;
2123         unsigned int i;
2124
2125         if (arg->stop)
2126                 return;
2127
2128         for (i = 0; i < q->clhash.hashsize; i++) {
2129                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2130                         if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
2131                                 return;
2132                 }
2133         }
2134 }
2135
2136 static const struct Qdisc_class_ops htb_class_ops = {
2137         .select_queue   =       htb_select_queue,
2138         .graft          =       htb_graft,
2139         .leaf           =       htb_leaf,
2140         .qlen_notify    =       htb_qlen_notify,
2141         .find           =       htb_search,
2142         .change         =       htb_change_class,
2143         .delete         =       htb_delete,
2144         .walk           =       htb_walk,
2145         .tcf_block      =       htb_tcf_block,
2146         .bind_tcf       =       htb_bind_filter,
2147         .unbind_tcf     =       htb_unbind_filter,
2148         .dump           =       htb_dump_class,
2149         .dump_stats     =       htb_dump_class_stats,
2150 };
2151
2152 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2153         .cl_ops         =       &htb_class_ops,
2154         .id             =       "htb",
2155         .priv_size      =       sizeof(struct htb_sched),
2156         .enqueue        =       htb_enqueue,
2157         .dequeue        =       htb_dequeue,
2158         .peek           =       qdisc_peek_dequeued,
2159         .init           =       htb_init,
2160         .attach         =       htb_attach,
2161         .reset          =       htb_reset,
2162         .destroy        =       htb_destroy,
2163         .dump           =       htb_dump,
2164         .owner          =       THIS_MODULE,
2165 };
2166
2167 static int __init htb_module_init(void)
2168 {
2169         return register_qdisc(&htb_qdisc_ops);
2170 }
2171 static void __exit htb_module_exit(void)
2172 {
2173         unregister_qdisc(&htb_qdisc_ops);
2174 }
2175
2176 module_init(htb_module_init)
2177 module_exit(htb_module_exit)
2178 MODULE_LICENSE("GPL");