mm: prevent page_frag_alloc() from corrupting the memory
[platform/kernel/linux-rpi.git] / net / sched / sch_htb.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
4  *
5  * Authors:     Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *                      HTB support at LARTC mailing list
10  *              Ondrej Kraus, <krauso@barr.cz>
11  *                      found missing INIT_QDISC(htb)
12  *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *                      helped a lot to locate nasty class stall bug
14  *              Andi Kleen, Jamal Hadi, Bert Hubert
15  *                      code review and helpful comments on shaping
16  *              Tomasz Wrona, <tw@eter.tym.pl>
17  *                      created test case so that I was able to fix nasty bug
18  *              Wilfried Weissmann
19  *                      spotted bug in dequeue code and helped with fix
20  *              Jiri Fojtasek
21  *                      fixed requeue routine
22  *              and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011         /* major must be matched with number supplied by TC as version */
56
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71         HTB_CANT_SEND,          /* class can't send and can't borrow */
72         HTB_MAY_BORROW,         /* class can't send but may borrow */
73         HTB_CAN_SEND            /* class can send */
74 };
75
76 struct htb_prio {
77         union {
78                 struct rb_root  row;
79                 struct rb_root  feed;
80         };
81         struct rb_node  *ptr;
82         /* When class changes from state 1->2 and disconnects from
83          * parent's feed then we lost ptr value and start from the
84          * first child again. Here we store classid of the
85          * last valid ptr (used when ptr is NULL).
86          */
87         u32             last_ptr_id;
88 };
89
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95         struct Qdisc_class_common common;
96         struct psched_ratecfg   rate;
97         struct psched_ratecfg   ceil;
98         s64                     buffer, cbuffer;/* token bucket depth/rate */
99         s64                     mbuffer;        /* max wait time */
100         u32                     prio;           /* these two are used only by leaves... */
101         int                     quantum;        /* but stored for parent-to-leaf return */
102
103         struct tcf_proto __rcu  *filter_list;   /* class attached filters */
104         struct tcf_block        *block;
105         int                     filter_cnt;
106
107         int                     level;          /* our level (see above) */
108         unsigned int            children;
109         struct htb_class        *parent;        /* parent class */
110
111         struct net_rate_estimator __rcu *rate_est;
112
113         /*
114          * Written often fields
115          */
116         struct gnet_stats_basic_packed bstats;
117         struct gnet_stats_basic_packed bstats_bias;
118         struct tc_htb_xstats    xstats; /* our special stats */
119
120         /* token bucket parameters */
121         s64                     tokens, ctokens;/* current number of tokens */
122         s64                     t_c;            /* checkpoint time */
123
124         union {
125                 struct htb_class_leaf {
126                         int             deficit[TC_HTB_MAXDEPTH];
127                         struct Qdisc    *q;
128                         struct netdev_queue *offload_queue;
129                 } leaf;
130                 struct htb_class_inner {
131                         struct htb_prio clprio[TC_HTB_NUMPRIO];
132                 } inner;
133         };
134         s64                     pq_key;
135
136         int                     prio_activity;  /* for which prios are we active */
137         enum htb_cmode          cmode;          /* current mode of the class */
138         struct rb_node          pq_node;        /* node for event queue */
139         struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
140
141         unsigned int drops ____cacheline_aligned_in_smp;
142         unsigned int            overlimits;
143 };
144
145 struct htb_level {
146         struct rb_root  wait_pq;
147         struct htb_prio hprio[TC_HTB_NUMPRIO];
148 };
149
150 struct htb_sched {
151         struct Qdisc_class_hash clhash;
152         int                     defcls;         /* class where unclassified flows go to */
153         int                     rate2quantum;   /* quant = rate / rate2quantum */
154
155         /* filters for qdisc itself */
156         struct tcf_proto __rcu  *filter_list;
157         struct tcf_block        *block;
158
159 #define HTB_WARN_TOOMANYEVENTS  0x1
160         unsigned int            warned; /* only one warning */
161         int                     direct_qlen;
162         struct work_struct      work;
163
164         /* non shaped skbs; let them go directly thru */
165         struct qdisc_skb_head   direct_queue;
166         u32                     direct_pkts;
167         u32                     overlimits;
168
169         struct qdisc_watchdog   watchdog;
170
171         s64                     now;    /* cached dequeue time */
172
173         /* time of nearest event per level (row) */
174         s64                     near_ev_cache[TC_HTB_MAXDEPTH];
175
176         int                     row_mask[TC_HTB_MAXDEPTH];
177
178         struct htb_level        hlevel[TC_HTB_MAXDEPTH];
179
180         struct Qdisc            **direct_qdiscs;
181         unsigned int            num_direct_qdiscs;
182
183         bool                    offload;
184 };
185
186 /* find class in global hash table using given handle */
187 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
188 {
189         struct htb_sched *q = qdisc_priv(sch);
190         struct Qdisc_class_common *clc;
191
192         clc = qdisc_class_find(&q->clhash, handle);
193         if (clc == NULL)
194                 return NULL;
195         return container_of(clc, struct htb_class, common);
196 }
197
198 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
199 {
200         return (unsigned long)htb_find(handle, sch);
201 }
202 /**
203  * htb_classify - classify a packet into class
204  *
205  * It returns NULL if the packet should be dropped or -1 if the packet
206  * should be passed directly thru. In all other cases leaf class is returned.
207  * We allow direct class selection by classid in priority. The we examine
208  * filters in qdisc and in inner nodes (if higher filter points to the inner
209  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
210  * internal fifo (direct). These packets then go directly thru. If we still
211  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
212  * then finish and return direct queue.
213  */
214 #define HTB_DIRECT ((struct htb_class *)-1L)
215
216 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
217                                       int *qerr)
218 {
219         struct htb_sched *q = qdisc_priv(sch);
220         struct htb_class *cl;
221         struct tcf_result res;
222         struct tcf_proto *tcf;
223         int result;
224
225         /* allow to select class by setting skb->priority to valid classid;
226          * note that nfmark can be used too by attaching filter fw with no
227          * rules in it
228          */
229         if (skb->priority == sch->handle)
230                 return HTB_DIRECT;      /* X:0 (direct flow) selected */
231         cl = htb_find(skb->priority, sch);
232         if (cl) {
233                 if (cl->level == 0)
234                         return cl;
235                 /* Start with inner filter chain if a non-leaf class is selected */
236                 tcf = rcu_dereference_bh(cl->filter_list);
237         } else {
238                 tcf = rcu_dereference_bh(q->filter_list);
239         }
240
241         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
242         while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
243 #ifdef CONFIG_NET_CLS_ACT
244                 switch (result) {
245                 case TC_ACT_QUEUED:
246                 case TC_ACT_STOLEN:
247                 case TC_ACT_TRAP:
248                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
249                         fallthrough;
250                 case TC_ACT_SHOT:
251                         return NULL;
252                 }
253 #endif
254                 cl = (void *)res.class;
255                 if (!cl) {
256                         if (res.classid == sch->handle)
257                                 return HTB_DIRECT;      /* X:0 (direct flow) */
258                         cl = htb_find(res.classid, sch);
259                         if (!cl)
260                                 break;  /* filter selected invalid classid */
261                 }
262                 if (!cl->level)
263                         return cl;      /* we hit leaf; return it */
264
265                 /* we have got inner class; apply inner filter chain */
266                 tcf = rcu_dereference_bh(cl->filter_list);
267         }
268         /* classification failed; try to use default class */
269         cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
270         if (!cl || cl->level)
271                 return HTB_DIRECT;      /* bad default .. this is safe bet */
272         return cl;
273 }
274
275 /**
276  * htb_add_to_id_tree - adds class to the round robin list
277  * @root: the root of the tree
278  * @cl: the class to add
279  * @prio: the give prio in class
280  *
281  * Routine adds class to the list (actually tree) sorted by classid.
282  * Make sure that class is not already on such list for given prio.
283  */
284 static void htb_add_to_id_tree(struct rb_root *root,
285                                struct htb_class *cl, int prio)
286 {
287         struct rb_node **p = &root->rb_node, *parent = NULL;
288
289         while (*p) {
290                 struct htb_class *c;
291                 parent = *p;
292                 c = rb_entry(parent, struct htb_class, node[prio]);
293
294                 if (cl->common.classid > c->common.classid)
295                         p = &parent->rb_right;
296                 else
297                         p = &parent->rb_left;
298         }
299         rb_link_node(&cl->node[prio], parent, p);
300         rb_insert_color(&cl->node[prio], root);
301 }
302
303 /**
304  * htb_add_to_wait_tree - adds class to the event queue with delay
305  * @q: the priority event queue
306  * @cl: the class to add
307  * @delay: delay in microseconds
308  *
309  * The class is added to priority event queue to indicate that class will
310  * change its mode in cl->pq_key microseconds. Make sure that class is not
311  * already in the queue.
312  */
313 static void htb_add_to_wait_tree(struct htb_sched *q,
314                                  struct htb_class *cl, s64 delay)
315 {
316         struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
317
318         cl->pq_key = q->now + delay;
319         if (cl->pq_key == q->now)
320                 cl->pq_key++;
321
322         /* update the nearest event cache */
323         if (q->near_ev_cache[cl->level] > cl->pq_key)
324                 q->near_ev_cache[cl->level] = cl->pq_key;
325
326         while (*p) {
327                 struct htb_class *c;
328                 parent = *p;
329                 c = rb_entry(parent, struct htb_class, pq_node);
330                 if (cl->pq_key >= c->pq_key)
331                         p = &parent->rb_right;
332                 else
333                         p = &parent->rb_left;
334         }
335         rb_link_node(&cl->pq_node, parent, p);
336         rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
337 }
338
339 /**
340  * htb_next_rb_node - finds next node in binary tree
341  * @n: the current node in binary tree
342  *
343  * When we are past last key we return NULL.
344  * Average complexity is 2 steps per call.
345  */
346 static inline void htb_next_rb_node(struct rb_node **n)
347 {
348         *n = rb_next(*n);
349 }
350
351 /**
352  * htb_add_class_to_row - add class to its row
353  * @q: the priority event queue
354  * @cl: the class to add
355  * @mask: the given priorities in class in bitmap
356  *
357  * The class is added to row at priorities marked in mask.
358  * It does nothing if mask == 0.
359  */
360 static inline void htb_add_class_to_row(struct htb_sched *q,
361                                         struct htb_class *cl, int mask)
362 {
363         q->row_mask[cl->level] |= mask;
364         while (mask) {
365                 int prio = ffz(~mask);
366                 mask &= ~(1 << prio);
367                 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
368         }
369 }
370
371 /* If this triggers, it is a bug in this code, but it need not be fatal */
372 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
373 {
374         if (RB_EMPTY_NODE(rb)) {
375                 WARN_ON(1);
376         } else {
377                 rb_erase(rb, root);
378                 RB_CLEAR_NODE(rb);
379         }
380 }
381
382
383 /**
384  * htb_remove_class_from_row - removes class from its row
385  * @q: the priority event queue
386  * @cl: the class to add
387  * @mask: the given priorities in class in bitmap
388  *
389  * The class is removed from row at priorities marked in mask.
390  * It does nothing if mask == 0.
391  */
392 static inline void htb_remove_class_from_row(struct htb_sched *q,
393                                                  struct htb_class *cl, int mask)
394 {
395         int m = 0;
396         struct htb_level *hlevel = &q->hlevel[cl->level];
397
398         while (mask) {
399                 int prio = ffz(~mask);
400                 struct htb_prio *hprio = &hlevel->hprio[prio];
401
402                 mask &= ~(1 << prio);
403                 if (hprio->ptr == cl->node + prio)
404                         htb_next_rb_node(&hprio->ptr);
405
406                 htb_safe_rb_erase(cl->node + prio, &hprio->row);
407                 if (!hprio->row.rb_node)
408                         m |= 1 << prio;
409         }
410         q->row_mask[cl->level] &= ~m;
411 }
412
413 /**
414  * htb_activate_prios - creates active classe's feed chain
415  * @q: the priority event queue
416  * @cl: the class to activate
417  *
418  * The class is connected to ancestors and/or appropriate rows
419  * for priorities it is participating on. cl->cmode must be new
420  * (activated) mode. It does nothing if cl->prio_activity == 0.
421  */
422 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
423 {
424         struct htb_class *p = cl->parent;
425         long m, mask = cl->prio_activity;
426
427         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
428                 m = mask;
429                 while (m) {
430                         int prio = ffz(~m);
431                         m &= ~(1 << prio);
432
433                         if (p->inner.clprio[prio].feed.rb_node)
434                                 /* parent already has its feed in use so that
435                                  * reset bit in mask as parent is already ok
436                                  */
437                                 mask &= ~(1 << prio);
438
439                         htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
440                 }
441                 p->prio_activity |= mask;
442                 cl = p;
443                 p = cl->parent;
444
445         }
446         if (cl->cmode == HTB_CAN_SEND && mask)
447                 htb_add_class_to_row(q, cl, mask);
448 }
449
450 /**
451  * htb_deactivate_prios - remove class from feed chain
452  * @q: the priority event queue
453  * @cl: the class to deactivate
454  *
455  * cl->cmode must represent old mode (before deactivation). It does
456  * nothing if cl->prio_activity == 0. Class is removed from all feed
457  * chains and rows.
458  */
459 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
460 {
461         struct htb_class *p = cl->parent;
462         long m, mask = cl->prio_activity;
463
464         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
465                 m = mask;
466                 mask = 0;
467                 while (m) {
468                         int prio = ffz(~m);
469                         m &= ~(1 << prio);
470
471                         if (p->inner.clprio[prio].ptr == cl->node + prio) {
472                                 /* we are removing child which is pointed to from
473                                  * parent feed - forget the pointer but remember
474                                  * classid
475                                  */
476                                 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
477                                 p->inner.clprio[prio].ptr = NULL;
478                         }
479
480                         htb_safe_rb_erase(cl->node + prio,
481                                           &p->inner.clprio[prio].feed);
482
483                         if (!p->inner.clprio[prio].feed.rb_node)
484                                 mask |= 1 << prio;
485                 }
486
487                 p->prio_activity &= ~mask;
488                 cl = p;
489                 p = cl->parent;
490
491         }
492         if (cl->cmode == HTB_CAN_SEND && mask)
493                 htb_remove_class_from_row(q, cl, mask);
494 }
495
496 static inline s64 htb_lowater(const struct htb_class *cl)
497 {
498         if (htb_hysteresis)
499                 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
500         else
501                 return 0;
502 }
503 static inline s64 htb_hiwater(const struct htb_class *cl)
504 {
505         if (htb_hysteresis)
506                 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
507         else
508                 return 0;
509 }
510
511
512 /**
513  * htb_class_mode - computes and returns current class mode
514  * @cl: the target class
515  * @diff: diff time in microseconds
516  *
517  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
518  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
519  * from now to time when cl will change its state.
520  * Also it is worth to note that class mode doesn't change simply
521  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
522  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
523  * mode transitions per time unit. The speed gain is about 1/6.
524  */
525 static inline enum htb_cmode
526 htb_class_mode(struct htb_class *cl, s64 *diff)
527 {
528         s64 toks;
529
530         if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
531                 *diff = -toks;
532                 return HTB_CANT_SEND;
533         }
534
535         if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
536                 return HTB_CAN_SEND;
537
538         *diff = -toks;
539         return HTB_MAY_BORROW;
540 }
541
542 /**
543  * htb_change_class_mode - changes classe's mode
544  * @q: the priority event queue
545  * @cl: the target class
546  * @diff: diff time in microseconds
547  *
548  * This should be the only way how to change classe's mode under normal
549  * circumstances. Routine will update feed lists linkage, change mode
550  * and add class to the wait event queue if appropriate. New mode should
551  * be different from old one and cl->pq_key has to be valid if changing
552  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
553  */
554 static void
555 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
556 {
557         enum htb_cmode new_mode = htb_class_mode(cl, diff);
558
559         if (new_mode == cl->cmode)
560                 return;
561
562         if (new_mode == HTB_CANT_SEND) {
563                 cl->overlimits++;
564                 q->overlimits++;
565         }
566
567         if (cl->prio_activity) {        /* not necessary: speed optimization */
568                 if (cl->cmode != HTB_CANT_SEND)
569                         htb_deactivate_prios(q, cl);
570                 cl->cmode = new_mode;
571                 if (new_mode != HTB_CANT_SEND)
572                         htb_activate_prios(q, cl);
573         } else
574                 cl->cmode = new_mode;
575 }
576
577 /**
578  * htb_activate - inserts leaf cl into appropriate active feeds
579  * @q: the priority event queue
580  * @cl: the target class
581  *
582  * Routine learns (new) priority of leaf and activates feed chain
583  * for the prio. It can be called on already active leaf safely.
584  * It also adds leaf into droplist.
585  */
586 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
587 {
588         WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
589
590         if (!cl->prio_activity) {
591                 cl->prio_activity = 1 << cl->prio;
592                 htb_activate_prios(q, cl);
593         }
594 }
595
596 /**
597  * htb_deactivate - remove leaf cl from active feeds
598  * @q: the priority event queue
599  * @cl: the target class
600  *
601  * Make sure that leaf is active. In the other words it can't be called
602  * with non-active leaf. It also removes class from the drop list.
603  */
604 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
605 {
606         WARN_ON(!cl->prio_activity);
607
608         htb_deactivate_prios(q, cl);
609         cl->prio_activity = 0;
610 }
611
612 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
613                        struct sk_buff **to_free)
614 {
615         int ret;
616         unsigned int len = qdisc_pkt_len(skb);
617         struct htb_sched *q = qdisc_priv(sch);
618         struct htb_class *cl = htb_classify(skb, sch, &ret);
619
620         if (cl == HTB_DIRECT) {
621                 /* enqueue to helper queue */
622                 if (q->direct_queue.qlen < q->direct_qlen) {
623                         __qdisc_enqueue_tail(skb, &q->direct_queue);
624                         q->direct_pkts++;
625                 } else {
626                         return qdisc_drop(skb, sch, to_free);
627                 }
628 #ifdef CONFIG_NET_CLS_ACT
629         } else if (!cl) {
630                 if (ret & __NET_XMIT_BYPASS)
631                         qdisc_qstats_drop(sch);
632                 __qdisc_drop(skb, to_free);
633                 return ret;
634 #endif
635         } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
636                                         to_free)) != NET_XMIT_SUCCESS) {
637                 if (net_xmit_drop_count(ret)) {
638                         qdisc_qstats_drop(sch);
639                         cl->drops++;
640                 }
641                 return ret;
642         } else {
643                 htb_activate(q, cl);
644         }
645
646         sch->qstats.backlog += len;
647         sch->q.qlen++;
648         return NET_XMIT_SUCCESS;
649 }
650
651 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
652 {
653         s64 toks = diff + cl->tokens;
654
655         if (toks > cl->buffer)
656                 toks = cl->buffer;
657         toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
658         if (toks <= -cl->mbuffer)
659                 toks = 1 - cl->mbuffer;
660
661         cl->tokens = toks;
662 }
663
664 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
665 {
666         s64 toks = diff + cl->ctokens;
667
668         if (toks > cl->cbuffer)
669                 toks = cl->cbuffer;
670         toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
671         if (toks <= -cl->mbuffer)
672                 toks = 1 - cl->mbuffer;
673
674         cl->ctokens = toks;
675 }
676
677 /**
678  * htb_charge_class - charges amount "bytes" to leaf and ancestors
679  * @q: the priority event queue
680  * @cl: the class to start iterate
681  * @level: the minimum level to account
682  * @skb: the socket buffer
683  *
684  * Routine assumes that packet "bytes" long was dequeued from leaf cl
685  * borrowing from "level". It accounts bytes to ceil leaky bucket for
686  * leaf and all ancestors and to rate bucket for ancestors at levels
687  * "level" and higher. It also handles possible change of mode resulting
688  * from the update. Note that mode can also increase here (MAY_BORROW to
689  * CAN_SEND) because we can use more precise clock that event queue here.
690  * In such case we remove class from event queue first.
691  */
692 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
693                              int level, struct sk_buff *skb)
694 {
695         int bytes = qdisc_pkt_len(skb);
696         enum htb_cmode old_mode;
697         s64 diff;
698
699         while (cl) {
700                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
701                 if (cl->level >= level) {
702                         if (cl->level == level)
703                                 cl->xstats.lends++;
704                         htb_accnt_tokens(cl, bytes, diff);
705                 } else {
706                         cl->xstats.borrows++;
707                         cl->tokens += diff;     /* we moved t_c; update tokens */
708                 }
709                 htb_accnt_ctokens(cl, bytes, diff);
710                 cl->t_c = q->now;
711
712                 old_mode = cl->cmode;
713                 diff = 0;
714                 htb_change_class_mode(q, cl, &diff);
715                 if (old_mode != cl->cmode) {
716                         if (old_mode != HTB_CAN_SEND)
717                                 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
718                         if (cl->cmode != HTB_CAN_SEND)
719                                 htb_add_to_wait_tree(q, cl, diff);
720                 }
721
722                 /* update basic stats except for leaves which are already updated */
723                 if (cl->level)
724                         bstats_update(&cl->bstats, skb);
725
726                 cl = cl->parent;
727         }
728 }
729
730 /**
731  * htb_do_events - make mode changes to classes at the level
732  * @q: the priority event queue
733  * @level: which wait_pq in 'q->hlevel'
734  * @start: start jiffies
735  *
736  * Scans event queue for pending events and applies them. Returns time of
737  * next pending event (0 for no event in pq, q->now for too many events).
738  * Note: Applied are events whose have cl->pq_key <= q->now.
739  */
740 static s64 htb_do_events(struct htb_sched *q, const int level,
741                          unsigned long start)
742 {
743         /* don't run for longer than 2 jiffies; 2 is used instead of
744          * 1 to simplify things when jiffy is going to be incremented
745          * too soon
746          */
747         unsigned long stop_at = start + 2;
748         struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
749
750         while (time_before(jiffies, stop_at)) {
751                 struct htb_class *cl;
752                 s64 diff;
753                 struct rb_node *p = rb_first(wait_pq);
754
755                 if (!p)
756                         return 0;
757
758                 cl = rb_entry(p, struct htb_class, pq_node);
759                 if (cl->pq_key > q->now)
760                         return cl->pq_key;
761
762                 htb_safe_rb_erase(p, wait_pq);
763                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
764                 htb_change_class_mode(q, cl, &diff);
765                 if (cl->cmode != HTB_CAN_SEND)
766                         htb_add_to_wait_tree(q, cl, diff);
767         }
768
769         /* too much load - let's continue after a break for scheduling */
770         if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
771                 pr_warn("htb: too many events!\n");
772                 q->warned |= HTB_WARN_TOOMANYEVENTS;
773         }
774
775         return q->now;
776 }
777
778 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
779  * is no such one exists.
780  */
781 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
782                                               u32 id)
783 {
784         struct rb_node *r = NULL;
785         while (n) {
786                 struct htb_class *cl =
787                     rb_entry(n, struct htb_class, node[prio]);
788
789                 if (id > cl->common.classid) {
790                         n = n->rb_right;
791                 } else if (id < cl->common.classid) {
792                         r = n;
793                         n = n->rb_left;
794                 } else {
795                         return n;
796                 }
797         }
798         return r;
799 }
800
801 /**
802  * htb_lookup_leaf - returns next leaf class in DRR order
803  * @hprio: the current one
804  * @prio: which prio in class
805  *
806  * Find leaf where current feed pointers points to.
807  */
808 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
809 {
810         int i;
811         struct {
812                 struct rb_node *root;
813                 struct rb_node **pptr;
814                 u32 *pid;
815         } stk[TC_HTB_MAXDEPTH], *sp = stk;
816
817         BUG_ON(!hprio->row.rb_node);
818         sp->root = hprio->row.rb_node;
819         sp->pptr = &hprio->ptr;
820         sp->pid = &hprio->last_ptr_id;
821
822         for (i = 0; i < 65535; i++) {
823                 if (!*sp->pptr && *sp->pid) {
824                         /* ptr was invalidated but id is valid - try to recover
825                          * the original or next ptr
826                          */
827                         *sp->pptr =
828                             htb_id_find_next_upper(prio, sp->root, *sp->pid);
829                 }
830                 *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
831                                  * can become out of date quickly
832                                  */
833                 if (!*sp->pptr) {       /* we are at right end; rewind & go up */
834                         *sp->pptr = sp->root;
835                         while ((*sp->pptr)->rb_left)
836                                 *sp->pptr = (*sp->pptr)->rb_left;
837                         if (sp > stk) {
838                                 sp--;
839                                 if (!*sp->pptr) {
840                                         WARN_ON(1);
841                                         return NULL;
842                                 }
843                                 htb_next_rb_node(sp->pptr);
844                         }
845                 } else {
846                         struct htb_class *cl;
847                         struct htb_prio *clp;
848
849                         cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
850                         if (!cl->level)
851                                 return cl;
852                         clp = &cl->inner.clprio[prio];
853                         (++sp)->root = clp->feed.rb_node;
854                         sp->pptr = &clp->ptr;
855                         sp->pid = &clp->last_ptr_id;
856                 }
857         }
858         WARN_ON(1);
859         return NULL;
860 }
861
862 /* dequeues packet at given priority and level; call only if
863  * you are sure that there is active class at prio/level
864  */
865 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
866                                         const int level)
867 {
868         struct sk_buff *skb = NULL;
869         struct htb_class *cl, *start;
870         struct htb_level *hlevel = &q->hlevel[level];
871         struct htb_prio *hprio = &hlevel->hprio[prio];
872
873         /* look initial class up in the row */
874         start = cl = htb_lookup_leaf(hprio, prio);
875
876         do {
877 next:
878                 if (unlikely(!cl))
879                         return NULL;
880
881                 /* class can be empty - it is unlikely but can be true if leaf
882                  * qdisc drops packets in enqueue routine or if someone used
883                  * graft operation on the leaf since last dequeue;
884                  * simply deactivate and skip such class
885                  */
886                 if (unlikely(cl->leaf.q->q.qlen == 0)) {
887                         struct htb_class *next;
888                         htb_deactivate(q, cl);
889
890                         /* row/level might become empty */
891                         if ((q->row_mask[level] & (1 << prio)) == 0)
892                                 return NULL;
893
894                         next = htb_lookup_leaf(hprio, prio);
895
896                         if (cl == start)        /* fix start if we just deleted it */
897                                 start = next;
898                         cl = next;
899                         goto next;
900                 }
901
902                 skb = cl->leaf.q->dequeue(cl->leaf.q);
903                 if (likely(skb != NULL))
904                         break;
905
906                 qdisc_warn_nonwc("htb", cl->leaf.q);
907                 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
908                                          &q->hlevel[0].hprio[prio].ptr);
909                 cl = htb_lookup_leaf(hprio, prio);
910
911         } while (cl != start);
912
913         if (likely(skb != NULL)) {
914                 bstats_update(&cl->bstats, skb);
915                 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
916                 if (cl->leaf.deficit[level] < 0) {
917                         cl->leaf.deficit[level] += cl->quantum;
918                         htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
919                                                  &q->hlevel[0].hprio[prio].ptr);
920                 }
921                 /* this used to be after charge_class but this constelation
922                  * gives us slightly better performance
923                  */
924                 if (!cl->leaf.q->q.qlen)
925                         htb_deactivate(q, cl);
926                 htb_charge_class(q, cl, level, skb);
927         }
928         return skb;
929 }
930
931 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
932 {
933         struct sk_buff *skb;
934         struct htb_sched *q = qdisc_priv(sch);
935         int level;
936         s64 next_event;
937         unsigned long start_at;
938
939         /* try to dequeue direct packets as high prio (!) to minimize cpu work */
940         skb = __qdisc_dequeue_head(&q->direct_queue);
941         if (skb != NULL) {
942 ok:
943                 qdisc_bstats_update(sch, skb);
944                 qdisc_qstats_backlog_dec(sch, skb);
945                 sch->q.qlen--;
946                 return skb;
947         }
948
949         if (!sch->q.qlen)
950                 goto fin;
951         q->now = ktime_get_ns();
952         start_at = jiffies;
953
954         next_event = q->now + 5LLU * NSEC_PER_SEC;
955
956         for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
957                 /* common case optimization - skip event handler quickly */
958                 int m;
959                 s64 event = q->near_ev_cache[level];
960
961                 if (q->now >= event) {
962                         event = htb_do_events(q, level, start_at);
963                         if (!event)
964                                 event = q->now + NSEC_PER_SEC;
965                         q->near_ev_cache[level] = event;
966                 }
967
968                 if (next_event > event)
969                         next_event = event;
970
971                 m = ~q->row_mask[level];
972                 while (m != (int)(-1)) {
973                         int prio = ffz(m);
974
975                         m |= 1 << prio;
976                         skb = htb_dequeue_tree(q, prio, level);
977                         if (likely(skb != NULL))
978                                 goto ok;
979                 }
980         }
981         if (likely(next_event > q->now))
982                 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
983         else
984                 schedule_work(&q->work);
985 fin:
986         return skb;
987 }
988
989 /* reset all classes */
990 /* always caled under BH & queue lock */
991 static void htb_reset(struct Qdisc *sch)
992 {
993         struct htb_sched *q = qdisc_priv(sch);
994         struct htb_class *cl;
995         unsigned int i;
996
997         for (i = 0; i < q->clhash.hashsize; i++) {
998                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
999                         if (cl->level)
1000                                 memset(&cl->inner, 0, sizeof(cl->inner));
1001                         else {
1002                                 if (cl->leaf.q && !q->offload)
1003                                         qdisc_reset(cl->leaf.q);
1004                         }
1005                         cl->prio_activity = 0;
1006                         cl->cmode = HTB_CAN_SEND;
1007                 }
1008         }
1009         qdisc_watchdog_cancel(&q->watchdog);
1010         __qdisc_reset_queue(&q->direct_queue);
1011         sch->q.qlen = 0;
1012         sch->qstats.backlog = 0;
1013         memset(q->hlevel, 0, sizeof(q->hlevel));
1014         memset(q->row_mask, 0, sizeof(q->row_mask));
1015 }
1016
1017 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1018         [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1019         [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
1020         [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1021         [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1022         [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1023         [TCA_HTB_RATE64] = { .type = NLA_U64 },
1024         [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1025         [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1026 };
1027
1028 static void htb_work_func(struct work_struct *work)
1029 {
1030         struct htb_sched *q = container_of(work, struct htb_sched, work);
1031         struct Qdisc *sch = q->watchdog.qdisc;
1032
1033         rcu_read_lock();
1034         __netif_schedule(qdisc_root(sch));
1035         rcu_read_unlock();
1036 }
1037
1038 static void htb_set_lockdep_class_child(struct Qdisc *q)
1039 {
1040         static struct lock_class_key child_key;
1041
1042         lockdep_set_class(qdisc_lock(q), &child_key);
1043 }
1044
1045 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1046 {
1047         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1048 }
1049
1050 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1051                     struct netlink_ext_ack *extack)
1052 {
1053         struct net_device *dev = qdisc_dev(sch);
1054         struct tc_htb_qopt_offload offload_opt;
1055         struct htb_sched *q = qdisc_priv(sch);
1056         struct nlattr *tb[TCA_HTB_MAX + 1];
1057         struct tc_htb_glob *gopt;
1058         unsigned int ntx;
1059         bool offload;
1060         int err;
1061
1062         qdisc_watchdog_init(&q->watchdog, sch);
1063         INIT_WORK(&q->work, htb_work_func);
1064
1065         if (!opt)
1066                 return -EINVAL;
1067
1068         err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1069         if (err)
1070                 return err;
1071
1072         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1073                                           NULL);
1074         if (err < 0)
1075                 return err;
1076
1077         if (!tb[TCA_HTB_INIT])
1078                 return -EINVAL;
1079
1080         gopt = nla_data(tb[TCA_HTB_INIT]);
1081         if (gopt->version != HTB_VER >> 16)
1082                 return -EINVAL;
1083
1084         offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1085
1086         if (offload) {
1087                 if (sch->parent != TC_H_ROOT)
1088                         return -EOPNOTSUPP;
1089
1090                 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1091                         return -EOPNOTSUPP;
1092
1093                 q->num_direct_qdiscs = dev->real_num_tx_queues;
1094                 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1095                                            sizeof(*q->direct_qdiscs),
1096                                            GFP_KERNEL);
1097                 if (!q->direct_qdiscs)
1098                         return -ENOMEM;
1099         }
1100
1101         err = qdisc_class_hash_init(&q->clhash);
1102         if (err < 0)
1103                 goto err_free_direct_qdiscs;
1104
1105         qdisc_skb_head_init(&q->direct_queue);
1106
1107         if (tb[TCA_HTB_DIRECT_QLEN])
1108                 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1109         else
1110                 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1111
1112         if ((q->rate2quantum = gopt->rate2quantum) < 1)
1113                 q->rate2quantum = 1;
1114         q->defcls = gopt->defcls;
1115
1116         if (!offload)
1117                 return 0;
1118
1119         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1120                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1121                 struct Qdisc *qdisc;
1122
1123                 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1124                                           TC_H_MAKE(sch->handle, 0), extack);
1125                 if (!qdisc) {
1126                         err = -ENOMEM;
1127                         goto err_free_qdiscs;
1128                 }
1129
1130                 htb_set_lockdep_class_child(qdisc);
1131                 q->direct_qdiscs[ntx] = qdisc;
1132                 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1133         }
1134
1135         sch->flags |= TCQ_F_MQROOT;
1136
1137         offload_opt = (struct tc_htb_qopt_offload) {
1138                 .command = TC_HTB_CREATE,
1139                 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1140                 .classid = TC_H_MIN(q->defcls),
1141                 .extack = extack,
1142         };
1143         err = htb_offload(dev, &offload_opt);
1144         if (err)
1145                 goto err_free_qdiscs;
1146
1147         /* Defer this assignment, so that htb_destroy skips offload-related
1148          * parts (especially calling ndo_setup_tc) on errors.
1149          */
1150         q->offload = true;
1151
1152         return 0;
1153
1154 err_free_qdiscs:
1155         for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1156              ntx++)
1157                 qdisc_put(q->direct_qdiscs[ntx]);
1158
1159         qdisc_class_hash_destroy(&q->clhash);
1160         /* Prevent use-after-free and double-free when htb_destroy gets called.
1161          */
1162         q->clhash.hash = NULL;
1163         q->clhash.hashsize = 0;
1164
1165 err_free_direct_qdiscs:
1166         kfree(q->direct_qdiscs);
1167         q->direct_qdiscs = NULL;
1168         return err;
1169 }
1170
1171 static void htb_attach_offload(struct Qdisc *sch)
1172 {
1173         struct net_device *dev = qdisc_dev(sch);
1174         struct htb_sched *q = qdisc_priv(sch);
1175         unsigned int ntx;
1176
1177         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1178                 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1179
1180                 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1181                 qdisc_put(old);
1182                 qdisc_hash_add(qdisc, false);
1183         }
1184         for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1185                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1186                 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1187
1188                 qdisc_put(old);
1189         }
1190
1191         kfree(q->direct_qdiscs);
1192         q->direct_qdiscs = NULL;
1193 }
1194
1195 static void htb_attach_software(struct Qdisc *sch)
1196 {
1197         struct net_device *dev = qdisc_dev(sch);
1198         unsigned int ntx;
1199
1200         /* Resemble qdisc_graft behavior. */
1201         for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1202                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1203                 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1204
1205                 qdisc_refcount_inc(sch);
1206
1207                 qdisc_put(old);
1208         }
1209 }
1210
1211 static void htb_attach(struct Qdisc *sch)
1212 {
1213         struct htb_sched *q = qdisc_priv(sch);
1214
1215         if (q->offload)
1216                 htb_attach_offload(sch);
1217         else
1218                 htb_attach_software(sch);
1219 }
1220
1221 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1222 {
1223         struct htb_sched *q = qdisc_priv(sch);
1224         struct nlattr *nest;
1225         struct tc_htb_glob gopt;
1226
1227         if (q->offload)
1228                 sch->flags |= TCQ_F_OFFLOADED;
1229         else
1230                 sch->flags &= ~TCQ_F_OFFLOADED;
1231
1232         sch->qstats.overlimits = q->overlimits;
1233         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1234          * no change can happen on the qdisc parameters.
1235          */
1236
1237         gopt.direct_pkts = q->direct_pkts;
1238         gopt.version = HTB_VER;
1239         gopt.rate2quantum = q->rate2quantum;
1240         gopt.defcls = q->defcls;
1241         gopt.debug = 0;
1242
1243         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1244         if (nest == NULL)
1245                 goto nla_put_failure;
1246         if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1247             nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1248                 goto nla_put_failure;
1249         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1250                 goto nla_put_failure;
1251
1252         return nla_nest_end(skb, nest);
1253
1254 nla_put_failure:
1255         nla_nest_cancel(skb, nest);
1256         return -1;
1257 }
1258
1259 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1260                           struct sk_buff *skb, struct tcmsg *tcm)
1261 {
1262         struct htb_class *cl = (struct htb_class *)arg;
1263         struct htb_sched *q = qdisc_priv(sch);
1264         struct nlattr *nest;
1265         struct tc_htb_opt opt;
1266
1267         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1268          * no change can happen on the class parameters.
1269          */
1270         tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1271         tcm->tcm_handle = cl->common.classid;
1272         if (!cl->level && cl->leaf.q)
1273                 tcm->tcm_info = cl->leaf.q->handle;
1274
1275         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1276         if (nest == NULL)
1277                 goto nla_put_failure;
1278
1279         memset(&opt, 0, sizeof(opt));
1280
1281         psched_ratecfg_getrate(&opt.rate, &cl->rate);
1282         opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1283         psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1284         opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1285         opt.quantum = cl->quantum;
1286         opt.prio = cl->prio;
1287         opt.level = cl->level;
1288         if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1289                 goto nla_put_failure;
1290         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1291                 goto nla_put_failure;
1292         if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1293             nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1294                               TCA_HTB_PAD))
1295                 goto nla_put_failure;
1296         if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1297             nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1298                               TCA_HTB_PAD))
1299                 goto nla_put_failure;
1300
1301         return nla_nest_end(skb, nest);
1302
1303 nla_put_failure:
1304         nla_nest_cancel(skb, nest);
1305         return -1;
1306 }
1307
1308 static void htb_offload_aggregate_stats(struct htb_sched *q,
1309                                         struct htb_class *cl)
1310 {
1311         struct htb_class *c;
1312         unsigned int i;
1313
1314         memset(&cl->bstats, 0, sizeof(cl->bstats));
1315
1316         for (i = 0; i < q->clhash.hashsize; i++) {
1317                 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1318                         struct htb_class *p = c;
1319
1320                         while (p && p->level < cl->level)
1321                                 p = p->parent;
1322
1323                         if (p != cl)
1324                                 continue;
1325
1326                         cl->bstats.bytes += c->bstats_bias.bytes;
1327                         cl->bstats.packets += c->bstats_bias.packets;
1328                         if (c->level == 0) {
1329                                 cl->bstats.bytes += c->leaf.q->bstats.bytes;
1330                                 cl->bstats.packets += c->leaf.q->bstats.packets;
1331                         }
1332                 }
1333         }
1334 }
1335
1336 static int
1337 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1338 {
1339         struct htb_class *cl = (struct htb_class *)arg;
1340         struct htb_sched *q = qdisc_priv(sch);
1341         struct gnet_stats_queue qs = {
1342                 .drops = cl->drops,
1343                 .overlimits = cl->overlimits,
1344         };
1345         __u32 qlen = 0;
1346
1347         if (!cl->level && cl->leaf.q)
1348                 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1349
1350         cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1351                                     INT_MIN, INT_MAX);
1352         cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1353                                      INT_MIN, INT_MAX);
1354
1355         if (q->offload) {
1356                 if (!cl->level) {
1357                         if (cl->leaf.q)
1358                                 cl->bstats = cl->leaf.q->bstats;
1359                         else
1360                                 memset(&cl->bstats, 0, sizeof(cl->bstats));
1361                         cl->bstats.bytes += cl->bstats_bias.bytes;
1362                         cl->bstats.packets += cl->bstats_bias.packets;
1363                 } else {
1364                         htb_offload_aggregate_stats(q, cl);
1365                 }
1366         }
1367
1368         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1369                                   d, NULL, &cl->bstats) < 0 ||
1370             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1371             gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1372                 return -1;
1373
1374         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1375 }
1376
1377 static struct netdev_queue *
1378 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1379 {
1380         struct net_device *dev = qdisc_dev(sch);
1381         struct tc_htb_qopt_offload offload_opt;
1382         struct htb_sched *q = qdisc_priv(sch);
1383         int err;
1384
1385         if (!q->offload)
1386                 return sch->dev_queue;
1387
1388         offload_opt = (struct tc_htb_qopt_offload) {
1389                 .command = TC_HTB_LEAF_QUERY_QUEUE,
1390                 .classid = TC_H_MIN(tcm->tcm_parent),
1391         };
1392         err = htb_offload(dev, &offload_opt);
1393         if (err || offload_opt.qid >= dev->num_tx_queues)
1394                 return NULL;
1395         return netdev_get_tx_queue(dev, offload_opt.qid);
1396 }
1397
1398 static struct Qdisc *
1399 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1400 {
1401         struct net_device *dev = dev_queue->dev;
1402         struct Qdisc *old_q;
1403
1404         if (dev->flags & IFF_UP)
1405                 dev_deactivate(dev);
1406         old_q = dev_graft_qdisc(dev_queue, new_q);
1407         if (new_q)
1408                 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1409         if (dev->flags & IFF_UP)
1410                 dev_activate(dev);
1411
1412         return old_q;
1413 }
1414
1415 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1416 {
1417         struct netdev_queue *queue;
1418
1419         queue = cl->leaf.offload_queue;
1420         if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1421                 WARN_ON(cl->leaf.q->dev_queue != queue);
1422
1423         return queue;
1424 }
1425
1426 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1427                                    struct htb_class *cl_new, bool destroying)
1428 {
1429         struct netdev_queue *queue_old, *queue_new;
1430         struct net_device *dev = qdisc_dev(sch);
1431
1432         queue_old = htb_offload_get_queue(cl_old);
1433         queue_new = htb_offload_get_queue(cl_new);
1434
1435         if (!destroying) {
1436                 struct Qdisc *qdisc;
1437
1438                 if (dev->flags & IFF_UP)
1439                         dev_deactivate(dev);
1440                 qdisc = dev_graft_qdisc(queue_old, NULL);
1441                 WARN_ON(qdisc != cl_old->leaf.q);
1442         }
1443
1444         if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1445                 cl_old->leaf.q->dev_queue = queue_new;
1446         cl_old->leaf.offload_queue = queue_new;
1447
1448         if (!destroying) {
1449                 struct Qdisc *qdisc;
1450
1451                 qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1452                 if (dev->flags & IFF_UP)
1453                         dev_activate(dev);
1454                 WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1455         }
1456 }
1457
1458 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1459                      struct Qdisc **old, struct netlink_ext_ack *extack)
1460 {
1461         struct netdev_queue *dev_queue = sch->dev_queue;
1462         struct htb_class *cl = (struct htb_class *)arg;
1463         struct htb_sched *q = qdisc_priv(sch);
1464         struct Qdisc *old_q;
1465
1466         if (cl->level)
1467                 return -EINVAL;
1468
1469         if (q->offload)
1470                 dev_queue = htb_offload_get_queue(cl);
1471
1472         if (!new) {
1473                 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1474                                         cl->common.classid, extack);
1475                 if (!new)
1476                         return -ENOBUFS;
1477         }
1478
1479         if (q->offload) {
1480                 htb_set_lockdep_class_child(new);
1481                 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1482                 qdisc_refcount_inc(new);
1483                 old_q = htb_graft_helper(dev_queue, new);
1484         }
1485
1486         *old = qdisc_replace(sch, new, &cl->leaf.q);
1487
1488         if (q->offload) {
1489                 WARN_ON(old_q != *old);
1490                 qdisc_put(old_q);
1491         }
1492
1493         return 0;
1494 }
1495
1496 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1497 {
1498         struct htb_class *cl = (struct htb_class *)arg;
1499         return !cl->level ? cl->leaf.q : NULL;
1500 }
1501
1502 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1503 {
1504         struct htb_class *cl = (struct htb_class *)arg;
1505
1506         htb_deactivate(qdisc_priv(sch), cl);
1507 }
1508
1509 static inline int htb_parent_last_child(struct htb_class *cl)
1510 {
1511         if (!cl->parent)
1512                 /* the root class */
1513                 return 0;
1514         if (cl->parent->children > 1)
1515                 /* not the last child */
1516                 return 0;
1517         return 1;
1518 }
1519
1520 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1521                                struct Qdisc *new_q)
1522 {
1523         struct htb_sched *q = qdisc_priv(sch);
1524         struct htb_class *parent = cl->parent;
1525
1526         WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1527
1528         if (parent->cmode != HTB_CAN_SEND)
1529                 htb_safe_rb_erase(&parent->pq_node,
1530                                   &q->hlevel[parent->level].wait_pq);
1531
1532         parent->level = 0;
1533         memset(&parent->inner, 0, sizeof(parent->inner));
1534         parent->leaf.q = new_q ? new_q : &noop_qdisc;
1535         parent->tokens = parent->buffer;
1536         parent->ctokens = parent->cbuffer;
1537         parent->t_c = ktime_get_ns();
1538         parent->cmode = HTB_CAN_SEND;
1539         if (q->offload)
1540                 parent->leaf.offload_queue = cl->leaf.offload_queue;
1541 }
1542
1543 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1544                                        struct netdev_queue *dev_queue,
1545                                        struct Qdisc *new_q)
1546 {
1547         struct Qdisc *old_q;
1548
1549         /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1550         if (new_q)
1551                 qdisc_refcount_inc(new_q);
1552         old_q = htb_graft_helper(dev_queue, new_q);
1553         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1554 }
1555
1556 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1557                                      bool last_child, bool destroying,
1558                                      struct netlink_ext_ack *extack)
1559 {
1560         struct tc_htb_qopt_offload offload_opt;
1561         struct netdev_queue *dev_queue;
1562         struct Qdisc *q = cl->leaf.q;
1563         struct Qdisc *old = NULL;
1564         int err;
1565
1566         if (cl->level)
1567                 return -EINVAL;
1568
1569         WARN_ON(!q);
1570         dev_queue = htb_offload_get_queue(cl);
1571         old = htb_graft_helper(dev_queue, NULL);
1572         if (destroying)
1573                 /* Before HTB is destroyed, the kernel grafts noop_qdisc to
1574                  * all queues.
1575                  */
1576                 WARN_ON(!(old->flags & TCQ_F_BUILTIN));
1577         else
1578                 WARN_ON(old != q);
1579
1580         if (cl->parent) {
1581                 cl->parent->bstats_bias.bytes += q->bstats.bytes;
1582                 cl->parent->bstats_bias.packets += q->bstats.packets;
1583         }
1584
1585         offload_opt = (struct tc_htb_qopt_offload) {
1586                 .command = !last_child ? TC_HTB_LEAF_DEL :
1587                            destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1588                            TC_HTB_LEAF_DEL_LAST,
1589                 .classid = cl->common.classid,
1590                 .extack = extack,
1591         };
1592         err = htb_offload(qdisc_dev(sch), &offload_opt);
1593
1594         if (!err || destroying)
1595                 qdisc_put(old);
1596         else
1597                 htb_graft_helper(dev_queue, old);
1598
1599         if (last_child)
1600                 return err;
1601
1602         if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1603                 u32 classid = TC_H_MAJ(sch->handle) |
1604                               TC_H_MIN(offload_opt.classid);
1605                 struct htb_class *moved_cl = htb_find(classid, sch);
1606
1607                 htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1608         }
1609
1610         return err;
1611 }
1612
1613 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1614 {
1615         if (!cl->level) {
1616                 WARN_ON(!cl->leaf.q);
1617                 qdisc_put(cl->leaf.q);
1618         }
1619         gen_kill_estimator(&cl->rate_est);
1620         tcf_block_put(cl->block);
1621         kfree(cl);
1622 }
1623
1624 static void htb_destroy(struct Qdisc *sch)
1625 {
1626         struct net_device *dev = qdisc_dev(sch);
1627         struct tc_htb_qopt_offload offload_opt;
1628         struct htb_sched *q = qdisc_priv(sch);
1629         struct hlist_node *next;
1630         bool nonempty, changed;
1631         struct htb_class *cl;
1632         unsigned int i;
1633
1634         cancel_work_sync(&q->work);
1635         qdisc_watchdog_cancel(&q->watchdog);
1636         /* This line used to be after htb_destroy_class call below
1637          * and surprisingly it worked in 2.4. But it must precede it
1638          * because filter need its target class alive to be able to call
1639          * unbind_filter on it (without Oops).
1640          */
1641         tcf_block_put(q->block);
1642
1643         for (i = 0; i < q->clhash.hashsize; i++) {
1644                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1645                         tcf_block_put(cl->block);
1646                         cl->block = NULL;
1647                 }
1648         }
1649
1650         do {
1651                 nonempty = false;
1652                 changed = false;
1653                 for (i = 0; i < q->clhash.hashsize; i++) {
1654                         hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1655                                                   common.hnode) {
1656                                 bool last_child;
1657
1658                                 if (!q->offload) {
1659                                         htb_destroy_class(sch, cl);
1660                                         continue;
1661                                 }
1662
1663                                 nonempty = true;
1664
1665                                 if (cl->level)
1666                                         continue;
1667
1668                                 changed = true;
1669
1670                                 last_child = htb_parent_last_child(cl);
1671                                 htb_destroy_class_offload(sch, cl, last_child,
1672                                                           true, NULL);
1673                                 qdisc_class_hash_remove(&q->clhash,
1674                                                         &cl->common);
1675                                 if (cl->parent)
1676                                         cl->parent->children--;
1677                                 if (last_child)
1678                                         htb_parent_to_leaf(sch, cl, NULL);
1679                                 htb_destroy_class(sch, cl);
1680                         }
1681                 }
1682         } while (changed);
1683         WARN_ON(nonempty);
1684
1685         qdisc_class_hash_destroy(&q->clhash);
1686         __qdisc_reset_queue(&q->direct_queue);
1687
1688         if (!q->offload)
1689                 return;
1690
1691         offload_opt = (struct tc_htb_qopt_offload) {
1692                 .command = TC_HTB_DESTROY,
1693         };
1694         htb_offload(dev, &offload_opt);
1695
1696         if (!q->direct_qdiscs)
1697                 return;
1698         for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1699                 qdisc_put(q->direct_qdiscs[i]);
1700         kfree(q->direct_qdiscs);
1701 }
1702
1703 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1704                       struct netlink_ext_ack *extack)
1705 {
1706         struct htb_sched *q = qdisc_priv(sch);
1707         struct htb_class *cl = (struct htb_class *)arg;
1708         struct Qdisc *new_q = NULL;
1709         int last_child = 0;
1710         int err;
1711
1712         /* TODO: why don't allow to delete subtree ? references ? does
1713          * tc subsys guarantee us that in htb_destroy it holds no class
1714          * refs so that we can remove children safely there ?
1715          */
1716         if (cl->children || cl->filter_cnt)
1717                 return -EBUSY;
1718
1719         if (!cl->level && htb_parent_last_child(cl))
1720                 last_child = 1;
1721
1722         if (q->offload) {
1723                 err = htb_destroy_class_offload(sch, cl, last_child, false,
1724                                                 extack);
1725                 if (err)
1726                         return err;
1727         }
1728
1729         if (last_child) {
1730                 struct netdev_queue *dev_queue = sch->dev_queue;
1731
1732                 if (q->offload)
1733                         dev_queue = htb_offload_get_queue(cl);
1734
1735                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1736                                           cl->parent->common.classid,
1737                                           NULL);
1738                 if (q->offload) {
1739                         if (new_q)
1740                                 htb_set_lockdep_class_child(new_q);
1741                         htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1742                 }
1743         }
1744
1745         sch_tree_lock(sch);
1746
1747         if (!cl->level)
1748                 qdisc_purge_queue(cl->leaf.q);
1749
1750         /* delete from hash and active; remainder in destroy_class */
1751         qdisc_class_hash_remove(&q->clhash, &cl->common);
1752         if (cl->parent)
1753                 cl->parent->children--;
1754
1755         if (cl->prio_activity)
1756                 htb_deactivate(q, cl);
1757
1758         if (cl->cmode != HTB_CAN_SEND)
1759                 htb_safe_rb_erase(&cl->pq_node,
1760                                   &q->hlevel[cl->level].wait_pq);
1761
1762         if (last_child)
1763                 htb_parent_to_leaf(sch, cl, new_q);
1764
1765         sch_tree_unlock(sch);
1766
1767         htb_destroy_class(sch, cl);
1768         return 0;
1769 }
1770
1771 static int htb_change_class(struct Qdisc *sch, u32 classid,
1772                             u32 parentid, struct nlattr **tca,
1773                             unsigned long *arg, struct netlink_ext_ack *extack)
1774 {
1775         int err = -EINVAL;
1776         struct htb_sched *q = qdisc_priv(sch);
1777         struct htb_class *cl = (struct htb_class *)*arg, *parent;
1778         struct tc_htb_qopt_offload offload_opt;
1779         struct nlattr *opt = tca[TCA_OPTIONS];
1780         struct nlattr *tb[TCA_HTB_MAX + 1];
1781         struct Qdisc *parent_qdisc = NULL;
1782         struct netdev_queue *dev_queue;
1783         struct tc_htb_opt *hopt;
1784         u64 rate64, ceil64;
1785         int warn = 0;
1786
1787         /* extract all subattrs from opt attr */
1788         if (!opt)
1789                 goto failure;
1790
1791         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1792                                           NULL);
1793         if (err < 0)
1794                 goto failure;
1795
1796         err = -EINVAL;
1797         if (tb[TCA_HTB_PARMS] == NULL)
1798                 goto failure;
1799
1800         parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1801
1802         hopt = nla_data(tb[TCA_HTB_PARMS]);
1803         if (!hopt->rate.rate || !hopt->ceil.rate)
1804                 goto failure;
1805
1806         if (q->offload) {
1807                 /* Options not supported by the offload. */
1808                 if (hopt->rate.overhead || hopt->ceil.overhead) {
1809                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
1810                         goto failure;
1811                 }
1812                 if (hopt->rate.mpu || hopt->ceil.mpu) {
1813                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
1814                         goto failure;
1815                 }
1816                 if (hopt->quantum) {
1817                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
1818                         goto failure;
1819                 }
1820                 if (hopt->prio) {
1821                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
1822                         goto failure;
1823                 }
1824         }
1825
1826         /* Keeping backward compatible with rate_table based iproute2 tc */
1827         if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1828                 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1829                                               NULL));
1830
1831         if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1832                 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1833                                               NULL));
1834
1835         rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1836         ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1837
1838         if (!cl) {              /* new class */
1839                 struct net_device *dev = qdisc_dev(sch);
1840                 struct Qdisc *new_q, *old_q;
1841                 int prio;
1842                 struct {
1843                         struct nlattr           nla;
1844                         struct gnet_estimator   opt;
1845                 } est = {
1846                         .nla = {
1847                                 .nla_len        = nla_attr_size(sizeof(est.opt)),
1848                                 .nla_type       = TCA_RATE,
1849                         },
1850                         .opt = {
1851                                 /* 4s interval, 16s averaging constant */
1852                                 .interval       = 2,
1853                                 .ewma_log       = 2,
1854                         },
1855                 };
1856
1857                 /* check for valid classid */
1858                 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1859                     htb_find(classid, sch))
1860                         goto failure;
1861
1862                 /* check maximal depth */
1863                 if (parent && parent->parent && parent->parent->level < 2) {
1864                         pr_err("htb: tree is too deep\n");
1865                         goto failure;
1866                 }
1867                 err = -ENOBUFS;
1868                 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1869                 if (!cl)
1870                         goto failure;
1871
1872                 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1873                 if (err) {
1874                         kfree(cl);
1875                         goto failure;
1876                 }
1877                 if (htb_rate_est || tca[TCA_RATE]) {
1878                         err = gen_new_estimator(&cl->bstats, NULL,
1879                                                 &cl->rate_est,
1880                                                 NULL,
1881                                                 qdisc_root_sleeping_running(sch),
1882                                                 tca[TCA_RATE] ? : &est.nla);
1883                         if (err)
1884                                 goto err_block_put;
1885                 }
1886
1887                 cl->children = 0;
1888                 RB_CLEAR_NODE(&cl->pq_node);
1889
1890                 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1891                         RB_CLEAR_NODE(&cl->node[prio]);
1892
1893                 cl->common.classid = classid;
1894
1895                 /* Make sure nothing interrupts us in between of two
1896                  * ndo_setup_tc calls.
1897                  */
1898                 ASSERT_RTNL();
1899
1900                 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1901                  * so that can't be used inside of sch_tree_lock
1902                  * -- thanks to Karlis Peisenieks
1903                  */
1904                 if (!q->offload) {
1905                         dev_queue = sch->dev_queue;
1906                 } else if (!(parent && !parent->level)) {
1907                         /* Assign a dev_queue to this classid. */
1908                         offload_opt = (struct tc_htb_qopt_offload) {
1909                                 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1910                                 .classid = cl->common.classid,
1911                                 .parent_classid = parent ?
1912                                         TC_H_MIN(parent->common.classid) :
1913                                         TC_HTB_CLASSID_ROOT,
1914                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1915                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1916                                 .extack = extack,
1917                         };
1918                         err = htb_offload(dev, &offload_opt);
1919                         if (err) {
1920                                 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1921                                        err);
1922                                 goto err_kill_estimator;
1923                         }
1924                         dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1925                 } else { /* First child. */
1926                         dev_queue = htb_offload_get_queue(parent);
1927                         old_q = htb_graft_helper(dev_queue, NULL);
1928                         WARN_ON(old_q != parent->leaf.q);
1929                         offload_opt = (struct tc_htb_qopt_offload) {
1930                                 .command = TC_HTB_LEAF_TO_INNER,
1931                                 .classid = cl->common.classid,
1932                                 .parent_classid =
1933                                         TC_H_MIN(parent->common.classid),
1934                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1935                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1936                                 .extack = extack,
1937                         };
1938                         err = htb_offload(dev, &offload_opt);
1939                         if (err) {
1940                                 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1941                                        err);
1942                                 htb_graft_helper(dev_queue, old_q);
1943                                 goto err_kill_estimator;
1944                         }
1945                         parent->bstats_bias.bytes += old_q->bstats.bytes;
1946                         parent->bstats_bias.packets += old_q->bstats.packets;
1947                         qdisc_put(old_q);
1948                 }
1949                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1950                                           classid, NULL);
1951                 if (q->offload) {
1952                         if (new_q) {
1953                                 htb_set_lockdep_class_child(new_q);
1954                                 /* One ref for cl->leaf.q, the other for
1955                                  * dev_queue->qdisc.
1956                                  */
1957                                 qdisc_refcount_inc(new_q);
1958                         }
1959                         old_q = htb_graft_helper(dev_queue, new_q);
1960                         /* No qdisc_put needed. */
1961                         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1962                 }
1963                 sch_tree_lock(sch);
1964                 if (parent && !parent->level) {
1965                         /* turn parent into inner node */
1966                         qdisc_purge_queue(parent->leaf.q);
1967                         parent_qdisc = parent->leaf.q;
1968                         if (parent->prio_activity)
1969                                 htb_deactivate(q, parent);
1970
1971                         /* remove from evt list because of level change */
1972                         if (parent->cmode != HTB_CAN_SEND) {
1973                                 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1974                                 parent->cmode = HTB_CAN_SEND;
1975                         }
1976                         parent->level = (parent->parent ? parent->parent->level
1977                                          : TC_HTB_MAXDEPTH) - 1;
1978                         memset(&parent->inner, 0, sizeof(parent->inner));
1979                 }
1980
1981                 /* leaf (we) needs elementary qdisc */
1982                 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1983                 if (q->offload)
1984                         cl->leaf.offload_queue = dev_queue;
1985
1986                 cl->parent = parent;
1987
1988                 /* set class to be in HTB_CAN_SEND state */
1989                 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1990                 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1991                 cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1992                 cl->t_c = ktime_get_ns();
1993                 cl->cmode = HTB_CAN_SEND;
1994
1995                 /* attach to the hash list and parent's family */
1996                 qdisc_class_hash_insert(&q->clhash, &cl->common);
1997                 if (parent)
1998                         parent->children++;
1999                 if (cl->leaf.q != &noop_qdisc)
2000                         qdisc_hash_add(cl->leaf.q, true);
2001         } else {
2002                 if (tca[TCA_RATE]) {
2003                         err = gen_replace_estimator(&cl->bstats, NULL,
2004                                                     &cl->rate_est,
2005                                                     NULL,
2006                                                     qdisc_root_sleeping_running(sch),
2007                                                     tca[TCA_RATE]);
2008                         if (err)
2009                                 return err;
2010                 }
2011
2012                 if (q->offload) {
2013                         struct net_device *dev = qdisc_dev(sch);
2014
2015                         offload_opt = (struct tc_htb_qopt_offload) {
2016                                 .command = TC_HTB_NODE_MODIFY,
2017                                 .classid = cl->common.classid,
2018                                 .rate = max_t(u64, hopt->rate.rate, rate64),
2019                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
2020                                 .extack = extack,
2021                         };
2022                         err = htb_offload(dev, &offload_opt);
2023                         if (err)
2024                                 /* Estimator was replaced, and rollback may fail
2025                                  * as well, so we don't try to recover it, and
2026                                  * the estimator won't work property with the
2027                                  * offload anyway, because bstats are updated
2028                                  * only when the stats are queried.
2029                                  */
2030                                 return err;
2031                 }
2032
2033                 sch_tree_lock(sch);
2034         }
2035
2036         psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2037         psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2038
2039         /* it used to be a nasty bug here, we have to check that node
2040          * is really leaf before changing cl->leaf !
2041          */
2042         if (!cl->level) {
2043                 u64 quantum = cl->rate.rate_bytes_ps;
2044
2045                 do_div(quantum, q->rate2quantum);
2046                 cl->quantum = min_t(u64, quantum, INT_MAX);
2047
2048                 if (!hopt->quantum && cl->quantum < 1000) {
2049                         warn = -1;
2050                         cl->quantum = 1000;
2051                 }
2052                 if (!hopt->quantum && cl->quantum > 200000) {
2053                         warn = 1;
2054                         cl->quantum = 200000;
2055                 }
2056                 if (hopt->quantum)
2057                         cl->quantum = hopt->quantum;
2058                 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2059                         cl->prio = TC_HTB_NUMPRIO - 1;
2060         }
2061
2062         cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2063         cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2064
2065         sch_tree_unlock(sch);
2066         qdisc_put(parent_qdisc);
2067
2068         if (warn)
2069                 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2070                             cl->common.classid, (warn == -1 ? "small" : "big"));
2071
2072         qdisc_class_hash_grow(sch, &q->clhash);
2073
2074         *arg = (unsigned long)cl;
2075         return 0;
2076
2077 err_kill_estimator:
2078         gen_kill_estimator(&cl->rate_est);
2079 err_block_put:
2080         tcf_block_put(cl->block);
2081         kfree(cl);
2082 failure:
2083         return err;
2084 }
2085
2086 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2087                                        struct netlink_ext_ack *extack)
2088 {
2089         struct htb_sched *q = qdisc_priv(sch);
2090         struct htb_class *cl = (struct htb_class *)arg;
2091
2092         return cl ? cl->block : q->block;
2093 }
2094
2095 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2096                                      u32 classid)
2097 {
2098         struct htb_class *cl = htb_find(classid, sch);
2099
2100         /*if (cl && !cl->level) return 0;
2101          * The line above used to be there to prevent attaching filters to
2102          * leaves. But at least tc_index filter uses this just to get class
2103          * for other reasons so that we have to allow for it.
2104          * ----
2105          * 19.6.2002 As Werner explained it is ok - bind filter is just
2106          * another way to "lock" the class - unlike "get" this lock can
2107          * be broken by class during destroy IIUC.
2108          */
2109         if (cl)
2110                 cl->filter_cnt++;
2111         return (unsigned long)cl;
2112 }
2113
2114 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2115 {
2116         struct htb_class *cl = (struct htb_class *)arg;
2117
2118         if (cl)
2119                 cl->filter_cnt--;
2120 }
2121
2122 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2123 {
2124         struct htb_sched *q = qdisc_priv(sch);
2125         struct htb_class *cl;
2126         unsigned int i;
2127
2128         if (arg->stop)
2129                 return;
2130
2131         for (i = 0; i < q->clhash.hashsize; i++) {
2132                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2133                         if (arg->count < arg->skip) {
2134                                 arg->count++;
2135                                 continue;
2136                         }
2137                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2138                                 arg->stop = 1;
2139                                 return;
2140                         }
2141                         arg->count++;
2142                 }
2143         }
2144 }
2145
2146 static const struct Qdisc_class_ops htb_class_ops = {
2147         .select_queue   =       htb_select_queue,
2148         .graft          =       htb_graft,
2149         .leaf           =       htb_leaf,
2150         .qlen_notify    =       htb_qlen_notify,
2151         .find           =       htb_search,
2152         .change         =       htb_change_class,
2153         .delete         =       htb_delete,
2154         .walk           =       htb_walk,
2155         .tcf_block      =       htb_tcf_block,
2156         .bind_tcf       =       htb_bind_filter,
2157         .unbind_tcf     =       htb_unbind_filter,
2158         .dump           =       htb_dump_class,
2159         .dump_stats     =       htb_dump_class_stats,
2160 };
2161
2162 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2163         .cl_ops         =       &htb_class_ops,
2164         .id             =       "htb",
2165         .priv_size      =       sizeof(struct htb_sched),
2166         .enqueue        =       htb_enqueue,
2167         .dequeue        =       htb_dequeue,
2168         .peek           =       qdisc_peek_dequeued,
2169         .init           =       htb_init,
2170         .attach         =       htb_attach,
2171         .reset          =       htb_reset,
2172         .destroy        =       htb_destroy,
2173         .dump           =       htb_dump,
2174         .owner          =       THIS_MODULE,
2175 };
2176
2177 static int __init htb_module_init(void)
2178 {
2179         return register_qdisc(&htb_qdisc_ops);
2180 }
2181 static void __exit htb_module_exit(void)
2182 {
2183         unregister_qdisc(&htb_qdisc_ops);
2184 }
2185
2186 module_init(htb_module_init)
2187 module_exit(htb_module_exit)
2188 MODULE_LICENSE("GPL");