Merge tag 'memblock-v6.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt...
[platform/kernel/linux-rpi.git] / net / sched / sch_htb.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
4  *
5  * Authors:     Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *                      HTB support at LARTC mailing list
10  *              Ondrej Kraus, <krauso@barr.cz>
11  *                      found missing INIT_QDISC(htb)
12  *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *                      helped a lot to locate nasty class stall bug
14  *              Andi Kleen, Jamal Hadi, Bert Hubert
15  *                      code review and helpful comments on shaping
16  *              Tomasz Wrona, <tw@eter.tym.pl>
17  *                      created test case so that I was able to fix nasty bug
18  *              Wilfried Weissmann
19  *                      spotted bug in dequeue code and helped with fix
20  *              Jiri Fojtasek
21  *                      fixed requeue routine
22  *              and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011         /* major must be matched with number supplied by TC as version */
56
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71         HTB_CANT_SEND,          /* class can't send and can't borrow */
72         HTB_MAY_BORROW,         /* class can't send but may borrow */
73         HTB_CAN_SEND            /* class can send */
74 };
75
76 struct htb_prio {
77         union {
78                 struct rb_root  row;
79                 struct rb_root  feed;
80         };
81         struct rb_node  *ptr;
82         /* When class changes from state 1->2 and disconnects from
83          * parent's feed then we lost ptr value and start from the
84          * first child again. Here we store classid of the
85          * last valid ptr (used when ptr is NULL).
86          */
87         u32             last_ptr_id;
88 };
89
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95         struct Qdisc_class_common common;
96         struct psched_ratecfg   rate;
97         struct psched_ratecfg   ceil;
98         s64                     buffer, cbuffer;/* token bucket depth/rate */
99         s64                     mbuffer;        /* max wait time */
100         u32                     prio;           /* these two are used only by leaves... */
101         int                     quantum;        /* but stored for parent-to-leaf return */
102
103         struct tcf_proto __rcu  *filter_list;   /* class attached filters */
104         struct tcf_block        *block;
105         int                     filter_cnt;
106
107         int                     level;          /* our level (see above) */
108         unsigned int            children;
109         struct htb_class        *parent;        /* parent class */
110
111         struct net_rate_estimator __rcu *rate_est;
112
113         /*
114          * Written often fields
115          */
116         struct gnet_stats_basic_sync bstats;
117         struct gnet_stats_basic_sync bstats_bias;
118         struct tc_htb_xstats    xstats; /* our special stats */
119
120         /* token bucket parameters */
121         s64                     tokens, ctokens;/* current number of tokens */
122         s64                     t_c;            /* checkpoint time */
123
124         union {
125                 struct htb_class_leaf {
126                         int             deficit[TC_HTB_MAXDEPTH];
127                         struct Qdisc    *q;
128                         struct netdev_queue *offload_queue;
129                 } leaf;
130                 struct htb_class_inner {
131                         struct htb_prio clprio[TC_HTB_NUMPRIO];
132                 } inner;
133         };
134         s64                     pq_key;
135
136         int                     prio_activity;  /* for which prios are we active */
137         enum htb_cmode          cmode;          /* current mode of the class */
138         struct rb_node          pq_node;        /* node for event queue */
139         struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
140
141         unsigned int drops ____cacheline_aligned_in_smp;
142         unsigned int            overlimits;
143 };
144
145 struct htb_level {
146         struct rb_root  wait_pq;
147         struct htb_prio hprio[TC_HTB_NUMPRIO];
148 };
149
150 struct htb_sched {
151         struct Qdisc_class_hash clhash;
152         int                     defcls;         /* class where unclassified flows go to */
153         int                     rate2quantum;   /* quant = rate / rate2quantum */
154
155         /* filters for qdisc itself */
156         struct tcf_proto __rcu  *filter_list;
157         struct tcf_block        *block;
158
159 #define HTB_WARN_TOOMANYEVENTS  0x1
160         unsigned int            warned; /* only one warning */
161         int                     direct_qlen;
162         struct work_struct      work;
163
164         /* non shaped skbs; let them go directly thru */
165         struct qdisc_skb_head   direct_queue;
166         u32                     direct_pkts;
167         u32                     overlimits;
168
169         struct qdisc_watchdog   watchdog;
170
171         s64                     now;    /* cached dequeue time */
172
173         /* time of nearest event per level (row) */
174         s64                     near_ev_cache[TC_HTB_MAXDEPTH];
175
176         int                     row_mask[TC_HTB_MAXDEPTH];
177
178         struct htb_level        hlevel[TC_HTB_MAXDEPTH];
179
180         struct Qdisc            **direct_qdiscs;
181         unsigned int            num_direct_qdiscs;
182
183         bool                    offload;
184 };
185
186 /* find class in global hash table using given handle */
187 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
188 {
189         struct htb_sched *q = qdisc_priv(sch);
190         struct Qdisc_class_common *clc;
191
192         clc = qdisc_class_find(&q->clhash, handle);
193         if (clc == NULL)
194                 return NULL;
195         return container_of(clc, struct htb_class, common);
196 }
197
198 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
199 {
200         return (unsigned long)htb_find(handle, sch);
201 }
202 /**
203  * htb_classify - classify a packet into class
204  *
205  * It returns NULL if the packet should be dropped or -1 if the packet
206  * should be passed directly thru. In all other cases leaf class is returned.
207  * We allow direct class selection by classid in priority. The we examine
208  * filters in qdisc and in inner nodes (if higher filter points to the inner
209  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
210  * internal fifo (direct). These packets then go directly thru. If we still
211  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
212  * then finish and return direct queue.
213  */
214 #define HTB_DIRECT ((struct htb_class *)-1L)
215
216 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
217                                       int *qerr)
218 {
219         struct htb_sched *q = qdisc_priv(sch);
220         struct htb_class *cl;
221         struct tcf_result res;
222         struct tcf_proto *tcf;
223         int result;
224
225         /* allow to select class by setting skb->priority to valid classid;
226          * note that nfmark can be used too by attaching filter fw with no
227          * rules in it
228          */
229         if (skb->priority == sch->handle)
230                 return HTB_DIRECT;      /* X:0 (direct flow) selected */
231         cl = htb_find(skb->priority, sch);
232         if (cl) {
233                 if (cl->level == 0)
234                         return cl;
235                 /* Start with inner filter chain if a non-leaf class is selected */
236                 tcf = rcu_dereference_bh(cl->filter_list);
237         } else {
238                 tcf = rcu_dereference_bh(q->filter_list);
239         }
240
241         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
242         while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
243 #ifdef CONFIG_NET_CLS_ACT
244                 switch (result) {
245                 case TC_ACT_QUEUED:
246                 case TC_ACT_STOLEN:
247                 case TC_ACT_TRAP:
248                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
249                         fallthrough;
250                 case TC_ACT_SHOT:
251                         return NULL;
252                 }
253 #endif
254                 cl = (void *)res.class;
255                 if (!cl) {
256                         if (res.classid == sch->handle)
257                                 return HTB_DIRECT;      /* X:0 (direct flow) */
258                         cl = htb_find(res.classid, sch);
259                         if (!cl)
260                                 break;  /* filter selected invalid classid */
261                 }
262                 if (!cl->level)
263                         return cl;      /* we hit leaf; return it */
264
265                 /* we have got inner class; apply inner filter chain */
266                 tcf = rcu_dereference_bh(cl->filter_list);
267         }
268         /* classification failed; try to use default class */
269         cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
270         if (!cl || cl->level)
271                 return HTB_DIRECT;      /* bad default .. this is safe bet */
272         return cl;
273 }
274
275 /**
276  * htb_add_to_id_tree - adds class to the round robin list
277  * @root: the root of the tree
278  * @cl: the class to add
279  * @prio: the give prio in class
280  *
281  * Routine adds class to the list (actually tree) sorted by classid.
282  * Make sure that class is not already on such list for given prio.
283  */
284 static void htb_add_to_id_tree(struct rb_root *root,
285                                struct htb_class *cl, int prio)
286 {
287         struct rb_node **p = &root->rb_node, *parent = NULL;
288
289         while (*p) {
290                 struct htb_class *c;
291                 parent = *p;
292                 c = rb_entry(parent, struct htb_class, node[prio]);
293
294                 if (cl->common.classid > c->common.classid)
295                         p = &parent->rb_right;
296                 else
297                         p = &parent->rb_left;
298         }
299         rb_link_node(&cl->node[prio], parent, p);
300         rb_insert_color(&cl->node[prio], root);
301 }
302
303 /**
304  * htb_add_to_wait_tree - adds class to the event queue with delay
305  * @q: the priority event queue
306  * @cl: the class to add
307  * @delay: delay in microseconds
308  *
309  * The class is added to priority event queue to indicate that class will
310  * change its mode in cl->pq_key microseconds. Make sure that class is not
311  * already in the queue.
312  */
313 static void htb_add_to_wait_tree(struct htb_sched *q,
314                                  struct htb_class *cl, s64 delay)
315 {
316         struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
317
318         cl->pq_key = q->now + delay;
319         if (cl->pq_key == q->now)
320                 cl->pq_key++;
321
322         /* update the nearest event cache */
323         if (q->near_ev_cache[cl->level] > cl->pq_key)
324                 q->near_ev_cache[cl->level] = cl->pq_key;
325
326         while (*p) {
327                 struct htb_class *c;
328                 parent = *p;
329                 c = rb_entry(parent, struct htb_class, pq_node);
330                 if (cl->pq_key >= c->pq_key)
331                         p = &parent->rb_right;
332                 else
333                         p = &parent->rb_left;
334         }
335         rb_link_node(&cl->pq_node, parent, p);
336         rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
337 }
338
339 /**
340  * htb_next_rb_node - finds next node in binary tree
341  * @n: the current node in binary tree
342  *
343  * When we are past last key we return NULL.
344  * Average complexity is 2 steps per call.
345  */
346 static inline void htb_next_rb_node(struct rb_node **n)
347 {
348         *n = rb_next(*n);
349 }
350
351 /**
352  * htb_add_class_to_row - add class to its row
353  * @q: the priority event queue
354  * @cl: the class to add
355  * @mask: the given priorities in class in bitmap
356  *
357  * The class is added to row at priorities marked in mask.
358  * It does nothing if mask == 0.
359  */
360 static inline void htb_add_class_to_row(struct htb_sched *q,
361                                         struct htb_class *cl, int mask)
362 {
363         q->row_mask[cl->level] |= mask;
364         while (mask) {
365                 int prio = ffz(~mask);
366                 mask &= ~(1 << prio);
367                 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
368         }
369 }
370
371 /* If this triggers, it is a bug in this code, but it need not be fatal */
372 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
373 {
374         if (RB_EMPTY_NODE(rb)) {
375                 WARN_ON(1);
376         } else {
377                 rb_erase(rb, root);
378                 RB_CLEAR_NODE(rb);
379         }
380 }
381
382
383 /**
384  * htb_remove_class_from_row - removes class from its row
385  * @q: the priority event queue
386  * @cl: the class to add
387  * @mask: the given priorities in class in bitmap
388  *
389  * The class is removed from row at priorities marked in mask.
390  * It does nothing if mask == 0.
391  */
392 static inline void htb_remove_class_from_row(struct htb_sched *q,
393                                                  struct htb_class *cl, int mask)
394 {
395         int m = 0;
396         struct htb_level *hlevel = &q->hlevel[cl->level];
397
398         while (mask) {
399                 int prio = ffz(~mask);
400                 struct htb_prio *hprio = &hlevel->hprio[prio];
401
402                 mask &= ~(1 << prio);
403                 if (hprio->ptr == cl->node + prio)
404                         htb_next_rb_node(&hprio->ptr);
405
406                 htb_safe_rb_erase(cl->node + prio, &hprio->row);
407                 if (!hprio->row.rb_node)
408                         m |= 1 << prio;
409         }
410         q->row_mask[cl->level] &= ~m;
411 }
412
413 /**
414  * htb_activate_prios - creates active classe's feed chain
415  * @q: the priority event queue
416  * @cl: the class to activate
417  *
418  * The class is connected to ancestors and/or appropriate rows
419  * for priorities it is participating on. cl->cmode must be new
420  * (activated) mode. It does nothing if cl->prio_activity == 0.
421  */
422 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
423 {
424         struct htb_class *p = cl->parent;
425         long m, mask = cl->prio_activity;
426
427         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
428                 m = mask;
429                 while (m) {
430                         int prio = ffz(~m);
431                         m &= ~(1 << prio);
432
433                         if (p->inner.clprio[prio].feed.rb_node)
434                                 /* parent already has its feed in use so that
435                                  * reset bit in mask as parent is already ok
436                                  */
437                                 mask &= ~(1 << prio);
438
439                         htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
440                 }
441                 p->prio_activity |= mask;
442                 cl = p;
443                 p = cl->parent;
444
445         }
446         if (cl->cmode == HTB_CAN_SEND && mask)
447                 htb_add_class_to_row(q, cl, mask);
448 }
449
450 /**
451  * htb_deactivate_prios - remove class from feed chain
452  * @q: the priority event queue
453  * @cl: the class to deactivate
454  *
455  * cl->cmode must represent old mode (before deactivation). It does
456  * nothing if cl->prio_activity == 0. Class is removed from all feed
457  * chains and rows.
458  */
459 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
460 {
461         struct htb_class *p = cl->parent;
462         long m, mask = cl->prio_activity;
463
464         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
465                 m = mask;
466                 mask = 0;
467                 while (m) {
468                         int prio = ffz(~m);
469                         m &= ~(1 << prio);
470
471                         if (p->inner.clprio[prio].ptr == cl->node + prio) {
472                                 /* we are removing child which is pointed to from
473                                  * parent feed - forget the pointer but remember
474                                  * classid
475                                  */
476                                 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
477                                 p->inner.clprio[prio].ptr = NULL;
478                         }
479
480                         htb_safe_rb_erase(cl->node + prio,
481                                           &p->inner.clprio[prio].feed);
482
483                         if (!p->inner.clprio[prio].feed.rb_node)
484                                 mask |= 1 << prio;
485                 }
486
487                 p->prio_activity &= ~mask;
488                 cl = p;
489                 p = cl->parent;
490
491         }
492         if (cl->cmode == HTB_CAN_SEND && mask)
493                 htb_remove_class_from_row(q, cl, mask);
494 }
495
496 static inline s64 htb_lowater(const struct htb_class *cl)
497 {
498         if (htb_hysteresis)
499                 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
500         else
501                 return 0;
502 }
503 static inline s64 htb_hiwater(const struct htb_class *cl)
504 {
505         if (htb_hysteresis)
506                 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
507         else
508                 return 0;
509 }
510
511
512 /**
513  * htb_class_mode - computes and returns current class mode
514  * @cl: the target class
515  * @diff: diff time in microseconds
516  *
517  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
518  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
519  * from now to time when cl will change its state.
520  * Also it is worth to note that class mode doesn't change simply
521  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
522  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
523  * mode transitions per time unit. The speed gain is about 1/6.
524  */
525 static inline enum htb_cmode
526 htb_class_mode(struct htb_class *cl, s64 *diff)
527 {
528         s64 toks;
529
530         if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
531                 *diff = -toks;
532                 return HTB_CANT_SEND;
533         }
534
535         if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
536                 return HTB_CAN_SEND;
537
538         *diff = -toks;
539         return HTB_MAY_BORROW;
540 }
541
542 /**
543  * htb_change_class_mode - changes classe's mode
544  * @q: the priority event queue
545  * @cl: the target class
546  * @diff: diff time in microseconds
547  *
548  * This should be the only way how to change classe's mode under normal
549  * circumstances. Routine will update feed lists linkage, change mode
550  * and add class to the wait event queue if appropriate. New mode should
551  * be different from old one and cl->pq_key has to be valid if changing
552  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
553  */
554 static void
555 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
556 {
557         enum htb_cmode new_mode = htb_class_mode(cl, diff);
558
559         if (new_mode == cl->cmode)
560                 return;
561
562         if (new_mode == HTB_CANT_SEND) {
563                 cl->overlimits++;
564                 q->overlimits++;
565         }
566
567         if (cl->prio_activity) {        /* not necessary: speed optimization */
568                 if (cl->cmode != HTB_CANT_SEND)
569                         htb_deactivate_prios(q, cl);
570                 cl->cmode = new_mode;
571                 if (new_mode != HTB_CANT_SEND)
572                         htb_activate_prios(q, cl);
573         } else
574                 cl->cmode = new_mode;
575 }
576
577 /**
578  * htb_activate - inserts leaf cl into appropriate active feeds
579  * @q: the priority event queue
580  * @cl: the target class
581  *
582  * Routine learns (new) priority of leaf and activates feed chain
583  * for the prio. It can be called on already active leaf safely.
584  * It also adds leaf into droplist.
585  */
586 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
587 {
588         WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
589
590         if (!cl->prio_activity) {
591                 cl->prio_activity = 1 << cl->prio;
592                 htb_activate_prios(q, cl);
593         }
594 }
595
596 /**
597  * htb_deactivate - remove leaf cl from active feeds
598  * @q: the priority event queue
599  * @cl: the target class
600  *
601  * Make sure that leaf is active. In the other words it can't be called
602  * with non-active leaf. It also removes class from the drop list.
603  */
604 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
605 {
606         WARN_ON(!cl->prio_activity);
607
608         htb_deactivate_prios(q, cl);
609         cl->prio_activity = 0;
610 }
611
612 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
613                        struct sk_buff **to_free)
614 {
615         int ret;
616         unsigned int len = qdisc_pkt_len(skb);
617         struct htb_sched *q = qdisc_priv(sch);
618         struct htb_class *cl = htb_classify(skb, sch, &ret);
619
620         if (cl == HTB_DIRECT) {
621                 /* enqueue to helper queue */
622                 if (q->direct_queue.qlen < q->direct_qlen) {
623                         __qdisc_enqueue_tail(skb, &q->direct_queue);
624                         q->direct_pkts++;
625                 } else {
626                         return qdisc_drop(skb, sch, to_free);
627                 }
628 #ifdef CONFIG_NET_CLS_ACT
629         } else if (!cl) {
630                 if (ret & __NET_XMIT_BYPASS)
631                         qdisc_qstats_drop(sch);
632                 __qdisc_drop(skb, to_free);
633                 return ret;
634 #endif
635         } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
636                                         to_free)) != NET_XMIT_SUCCESS) {
637                 if (net_xmit_drop_count(ret)) {
638                         qdisc_qstats_drop(sch);
639                         cl->drops++;
640                 }
641                 return ret;
642         } else {
643                 htb_activate(q, cl);
644         }
645
646         sch->qstats.backlog += len;
647         sch->q.qlen++;
648         return NET_XMIT_SUCCESS;
649 }
650
651 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
652 {
653         s64 toks = diff + cl->tokens;
654
655         if (toks > cl->buffer)
656                 toks = cl->buffer;
657         toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
658         if (toks <= -cl->mbuffer)
659                 toks = 1 - cl->mbuffer;
660
661         cl->tokens = toks;
662 }
663
664 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
665 {
666         s64 toks = diff + cl->ctokens;
667
668         if (toks > cl->cbuffer)
669                 toks = cl->cbuffer;
670         toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
671         if (toks <= -cl->mbuffer)
672                 toks = 1 - cl->mbuffer;
673
674         cl->ctokens = toks;
675 }
676
677 /**
678  * htb_charge_class - charges amount "bytes" to leaf and ancestors
679  * @q: the priority event queue
680  * @cl: the class to start iterate
681  * @level: the minimum level to account
682  * @skb: the socket buffer
683  *
684  * Routine assumes that packet "bytes" long was dequeued from leaf cl
685  * borrowing from "level". It accounts bytes to ceil leaky bucket for
686  * leaf and all ancestors and to rate bucket for ancestors at levels
687  * "level" and higher. It also handles possible change of mode resulting
688  * from the update. Note that mode can also increase here (MAY_BORROW to
689  * CAN_SEND) because we can use more precise clock that event queue here.
690  * In such case we remove class from event queue first.
691  */
692 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
693                              int level, struct sk_buff *skb)
694 {
695         int bytes = qdisc_pkt_len(skb);
696         enum htb_cmode old_mode;
697         s64 diff;
698
699         while (cl) {
700                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
701                 if (cl->level >= level) {
702                         if (cl->level == level)
703                                 cl->xstats.lends++;
704                         htb_accnt_tokens(cl, bytes, diff);
705                 } else {
706                         cl->xstats.borrows++;
707                         cl->tokens += diff;     /* we moved t_c; update tokens */
708                 }
709                 htb_accnt_ctokens(cl, bytes, diff);
710                 cl->t_c = q->now;
711
712                 old_mode = cl->cmode;
713                 diff = 0;
714                 htb_change_class_mode(q, cl, &diff);
715                 if (old_mode != cl->cmode) {
716                         if (old_mode != HTB_CAN_SEND)
717                                 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
718                         if (cl->cmode != HTB_CAN_SEND)
719                                 htb_add_to_wait_tree(q, cl, diff);
720                 }
721
722                 /* update basic stats except for leaves which are already updated */
723                 if (cl->level)
724                         bstats_update(&cl->bstats, skb);
725
726                 cl = cl->parent;
727         }
728 }
729
730 /**
731  * htb_do_events - make mode changes to classes at the level
732  * @q: the priority event queue
733  * @level: which wait_pq in 'q->hlevel'
734  * @start: start jiffies
735  *
736  * Scans event queue for pending events and applies them. Returns time of
737  * next pending event (0 for no event in pq, q->now for too many events).
738  * Note: Applied are events whose have cl->pq_key <= q->now.
739  */
740 static s64 htb_do_events(struct htb_sched *q, const int level,
741                          unsigned long start)
742 {
743         /* don't run for longer than 2 jiffies; 2 is used instead of
744          * 1 to simplify things when jiffy is going to be incremented
745          * too soon
746          */
747         unsigned long stop_at = start + 2;
748         struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
749
750         while (time_before(jiffies, stop_at)) {
751                 struct htb_class *cl;
752                 s64 diff;
753                 struct rb_node *p = rb_first(wait_pq);
754
755                 if (!p)
756                         return 0;
757
758                 cl = rb_entry(p, struct htb_class, pq_node);
759                 if (cl->pq_key > q->now)
760                         return cl->pq_key;
761
762                 htb_safe_rb_erase(p, wait_pq);
763                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
764                 htb_change_class_mode(q, cl, &diff);
765                 if (cl->cmode != HTB_CAN_SEND)
766                         htb_add_to_wait_tree(q, cl, diff);
767         }
768
769         /* too much load - let's continue after a break for scheduling */
770         if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
771                 pr_warn("htb: too many events!\n");
772                 q->warned |= HTB_WARN_TOOMANYEVENTS;
773         }
774
775         return q->now;
776 }
777
778 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
779  * is no such one exists.
780  */
781 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
782                                               u32 id)
783 {
784         struct rb_node *r = NULL;
785         while (n) {
786                 struct htb_class *cl =
787                     rb_entry(n, struct htb_class, node[prio]);
788
789                 if (id > cl->common.classid) {
790                         n = n->rb_right;
791                 } else if (id < cl->common.classid) {
792                         r = n;
793                         n = n->rb_left;
794                 } else {
795                         return n;
796                 }
797         }
798         return r;
799 }
800
801 /**
802  * htb_lookup_leaf - returns next leaf class in DRR order
803  * @hprio: the current one
804  * @prio: which prio in class
805  *
806  * Find leaf where current feed pointers points to.
807  */
808 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
809 {
810         int i;
811         struct {
812                 struct rb_node *root;
813                 struct rb_node **pptr;
814                 u32 *pid;
815         } stk[TC_HTB_MAXDEPTH], *sp = stk;
816
817         BUG_ON(!hprio->row.rb_node);
818         sp->root = hprio->row.rb_node;
819         sp->pptr = &hprio->ptr;
820         sp->pid = &hprio->last_ptr_id;
821
822         for (i = 0; i < 65535; i++) {
823                 if (!*sp->pptr && *sp->pid) {
824                         /* ptr was invalidated but id is valid - try to recover
825                          * the original or next ptr
826                          */
827                         *sp->pptr =
828                             htb_id_find_next_upper(prio, sp->root, *sp->pid);
829                 }
830                 *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
831                                  * can become out of date quickly
832                                  */
833                 if (!*sp->pptr) {       /* we are at right end; rewind & go up */
834                         *sp->pptr = sp->root;
835                         while ((*sp->pptr)->rb_left)
836                                 *sp->pptr = (*sp->pptr)->rb_left;
837                         if (sp > stk) {
838                                 sp--;
839                                 if (!*sp->pptr) {
840                                         WARN_ON(1);
841                                         return NULL;
842                                 }
843                                 htb_next_rb_node(sp->pptr);
844                         }
845                 } else {
846                         struct htb_class *cl;
847                         struct htb_prio *clp;
848
849                         cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
850                         if (!cl->level)
851                                 return cl;
852                         clp = &cl->inner.clprio[prio];
853                         (++sp)->root = clp->feed.rb_node;
854                         sp->pptr = &clp->ptr;
855                         sp->pid = &clp->last_ptr_id;
856                 }
857         }
858         WARN_ON(1);
859         return NULL;
860 }
861
862 /* dequeues packet at given priority and level; call only if
863  * you are sure that there is active class at prio/level
864  */
865 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
866                                         const int level)
867 {
868         struct sk_buff *skb = NULL;
869         struct htb_class *cl, *start;
870         struct htb_level *hlevel = &q->hlevel[level];
871         struct htb_prio *hprio = &hlevel->hprio[prio];
872
873         /* look initial class up in the row */
874         start = cl = htb_lookup_leaf(hprio, prio);
875
876         do {
877 next:
878                 if (unlikely(!cl))
879                         return NULL;
880
881                 /* class can be empty - it is unlikely but can be true if leaf
882                  * qdisc drops packets in enqueue routine or if someone used
883                  * graft operation on the leaf since last dequeue;
884                  * simply deactivate and skip such class
885                  */
886                 if (unlikely(cl->leaf.q->q.qlen == 0)) {
887                         struct htb_class *next;
888                         htb_deactivate(q, cl);
889
890                         /* row/level might become empty */
891                         if ((q->row_mask[level] & (1 << prio)) == 0)
892                                 return NULL;
893
894                         next = htb_lookup_leaf(hprio, prio);
895
896                         if (cl == start)        /* fix start if we just deleted it */
897                                 start = next;
898                         cl = next;
899                         goto next;
900                 }
901
902                 skb = cl->leaf.q->dequeue(cl->leaf.q);
903                 if (likely(skb != NULL))
904                         break;
905
906                 qdisc_warn_nonwc("htb", cl->leaf.q);
907                 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
908                                          &q->hlevel[0].hprio[prio].ptr);
909                 cl = htb_lookup_leaf(hprio, prio);
910
911         } while (cl != start);
912
913         if (likely(skb != NULL)) {
914                 bstats_update(&cl->bstats, skb);
915                 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
916                 if (cl->leaf.deficit[level] < 0) {
917                         cl->leaf.deficit[level] += cl->quantum;
918                         htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
919                                                  &q->hlevel[0].hprio[prio].ptr);
920                 }
921                 /* this used to be after charge_class but this constelation
922                  * gives us slightly better performance
923                  */
924                 if (!cl->leaf.q->q.qlen)
925                         htb_deactivate(q, cl);
926                 htb_charge_class(q, cl, level, skb);
927         }
928         return skb;
929 }
930
931 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
932 {
933         struct sk_buff *skb;
934         struct htb_sched *q = qdisc_priv(sch);
935         int level;
936         s64 next_event;
937         unsigned long start_at;
938
939         /* try to dequeue direct packets as high prio (!) to minimize cpu work */
940         skb = __qdisc_dequeue_head(&q->direct_queue);
941         if (skb != NULL) {
942 ok:
943                 qdisc_bstats_update(sch, skb);
944                 qdisc_qstats_backlog_dec(sch, skb);
945                 sch->q.qlen--;
946                 return skb;
947         }
948
949         if (!sch->q.qlen)
950                 goto fin;
951         q->now = ktime_get_ns();
952         start_at = jiffies;
953
954         next_event = q->now + 5LLU * NSEC_PER_SEC;
955
956         for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
957                 /* common case optimization - skip event handler quickly */
958                 int m;
959                 s64 event = q->near_ev_cache[level];
960
961                 if (q->now >= event) {
962                         event = htb_do_events(q, level, start_at);
963                         if (!event)
964                                 event = q->now + NSEC_PER_SEC;
965                         q->near_ev_cache[level] = event;
966                 }
967
968                 if (next_event > event)
969                         next_event = event;
970
971                 m = ~q->row_mask[level];
972                 while (m != (int)(-1)) {
973                         int prio = ffz(m);
974
975                         m |= 1 << prio;
976                         skb = htb_dequeue_tree(q, prio, level);
977                         if (likely(skb != NULL))
978                                 goto ok;
979                 }
980         }
981         if (likely(next_event > q->now))
982                 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
983         else
984                 schedule_work(&q->work);
985 fin:
986         return skb;
987 }
988
989 /* reset all classes */
990 /* always caled under BH & queue lock */
991 static void htb_reset(struct Qdisc *sch)
992 {
993         struct htb_sched *q = qdisc_priv(sch);
994         struct htb_class *cl;
995         unsigned int i;
996
997         for (i = 0; i < q->clhash.hashsize; i++) {
998                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
999                         if (cl->level)
1000                                 memset(&cl->inner, 0, sizeof(cl->inner));
1001                         else {
1002                                 if (cl->leaf.q && !q->offload)
1003                                         qdisc_reset(cl->leaf.q);
1004                         }
1005                         cl->prio_activity = 0;
1006                         cl->cmode = HTB_CAN_SEND;
1007                 }
1008         }
1009         qdisc_watchdog_cancel(&q->watchdog);
1010         __qdisc_reset_queue(&q->direct_queue);
1011         memset(q->hlevel, 0, sizeof(q->hlevel));
1012         memset(q->row_mask, 0, sizeof(q->row_mask));
1013 }
1014
1015 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1016         [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1017         [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
1018         [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1019         [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1020         [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1021         [TCA_HTB_RATE64] = { .type = NLA_U64 },
1022         [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1023         [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1024 };
1025
1026 static void htb_work_func(struct work_struct *work)
1027 {
1028         struct htb_sched *q = container_of(work, struct htb_sched, work);
1029         struct Qdisc *sch = q->watchdog.qdisc;
1030
1031         rcu_read_lock();
1032         __netif_schedule(qdisc_root(sch));
1033         rcu_read_unlock();
1034 }
1035
1036 static void htb_set_lockdep_class_child(struct Qdisc *q)
1037 {
1038         static struct lock_class_key child_key;
1039
1040         lockdep_set_class(qdisc_lock(q), &child_key);
1041 }
1042
1043 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1044 {
1045         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1046 }
1047
1048 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1049                     struct netlink_ext_ack *extack)
1050 {
1051         struct net_device *dev = qdisc_dev(sch);
1052         struct tc_htb_qopt_offload offload_opt;
1053         struct htb_sched *q = qdisc_priv(sch);
1054         struct nlattr *tb[TCA_HTB_MAX + 1];
1055         struct tc_htb_glob *gopt;
1056         unsigned int ntx;
1057         bool offload;
1058         int err;
1059
1060         qdisc_watchdog_init(&q->watchdog, sch);
1061         INIT_WORK(&q->work, htb_work_func);
1062
1063         if (!opt)
1064                 return -EINVAL;
1065
1066         err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1067         if (err)
1068                 return err;
1069
1070         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1071                                           NULL);
1072         if (err < 0)
1073                 return err;
1074
1075         if (!tb[TCA_HTB_INIT])
1076                 return -EINVAL;
1077
1078         gopt = nla_data(tb[TCA_HTB_INIT]);
1079         if (gopt->version != HTB_VER >> 16)
1080                 return -EINVAL;
1081
1082         offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1083
1084         if (offload) {
1085                 if (sch->parent != TC_H_ROOT) {
1086                         NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload");
1087                         return -EOPNOTSUPP;
1088                 }
1089
1090                 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) {
1091                         NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on");
1092                         return -EOPNOTSUPP;
1093                 }
1094
1095                 q->num_direct_qdiscs = dev->real_num_tx_queues;
1096                 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1097                                            sizeof(*q->direct_qdiscs),
1098                                            GFP_KERNEL);
1099                 if (!q->direct_qdiscs)
1100                         return -ENOMEM;
1101         }
1102
1103         err = qdisc_class_hash_init(&q->clhash);
1104         if (err < 0)
1105                 return err;
1106
1107         if (tb[TCA_HTB_DIRECT_QLEN])
1108                 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1109         else
1110                 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1111
1112         if ((q->rate2quantum = gopt->rate2quantum) < 1)
1113                 q->rate2quantum = 1;
1114         q->defcls = gopt->defcls;
1115
1116         if (!offload)
1117                 return 0;
1118
1119         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1120                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1121                 struct Qdisc *qdisc;
1122
1123                 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1124                                           TC_H_MAKE(sch->handle, 0), extack);
1125                 if (!qdisc) {
1126                         return -ENOMEM;
1127                 }
1128
1129                 htb_set_lockdep_class_child(qdisc);
1130                 q->direct_qdiscs[ntx] = qdisc;
1131                 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1132         }
1133
1134         sch->flags |= TCQ_F_MQROOT;
1135
1136         offload_opt = (struct tc_htb_qopt_offload) {
1137                 .command = TC_HTB_CREATE,
1138                 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1139                 .classid = TC_H_MIN(q->defcls),
1140                 .extack = extack,
1141         };
1142         err = htb_offload(dev, &offload_opt);
1143         if (err)
1144                 return err;
1145
1146         /* Defer this assignment, so that htb_destroy skips offload-related
1147          * parts (especially calling ndo_setup_tc) on errors.
1148          */
1149         q->offload = true;
1150
1151         return 0;
1152 }
1153
1154 static void htb_attach_offload(struct Qdisc *sch)
1155 {
1156         struct net_device *dev = qdisc_dev(sch);
1157         struct htb_sched *q = qdisc_priv(sch);
1158         unsigned int ntx;
1159
1160         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1161                 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1162
1163                 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1164                 qdisc_put(old);
1165                 qdisc_hash_add(qdisc, false);
1166         }
1167         for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1168                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1169                 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1170
1171                 qdisc_put(old);
1172         }
1173
1174         kfree(q->direct_qdiscs);
1175         q->direct_qdiscs = NULL;
1176 }
1177
1178 static void htb_attach_software(struct Qdisc *sch)
1179 {
1180         struct net_device *dev = qdisc_dev(sch);
1181         unsigned int ntx;
1182
1183         /* Resemble qdisc_graft behavior. */
1184         for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1185                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1186                 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1187
1188                 qdisc_refcount_inc(sch);
1189
1190                 qdisc_put(old);
1191         }
1192 }
1193
1194 static void htb_attach(struct Qdisc *sch)
1195 {
1196         struct htb_sched *q = qdisc_priv(sch);
1197
1198         if (q->offload)
1199                 htb_attach_offload(sch);
1200         else
1201                 htb_attach_software(sch);
1202 }
1203
1204 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1205 {
1206         struct htb_sched *q = qdisc_priv(sch);
1207         struct nlattr *nest;
1208         struct tc_htb_glob gopt;
1209
1210         if (q->offload)
1211                 sch->flags |= TCQ_F_OFFLOADED;
1212         else
1213                 sch->flags &= ~TCQ_F_OFFLOADED;
1214
1215         sch->qstats.overlimits = q->overlimits;
1216         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1217          * no change can happen on the qdisc parameters.
1218          */
1219
1220         gopt.direct_pkts = q->direct_pkts;
1221         gopt.version = HTB_VER;
1222         gopt.rate2quantum = q->rate2quantum;
1223         gopt.defcls = q->defcls;
1224         gopt.debug = 0;
1225
1226         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1227         if (nest == NULL)
1228                 goto nla_put_failure;
1229         if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1230             nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1231                 goto nla_put_failure;
1232         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1233                 goto nla_put_failure;
1234
1235         return nla_nest_end(skb, nest);
1236
1237 nla_put_failure:
1238         nla_nest_cancel(skb, nest);
1239         return -1;
1240 }
1241
1242 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1243                           struct sk_buff *skb, struct tcmsg *tcm)
1244 {
1245         struct htb_class *cl = (struct htb_class *)arg;
1246         struct htb_sched *q = qdisc_priv(sch);
1247         struct nlattr *nest;
1248         struct tc_htb_opt opt;
1249
1250         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1251          * no change can happen on the class parameters.
1252          */
1253         tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1254         tcm->tcm_handle = cl->common.classid;
1255         if (!cl->level && cl->leaf.q)
1256                 tcm->tcm_info = cl->leaf.q->handle;
1257
1258         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1259         if (nest == NULL)
1260                 goto nla_put_failure;
1261
1262         memset(&opt, 0, sizeof(opt));
1263
1264         psched_ratecfg_getrate(&opt.rate, &cl->rate);
1265         opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1266         psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1267         opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1268         opt.quantum = cl->quantum;
1269         opt.prio = cl->prio;
1270         opt.level = cl->level;
1271         if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1272                 goto nla_put_failure;
1273         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1274                 goto nla_put_failure;
1275         if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1276             nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1277                               TCA_HTB_PAD))
1278                 goto nla_put_failure;
1279         if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1280             nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1281                               TCA_HTB_PAD))
1282                 goto nla_put_failure;
1283
1284         return nla_nest_end(skb, nest);
1285
1286 nla_put_failure:
1287         nla_nest_cancel(skb, nest);
1288         return -1;
1289 }
1290
1291 static void htb_offload_aggregate_stats(struct htb_sched *q,
1292                                         struct htb_class *cl)
1293 {
1294         u64 bytes = 0, packets = 0;
1295         struct htb_class *c;
1296         unsigned int i;
1297
1298         gnet_stats_basic_sync_init(&cl->bstats);
1299
1300         for (i = 0; i < q->clhash.hashsize; i++) {
1301                 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1302                         struct htb_class *p = c;
1303
1304                         while (p && p->level < cl->level)
1305                                 p = p->parent;
1306
1307                         if (p != cl)
1308                                 continue;
1309
1310                         bytes += u64_stats_read(&c->bstats_bias.bytes);
1311                         packets += u64_stats_read(&c->bstats_bias.packets);
1312                         if (c->level == 0) {
1313                                 bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
1314                                 packets += u64_stats_read(&c->leaf.q->bstats.packets);
1315                         }
1316                 }
1317         }
1318         _bstats_update(&cl->bstats, bytes, packets);
1319 }
1320
1321 static int
1322 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1323 {
1324         struct htb_class *cl = (struct htb_class *)arg;
1325         struct htb_sched *q = qdisc_priv(sch);
1326         struct gnet_stats_queue qs = {
1327                 .drops = cl->drops,
1328                 .overlimits = cl->overlimits,
1329         };
1330         __u32 qlen = 0;
1331
1332         if (!cl->level && cl->leaf.q)
1333                 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1334
1335         cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1336                                     INT_MIN, INT_MAX);
1337         cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1338                                      INT_MIN, INT_MAX);
1339
1340         if (q->offload) {
1341                 if (!cl->level) {
1342                         if (cl->leaf.q)
1343                                 cl->bstats = cl->leaf.q->bstats;
1344                         else
1345                                 gnet_stats_basic_sync_init(&cl->bstats);
1346                         _bstats_update(&cl->bstats,
1347                                        u64_stats_read(&cl->bstats_bias.bytes),
1348                                        u64_stats_read(&cl->bstats_bias.packets));
1349                 } else {
1350                         htb_offload_aggregate_stats(q, cl);
1351                 }
1352         }
1353
1354         if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
1355             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1356             gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1357                 return -1;
1358
1359         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1360 }
1361
1362 static struct netdev_queue *
1363 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1364 {
1365         struct net_device *dev = qdisc_dev(sch);
1366         struct tc_htb_qopt_offload offload_opt;
1367         struct htb_sched *q = qdisc_priv(sch);
1368         int err;
1369
1370         if (!q->offload)
1371                 return sch->dev_queue;
1372
1373         offload_opt = (struct tc_htb_qopt_offload) {
1374                 .command = TC_HTB_LEAF_QUERY_QUEUE,
1375                 .classid = TC_H_MIN(tcm->tcm_parent),
1376         };
1377         err = htb_offload(dev, &offload_opt);
1378         if (err || offload_opt.qid >= dev->num_tx_queues)
1379                 return NULL;
1380         return netdev_get_tx_queue(dev, offload_opt.qid);
1381 }
1382
1383 static struct Qdisc *
1384 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1385 {
1386         struct net_device *dev = dev_queue->dev;
1387         struct Qdisc *old_q;
1388
1389         if (dev->flags & IFF_UP)
1390                 dev_deactivate(dev);
1391         old_q = dev_graft_qdisc(dev_queue, new_q);
1392         if (new_q)
1393                 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1394         if (dev->flags & IFF_UP)
1395                 dev_activate(dev);
1396
1397         return old_q;
1398 }
1399
1400 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1401 {
1402         struct netdev_queue *queue;
1403
1404         queue = cl->leaf.offload_queue;
1405         if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1406                 WARN_ON(cl->leaf.q->dev_queue != queue);
1407
1408         return queue;
1409 }
1410
1411 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1412                                    struct htb_class *cl_new, bool destroying)
1413 {
1414         struct netdev_queue *queue_old, *queue_new;
1415         struct net_device *dev = qdisc_dev(sch);
1416
1417         queue_old = htb_offload_get_queue(cl_old);
1418         queue_new = htb_offload_get_queue(cl_new);
1419
1420         if (!destroying) {
1421                 struct Qdisc *qdisc;
1422
1423                 if (dev->flags & IFF_UP)
1424                         dev_deactivate(dev);
1425                 qdisc = dev_graft_qdisc(queue_old, NULL);
1426                 WARN_ON(qdisc != cl_old->leaf.q);
1427         }
1428
1429         if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1430                 cl_old->leaf.q->dev_queue = queue_new;
1431         cl_old->leaf.offload_queue = queue_new;
1432
1433         if (!destroying) {
1434                 struct Qdisc *qdisc;
1435
1436                 qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1437                 if (dev->flags & IFF_UP)
1438                         dev_activate(dev);
1439                 WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1440         }
1441 }
1442
1443 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1444                      struct Qdisc **old, struct netlink_ext_ack *extack)
1445 {
1446         struct netdev_queue *dev_queue = sch->dev_queue;
1447         struct htb_class *cl = (struct htb_class *)arg;
1448         struct htb_sched *q = qdisc_priv(sch);
1449         struct Qdisc *old_q;
1450
1451         if (cl->level)
1452                 return -EINVAL;
1453
1454         if (q->offload)
1455                 dev_queue = htb_offload_get_queue(cl);
1456
1457         if (!new) {
1458                 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1459                                         cl->common.classid, extack);
1460                 if (!new)
1461                         return -ENOBUFS;
1462         }
1463
1464         if (q->offload) {
1465                 htb_set_lockdep_class_child(new);
1466                 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1467                 qdisc_refcount_inc(new);
1468                 old_q = htb_graft_helper(dev_queue, new);
1469         }
1470
1471         *old = qdisc_replace(sch, new, &cl->leaf.q);
1472
1473         if (q->offload) {
1474                 WARN_ON(old_q != *old);
1475                 qdisc_put(old_q);
1476         }
1477
1478         return 0;
1479 }
1480
1481 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1482 {
1483         struct htb_class *cl = (struct htb_class *)arg;
1484         return !cl->level ? cl->leaf.q : NULL;
1485 }
1486
1487 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1488 {
1489         struct htb_class *cl = (struct htb_class *)arg;
1490
1491         htb_deactivate(qdisc_priv(sch), cl);
1492 }
1493
1494 static inline int htb_parent_last_child(struct htb_class *cl)
1495 {
1496         if (!cl->parent)
1497                 /* the root class */
1498                 return 0;
1499         if (cl->parent->children > 1)
1500                 /* not the last child */
1501                 return 0;
1502         return 1;
1503 }
1504
1505 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1506                                struct Qdisc *new_q)
1507 {
1508         struct htb_sched *q = qdisc_priv(sch);
1509         struct htb_class *parent = cl->parent;
1510
1511         WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1512
1513         if (parent->cmode != HTB_CAN_SEND)
1514                 htb_safe_rb_erase(&parent->pq_node,
1515                                   &q->hlevel[parent->level].wait_pq);
1516
1517         parent->level = 0;
1518         memset(&parent->inner, 0, sizeof(parent->inner));
1519         parent->leaf.q = new_q ? new_q : &noop_qdisc;
1520         parent->tokens = parent->buffer;
1521         parent->ctokens = parent->cbuffer;
1522         parent->t_c = ktime_get_ns();
1523         parent->cmode = HTB_CAN_SEND;
1524         if (q->offload)
1525                 parent->leaf.offload_queue = cl->leaf.offload_queue;
1526 }
1527
1528 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1529                                        struct netdev_queue *dev_queue,
1530                                        struct Qdisc *new_q)
1531 {
1532         struct Qdisc *old_q;
1533
1534         /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1535         if (new_q)
1536                 qdisc_refcount_inc(new_q);
1537         old_q = htb_graft_helper(dev_queue, new_q);
1538         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1539 }
1540
1541 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1542                                      bool last_child, bool destroying,
1543                                      struct netlink_ext_ack *extack)
1544 {
1545         struct tc_htb_qopt_offload offload_opt;
1546         struct netdev_queue *dev_queue;
1547         struct Qdisc *q = cl->leaf.q;
1548         struct Qdisc *old = NULL;
1549         int err;
1550
1551         if (cl->level)
1552                 return -EINVAL;
1553
1554         WARN_ON(!q);
1555         dev_queue = htb_offload_get_queue(cl);
1556         old = htb_graft_helper(dev_queue, NULL);
1557         if (destroying)
1558                 /* Before HTB is destroyed, the kernel grafts noop_qdisc to
1559                  * all queues.
1560                  */
1561                 WARN_ON(!(old->flags & TCQ_F_BUILTIN));
1562         else
1563                 WARN_ON(old != q);
1564
1565         if (cl->parent) {
1566                 _bstats_update(&cl->parent->bstats_bias,
1567                                u64_stats_read(&q->bstats.bytes),
1568                                u64_stats_read(&q->bstats.packets));
1569         }
1570
1571         offload_opt = (struct tc_htb_qopt_offload) {
1572                 .command = !last_child ? TC_HTB_LEAF_DEL :
1573                            destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1574                            TC_HTB_LEAF_DEL_LAST,
1575                 .classid = cl->common.classid,
1576                 .extack = extack,
1577         };
1578         err = htb_offload(qdisc_dev(sch), &offload_opt);
1579
1580         if (!err || destroying)
1581                 qdisc_put(old);
1582         else
1583                 htb_graft_helper(dev_queue, old);
1584
1585         if (last_child)
1586                 return err;
1587
1588         if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1589                 u32 classid = TC_H_MAJ(sch->handle) |
1590                               TC_H_MIN(offload_opt.classid);
1591                 struct htb_class *moved_cl = htb_find(classid, sch);
1592
1593                 htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1594         }
1595
1596         return err;
1597 }
1598
1599 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1600 {
1601         if (!cl->level) {
1602                 WARN_ON(!cl->leaf.q);
1603                 qdisc_put(cl->leaf.q);
1604         }
1605         gen_kill_estimator(&cl->rate_est);
1606         tcf_block_put(cl->block);
1607         kfree(cl);
1608 }
1609
1610 static void htb_destroy(struct Qdisc *sch)
1611 {
1612         struct net_device *dev = qdisc_dev(sch);
1613         struct tc_htb_qopt_offload offload_opt;
1614         struct htb_sched *q = qdisc_priv(sch);
1615         struct hlist_node *next;
1616         bool nonempty, changed;
1617         struct htb_class *cl;
1618         unsigned int i;
1619
1620         cancel_work_sync(&q->work);
1621         qdisc_watchdog_cancel(&q->watchdog);
1622         /* This line used to be after htb_destroy_class call below
1623          * and surprisingly it worked in 2.4. But it must precede it
1624          * because filter need its target class alive to be able to call
1625          * unbind_filter on it (without Oops).
1626          */
1627         tcf_block_put(q->block);
1628
1629         for (i = 0; i < q->clhash.hashsize; i++) {
1630                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1631                         tcf_block_put(cl->block);
1632                         cl->block = NULL;
1633                 }
1634         }
1635
1636         do {
1637                 nonempty = false;
1638                 changed = false;
1639                 for (i = 0; i < q->clhash.hashsize; i++) {
1640                         hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1641                                                   common.hnode) {
1642                                 bool last_child;
1643
1644                                 if (!q->offload) {
1645                                         htb_destroy_class(sch, cl);
1646                                         continue;
1647                                 }
1648
1649                                 nonempty = true;
1650
1651                                 if (cl->level)
1652                                         continue;
1653
1654                                 changed = true;
1655
1656                                 last_child = htb_parent_last_child(cl);
1657                                 htb_destroy_class_offload(sch, cl, last_child,
1658                                                           true, NULL);
1659                                 qdisc_class_hash_remove(&q->clhash,
1660                                                         &cl->common);
1661                                 if (cl->parent)
1662                                         cl->parent->children--;
1663                                 if (last_child)
1664                                         htb_parent_to_leaf(sch, cl, NULL);
1665                                 htb_destroy_class(sch, cl);
1666                         }
1667                 }
1668         } while (changed);
1669         WARN_ON(nonempty);
1670
1671         qdisc_class_hash_destroy(&q->clhash);
1672         __qdisc_reset_queue(&q->direct_queue);
1673
1674         if (q->offload) {
1675                 offload_opt = (struct tc_htb_qopt_offload) {
1676                         .command = TC_HTB_DESTROY,
1677                 };
1678                 htb_offload(dev, &offload_opt);
1679         }
1680
1681         if (!q->direct_qdiscs)
1682                 return;
1683         for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1684                 qdisc_put(q->direct_qdiscs[i]);
1685         kfree(q->direct_qdiscs);
1686 }
1687
1688 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1689                       struct netlink_ext_ack *extack)
1690 {
1691         struct htb_sched *q = qdisc_priv(sch);
1692         struct htb_class *cl = (struct htb_class *)arg;
1693         struct Qdisc *new_q = NULL;
1694         int last_child = 0;
1695         int err;
1696
1697         /* TODO: why don't allow to delete subtree ? references ? does
1698          * tc subsys guarantee us that in htb_destroy it holds no class
1699          * refs so that we can remove children safely there ?
1700          */
1701         if (cl->children || cl->filter_cnt)
1702                 return -EBUSY;
1703
1704         if (!cl->level && htb_parent_last_child(cl))
1705                 last_child = 1;
1706
1707         if (q->offload) {
1708                 err = htb_destroy_class_offload(sch, cl, last_child, false,
1709                                                 extack);
1710                 if (err)
1711                         return err;
1712         }
1713
1714         if (last_child) {
1715                 struct netdev_queue *dev_queue = sch->dev_queue;
1716
1717                 if (q->offload)
1718                         dev_queue = htb_offload_get_queue(cl);
1719
1720                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1721                                           cl->parent->common.classid,
1722                                           NULL);
1723                 if (q->offload) {
1724                         if (new_q)
1725                                 htb_set_lockdep_class_child(new_q);
1726                         htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1727                 }
1728         }
1729
1730         sch_tree_lock(sch);
1731
1732         if (!cl->level)
1733                 qdisc_purge_queue(cl->leaf.q);
1734
1735         /* delete from hash and active; remainder in destroy_class */
1736         qdisc_class_hash_remove(&q->clhash, &cl->common);
1737         if (cl->parent)
1738                 cl->parent->children--;
1739
1740         if (cl->prio_activity)
1741                 htb_deactivate(q, cl);
1742
1743         if (cl->cmode != HTB_CAN_SEND)
1744                 htb_safe_rb_erase(&cl->pq_node,
1745                                   &q->hlevel[cl->level].wait_pq);
1746
1747         if (last_child)
1748                 htb_parent_to_leaf(sch, cl, new_q);
1749
1750         sch_tree_unlock(sch);
1751
1752         htb_destroy_class(sch, cl);
1753         return 0;
1754 }
1755
1756 static int htb_change_class(struct Qdisc *sch, u32 classid,
1757                             u32 parentid, struct nlattr **tca,
1758                             unsigned long *arg, struct netlink_ext_ack *extack)
1759 {
1760         int err = -EINVAL;
1761         struct htb_sched *q = qdisc_priv(sch);
1762         struct htb_class *cl = (struct htb_class *)*arg, *parent;
1763         struct tc_htb_qopt_offload offload_opt;
1764         struct nlattr *opt = tca[TCA_OPTIONS];
1765         struct nlattr *tb[TCA_HTB_MAX + 1];
1766         struct Qdisc *parent_qdisc = NULL;
1767         struct netdev_queue *dev_queue;
1768         struct tc_htb_opt *hopt;
1769         u64 rate64, ceil64;
1770         int warn = 0;
1771
1772         /* extract all subattrs from opt attr */
1773         if (!opt)
1774                 goto failure;
1775
1776         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1777                                           NULL);
1778         if (err < 0)
1779                 goto failure;
1780
1781         err = -EINVAL;
1782         if (tb[TCA_HTB_PARMS] == NULL)
1783                 goto failure;
1784
1785         parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1786
1787         hopt = nla_data(tb[TCA_HTB_PARMS]);
1788         if (!hopt->rate.rate || !hopt->ceil.rate)
1789                 goto failure;
1790
1791         if (q->offload) {
1792                 /* Options not supported by the offload. */
1793                 if (hopt->rate.overhead || hopt->ceil.overhead) {
1794                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
1795                         goto failure;
1796                 }
1797                 if (hopt->rate.mpu || hopt->ceil.mpu) {
1798                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
1799                         goto failure;
1800                 }
1801                 if (hopt->quantum) {
1802                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
1803                         goto failure;
1804                 }
1805                 if (hopt->prio) {
1806                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
1807                         goto failure;
1808                 }
1809         }
1810
1811         /* Keeping backward compatible with rate_table based iproute2 tc */
1812         if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1813                 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1814                                               NULL));
1815
1816         if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1817                 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1818                                               NULL));
1819
1820         rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1821         ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1822
1823         if (!cl) {              /* new class */
1824                 struct net_device *dev = qdisc_dev(sch);
1825                 struct Qdisc *new_q, *old_q;
1826                 int prio;
1827                 struct {
1828                         struct nlattr           nla;
1829                         struct gnet_estimator   opt;
1830                 } est = {
1831                         .nla = {
1832                                 .nla_len        = nla_attr_size(sizeof(est.opt)),
1833                                 .nla_type       = TCA_RATE,
1834                         },
1835                         .opt = {
1836                                 /* 4s interval, 16s averaging constant */
1837                                 .interval       = 2,
1838                                 .ewma_log       = 2,
1839                         },
1840                 };
1841
1842                 /* check for valid classid */
1843                 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1844                     htb_find(classid, sch))
1845                         goto failure;
1846
1847                 /* check maximal depth */
1848                 if (parent && parent->parent && parent->parent->level < 2) {
1849                         pr_err("htb: tree is too deep\n");
1850                         goto failure;
1851                 }
1852                 err = -ENOBUFS;
1853                 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1854                 if (!cl)
1855                         goto failure;
1856
1857                 gnet_stats_basic_sync_init(&cl->bstats);
1858                 gnet_stats_basic_sync_init(&cl->bstats_bias);
1859
1860                 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1861                 if (err) {
1862                         kfree(cl);
1863                         goto failure;
1864                 }
1865                 if (htb_rate_est || tca[TCA_RATE]) {
1866                         err = gen_new_estimator(&cl->bstats, NULL,
1867                                                 &cl->rate_est,
1868                                                 NULL,
1869                                                 true,
1870                                                 tca[TCA_RATE] ? : &est.nla);
1871                         if (err)
1872                                 goto err_block_put;
1873                 }
1874
1875                 cl->children = 0;
1876                 RB_CLEAR_NODE(&cl->pq_node);
1877
1878                 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1879                         RB_CLEAR_NODE(&cl->node[prio]);
1880
1881                 cl->common.classid = classid;
1882
1883                 /* Make sure nothing interrupts us in between of two
1884                  * ndo_setup_tc calls.
1885                  */
1886                 ASSERT_RTNL();
1887
1888                 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1889                  * so that can't be used inside of sch_tree_lock
1890                  * -- thanks to Karlis Peisenieks
1891                  */
1892                 if (!q->offload) {
1893                         dev_queue = sch->dev_queue;
1894                 } else if (!(parent && !parent->level)) {
1895                         /* Assign a dev_queue to this classid. */
1896                         offload_opt = (struct tc_htb_qopt_offload) {
1897                                 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1898                                 .classid = cl->common.classid,
1899                                 .parent_classid = parent ?
1900                                         TC_H_MIN(parent->common.classid) :
1901                                         TC_HTB_CLASSID_ROOT,
1902                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1903                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1904                                 .extack = extack,
1905                         };
1906                         err = htb_offload(dev, &offload_opt);
1907                         if (err) {
1908                                 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1909                                        err);
1910                                 goto err_kill_estimator;
1911                         }
1912                         dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1913                 } else { /* First child. */
1914                         dev_queue = htb_offload_get_queue(parent);
1915                         old_q = htb_graft_helper(dev_queue, NULL);
1916                         WARN_ON(old_q != parent->leaf.q);
1917                         offload_opt = (struct tc_htb_qopt_offload) {
1918                                 .command = TC_HTB_LEAF_TO_INNER,
1919                                 .classid = cl->common.classid,
1920                                 .parent_classid =
1921                                         TC_H_MIN(parent->common.classid),
1922                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1923                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1924                                 .extack = extack,
1925                         };
1926                         err = htb_offload(dev, &offload_opt);
1927                         if (err) {
1928                                 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1929                                        err);
1930                                 htb_graft_helper(dev_queue, old_q);
1931                                 goto err_kill_estimator;
1932                         }
1933                         _bstats_update(&parent->bstats_bias,
1934                                        u64_stats_read(&old_q->bstats.bytes),
1935                                        u64_stats_read(&old_q->bstats.packets));
1936                         qdisc_put(old_q);
1937                 }
1938                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1939                                           classid, NULL);
1940                 if (q->offload) {
1941                         if (new_q) {
1942                                 htb_set_lockdep_class_child(new_q);
1943                                 /* One ref for cl->leaf.q, the other for
1944                                  * dev_queue->qdisc.
1945                                  */
1946                                 qdisc_refcount_inc(new_q);
1947                         }
1948                         old_q = htb_graft_helper(dev_queue, new_q);
1949                         /* No qdisc_put needed. */
1950                         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1951                 }
1952                 sch_tree_lock(sch);
1953                 if (parent && !parent->level) {
1954                         /* turn parent into inner node */
1955                         qdisc_purge_queue(parent->leaf.q);
1956                         parent_qdisc = parent->leaf.q;
1957                         if (parent->prio_activity)
1958                                 htb_deactivate(q, parent);
1959
1960                         /* remove from evt list because of level change */
1961                         if (parent->cmode != HTB_CAN_SEND) {
1962                                 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1963                                 parent->cmode = HTB_CAN_SEND;
1964                         }
1965                         parent->level = (parent->parent ? parent->parent->level
1966                                          : TC_HTB_MAXDEPTH) - 1;
1967                         memset(&parent->inner, 0, sizeof(parent->inner));
1968                 }
1969
1970                 /* leaf (we) needs elementary qdisc */
1971                 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1972                 if (q->offload)
1973                         cl->leaf.offload_queue = dev_queue;
1974
1975                 cl->parent = parent;
1976
1977                 /* set class to be in HTB_CAN_SEND state */
1978                 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1979                 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1980                 cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1981                 cl->t_c = ktime_get_ns();
1982                 cl->cmode = HTB_CAN_SEND;
1983
1984                 /* attach to the hash list and parent's family */
1985                 qdisc_class_hash_insert(&q->clhash, &cl->common);
1986                 if (parent)
1987                         parent->children++;
1988                 if (cl->leaf.q != &noop_qdisc)
1989                         qdisc_hash_add(cl->leaf.q, true);
1990         } else {
1991                 if (tca[TCA_RATE]) {
1992                         err = gen_replace_estimator(&cl->bstats, NULL,
1993                                                     &cl->rate_est,
1994                                                     NULL,
1995                                                     true,
1996                                                     tca[TCA_RATE]);
1997                         if (err)
1998                                 return err;
1999                 }
2000
2001                 if (q->offload) {
2002                         struct net_device *dev = qdisc_dev(sch);
2003
2004                         offload_opt = (struct tc_htb_qopt_offload) {
2005                                 .command = TC_HTB_NODE_MODIFY,
2006                                 .classid = cl->common.classid,
2007                                 .rate = max_t(u64, hopt->rate.rate, rate64),
2008                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
2009                                 .extack = extack,
2010                         };
2011                         err = htb_offload(dev, &offload_opt);
2012                         if (err)
2013                                 /* Estimator was replaced, and rollback may fail
2014                                  * as well, so we don't try to recover it, and
2015                                  * the estimator won't work property with the
2016                                  * offload anyway, because bstats are updated
2017                                  * only when the stats are queried.
2018                                  */
2019                                 return err;
2020                 }
2021
2022                 sch_tree_lock(sch);
2023         }
2024
2025         psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2026         psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2027
2028         /* it used to be a nasty bug here, we have to check that node
2029          * is really leaf before changing cl->leaf !
2030          */
2031         if (!cl->level) {
2032                 u64 quantum = cl->rate.rate_bytes_ps;
2033
2034                 do_div(quantum, q->rate2quantum);
2035                 cl->quantum = min_t(u64, quantum, INT_MAX);
2036
2037                 if (!hopt->quantum && cl->quantum < 1000) {
2038                         warn = -1;
2039                         cl->quantum = 1000;
2040                 }
2041                 if (!hopt->quantum && cl->quantum > 200000) {
2042                         warn = 1;
2043                         cl->quantum = 200000;
2044                 }
2045                 if (hopt->quantum)
2046                         cl->quantum = hopt->quantum;
2047                 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2048                         cl->prio = TC_HTB_NUMPRIO - 1;
2049         }
2050
2051         cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2052         cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2053
2054         sch_tree_unlock(sch);
2055         qdisc_put(parent_qdisc);
2056
2057         if (warn)
2058                 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2059                             cl->common.classid, (warn == -1 ? "small" : "big"));
2060
2061         qdisc_class_hash_grow(sch, &q->clhash);
2062
2063         *arg = (unsigned long)cl;
2064         return 0;
2065
2066 err_kill_estimator:
2067         gen_kill_estimator(&cl->rate_est);
2068 err_block_put:
2069         tcf_block_put(cl->block);
2070         kfree(cl);
2071 failure:
2072         return err;
2073 }
2074
2075 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2076                                        struct netlink_ext_ack *extack)
2077 {
2078         struct htb_sched *q = qdisc_priv(sch);
2079         struct htb_class *cl = (struct htb_class *)arg;
2080
2081         return cl ? cl->block : q->block;
2082 }
2083
2084 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2085                                      u32 classid)
2086 {
2087         struct htb_class *cl = htb_find(classid, sch);
2088
2089         /*if (cl && !cl->level) return 0;
2090          * The line above used to be there to prevent attaching filters to
2091          * leaves. But at least tc_index filter uses this just to get class
2092          * for other reasons so that we have to allow for it.
2093          * ----
2094          * 19.6.2002 As Werner explained it is ok - bind filter is just
2095          * another way to "lock" the class - unlike "get" this lock can
2096          * be broken by class during destroy IIUC.
2097          */
2098         if (cl)
2099                 cl->filter_cnt++;
2100         return (unsigned long)cl;
2101 }
2102
2103 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2104 {
2105         struct htb_class *cl = (struct htb_class *)arg;
2106
2107         if (cl)
2108                 cl->filter_cnt--;
2109 }
2110
2111 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2112 {
2113         struct htb_sched *q = qdisc_priv(sch);
2114         struct htb_class *cl;
2115         unsigned int i;
2116
2117         if (arg->stop)
2118                 return;
2119
2120         for (i = 0; i < q->clhash.hashsize; i++) {
2121                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2122                         if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
2123                                 return;
2124                 }
2125         }
2126 }
2127
2128 static const struct Qdisc_class_ops htb_class_ops = {
2129         .select_queue   =       htb_select_queue,
2130         .graft          =       htb_graft,
2131         .leaf           =       htb_leaf,
2132         .qlen_notify    =       htb_qlen_notify,
2133         .find           =       htb_search,
2134         .change         =       htb_change_class,
2135         .delete         =       htb_delete,
2136         .walk           =       htb_walk,
2137         .tcf_block      =       htb_tcf_block,
2138         .bind_tcf       =       htb_bind_filter,
2139         .unbind_tcf     =       htb_unbind_filter,
2140         .dump           =       htb_dump_class,
2141         .dump_stats     =       htb_dump_class_stats,
2142 };
2143
2144 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2145         .cl_ops         =       &htb_class_ops,
2146         .id             =       "htb",
2147         .priv_size      =       sizeof(struct htb_sched),
2148         .enqueue        =       htb_enqueue,
2149         .dequeue        =       htb_dequeue,
2150         .peek           =       qdisc_peek_dequeued,
2151         .init           =       htb_init,
2152         .attach         =       htb_attach,
2153         .reset          =       htb_reset,
2154         .destroy        =       htb_destroy,
2155         .dump           =       htb_dump,
2156         .owner          =       THIS_MODULE,
2157 };
2158
2159 static int __init htb_module_init(void)
2160 {
2161         return register_qdisc(&htb_qdisc_ops);
2162 }
2163 static void __exit htb_module_exit(void)
2164 {
2165         unregister_qdisc(&htb_qdisc_ops);
2166 }
2167
2168 module_init(htb_module_init)
2169 module_exit(htb_module_exit)
2170 MODULE_LICENSE("GPL");