net: fix NULL pointer in skb_segment_list
[platform/kernel/linux-rpi.git] / net / sched / sch_htb.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
4  *
5  * Authors:     Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *                      HTB support at LARTC mailing list
10  *              Ondrej Kraus, <krauso@barr.cz>
11  *                      found missing INIT_QDISC(htb)
12  *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *                      helped a lot to locate nasty class stall bug
14  *              Andi Kleen, Jamal Hadi, Bert Hubert
15  *                      code review and helpful comments on shaping
16  *              Tomasz Wrona, <tw@eter.tym.pl>
17  *                      created test case so that I was able to fix nasty bug
18  *              Wilfried Weissmann
19  *                      spotted bug in dequeue code and helped with fix
20  *              Jiri Fojtasek
21  *                      fixed requeue routine
22  *              and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011         /* major must be matched with number supplied by TC as version */
56
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71         HTB_CANT_SEND,          /* class can't send and can't borrow */
72         HTB_MAY_BORROW,         /* class can't send but may borrow */
73         HTB_CAN_SEND            /* class can send */
74 };
75
76 struct htb_prio {
77         union {
78                 struct rb_root  row;
79                 struct rb_root  feed;
80         };
81         struct rb_node  *ptr;
82         /* When class changes from state 1->2 and disconnects from
83          * parent's feed then we lost ptr value and start from the
84          * first child again. Here we store classid of the
85          * last valid ptr (used when ptr is NULL).
86          */
87         u32             last_ptr_id;
88 };
89
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95         struct Qdisc_class_common common;
96         struct psched_ratecfg   rate;
97         struct psched_ratecfg   ceil;
98         s64                     buffer, cbuffer;/* token bucket depth/rate */
99         s64                     mbuffer;        /* max wait time */
100         u32                     prio;           /* these two are used only by leaves... */
101         int                     quantum;        /* but stored for parent-to-leaf return */
102
103         struct tcf_proto __rcu  *filter_list;   /* class attached filters */
104         struct tcf_block        *block;
105         int                     filter_cnt;
106
107         int                     level;          /* our level (see above) */
108         unsigned int            children;
109         struct htb_class        *parent;        /* parent class */
110
111         struct net_rate_estimator __rcu *rate_est;
112
113         /*
114          * Written often fields
115          */
116         struct gnet_stats_basic_packed bstats;
117         struct gnet_stats_basic_packed bstats_bias;
118         struct tc_htb_xstats    xstats; /* our special stats */
119
120         /* token bucket parameters */
121         s64                     tokens, ctokens;/* current number of tokens */
122         s64                     t_c;            /* checkpoint time */
123
124         union {
125                 struct htb_class_leaf {
126                         int             deficit[TC_HTB_MAXDEPTH];
127                         struct Qdisc    *q;
128                         struct netdev_queue *offload_queue;
129                 } leaf;
130                 struct htb_class_inner {
131                         struct htb_prio clprio[TC_HTB_NUMPRIO];
132                 } inner;
133         };
134         s64                     pq_key;
135
136         int                     prio_activity;  /* for which prios are we active */
137         enum htb_cmode          cmode;          /* current mode of the class */
138         struct rb_node          pq_node;        /* node for event queue */
139         struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
140
141         unsigned int drops ____cacheline_aligned_in_smp;
142         unsigned int            overlimits;
143 };
144
145 struct htb_level {
146         struct rb_root  wait_pq;
147         struct htb_prio hprio[TC_HTB_NUMPRIO];
148 };
149
150 struct htb_sched {
151         struct Qdisc_class_hash clhash;
152         int                     defcls;         /* class where unclassified flows go to */
153         int                     rate2quantum;   /* quant = rate / rate2quantum */
154
155         /* filters for qdisc itself */
156         struct tcf_proto __rcu  *filter_list;
157         struct tcf_block        *block;
158
159 #define HTB_WARN_TOOMANYEVENTS  0x1
160         unsigned int            warned; /* only one warning */
161         int                     direct_qlen;
162         struct work_struct      work;
163
164         /* non shaped skbs; let them go directly thru */
165         struct qdisc_skb_head   direct_queue;
166         u32                     direct_pkts;
167         u32                     overlimits;
168
169         struct qdisc_watchdog   watchdog;
170
171         s64                     now;    /* cached dequeue time */
172
173         /* time of nearest event per level (row) */
174         s64                     near_ev_cache[TC_HTB_MAXDEPTH];
175
176         int                     row_mask[TC_HTB_MAXDEPTH];
177
178         struct htb_level        hlevel[TC_HTB_MAXDEPTH];
179
180         struct Qdisc            **direct_qdiscs;
181         unsigned int            num_direct_qdiscs;
182
183         bool                    offload;
184 };
185
186 /* find class in global hash table using given handle */
187 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
188 {
189         struct htb_sched *q = qdisc_priv(sch);
190         struct Qdisc_class_common *clc;
191
192         clc = qdisc_class_find(&q->clhash, handle);
193         if (clc == NULL)
194                 return NULL;
195         return container_of(clc, struct htb_class, common);
196 }
197
198 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
199 {
200         return (unsigned long)htb_find(handle, sch);
201 }
202 /**
203  * htb_classify - classify a packet into class
204  *
205  * It returns NULL if the packet should be dropped or -1 if the packet
206  * should be passed directly thru. In all other cases leaf class is returned.
207  * We allow direct class selection by classid in priority. The we examine
208  * filters in qdisc and in inner nodes (if higher filter points to the inner
209  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
210  * internal fifo (direct). These packets then go directly thru. If we still
211  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
212  * then finish and return direct queue.
213  */
214 #define HTB_DIRECT ((struct htb_class *)-1L)
215
216 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
217                                       int *qerr)
218 {
219         struct htb_sched *q = qdisc_priv(sch);
220         struct htb_class *cl;
221         struct tcf_result res;
222         struct tcf_proto *tcf;
223         int result;
224
225         /* allow to select class by setting skb->priority to valid classid;
226          * note that nfmark can be used too by attaching filter fw with no
227          * rules in it
228          */
229         if (skb->priority == sch->handle)
230                 return HTB_DIRECT;      /* X:0 (direct flow) selected */
231         cl = htb_find(skb->priority, sch);
232         if (cl) {
233                 if (cl->level == 0)
234                         return cl;
235                 /* Start with inner filter chain if a non-leaf class is selected */
236                 tcf = rcu_dereference_bh(cl->filter_list);
237         } else {
238                 tcf = rcu_dereference_bh(q->filter_list);
239         }
240
241         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
242         while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
243 #ifdef CONFIG_NET_CLS_ACT
244                 switch (result) {
245                 case TC_ACT_QUEUED:
246                 case TC_ACT_STOLEN:
247                 case TC_ACT_TRAP:
248                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
249                         fallthrough;
250                 case TC_ACT_SHOT:
251                         return NULL;
252                 }
253 #endif
254                 cl = (void *)res.class;
255                 if (!cl) {
256                         if (res.classid == sch->handle)
257                                 return HTB_DIRECT;      /* X:0 (direct flow) */
258                         cl = htb_find(res.classid, sch);
259                         if (!cl)
260                                 break;  /* filter selected invalid classid */
261                 }
262                 if (!cl->level)
263                         return cl;      /* we hit leaf; return it */
264
265                 /* we have got inner class; apply inner filter chain */
266                 tcf = rcu_dereference_bh(cl->filter_list);
267         }
268         /* classification failed; try to use default class */
269         cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
270         if (!cl || cl->level)
271                 return HTB_DIRECT;      /* bad default .. this is safe bet */
272         return cl;
273 }
274
275 /**
276  * htb_add_to_id_tree - adds class to the round robin list
277  * @root: the root of the tree
278  * @cl: the class to add
279  * @prio: the give prio in class
280  *
281  * Routine adds class to the list (actually tree) sorted by classid.
282  * Make sure that class is not already on such list for given prio.
283  */
284 static void htb_add_to_id_tree(struct rb_root *root,
285                                struct htb_class *cl, int prio)
286 {
287         struct rb_node **p = &root->rb_node, *parent = NULL;
288
289         while (*p) {
290                 struct htb_class *c;
291                 parent = *p;
292                 c = rb_entry(parent, struct htb_class, node[prio]);
293
294                 if (cl->common.classid > c->common.classid)
295                         p = &parent->rb_right;
296                 else
297                         p = &parent->rb_left;
298         }
299         rb_link_node(&cl->node[prio], parent, p);
300         rb_insert_color(&cl->node[prio], root);
301 }
302
303 /**
304  * htb_add_to_wait_tree - adds class to the event queue with delay
305  * @q: the priority event queue
306  * @cl: the class to add
307  * @delay: delay in microseconds
308  *
309  * The class is added to priority event queue to indicate that class will
310  * change its mode in cl->pq_key microseconds. Make sure that class is not
311  * already in the queue.
312  */
313 static void htb_add_to_wait_tree(struct htb_sched *q,
314                                  struct htb_class *cl, s64 delay)
315 {
316         struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
317
318         cl->pq_key = q->now + delay;
319         if (cl->pq_key == q->now)
320                 cl->pq_key++;
321
322         /* update the nearest event cache */
323         if (q->near_ev_cache[cl->level] > cl->pq_key)
324                 q->near_ev_cache[cl->level] = cl->pq_key;
325
326         while (*p) {
327                 struct htb_class *c;
328                 parent = *p;
329                 c = rb_entry(parent, struct htb_class, pq_node);
330                 if (cl->pq_key >= c->pq_key)
331                         p = &parent->rb_right;
332                 else
333                         p = &parent->rb_left;
334         }
335         rb_link_node(&cl->pq_node, parent, p);
336         rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
337 }
338
339 /**
340  * htb_next_rb_node - finds next node in binary tree
341  * @n: the current node in binary tree
342  *
343  * When we are past last key we return NULL.
344  * Average complexity is 2 steps per call.
345  */
346 static inline void htb_next_rb_node(struct rb_node **n)
347 {
348         *n = rb_next(*n);
349 }
350
351 /**
352  * htb_add_class_to_row - add class to its row
353  * @q: the priority event queue
354  * @cl: the class to add
355  * @mask: the given priorities in class in bitmap
356  *
357  * The class is added to row at priorities marked in mask.
358  * It does nothing if mask == 0.
359  */
360 static inline void htb_add_class_to_row(struct htb_sched *q,
361                                         struct htb_class *cl, int mask)
362 {
363         q->row_mask[cl->level] |= mask;
364         while (mask) {
365                 int prio = ffz(~mask);
366                 mask &= ~(1 << prio);
367                 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
368         }
369 }
370
371 /* If this triggers, it is a bug in this code, but it need not be fatal */
372 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
373 {
374         if (RB_EMPTY_NODE(rb)) {
375                 WARN_ON(1);
376         } else {
377                 rb_erase(rb, root);
378                 RB_CLEAR_NODE(rb);
379         }
380 }
381
382
383 /**
384  * htb_remove_class_from_row - removes class from its row
385  * @q: the priority event queue
386  * @cl: the class to add
387  * @mask: the given priorities in class in bitmap
388  *
389  * The class is removed from row at priorities marked in mask.
390  * It does nothing if mask == 0.
391  */
392 static inline void htb_remove_class_from_row(struct htb_sched *q,
393                                                  struct htb_class *cl, int mask)
394 {
395         int m = 0;
396         struct htb_level *hlevel = &q->hlevel[cl->level];
397
398         while (mask) {
399                 int prio = ffz(~mask);
400                 struct htb_prio *hprio = &hlevel->hprio[prio];
401
402                 mask &= ~(1 << prio);
403                 if (hprio->ptr == cl->node + prio)
404                         htb_next_rb_node(&hprio->ptr);
405
406                 htb_safe_rb_erase(cl->node + prio, &hprio->row);
407                 if (!hprio->row.rb_node)
408                         m |= 1 << prio;
409         }
410         q->row_mask[cl->level] &= ~m;
411 }
412
413 /**
414  * htb_activate_prios - creates active classe's feed chain
415  * @q: the priority event queue
416  * @cl: the class to activate
417  *
418  * The class is connected to ancestors and/or appropriate rows
419  * for priorities it is participating on. cl->cmode must be new
420  * (activated) mode. It does nothing if cl->prio_activity == 0.
421  */
422 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
423 {
424         struct htb_class *p = cl->parent;
425         long m, mask = cl->prio_activity;
426
427         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
428                 m = mask;
429                 while (m) {
430                         int prio = ffz(~m);
431                         m &= ~(1 << prio);
432
433                         if (p->inner.clprio[prio].feed.rb_node)
434                                 /* parent already has its feed in use so that
435                                  * reset bit in mask as parent is already ok
436                                  */
437                                 mask &= ~(1 << prio);
438
439                         htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
440                 }
441                 p->prio_activity |= mask;
442                 cl = p;
443                 p = cl->parent;
444
445         }
446         if (cl->cmode == HTB_CAN_SEND && mask)
447                 htb_add_class_to_row(q, cl, mask);
448 }
449
450 /**
451  * htb_deactivate_prios - remove class from feed chain
452  * @q: the priority event queue
453  * @cl: the class to deactivate
454  *
455  * cl->cmode must represent old mode (before deactivation). It does
456  * nothing if cl->prio_activity == 0. Class is removed from all feed
457  * chains and rows.
458  */
459 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
460 {
461         struct htb_class *p = cl->parent;
462         long m, mask = cl->prio_activity;
463
464         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
465                 m = mask;
466                 mask = 0;
467                 while (m) {
468                         int prio = ffz(~m);
469                         m &= ~(1 << prio);
470
471                         if (p->inner.clprio[prio].ptr == cl->node + prio) {
472                                 /* we are removing child which is pointed to from
473                                  * parent feed - forget the pointer but remember
474                                  * classid
475                                  */
476                                 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
477                                 p->inner.clprio[prio].ptr = NULL;
478                         }
479
480                         htb_safe_rb_erase(cl->node + prio,
481                                           &p->inner.clprio[prio].feed);
482
483                         if (!p->inner.clprio[prio].feed.rb_node)
484                                 mask |= 1 << prio;
485                 }
486
487                 p->prio_activity &= ~mask;
488                 cl = p;
489                 p = cl->parent;
490
491         }
492         if (cl->cmode == HTB_CAN_SEND && mask)
493                 htb_remove_class_from_row(q, cl, mask);
494 }
495
496 static inline s64 htb_lowater(const struct htb_class *cl)
497 {
498         if (htb_hysteresis)
499                 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
500         else
501                 return 0;
502 }
503 static inline s64 htb_hiwater(const struct htb_class *cl)
504 {
505         if (htb_hysteresis)
506                 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
507         else
508                 return 0;
509 }
510
511
512 /**
513  * htb_class_mode - computes and returns current class mode
514  * @cl: the target class
515  * @diff: diff time in microseconds
516  *
517  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
518  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
519  * from now to time when cl will change its state.
520  * Also it is worth to note that class mode doesn't change simply
521  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
522  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
523  * mode transitions per time unit. The speed gain is about 1/6.
524  */
525 static inline enum htb_cmode
526 htb_class_mode(struct htb_class *cl, s64 *diff)
527 {
528         s64 toks;
529
530         if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
531                 *diff = -toks;
532                 return HTB_CANT_SEND;
533         }
534
535         if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
536                 return HTB_CAN_SEND;
537
538         *diff = -toks;
539         return HTB_MAY_BORROW;
540 }
541
542 /**
543  * htb_change_class_mode - changes classe's mode
544  * @q: the priority event queue
545  * @cl: the target class
546  * @diff: diff time in microseconds
547  *
548  * This should be the only way how to change classe's mode under normal
549  * circumstances. Routine will update feed lists linkage, change mode
550  * and add class to the wait event queue if appropriate. New mode should
551  * be different from old one and cl->pq_key has to be valid if changing
552  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
553  */
554 static void
555 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
556 {
557         enum htb_cmode new_mode = htb_class_mode(cl, diff);
558
559         if (new_mode == cl->cmode)
560                 return;
561
562         if (new_mode == HTB_CANT_SEND) {
563                 cl->overlimits++;
564                 q->overlimits++;
565         }
566
567         if (cl->prio_activity) {        /* not necessary: speed optimization */
568                 if (cl->cmode != HTB_CANT_SEND)
569                         htb_deactivate_prios(q, cl);
570                 cl->cmode = new_mode;
571                 if (new_mode != HTB_CANT_SEND)
572                         htb_activate_prios(q, cl);
573         } else
574                 cl->cmode = new_mode;
575 }
576
577 /**
578  * htb_activate - inserts leaf cl into appropriate active feeds
579  * @q: the priority event queue
580  * @cl: the target class
581  *
582  * Routine learns (new) priority of leaf and activates feed chain
583  * for the prio. It can be called on already active leaf safely.
584  * It also adds leaf into droplist.
585  */
586 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
587 {
588         WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
589
590         if (!cl->prio_activity) {
591                 cl->prio_activity = 1 << cl->prio;
592                 htb_activate_prios(q, cl);
593         }
594 }
595
596 /**
597  * htb_deactivate - remove leaf cl from active feeds
598  * @q: the priority event queue
599  * @cl: the target class
600  *
601  * Make sure that leaf is active. In the other words it can't be called
602  * with non-active leaf. It also removes class from the drop list.
603  */
604 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
605 {
606         WARN_ON(!cl->prio_activity);
607
608         htb_deactivate_prios(q, cl);
609         cl->prio_activity = 0;
610 }
611
612 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
613                        struct sk_buff **to_free)
614 {
615         int ret;
616         unsigned int len = qdisc_pkt_len(skb);
617         struct htb_sched *q = qdisc_priv(sch);
618         struct htb_class *cl = htb_classify(skb, sch, &ret);
619
620         if (cl == HTB_DIRECT) {
621                 /* enqueue to helper queue */
622                 if (q->direct_queue.qlen < q->direct_qlen) {
623                         __qdisc_enqueue_tail(skb, &q->direct_queue);
624                         q->direct_pkts++;
625                 } else {
626                         return qdisc_drop(skb, sch, to_free);
627                 }
628 #ifdef CONFIG_NET_CLS_ACT
629         } else if (!cl) {
630                 if (ret & __NET_XMIT_BYPASS)
631                         qdisc_qstats_drop(sch);
632                 __qdisc_drop(skb, to_free);
633                 return ret;
634 #endif
635         } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
636                                         to_free)) != NET_XMIT_SUCCESS) {
637                 if (net_xmit_drop_count(ret)) {
638                         qdisc_qstats_drop(sch);
639                         cl->drops++;
640                 }
641                 return ret;
642         } else {
643                 htb_activate(q, cl);
644         }
645
646         sch->qstats.backlog += len;
647         sch->q.qlen++;
648         return NET_XMIT_SUCCESS;
649 }
650
651 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
652 {
653         s64 toks = diff + cl->tokens;
654
655         if (toks > cl->buffer)
656                 toks = cl->buffer;
657         toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
658         if (toks <= -cl->mbuffer)
659                 toks = 1 - cl->mbuffer;
660
661         cl->tokens = toks;
662 }
663
664 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
665 {
666         s64 toks = diff + cl->ctokens;
667
668         if (toks > cl->cbuffer)
669                 toks = cl->cbuffer;
670         toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
671         if (toks <= -cl->mbuffer)
672                 toks = 1 - cl->mbuffer;
673
674         cl->ctokens = toks;
675 }
676
677 /**
678  * htb_charge_class - charges amount "bytes" to leaf and ancestors
679  * @q: the priority event queue
680  * @cl: the class to start iterate
681  * @level: the minimum level to account
682  * @skb: the socket buffer
683  *
684  * Routine assumes that packet "bytes" long was dequeued from leaf cl
685  * borrowing from "level". It accounts bytes to ceil leaky bucket for
686  * leaf and all ancestors and to rate bucket for ancestors at levels
687  * "level" and higher. It also handles possible change of mode resulting
688  * from the update. Note that mode can also increase here (MAY_BORROW to
689  * CAN_SEND) because we can use more precise clock that event queue here.
690  * In such case we remove class from event queue first.
691  */
692 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
693                              int level, struct sk_buff *skb)
694 {
695         int bytes = qdisc_pkt_len(skb);
696         enum htb_cmode old_mode;
697         s64 diff;
698
699         while (cl) {
700                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
701                 if (cl->level >= level) {
702                         if (cl->level == level)
703                                 cl->xstats.lends++;
704                         htb_accnt_tokens(cl, bytes, diff);
705                 } else {
706                         cl->xstats.borrows++;
707                         cl->tokens += diff;     /* we moved t_c; update tokens */
708                 }
709                 htb_accnt_ctokens(cl, bytes, diff);
710                 cl->t_c = q->now;
711
712                 old_mode = cl->cmode;
713                 diff = 0;
714                 htb_change_class_mode(q, cl, &diff);
715                 if (old_mode != cl->cmode) {
716                         if (old_mode != HTB_CAN_SEND)
717                                 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
718                         if (cl->cmode != HTB_CAN_SEND)
719                                 htb_add_to_wait_tree(q, cl, diff);
720                 }
721
722                 /* update basic stats except for leaves which are already updated */
723                 if (cl->level)
724                         bstats_update(&cl->bstats, skb);
725
726                 cl = cl->parent;
727         }
728 }
729
730 /**
731  * htb_do_events - make mode changes to classes at the level
732  * @q: the priority event queue
733  * @level: which wait_pq in 'q->hlevel'
734  * @start: start jiffies
735  *
736  * Scans event queue for pending events and applies them. Returns time of
737  * next pending event (0 for no event in pq, q->now for too many events).
738  * Note: Applied are events whose have cl->pq_key <= q->now.
739  */
740 static s64 htb_do_events(struct htb_sched *q, const int level,
741                          unsigned long start)
742 {
743         /* don't run for longer than 2 jiffies; 2 is used instead of
744          * 1 to simplify things when jiffy is going to be incremented
745          * too soon
746          */
747         unsigned long stop_at = start + 2;
748         struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
749
750         while (time_before(jiffies, stop_at)) {
751                 struct htb_class *cl;
752                 s64 diff;
753                 struct rb_node *p = rb_first(wait_pq);
754
755                 if (!p)
756                         return 0;
757
758                 cl = rb_entry(p, struct htb_class, pq_node);
759                 if (cl->pq_key > q->now)
760                         return cl->pq_key;
761
762                 htb_safe_rb_erase(p, wait_pq);
763                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
764                 htb_change_class_mode(q, cl, &diff);
765                 if (cl->cmode != HTB_CAN_SEND)
766                         htb_add_to_wait_tree(q, cl, diff);
767         }
768
769         /* too much load - let's continue after a break for scheduling */
770         if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
771                 pr_warn("htb: too many events!\n");
772                 q->warned |= HTB_WARN_TOOMANYEVENTS;
773         }
774
775         return q->now;
776 }
777
778 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
779  * is no such one exists.
780  */
781 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
782                                               u32 id)
783 {
784         struct rb_node *r = NULL;
785         while (n) {
786                 struct htb_class *cl =
787                     rb_entry(n, struct htb_class, node[prio]);
788
789                 if (id > cl->common.classid) {
790                         n = n->rb_right;
791                 } else if (id < cl->common.classid) {
792                         r = n;
793                         n = n->rb_left;
794                 } else {
795                         return n;
796                 }
797         }
798         return r;
799 }
800
801 /**
802  * htb_lookup_leaf - returns next leaf class in DRR order
803  * @hprio: the current one
804  * @prio: which prio in class
805  *
806  * Find leaf where current feed pointers points to.
807  */
808 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
809 {
810         int i;
811         struct {
812                 struct rb_node *root;
813                 struct rb_node **pptr;
814                 u32 *pid;
815         } stk[TC_HTB_MAXDEPTH], *sp = stk;
816
817         BUG_ON(!hprio->row.rb_node);
818         sp->root = hprio->row.rb_node;
819         sp->pptr = &hprio->ptr;
820         sp->pid = &hprio->last_ptr_id;
821
822         for (i = 0; i < 65535; i++) {
823                 if (!*sp->pptr && *sp->pid) {
824                         /* ptr was invalidated but id is valid - try to recover
825                          * the original or next ptr
826                          */
827                         *sp->pptr =
828                             htb_id_find_next_upper(prio, sp->root, *sp->pid);
829                 }
830                 *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
831                                  * can become out of date quickly
832                                  */
833                 if (!*sp->pptr) {       /* we are at right end; rewind & go up */
834                         *sp->pptr = sp->root;
835                         while ((*sp->pptr)->rb_left)
836                                 *sp->pptr = (*sp->pptr)->rb_left;
837                         if (sp > stk) {
838                                 sp--;
839                                 if (!*sp->pptr) {
840                                         WARN_ON(1);
841                                         return NULL;
842                                 }
843                                 htb_next_rb_node(sp->pptr);
844                         }
845                 } else {
846                         struct htb_class *cl;
847                         struct htb_prio *clp;
848
849                         cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
850                         if (!cl->level)
851                                 return cl;
852                         clp = &cl->inner.clprio[prio];
853                         (++sp)->root = clp->feed.rb_node;
854                         sp->pptr = &clp->ptr;
855                         sp->pid = &clp->last_ptr_id;
856                 }
857         }
858         WARN_ON(1);
859         return NULL;
860 }
861
862 /* dequeues packet at given priority and level; call only if
863  * you are sure that there is active class at prio/level
864  */
865 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
866                                         const int level)
867 {
868         struct sk_buff *skb = NULL;
869         struct htb_class *cl, *start;
870         struct htb_level *hlevel = &q->hlevel[level];
871         struct htb_prio *hprio = &hlevel->hprio[prio];
872
873         /* look initial class up in the row */
874         start = cl = htb_lookup_leaf(hprio, prio);
875
876         do {
877 next:
878                 if (unlikely(!cl))
879                         return NULL;
880
881                 /* class can be empty - it is unlikely but can be true if leaf
882                  * qdisc drops packets in enqueue routine or if someone used
883                  * graft operation on the leaf since last dequeue;
884                  * simply deactivate and skip such class
885                  */
886                 if (unlikely(cl->leaf.q->q.qlen == 0)) {
887                         struct htb_class *next;
888                         htb_deactivate(q, cl);
889
890                         /* row/level might become empty */
891                         if ((q->row_mask[level] & (1 << prio)) == 0)
892                                 return NULL;
893
894                         next = htb_lookup_leaf(hprio, prio);
895
896                         if (cl == start)        /* fix start if we just deleted it */
897                                 start = next;
898                         cl = next;
899                         goto next;
900                 }
901
902                 skb = cl->leaf.q->dequeue(cl->leaf.q);
903                 if (likely(skb != NULL))
904                         break;
905
906                 qdisc_warn_nonwc("htb", cl->leaf.q);
907                 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
908                                          &q->hlevel[0].hprio[prio].ptr);
909                 cl = htb_lookup_leaf(hprio, prio);
910
911         } while (cl != start);
912
913         if (likely(skb != NULL)) {
914                 bstats_update(&cl->bstats, skb);
915                 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
916                 if (cl->leaf.deficit[level] < 0) {
917                         cl->leaf.deficit[level] += cl->quantum;
918                         htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
919                                                  &q->hlevel[0].hprio[prio].ptr);
920                 }
921                 /* this used to be after charge_class but this constelation
922                  * gives us slightly better performance
923                  */
924                 if (!cl->leaf.q->q.qlen)
925                         htb_deactivate(q, cl);
926                 htb_charge_class(q, cl, level, skb);
927         }
928         return skb;
929 }
930
931 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
932 {
933         struct sk_buff *skb;
934         struct htb_sched *q = qdisc_priv(sch);
935         int level;
936         s64 next_event;
937         unsigned long start_at;
938
939         /* try to dequeue direct packets as high prio (!) to minimize cpu work */
940         skb = __qdisc_dequeue_head(&q->direct_queue);
941         if (skb != NULL) {
942 ok:
943                 qdisc_bstats_update(sch, skb);
944                 qdisc_qstats_backlog_dec(sch, skb);
945                 sch->q.qlen--;
946                 return skb;
947         }
948
949         if (!sch->q.qlen)
950                 goto fin;
951         q->now = ktime_get_ns();
952         start_at = jiffies;
953
954         next_event = q->now + 5LLU * NSEC_PER_SEC;
955
956         for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
957                 /* common case optimization - skip event handler quickly */
958                 int m;
959                 s64 event = q->near_ev_cache[level];
960
961                 if (q->now >= event) {
962                         event = htb_do_events(q, level, start_at);
963                         if (!event)
964                                 event = q->now + NSEC_PER_SEC;
965                         q->near_ev_cache[level] = event;
966                 }
967
968                 if (next_event > event)
969                         next_event = event;
970
971                 m = ~q->row_mask[level];
972                 while (m != (int)(-1)) {
973                         int prio = ffz(m);
974
975                         m |= 1 << prio;
976                         skb = htb_dequeue_tree(q, prio, level);
977                         if (likely(skb != NULL))
978                                 goto ok;
979                 }
980         }
981         if (likely(next_event > q->now))
982                 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
983         else
984                 schedule_work(&q->work);
985 fin:
986         return skb;
987 }
988
989 /* reset all classes */
990 /* always caled under BH & queue lock */
991 static void htb_reset(struct Qdisc *sch)
992 {
993         struct htb_sched *q = qdisc_priv(sch);
994         struct htb_class *cl;
995         unsigned int i;
996
997         for (i = 0; i < q->clhash.hashsize; i++) {
998                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
999                         if (cl->level)
1000                                 memset(&cl->inner, 0, sizeof(cl->inner));
1001                         else {
1002                                 if (cl->leaf.q && !q->offload)
1003                                         qdisc_reset(cl->leaf.q);
1004                         }
1005                         cl->prio_activity = 0;
1006                         cl->cmode = HTB_CAN_SEND;
1007                 }
1008         }
1009         qdisc_watchdog_cancel(&q->watchdog);
1010         __qdisc_reset_queue(&q->direct_queue);
1011         memset(q->hlevel, 0, sizeof(q->hlevel));
1012         memset(q->row_mask, 0, sizeof(q->row_mask));
1013 }
1014
1015 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1016         [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1017         [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
1018         [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1019         [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1020         [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1021         [TCA_HTB_RATE64] = { .type = NLA_U64 },
1022         [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1023         [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1024 };
1025
1026 static void htb_work_func(struct work_struct *work)
1027 {
1028         struct htb_sched *q = container_of(work, struct htb_sched, work);
1029         struct Qdisc *sch = q->watchdog.qdisc;
1030
1031         rcu_read_lock();
1032         __netif_schedule(qdisc_root(sch));
1033         rcu_read_unlock();
1034 }
1035
1036 static void htb_set_lockdep_class_child(struct Qdisc *q)
1037 {
1038         static struct lock_class_key child_key;
1039
1040         lockdep_set_class(qdisc_lock(q), &child_key);
1041 }
1042
1043 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1044 {
1045         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1046 }
1047
1048 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1049                     struct netlink_ext_ack *extack)
1050 {
1051         struct net_device *dev = qdisc_dev(sch);
1052         struct tc_htb_qopt_offload offload_opt;
1053         struct htb_sched *q = qdisc_priv(sch);
1054         struct nlattr *tb[TCA_HTB_MAX + 1];
1055         struct tc_htb_glob *gopt;
1056         unsigned int ntx;
1057         bool offload;
1058         int err;
1059
1060         qdisc_watchdog_init(&q->watchdog, sch);
1061         INIT_WORK(&q->work, htb_work_func);
1062
1063         if (!opt)
1064                 return -EINVAL;
1065
1066         err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1067         if (err)
1068                 return err;
1069
1070         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1071                                           NULL);
1072         if (err < 0)
1073                 return err;
1074
1075         if (!tb[TCA_HTB_INIT])
1076                 return -EINVAL;
1077
1078         gopt = nla_data(tb[TCA_HTB_INIT]);
1079         if (gopt->version != HTB_VER >> 16)
1080                 return -EINVAL;
1081
1082         offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1083
1084         if (offload) {
1085                 if (sch->parent != TC_H_ROOT)
1086                         return -EOPNOTSUPP;
1087
1088                 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1089                         return -EOPNOTSUPP;
1090
1091                 q->num_direct_qdiscs = dev->real_num_tx_queues;
1092                 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1093                                            sizeof(*q->direct_qdiscs),
1094                                            GFP_KERNEL);
1095                 if (!q->direct_qdiscs)
1096                         return -ENOMEM;
1097         }
1098
1099         err = qdisc_class_hash_init(&q->clhash);
1100         if (err < 0)
1101                 goto err_free_direct_qdiscs;
1102
1103         qdisc_skb_head_init(&q->direct_queue);
1104
1105         if (tb[TCA_HTB_DIRECT_QLEN])
1106                 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1107         else
1108                 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1109
1110         if ((q->rate2quantum = gopt->rate2quantum) < 1)
1111                 q->rate2quantum = 1;
1112         q->defcls = gopt->defcls;
1113
1114         if (!offload)
1115                 return 0;
1116
1117         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1118                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1119                 struct Qdisc *qdisc;
1120
1121                 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1122                                           TC_H_MAKE(sch->handle, 0), extack);
1123                 if (!qdisc) {
1124                         err = -ENOMEM;
1125                         goto err_free_qdiscs;
1126                 }
1127
1128                 htb_set_lockdep_class_child(qdisc);
1129                 q->direct_qdiscs[ntx] = qdisc;
1130                 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1131         }
1132
1133         sch->flags |= TCQ_F_MQROOT;
1134
1135         offload_opt = (struct tc_htb_qopt_offload) {
1136                 .command = TC_HTB_CREATE,
1137                 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1138                 .classid = TC_H_MIN(q->defcls),
1139                 .extack = extack,
1140         };
1141         err = htb_offload(dev, &offload_opt);
1142         if (err)
1143                 goto err_free_qdiscs;
1144
1145         /* Defer this assignment, so that htb_destroy skips offload-related
1146          * parts (especially calling ndo_setup_tc) on errors.
1147          */
1148         q->offload = true;
1149
1150         return 0;
1151
1152 err_free_qdiscs:
1153         for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1154              ntx++)
1155                 qdisc_put(q->direct_qdiscs[ntx]);
1156
1157         qdisc_class_hash_destroy(&q->clhash);
1158         /* Prevent use-after-free and double-free when htb_destroy gets called.
1159          */
1160         q->clhash.hash = NULL;
1161         q->clhash.hashsize = 0;
1162
1163 err_free_direct_qdiscs:
1164         kfree(q->direct_qdiscs);
1165         q->direct_qdiscs = NULL;
1166         return err;
1167 }
1168
1169 static void htb_attach_offload(struct Qdisc *sch)
1170 {
1171         struct net_device *dev = qdisc_dev(sch);
1172         struct htb_sched *q = qdisc_priv(sch);
1173         unsigned int ntx;
1174
1175         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1176                 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1177
1178                 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1179                 qdisc_put(old);
1180                 qdisc_hash_add(qdisc, false);
1181         }
1182         for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1183                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1184                 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1185
1186                 qdisc_put(old);
1187         }
1188
1189         kfree(q->direct_qdiscs);
1190         q->direct_qdiscs = NULL;
1191 }
1192
1193 static void htb_attach_software(struct Qdisc *sch)
1194 {
1195         struct net_device *dev = qdisc_dev(sch);
1196         unsigned int ntx;
1197
1198         /* Resemble qdisc_graft behavior. */
1199         for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1200                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1201                 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1202
1203                 qdisc_refcount_inc(sch);
1204
1205                 qdisc_put(old);
1206         }
1207 }
1208
1209 static void htb_attach(struct Qdisc *sch)
1210 {
1211         struct htb_sched *q = qdisc_priv(sch);
1212
1213         if (q->offload)
1214                 htb_attach_offload(sch);
1215         else
1216                 htb_attach_software(sch);
1217 }
1218
1219 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1220 {
1221         struct htb_sched *q = qdisc_priv(sch);
1222         struct nlattr *nest;
1223         struct tc_htb_glob gopt;
1224
1225         if (q->offload)
1226                 sch->flags |= TCQ_F_OFFLOADED;
1227         else
1228                 sch->flags &= ~TCQ_F_OFFLOADED;
1229
1230         sch->qstats.overlimits = q->overlimits;
1231         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1232          * no change can happen on the qdisc parameters.
1233          */
1234
1235         gopt.direct_pkts = q->direct_pkts;
1236         gopt.version = HTB_VER;
1237         gopt.rate2quantum = q->rate2quantum;
1238         gopt.defcls = q->defcls;
1239         gopt.debug = 0;
1240
1241         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1242         if (nest == NULL)
1243                 goto nla_put_failure;
1244         if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1245             nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1246                 goto nla_put_failure;
1247         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1248                 goto nla_put_failure;
1249
1250         return nla_nest_end(skb, nest);
1251
1252 nla_put_failure:
1253         nla_nest_cancel(skb, nest);
1254         return -1;
1255 }
1256
1257 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1258                           struct sk_buff *skb, struct tcmsg *tcm)
1259 {
1260         struct htb_class *cl = (struct htb_class *)arg;
1261         struct htb_sched *q = qdisc_priv(sch);
1262         struct nlattr *nest;
1263         struct tc_htb_opt opt;
1264
1265         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1266          * no change can happen on the class parameters.
1267          */
1268         tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1269         tcm->tcm_handle = cl->common.classid;
1270         if (!cl->level && cl->leaf.q)
1271                 tcm->tcm_info = cl->leaf.q->handle;
1272
1273         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1274         if (nest == NULL)
1275                 goto nla_put_failure;
1276
1277         memset(&opt, 0, sizeof(opt));
1278
1279         psched_ratecfg_getrate(&opt.rate, &cl->rate);
1280         opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1281         psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1282         opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1283         opt.quantum = cl->quantum;
1284         opt.prio = cl->prio;
1285         opt.level = cl->level;
1286         if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1287                 goto nla_put_failure;
1288         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1289                 goto nla_put_failure;
1290         if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1291             nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1292                               TCA_HTB_PAD))
1293                 goto nla_put_failure;
1294         if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1295             nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1296                               TCA_HTB_PAD))
1297                 goto nla_put_failure;
1298
1299         return nla_nest_end(skb, nest);
1300
1301 nla_put_failure:
1302         nla_nest_cancel(skb, nest);
1303         return -1;
1304 }
1305
1306 static void htb_offload_aggregate_stats(struct htb_sched *q,
1307                                         struct htb_class *cl)
1308 {
1309         struct htb_class *c;
1310         unsigned int i;
1311
1312         memset(&cl->bstats, 0, sizeof(cl->bstats));
1313
1314         for (i = 0; i < q->clhash.hashsize; i++) {
1315                 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1316                         struct htb_class *p = c;
1317
1318                         while (p && p->level < cl->level)
1319                                 p = p->parent;
1320
1321                         if (p != cl)
1322                                 continue;
1323
1324                         cl->bstats.bytes += c->bstats_bias.bytes;
1325                         cl->bstats.packets += c->bstats_bias.packets;
1326                         if (c->level == 0) {
1327                                 cl->bstats.bytes += c->leaf.q->bstats.bytes;
1328                                 cl->bstats.packets += c->leaf.q->bstats.packets;
1329                         }
1330                 }
1331         }
1332 }
1333
1334 static int
1335 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1336 {
1337         struct htb_class *cl = (struct htb_class *)arg;
1338         struct htb_sched *q = qdisc_priv(sch);
1339         struct gnet_stats_queue qs = {
1340                 .drops = cl->drops,
1341                 .overlimits = cl->overlimits,
1342         };
1343         __u32 qlen = 0;
1344
1345         if (!cl->level && cl->leaf.q)
1346                 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1347
1348         cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1349                                     INT_MIN, INT_MAX);
1350         cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1351                                      INT_MIN, INT_MAX);
1352
1353         if (q->offload) {
1354                 if (!cl->level) {
1355                         if (cl->leaf.q)
1356                                 cl->bstats = cl->leaf.q->bstats;
1357                         else
1358                                 memset(&cl->bstats, 0, sizeof(cl->bstats));
1359                         cl->bstats.bytes += cl->bstats_bias.bytes;
1360                         cl->bstats.packets += cl->bstats_bias.packets;
1361                 } else {
1362                         htb_offload_aggregate_stats(q, cl);
1363                 }
1364         }
1365
1366         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1367                                   d, NULL, &cl->bstats) < 0 ||
1368             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1369             gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1370                 return -1;
1371
1372         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1373 }
1374
1375 static struct netdev_queue *
1376 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1377 {
1378         struct net_device *dev = qdisc_dev(sch);
1379         struct tc_htb_qopt_offload offload_opt;
1380         struct htb_sched *q = qdisc_priv(sch);
1381         int err;
1382
1383         if (!q->offload)
1384                 return sch->dev_queue;
1385
1386         offload_opt = (struct tc_htb_qopt_offload) {
1387                 .command = TC_HTB_LEAF_QUERY_QUEUE,
1388                 .classid = TC_H_MIN(tcm->tcm_parent),
1389         };
1390         err = htb_offload(dev, &offload_opt);
1391         if (err || offload_opt.qid >= dev->num_tx_queues)
1392                 return NULL;
1393         return netdev_get_tx_queue(dev, offload_opt.qid);
1394 }
1395
1396 static struct Qdisc *
1397 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1398 {
1399         struct net_device *dev = dev_queue->dev;
1400         struct Qdisc *old_q;
1401
1402         if (dev->flags & IFF_UP)
1403                 dev_deactivate(dev);
1404         old_q = dev_graft_qdisc(dev_queue, new_q);
1405         if (new_q)
1406                 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1407         if (dev->flags & IFF_UP)
1408                 dev_activate(dev);
1409
1410         return old_q;
1411 }
1412
1413 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1414 {
1415         struct netdev_queue *queue;
1416
1417         queue = cl->leaf.offload_queue;
1418         if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1419                 WARN_ON(cl->leaf.q->dev_queue != queue);
1420
1421         return queue;
1422 }
1423
1424 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1425                                    struct htb_class *cl_new, bool destroying)
1426 {
1427         struct netdev_queue *queue_old, *queue_new;
1428         struct net_device *dev = qdisc_dev(sch);
1429
1430         queue_old = htb_offload_get_queue(cl_old);
1431         queue_new = htb_offload_get_queue(cl_new);
1432
1433         if (!destroying) {
1434                 struct Qdisc *qdisc;
1435
1436                 if (dev->flags & IFF_UP)
1437                         dev_deactivate(dev);
1438                 qdisc = dev_graft_qdisc(queue_old, NULL);
1439                 WARN_ON(qdisc != cl_old->leaf.q);
1440         }
1441
1442         if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1443                 cl_old->leaf.q->dev_queue = queue_new;
1444         cl_old->leaf.offload_queue = queue_new;
1445
1446         if (!destroying) {
1447                 struct Qdisc *qdisc;
1448
1449                 qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1450                 if (dev->flags & IFF_UP)
1451                         dev_activate(dev);
1452                 WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1453         }
1454 }
1455
1456 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1457                      struct Qdisc **old, struct netlink_ext_ack *extack)
1458 {
1459         struct netdev_queue *dev_queue = sch->dev_queue;
1460         struct htb_class *cl = (struct htb_class *)arg;
1461         struct htb_sched *q = qdisc_priv(sch);
1462         struct Qdisc *old_q;
1463
1464         if (cl->level)
1465                 return -EINVAL;
1466
1467         if (q->offload)
1468                 dev_queue = htb_offload_get_queue(cl);
1469
1470         if (!new) {
1471                 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1472                                         cl->common.classid, extack);
1473                 if (!new)
1474                         return -ENOBUFS;
1475         }
1476
1477         if (q->offload) {
1478                 htb_set_lockdep_class_child(new);
1479                 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1480                 qdisc_refcount_inc(new);
1481                 old_q = htb_graft_helper(dev_queue, new);
1482         }
1483
1484         *old = qdisc_replace(sch, new, &cl->leaf.q);
1485
1486         if (q->offload) {
1487                 WARN_ON(old_q != *old);
1488                 qdisc_put(old_q);
1489         }
1490
1491         return 0;
1492 }
1493
1494 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1495 {
1496         struct htb_class *cl = (struct htb_class *)arg;
1497         return !cl->level ? cl->leaf.q : NULL;
1498 }
1499
1500 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1501 {
1502         struct htb_class *cl = (struct htb_class *)arg;
1503
1504         htb_deactivate(qdisc_priv(sch), cl);
1505 }
1506
1507 static inline int htb_parent_last_child(struct htb_class *cl)
1508 {
1509         if (!cl->parent)
1510                 /* the root class */
1511                 return 0;
1512         if (cl->parent->children > 1)
1513                 /* not the last child */
1514                 return 0;
1515         return 1;
1516 }
1517
1518 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1519                                struct Qdisc *new_q)
1520 {
1521         struct htb_sched *q = qdisc_priv(sch);
1522         struct htb_class *parent = cl->parent;
1523
1524         WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1525
1526         if (parent->cmode != HTB_CAN_SEND)
1527                 htb_safe_rb_erase(&parent->pq_node,
1528                                   &q->hlevel[parent->level].wait_pq);
1529
1530         parent->level = 0;
1531         memset(&parent->inner, 0, sizeof(parent->inner));
1532         parent->leaf.q = new_q ? new_q : &noop_qdisc;
1533         parent->tokens = parent->buffer;
1534         parent->ctokens = parent->cbuffer;
1535         parent->t_c = ktime_get_ns();
1536         parent->cmode = HTB_CAN_SEND;
1537         if (q->offload)
1538                 parent->leaf.offload_queue = cl->leaf.offload_queue;
1539 }
1540
1541 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1542                                        struct netdev_queue *dev_queue,
1543                                        struct Qdisc *new_q)
1544 {
1545         struct Qdisc *old_q;
1546
1547         /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1548         if (new_q)
1549                 qdisc_refcount_inc(new_q);
1550         old_q = htb_graft_helper(dev_queue, new_q);
1551         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1552 }
1553
1554 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1555                                      bool last_child, bool destroying,
1556                                      struct netlink_ext_ack *extack)
1557 {
1558         struct tc_htb_qopt_offload offload_opt;
1559         struct netdev_queue *dev_queue;
1560         struct Qdisc *q = cl->leaf.q;
1561         struct Qdisc *old;
1562         int err;
1563
1564         if (cl->level)
1565                 return -EINVAL;
1566
1567         WARN_ON(!q);
1568         dev_queue = htb_offload_get_queue(cl);
1569         /* When destroying, caller qdisc_graft grafts the new qdisc and invokes
1570          * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload
1571          * does not need to graft or qdisc_put the qdisc being destroyed.
1572          */
1573         if (!destroying) {
1574                 old = htb_graft_helper(dev_queue, NULL);
1575                 /* Last qdisc grafted should be the same as cl->leaf.q when
1576                  * calling htb_delete.
1577                  */
1578                 WARN_ON(old != q);
1579         }
1580
1581         if (cl->parent) {
1582                 cl->parent->bstats_bias.bytes += q->bstats.bytes;
1583                 cl->parent->bstats_bias.packets += q->bstats.packets;
1584         }
1585
1586         offload_opt = (struct tc_htb_qopt_offload) {
1587                 .command = !last_child ? TC_HTB_LEAF_DEL :
1588                            destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1589                            TC_HTB_LEAF_DEL_LAST,
1590                 .classid = cl->common.classid,
1591                 .extack = extack,
1592         };
1593         err = htb_offload(qdisc_dev(sch), &offload_opt);
1594
1595         if (!destroying) {
1596                 if (!err)
1597                         qdisc_put(old);
1598                 else
1599                         htb_graft_helper(dev_queue, old);
1600         }
1601
1602         if (last_child)
1603                 return err;
1604
1605         if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1606                 u32 classid = TC_H_MAJ(sch->handle) |
1607                               TC_H_MIN(offload_opt.classid);
1608                 struct htb_class *moved_cl = htb_find(classid, sch);
1609
1610                 htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1611         }
1612
1613         return err;
1614 }
1615
1616 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1617 {
1618         if (!cl->level) {
1619                 WARN_ON(!cl->leaf.q);
1620                 qdisc_put(cl->leaf.q);
1621         }
1622         gen_kill_estimator(&cl->rate_est);
1623         tcf_block_put(cl->block);
1624         kfree(cl);
1625 }
1626
1627 static void htb_destroy(struct Qdisc *sch)
1628 {
1629         struct net_device *dev = qdisc_dev(sch);
1630         struct tc_htb_qopt_offload offload_opt;
1631         struct htb_sched *q = qdisc_priv(sch);
1632         struct hlist_node *next;
1633         bool nonempty, changed;
1634         struct htb_class *cl;
1635         unsigned int i;
1636
1637         cancel_work_sync(&q->work);
1638         qdisc_watchdog_cancel(&q->watchdog);
1639         /* This line used to be after htb_destroy_class call below
1640          * and surprisingly it worked in 2.4. But it must precede it
1641          * because filter need its target class alive to be able to call
1642          * unbind_filter on it (without Oops).
1643          */
1644         tcf_block_put(q->block);
1645
1646         for (i = 0; i < q->clhash.hashsize; i++) {
1647                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1648                         tcf_block_put(cl->block);
1649                         cl->block = NULL;
1650                 }
1651         }
1652
1653         do {
1654                 nonempty = false;
1655                 changed = false;
1656                 for (i = 0; i < q->clhash.hashsize; i++) {
1657                         hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1658                                                   common.hnode) {
1659                                 bool last_child;
1660
1661                                 if (!q->offload) {
1662                                         htb_destroy_class(sch, cl);
1663                                         continue;
1664                                 }
1665
1666                                 nonempty = true;
1667
1668                                 if (cl->level)
1669                                         continue;
1670
1671                                 changed = true;
1672
1673                                 last_child = htb_parent_last_child(cl);
1674                                 htb_destroy_class_offload(sch, cl, last_child,
1675                                                           true, NULL);
1676                                 qdisc_class_hash_remove(&q->clhash,
1677                                                         &cl->common);
1678                                 if (cl->parent)
1679                                         cl->parent->children--;
1680                                 if (last_child)
1681                                         htb_parent_to_leaf(sch, cl, NULL);
1682                                 htb_destroy_class(sch, cl);
1683                         }
1684                 }
1685         } while (changed);
1686         WARN_ON(nonempty);
1687
1688         qdisc_class_hash_destroy(&q->clhash);
1689         __qdisc_reset_queue(&q->direct_queue);
1690
1691         if (!q->offload)
1692                 return;
1693
1694         offload_opt = (struct tc_htb_qopt_offload) {
1695                 .command = TC_HTB_DESTROY,
1696         };
1697         htb_offload(dev, &offload_opt);
1698
1699         if (!q->direct_qdiscs)
1700                 return;
1701         for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1702                 qdisc_put(q->direct_qdiscs[i]);
1703         kfree(q->direct_qdiscs);
1704 }
1705
1706 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1707                       struct netlink_ext_ack *extack)
1708 {
1709         struct htb_sched *q = qdisc_priv(sch);
1710         struct htb_class *cl = (struct htb_class *)arg;
1711         struct Qdisc *new_q = NULL;
1712         int last_child = 0;
1713         int err;
1714
1715         /* TODO: why don't allow to delete subtree ? references ? does
1716          * tc subsys guarantee us that in htb_destroy it holds no class
1717          * refs so that we can remove children safely there ?
1718          */
1719         if (cl->children || cl->filter_cnt)
1720                 return -EBUSY;
1721
1722         if (!cl->level && htb_parent_last_child(cl))
1723                 last_child = 1;
1724
1725         if (q->offload) {
1726                 err = htb_destroy_class_offload(sch, cl, last_child, false,
1727                                                 extack);
1728                 if (err)
1729                         return err;
1730         }
1731
1732         if (last_child) {
1733                 struct netdev_queue *dev_queue = sch->dev_queue;
1734
1735                 if (q->offload)
1736                         dev_queue = htb_offload_get_queue(cl);
1737
1738                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1739                                           cl->parent->common.classid,
1740                                           NULL);
1741                 if (q->offload) {
1742                         if (new_q)
1743                                 htb_set_lockdep_class_child(new_q);
1744                         htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1745                 }
1746         }
1747
1748         sch_tree_lock(sch);
1749
1750         if (!cl->level)
1751                 qdisc_purge_queue(cl->leaf.q);
1752
1753         /* delete from hash and active; remainder in destroy_class */
1754         qdisc_class_hash_remove(&q->clhash, &cl->common);
1755         if (cl->parent)
1756                 cl->parent->children--;
1757
1758         if (cl->prio_activity)
1759                 htb_deactivate(q, cl);
1760
1761         if (cl->cmode != HTB_CAN_SEND)
1762                 htb_safe_rb_erase(&cl->pq_node,
1763                                   &q->hlevel[cl->level].wait_pq);
1764
1765         if (last_child)
1766                 htb_parent_to_leaf(sch, cl, new_q);
1767
1768         sch_tree_unlock(sch);
1769
1770         htb_destroy_class(sch, cl);
1771         return 0;
1772 }
1773
1774 static int htb_change_class(struct Qdisc *sch, u32 classid,
1775                             u32 parentid, struct nlattr **tca,
1776                             unsigned long *arg, struct netlink_ext_ack *extack)
1777 {
1778         int err = -EINVAL;
1779         struct htb_sched *q = qdisc_priv(sch);
1780         struct htb_class *cl = (struct htb_class *)*arg, *parent;
1781         struct tc_htb_qopt_offload offload_opt;
1782         struct nlattr *opt = tca[TCA_OPTIONS];
1783         struct nlattr *tb[TCA_HTB_MAX + 1];
1784         struct Qdisc *parent_qdisc = NULL;
1785         struct netdev_queue *dev_queue;
1786         struct tc_htb_opt *hopt;
1787         u64 rate64, ceil64;
1788         int warn = 0;
1789
1790         /* extract all subattrs from opt attr */
1791         if (!opt)
1792                 goto failure;
1793
1794         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1795                                           NULL);
1796         if (err < 0)
1797                 goto failure;
1798
1799         err = -EINVAL;
1800         if (tb[TCA_HTB_PARMS] == NULL)
1801                 goto failure;
1802
1803         parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1804
1805         hopt = nla_data(tb[TCA_HTB_PARMS]);
1806         if (!hopt->rate.rate || !hopt->ceil.rate)
1807                 goto failure;
1808
1809         if (q->offload) {
1810                 /* Options not supported by the offload. */
1811                 if (hopt->rate.overhead || hopt->ceil.overhead) {
1812                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
1813                         goto failure;
1814                 }
1815                 if (hopt->rate.mpu || hopt->ceil.mpu) {
1816                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
1817                         goto failure;
1818                 }
1819                 if (hopt->quantum) {
1820                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
1821                         goto failure;
1822                 }
1823                 if (hopt->prio) {
1824                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
1825                         goto failure;
1826                 }
1827         }
1828
1829         /* Keeping backward compatible with rate_table based iproute2 tc */
1830         if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1831                 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1832                                               NULL));
1833
1834         if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1835                 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1836                                               NULL));
1837
1838         rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1839         ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1840
1841         if (!cl) {              /* new class */
1842                 struct net_device *dev = qdisc_dev(sch);
1843                 struct Qdisc *new_q, *old_q;
1844                 int prio;
1845                 struct {
1846                         struct nlattr           nla;
1847                         struct gnet_estimator   opt;
1848                 } est = {
1849                         .nla = {
1850                                 .nla_len        = nla_attr_size(sizeof(est.opt)),
1851                                 .nla_type       = TCA_RATE,
1852                         },
1853                         .opt = {
1854                                 /* 4s interval, 16s averaging constant */
1855                                 .interval       = 2,
1856                                 .ewma_log       = 2,
1857                         },
1858                 };
1859
1860                 /* check for valid classid */
1861                 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1862                     htb_find(classid, sch))
1863                         goto failure;
1864
1865                 /* check maximal depth */
1866                 if (parent && parent->parent && parent->parent->level < 2) {
1867                         pr_err("htb: tree is too deep\n");
1868                         goto failure;
1869                 }
1870                 err = -ENOBUFS;
1871                 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1872                 if (!cl)
1873                         goto failure;
1874
1875                 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1876                 if (err) {
1877                         kfree(cl);
1878                         goto failure;
1879                 }
1880                 if (htb_rate_est || tca[TCA_RATE]) {
1881                         err = gen_new_estimator(&cl->bstats, NULL,
1882                                                 &cl->rate_est,
1883                                                 NULL,
1884                                                 qdisc_root_sleeping_running(sch),
1885                                                 tca[TCA_RATE] ? : &est.nla);
1886                         if (err)
1887                                 goto err_block_put;
1888                 }
1889
1890                 cl->children = 0;
1891                 RB_CLEAR_NODE(&cl->pq_node);
1892
1893                 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1894                         RB_CLEAR_NODE(&cl->node[prio]);
1895
1896                 cl->common.classid = classid;
1897
1898                 /* Make sure nothing interrupts us in between of two
1899                  * ndo_setup_tc calls.
1900                  */
1901                 ASSERT_RTNL();
1902
1903                 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1904                  * so that can't be used inside of sch_tree_lock
1905                  * -- thanks to Karlis Peisenieks
1906                  */
1907                 if (!q->offload) {
1908                         dev_queue = sch->dev_queue;
1909                 } else if (!(parent && !parent->level)) {
1910                         /* Assign a dev_queue to this classid. */
1911                         offload_opt = (struct tc_htb_qopt_offload) {
1912                                 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1913                                 .classid = cl->common.classid,
1914                                 .parent_classid = parent ?
1915                                         TC_H_MIN(parent->common.classid) :
1916                                         TC_HTB_CLASSID_ROOT,
1917                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1918                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1919                                 .extack = extack,
1920                         };
1921                         err = htb_offload(dev, &offload_opt);
1922                         if (err) {
1923                                 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1924                                        err);
1925                                 goto err_kill_estimator;
1926                         }
1927                         dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1928                 } else { /* First child. */
1929                         dev_queue = htb_offload_get_queue(parent);
1930                         old_q = htb_graft_helper(dev_queue, NULL);
1931                         WARN_ON(old_q != parent->leaf.q);
1932                         offload_opt = (struct tc_htb_qopt_offload) {
1933                                 .command = TC_HTB_LEAF_TO_INNER,
1934                                 .classid = cl->common.classid,
1935                                 .parent_classid =
1936                                         TC_H_MIN(parent->common.classid),
1937                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1938                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1939                                 .extack = extack,
1940                         };
1941                         err = htb_offload(dev, &offload_opt);
1942                         if (err) {
1943                                 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1944                                        err);
1945                                 htb_graft_helper(dev_queue, old_q);
1946                                 goto err_kill_estimator;
1947                         }
1948                         parent->bstats_bias.bytes += old_q->bstats.bytes;
1949                         parent->bstats_bias.packets += old_q->bstats.packets;
1950                         qdisc_put(old_q);
1951                 }
1952                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1953                                           classid, NULL);
1954                 if (q->offload) {
1955                         if (new_q) {
1956                                 htb_set_lockdep_class_child(new_q);
1957                                 /* One ref for cl->leaf.q, the other for
1958                                  * dev_queue->qdisc.
1959                                  */
1960                                 qdisc_refcount_inc(new_q);
1961                         }
1962                         old_q = htb_graft_helper(dev_queue, new_q);
1963                         /* No qdisc_put needed. */
1964                         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1965                 }
1966                 sch_tree_lock(sch);
1967                 if (parent && !parent->level) {
1968                         /* turn parent into inner node */
1969                         qdisc_purge_queue(parent->leaf.q);
1970                         parent_qdisc = parent->leaf.q;
1971                         if (parent->prio_activity)
1972                                 htb_deactivate(q, parent);
1973
1974                         /* remove from evt list because of level change */
1975                         if (parent->cmode != HTB_CAN_SEND) {
1976                                 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1977                                 parent->cmode = HTB_CAN_SEND;
1978                         }
1979                         parent->level = (parent->parent ? parent->parent->level
1980                                          : TC_HTB_MAXDEPTH) - 1;
1981                         memset(&parent->inner, 0, sizeof(parent->inner));
1982                 }
1983
1984                 /* leaf (we) needs elementary qdisc */
1985                 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1986                 if (q->offload)
1987                         cl->leaf.offload_queue = dev_queue;
1988
1989                 cl->parent = parent;
1990
1991                 /* set class to be in HTB_CAN_SEND state */
1992                 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1993                 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1994                 cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1995                 cl->t_c = ktime_get_ns();
1996                 cl->cmode = HTB_CAN_SEND;
1997
1998                 /* attach to the hash list and parent's family */
1999                 qdisc_class_hash_insert(&q->clhash, &cl->common);
2000                 if (parent)
2001                         parent->children++;
2002                 if (cl->leaf.q != &noop_qdisc)
2003                         qdisc_hash_add(cl->leaf.q, true);
2004         } else {
2005                 if (tca[TCA_RATE]) {
2006                         err = gen_replace_estimator(&cl->bstats, NULL,
2007                                                     &cl->rate_est,
2008                                                     NULL,
2009                                                     qdisc_root_sleeping_running(sch),
2010                                                     tca[TCA_RATE]);
2011                         if (err)
2012                                 return err;
2013                 }
2014
2015                 if (q->offload) {
2016                         struct net_device *dev = qdisc_dev(sch);
2017
2018                         offload_opt = (struct tc_htb_qopt_offload) {
2019                                 .command = TC_HTB_NODE_MODIFY,
2020                                 .classid = cl->common.classid,
2021                                 .rate = max_t(u64, hopt->rate.rate, rate64),
2022                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
2023                                 .extack = extack,
2024                         };
2025                         err = htb_offload(dev, &offload_opt);
2026                         if (err)
2027                                 /* Estimator was replaced, and rollback may fail
2028                                  * as well, so we don't try to recover it, and
2029                                  * the estimator won't work property with the
2030                                  * offload anyway, because bstats are updated
2031                                  * only when the stats are queried.
2032                                  */
2033                                 return err;
2034                 }
2035
2036                 sch_tree_lock(sch);
2037         }
2038
2039         psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2040         psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2041
2042         /* it used to be a nasty bug here, we have to check that node
2043          * is really leaf before changing cl->leaf !
2044          */
2045         if (!cl->level) {
2046                 u64 quantum = cl->rate.rate_bytes_ps;
2047
2048                 do_div(quantum, q->rate2quantum);
2049                 cl->quantum = min_t(u64, quantum, INT_MAX);
2050
2051                 if (!hopt->quantum && cl->quantum < 1000) {
2052                         warn = -1;
2053                         cl->quantum = 1000;
2054                 }
2055                 if (!hopt->quantum && cl->quantum > 200000) {
2056                         warn = 1;
2057                         cl->quantum = 200000;
2058                 }
2059                 if (hopt->quantum)
2060                         cl->quantum = hopt->quantum;
2061                 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2062                         cl->prio = TC_HTB_NUMPRIO - 1;
2063         }
2064
2065         cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2066         cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2067
2068         sch_tree_unlock(sch);
2069         qdisc_put(parent_qdisc);
2070
2071         if (warn)
2072                 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2073                             cl->common.classid, (warn == -1 ? "small" : "big"));
2074
2075         qdisc_class_hash_grow(sch, &q->clhash);
2076
2077         *arg = (unsigned long)cl;
2078         return 0;
2079
2080 err_kill_estimator:
2081         gen_kill_estimator(&cl->rate_est);
2082 err_block_put:
2083         tcf_block_put(cl->block);
2084         kfree(cl);
2085 failure:
2086         return err;
2087 }
2088
2089 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2090                                        struct netlink_ext_ack *extack)
2091 {
2092         struct htb_sched *q = qdisc_priv(sch);
2093         struct htb_class *cl = (struct htb_class *)arg;
2094
2095         return cl ? cl->block : q->block;
2096 }
2097
2098 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2099                                      u32 classid)
2100 {
2101         struct htb_class *cl = htb_find(classid, sch);
2102
2103         /*if (cl && !cl->level) return 0;
2104          * The line above used to be there to prevent attaching filters to
2105          * leaves. But at least tc_index filter uses this just to get class
2106          * for other reasons so that we have to allow for it.
2107          * ----
2108          * 19.6.2002 As Werner explained it is ok - bind filter is just
2109          * another way to "lock" the class - unlike "get" this lock can
2110          * be broken by class during destroy IIUC.
2111          */
2112         if (cl)
2113                 cl->filter_cnt++;
2114         return (unsigned long)cl;
2115 }
2116
2117 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2118 {
2119         struct htb_class *cl = (struct htb_class *)arg;
2120
2121         if (cl)
2122                 cl->filter_cnt--;
2123 }
2124
2125 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2126 {
2127         struct htb_sched *q = qdisc_priv(sch);
2128         struct htb_class *cl;
2129         unsigned int i;
2130
2131         if (arg->stop)
2132                 return;
2133
2134         for (i = 0; i < q->clhash.hashsize; i++) {
2135                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2136                         if (arg->count < arg->skip) {
2137                                 arg->count++;
2138                                 continue;
2139                         }
2140                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2141                                 arg->stop = 1;
2142                                 return;
2143                         }
2144                         arg->count++;
2145                 }
2146         }
2147 }
2148
2149 static const struct Qdisc_class_ops htb_class_ops = {
2150         .select_queue   =       htb_select_queue,
2151         .graft          =       htb_graft,
2152         .leaf           =       htb_leaf,
2153         .qlen_notify    =       htb_qlen_notify,
2154         .find           =       htb_search,
2155         .change         =       htb_change_class,
2156         .delete         =       htb_delete,
2157         .walk           =       htb_walk,
2158         .tcf_block      =       htb_tcf_block,
2159         .bind_tcf       =       htb_bind_filter,
2160         .unbind_tcf     =       htb_unbind_filter,
2161         .dump           =       htb_dump_class,
2162         .dump_stats     =       htb_dump_class_stats,
2163 };
2164
2165 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2166         .cl_ops         =       &htb_class_ops,
2167         .id             =       "htb",
2168         .priv_size      =       sizeof(struct htb_sched),
2169         .enqueue        =       htb_enqueue,
2170         .dequeue        =       htb_dequeue,
2171         .peek           =       qdisc_peek_dequeued,
2172         .init           =       htb_init,
2173         .attach         =       htb_attach,
2174         .reset          =       htb_reset,
2175         .destroy        =       htb_destroy,
2176         .dump           =       htb_dump,
2177         .owner          =       THIS_MODULE,
2178 };
2179
2180 static int __init htb_module_init(void)
2181 {
2182         return register_qdisc(&htb_qdisc_ops);
2183 }
2184 static void __exit htb_module_exit(void)
2185 {
2186         unregister_qdisc(&htb_qdisc_ops);
2187 }
2188
2189 module_init(htb_module_init)
2190 module_exit(htb_module_exit)
2191 MODULE_LICENSE("GPL");