net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/slab.h>
  15 #include <linux/types.h>
  16 #include <linux/kernel.h>
  17 #include <linux/string.h>
  18 #include <linux/errno.h>
  19 #include <linux/skbuff.h>
  20 #include <net/netlink.h>
  21 #include <net/pkt_sched.h>
  22
  23
  24 /*      Class-Based Queueing (CBQ) algorithm.
  25         =======================================
  26
  27         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28                  Management Models for Packet Networks",
  29                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34                  Parameters", 1996
  35
  36                  [4] Sally Floyd and Michael Speer, "Experimental Results
  37                  for Class-Based Queueing", 1998, not published.
  38
  39         -----------------------------------------------------------------------
  40
  41         Algorithm skeleton was taken from NS simulator cbq.cc.
  42         If someone wants to check this code against the LBL version,
  43         he should take into account that ONLY the skeleton was borrowed,
  44         the implementation is different. Particularly:
  45
  46         --- The WRR algorithm is different. Our version looks more
  47         reasonable (I hope) and works when quanta are allowed to be
  48         less than MTU, which is always the case when real time classes
  49         have small rates. Note, that the statement of [3] is
  50         incomplete, delay may actually be estimated even if class
  51         per-round allotment is less than MTU. Namely, if per-round
  52         allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56         In the worst case we have IntServ estimate with D = W*r+k*MTU
  57         and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61         interpret some places, which look like wrong translations
  62         from NS. Anyone is advised to find these differences
  63         and explain to me, why I am wrong 8).
  64
  65         --- Linux has no EOI event, so that we cannot estimate true class
  66         idle time. Workaround is to consider the next dequeue event
  67         as sign that previous packet is finished. This is wrong because of
  68         internal device queueing, but on a permanently loaded link it is true.
  69         Moreover, combined with clock integrator, this scheme looks
  70         very close to an ideal solution.  */
  71
  72 struct cbq_sched_data;
  73
  74
  75 struct cbq_class
  76 {
  77         struct Qdisc_class_common common;
  78         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  79
  80 /* Parameters */
  81         unsigned char           priority;       /* class priority */
  82         unsigned char           priority2;      /* priority to be used after overlimit */
  83         unsigned char           ewma_log;       /* time constant for idle time calculation */
  84         unsigned char           ovl_strategy;
  85 #ifdef CONFIG_NET_CLS_ACT
  86         unsigned char           police;
  87 #endif
  88
  89         u32                     defmap;
  90
  91         /* Link-sharing scheduler parameters */
  92         long                    maxidle;        /* Class parameters: see below. */
  93         long                    offtime;
  94         long                    minidle;
  95         u32                     avpkt;
  96         struct qdisc_rate_table *R_tab;
  97
  98         /* Overlimit strategy parameters */
  99         void                    (*overlimit)(struct cbq_class *cl);
 100         psched_tdiff_t          penalty;
 101
 102         /* General scheduler (WRR) parameters */
 103         long                    allot;
 104         long                    quantum;        /* Allotment per WRR round */
 105         long                    weight;         /* Relative allotment: see below */
 106
 107         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 108         struct cbq_class        *split;         /* Ptr to split node */
 109         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 110         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 111         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 112                                                    parent otherwise */
 113         struct cbq_class        *sibling;       /* Sibling chain */
 114         struct cbq_class        *children;      /* Pointer to children chain */
 115
 116         struct Qdisc            *q;             /* Elementary queueing discipline */
 117
 118
 119 /* Variables */
 120         unsigned char           cpriority;      /* Effective priority */
 121         unsigned char           delayed;
 122         unsigned char           level;          /* level of the class in hierarchy:
 123                                                    0 for leaf classes, and maximal
 124                                                    level of children + 1 for nodes.
 125                                                  */
 126
 127         psched_time_t           last;           /* Last end of service */
 128         psched_time_t           undertime;
 129         long                    avgidle;
 130         long                    deficit;        /* Saved deficit for WRR */
 131         psched_time_t           penalized;
 132         struct gnet_stats_basic_packed bstats;
 133         struct gnet_stats_queue qstats;
 134         struct gnet_stats_rate_est rate_est;
 135         struct tc_cbq_xstats    xstats;
 136
 137         struct tcf_proto        *filter_list;
 138
 139         int                     refcnt;
 140         int                     filters;
 141
 142         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 143 };
 144
 145 struct cbq_sched_data
 146 {
 147         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 148         int                     nclasses[TC_CBQ_MAXPRIO+1];
 149         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 150
 151         struct cbq_class        link;
 152
 153         unsigned                activemask;
 154         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 155                                                                    with backlog */
 156
 157 #ifdef CONFIG_NET_CLS_ACT
 158         struct cbq_class        *rx_class;
 159 #endif
 160         struct cbq_class        *tx_class;
 161         struct cbq_class        *tx_borrowed;
 162         int                     tx_len;
 163         psched_time_t           now;            /* Cached timestamp */
 164         psched_time_t           now_rt;         /* Cached real time */
 165         unsigned                pmask;
 166
 167         struct hrtimer          delay_timer;
 168         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 169                                                    started when CBQ has
 170                                                    backlog, but cannot
 171                                                    transmit just now */
 172         psched_tdiff_t          wd_expires;
 173         int                     toplevel;
 174         u32                     hgenerator;
 175 };
 176
 177
 178 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 179
 180 static __inline__ struct cbq_class *
 181 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 182 {
 183         struct Qdisc_class_common *clc;
 184
 185         clc = qdisc_class_find(&q->clhash, classid);
 186         if (clc == NULL)
 187                 return NULL;
 188         return container_of(clc, struct cbq_class, common);
 189 }
 190
 191 #ifdef CONFIG_NET_CLS_ACT
 192
 193 static struct cbq_class *
 194 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 195 {
 196         struct cbq_class *cl, *new;
 197
 198         for (cl = this->tparent; cl; cl = cl->tparent)
 199                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 200                         return new;
 201
 202         return NULL;
 203 }
 204
 205 #endif
 206
 207 /* Classify packet. The procedure is pretty complicated, but
 208    it allows us to combine link sharing and priority scheduling
 209    transparently.
 210
 211    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 212    so that it resolves to split nodes. Then packets are classified
 213    by logical priority, or a more specific classifier may be attached
 214    to the split node.
 215  */
 216
 217 static struct cbq_class *
 218 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 219 {
 220         struct cbq_sched_data *q = qdisc_priv(sch);
 221         struct cbq_class *head = &q->link;
 222         struct cbq_class **defmap;
 223         struct cbq_class *cl = NULL;
 224         u32 prio = skb->priority;
 225         struct tcf_result res;
 226
 227         /*
 228          *  Step 1. If skb->priority points to one of our classes, use it.
 229          */
 230         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 231             (cl = cbq_class_lookup(q, prio)) != NULL)
 232                 return cl;
 233
 234         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235         for (;;) {
 236                 int result = 0;
 237                 defmap = head->defaults;
 238
 239                 /*
 240                  * Step 2+n. Apply classifier.
 241                  */
 242                 if (!head->filter_list ||
 243                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 244                         goto fallback;
 245
 246                 if ((cl = (void*)res.class) == NULL) {
 247                         if (TC_H_MAJ(res.classid))
 248                                 cl = cbq_class_lookup(q, res.classid);
 249                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 250                                 cl = defmap[TC_PRIO_BESTEFFORT];
 251
 252                         if (cl == NULL || cl->level >= head->level)
 253                                 goto fallback;
 254                 }
 255
 256 #ifdef CONFIG_NET_CLS_ACT
 257                 switch (result) {
 258                 case TC_ACT_QUEUED:
 259                 case TC_ACT_STOLEN:
 260                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 261                 case TC_ACT_SHOT:
 262                         return NULL;
 263                 case TC_ACT_RECLASSIFY:
 264                         return cbq_reclassify(skb, cl);
 265                 }
 266 #endif
 267                 if (cl->level == 0)
 268                         return cl;
 269
 270                 /*
 271                  * Step 3+n. If classifier selected a link sharing class,
 272                  *         apply agency specific classifier.
 273                  *         Repeat this procdure until we hit a leaf node.
 274                  */
 275                 head = cl;
 276         }
 277
 278 fallback:
 279         cl = head;
 280
 281         /*
 282          * Step 4. No success...
 283          */
 284         if (TC_H_MAJ(prio) == 0 &&
 285             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 286             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 287                 return head;
 288
 289         return cl;
 290 }
 291
 292 /*
 293    A packet has just been enqueued on the empty class.
 294    cbq_activate_class adds it to the tail of active class list
 295    of its priority band.
 296  */
 297
 298 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 299 {
 300         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 301         int prio = cl->cpriority;
 302         struct cbq_class *cl_tail;
 303
 304         cl_tail = q->active[prio];
 305         q->active[prio] = cl;
 306
 307         if (cl_tail != NULL) {
 308                 cl->next_alive = cl_tail->next_alive;
 309                 cl_tail->next_alive = cl;
 310         } else {
 311                 cl->next_alive = cl;
 312                 q->activemask |= (1<<prio);
 313         }
 314 }
 315
 316 /*
 317    Unlink class from active chain.
 318    Note that this same procedure is done directly in cbq_dequeue*
 319    during round-robin procedure.
 320  */
 321
 322 static void cbq_deactivate_class(struct cbq_class *this)
 323 {
 324         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 325         int prio = this->cpriority;
 326         struct cbq_class *cl;
 327         struct cbq_class *cl_prev = q->active[prio];
 328
 329         do {
 330                 cl = cl_prev->next_alive;
 331                 if (cl == this) {
 332                         cl_prev->next_alive = cl->next_alive;
 333                         cl->next_alive = NULL;
 334
 335                         if (cl == q->active[prio]) {
 336                                 q->active[prio] = cl_prev;
 337                                 if (cl == q->active[prio]) {
 338                                         q->active[prio] = NULL;
 339                                         q->activemask &= ~(1<<prio);
 340                                         return;
 341                                 }
 342                         }
 343                         return;
 344                 }
 345         } while ((cl_prev = cl) != q->active[prio]);
 346 }
 347
 348 static void
 349 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 350 {
 351         int toplevel = q->toplevel;
 352
 353         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 354                 psched_time_t now;
 355                 psched_tdiff_t incr;
 356
 357                 now = psched_get_time();
 358                 incr = now - q->now_rt;
 359                 now = q->now + incr;
 360
 361                 do {
 362                         if (cl->undertime < now) {
 363                                 q->toplevel = cl->level;
 364                                 return;
 365                         }
 366                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 367         }
 368 }
 369
 370 static int
 371 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 372 {
 373         struct cbq_sched_data *q = qdisc_priv(sch);
 374         int uninitialized_var(ret);
 375         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 376
 377 #ifdef CONFIG_NET_CLS_ACT
 378         q->rx_class = cl;
 379 #endif
 380         if (cl == NULL) {
 381                 if (ret & __NET_XMIT_BYPASS)
 382                         sch->qstats.drops++;
 383                 kfree_skb(skb);
 384                 return ret;
 385         }
 386
 387 #ifdef CONFIG_NET_CLS_ACT
 388         cl->q->__parent = sch;
 389 #endif
 390         ret = qdisc_enqueue(skb, cl->q);
 391         if (ret == NET_XMIT_SUCCESS) {
 392                 sch->q.qlen++;
 393                 sch->bstats.packets++;
 394                 sch->bstats.bytes += qdisc_pkt_len(skb);
 395                 cbq_mark_toplevel(q, cl);
 396                 if (!cl->next_alive)
 397                         cbq_activate_class(cl);
 398                 return ret;
 399         }
 400
 401         if (net_xmit_drop_count(ret)) {
 402                 sch->qstats.drops++;
 403                 cbq_mark_toplevel(q, cl);
 404                 cl->qstats.drops++;
 405         }
 406         return ret;
 407 }
 408
 409 /* Overlimit actions */
 410
 411 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 412
 413 static void cbq_ovl_classic(struct cbq_class *cl)
 414 {
 415         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 416         psched_tdiff_t delay = cl->undertime - q->now;
 417
 418         if (!cl->delayed) {
 419                 delay += cl->offtime;
 420
 421                 /*
 422                    Class goes to sleep, so that it will have no
 423                    chance to work avgidle. Let's forgive it 8)
 424
 425                    BTW cbq-2.0 has a crap in this
 426                    place, apparently they forgot to shift it by cl->ewma_log.
 427                  */
 428                 if (cl->avgidle < 0)
 429                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 430                 if (cl->avgidle < cl->minidle)
 431                         cl->avgidle = cl->minidle;
 432                 if (delay <= 0)
 433                         delay = 1;
 434                 cl->undertime = q->now + delay;
 435
 436                 cl->xstats.overactions++;
 437                 cl->delayed = 1;
 438         }
 439         if (q->wd_expires == 0 || q->wd_expires > delay)
 440                 q->wd_expires = delay;
 441
 442         /* Dirty work! We must schedule wakeups based on
 443            real available rate, rather than leaf rate,
 444            which may be tiny (even zero).
 445          */
 446         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 447                 struct cbq_class *b;
 448                 psched_tdiff_t base_delay = q->wd_expires;
 449
 450                 for (b = cl->borrow; b; b = b->borrow) {
 451                         delay = b->undertime - q->now;
 452                         if (delay < base_delay) {
 453                                 if (delay <= 0)
 454                                         delay = 1;
 455                                 base_delay = delay;
 456                         }
 457                 }
 458
 459                 q->wd_expires = base_delay;
 460         }
 461 }
 462
 463 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 464    they go overlimit
 465  */
 466
 467 static void cbq_ovl_rclassic(struct cbq_class *cl)
 468 {
 469         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 470         struct cbq_class *this = cl;
 471
 472         do {
 473                 if (cl->level > q->toplevel) {
 474                         cl = NULL;
 475                         break;
 476                 }
 477         } while ((cl = cl->borrow) != NULL);
 478
 479         if (cl == NULL)
 480                 cl = this;
 481         cbq_ovl_classic(cl);
 482 }
 483
 484 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 485
 486 static void cbq_ovl_delay(struct cbq_class *cl)
 487 {
 488         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 489         psched_tdiff_t delay = cl->undertime - q->now;
 490
 491         if (test_bit(__QDISC_STATE_DEACTIVATED,
 492                      &qdisc_root_sleeping(cl->qdisc)->state))
 493                 return;
 494
 495         if (!cl->delayed) {
 496                 psched_time_t sched = q->now;
 497                 ktime_t expires;
 498
 499                 delay += cl->offtime;
 500                 if (cl->avgidle < 0)
 501                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 502                 if (cl->avgidle < cl->minidle)
 503                         cl->avgidle = cl->minidle;
 504                 cl->undertime = q->now + delay;
 505
 506                 if (delay > 0) {
 507                         sched += delay + cl->penalty;
 508                         cl->penalized = sched;
 509                         cl->cpriority = TC_CBQ_MAXPRIO;
 510                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 511
 512                         expires = ktime_set(0, 0);
 513                         expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
 514                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 515                             ktime_to_ns(ktime_sub(
 516                                         hrtimer_get_expires(&q->delay_timer),
 517                                         expires)) > 0)
 518                                 hrtimer_set_expires(&q->delay_timer, expires);
 519                         hrtimer_restart(&q->delay_timer);
 520                         cl->delayed = 1;
 521                         cl->xstats.overactions++;
 522                         return;
 523                 }
 524                 delay = 1;
 525         }
 526         if (q->wd_expires == 0 || q->wd_expires > delay)
 527                 q->wd_expires = delay;
 528 }
 529
 530 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 531
 532 static void cbq_ovl_lowprio(struct cbq_class *cl)
 533 {
 534         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 535
 536         cl->penalized = q->now + cl->penalty;
 537
 538         if (cl->cpriority != cl->priority2) {
 539                 cl->cpriority = cl->priority2;
 540                 q->pmask |= (1<<cl->cpriority);
 541                 cl->xstats.overactions++;
 542         }
 543         cbq_ovl_classic(cl);
 544 }
 545
 546 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 547
 548 static void cbq_ovl_drop(struct cbq_class *cl)
 549 {
 550         if (cl->q->ops->drop)
 551                 if (cl->q->ops->drop(cl->q))
 552                         cl->qdisc->q.qlen--;
 553         cl->xstats.overactions++;
 554         cbq_ovl_classic(cl);
 555 }
 556
 557 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 558                                        psched_time_t now)
 559 {
 560         struct cbq_class *cl;
 561         struct cbq_class *cl_prev = q->active[prio];
 562         psched_time_t sched = now;
 563
 564         if (cl_prev == NULL)
 565                 return 0;
 566
 567         do {
 568                 cl = cl_prev->next_alive;
 569                 if (now - cl->penalized > 0) {
 570                         cl_prev->next_alive = cl->next_alive;
 571                         cl->next_alive = NULL;
 572                         cl->cpriority = cl->priority;
 573                         cl->delayed = 0;
 574                         cbq_activate_class(cl);
 575
 576                         if (cl == q->active[prio]) {
 577                                 q->active[prio] = cl_prev;
 578                                 if (cl == q->active[prio]) {
 579                                         q->active[prio] = NULL;
 580                                         return 0;
 581                                 }
 582                         }
 583
 584                         cl = cl_prev->next_alive;
 585                 } else if (sched - cl->penalized > 0)
 586                         sched = cl->penalized;
 587         } while ((cl_prev = cl) != q->active[prio]);
 588
 589         return sched - now;
 590 }
 591
 592 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 593 {
 594         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 595                                                 delay_timer);
 596         struct Qdisc *sch = q->watchdog.qdisc;
 597         psched_time_t now;
 598         psched_tdiff_t delay = 0;
 599         unsigned pmask;
 600
 601         now = psched_get_time();
 602
 603         pmask = q->pmask;
 604         q->pmask = 0;
 605
 606         while (pmask) {
 607                 int prio = ffz(~pmask);
 608                 psched_tdiff_t tmp;
 609
 610                 pmask &= ~(1<<prio);
 611
 612                 tmp = cbq_undelay_prio(q, prio, now);
 613                 if (tmp > 0) {
 614                         q->pmask |= 1<<prio;
 615                         if (tmp < delay || delay == 0)
 616                                 delay = tmp;
 617                 }
 618         }
 619
 620         if (delay) {
 621                 ktime_t time;
 622
 623                 time = ktime_set(0, 0);
 624                 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 625                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 626         }
 627
 628         sch->flags &= ~TCQ_F_THROTTLED;
 629         __netif_schedule(qdisc_root(sch));
 630         return HRTIMER_NORESTART;
 631 }
 632
 633 #ifdef CONFIG_NET_CLS_ACT
 634 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 635 {
 636         struct Qdisc *sch = child->__parent;
 637         struct cbq_sched_data *q = qdisc_priv(sch);
 638         struct cbq_class *cl = q->rx_class;
 639
 640         q->rx_class = NULL;
 641
 642         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 643                 int ret;
 644
 645                 cbq_mark_toplevel(q, cl);
 646
 647                 q->rx_class = cl;
 648                 cl->q->__parent = sch;
 649
 650                 ret = qdisc_enqueue(skb, cl->q);
 651                 if (ret == NET_XMIT_SUCCESS) {
 652                         sch->q.qlen++;
 653                         sch->bstats.packets++;
 654                         sch->bstats.bytes += qdisc_pkt_len(skb);
 655                         if (!cl->next_alive)
 656                                 cbq_activate_class(cl);
 657                         return 0;
 658                 }
 659                 if (net_xmit_drop_count(ret))
 660                         sch->qstats.drops++;
 661                 return 0;
 662         }
 663
 664         sch->qstats.drops++;
 665         return -1;
 666 }
 667 #endif
 668
 669 /*
 670    It is mission critical procedure.
 671
 672    We "regenerate" toplevel cutoff, if transmitting class
 673    has backlog and it is not regulated. It is not part of
 674    original CBQ description, but looks more reasonable.
 675    Probably, it is wrong. This question needs further investigation.
 676 */
 677
 678 static __inline__ void
 679 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 680                     struct cbq_class *borrowed)
 681 {
 682         if (cl && q->toplevel >= borrowed->level) {
 683                 if (cl->q->q.qlen > 1) {
 684                         do {
 685                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 686                                         q->toplevel = borrowed->level;
 687                                         return;
 688                                 }
 689                         } while ((borrowed=borrowed->borrow) != NULL);
 690                 }
 691 #if 0
 692         /* It is not necessary now. Uncommenting it
 693            will save CPU cycles, but decrease fairness.
 694          */
 695                 q->toplevel = TC_CBQ_MAXLEVEL;
 696 #endif
 697         }
 698 }
 699
 700 static void
 701 cbq_update(struct cbq_sched_data *q)
 702 {
 703         struct cbq_class *this = q->tx_class;
 704         struct cbq_class *cl = this;
 705         int len = q->tx_len;
 706
 707         q->tx_class = NULL;
 708
 709         for ( ; cl; cl = cl->share) {
 710                 long avgidle = cl->avgidle;
 711                 long idle;
 712
 713                 cl->bstats.packets++;
 714                 cl->bstats.bytes += len;
 715
 716                 /*
 717                    (now - last) is total time between packet right edges.
 718                    (last_pktlen/rate) is "virtual" busy time, so that
 719
 720                          idle = (now - last) - last_pktlen/rate
 721                  */
 722
 723                 idle = q->now - cl->last;
 724                 if ((unsigned long)idle > 128*1024*1024) {
 725                         avgidle = cl->maxidle;
 726                 } else {
 727                         idle -= L2T(cl, len);
 728
 729                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 730                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 731                    cl->avgidle == true_avgidle/W,
 732                    hence:
 733                  */
 734                         avgidle += idle - (avgidle>>cl->ewma_log);
 735                 }
 736
 737                 if (avgidle <= 0) {
 738                         /* Overlimit or at-limit */
 739
 740                         if (avgidle < cl->minidle)
 741                                 avgidle = cl->minidle;
 742
 743                         cl->avgidle = avgidle;
 744
 745                         /* Calculate expected time, when this class
 746                            will be allowed to send.
 747                            It will occur, when:
 748                            (1-W)*true_avgidle + W*delay = 0, i.e.
 749                            idle = (1/W - 1)*(-true_avgidle)
 750                            or
 751                            idle = (1 - W)*(-cl->avgidle);
 752                          */
 753                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 754
 755                         /*
 756                            That is not all.
 757                            To maintain the rate allocated to the class,
 758                            we add to undertime virtual clock,
 759                            necessary to complete transmitted packet.
 760                            (len/phys_bandwidth has been already passed
 761                            to the moment of cbq_update)
 762                          */
 763
 764                         idle -= L2T(&q->link, len);
 765                         idle += L2T(cl, len);
 766
 767                         cl->undertime = q->now + idle;
 768                 } else {
 769                         /* Underlimit */
 770
 771                         cl->undertime = PSCHED_PASTPERFECT;
 772                         if (avgidle > cl->maxidle)
 773                                 cl->avgidle = cl->maxidle;
 774                         else
 775                                 cl->avgidle = avgidle;
 776                 }
 777                 cl->last = q->now;
 778         }
 779
 780         cbq_update_toplevel(q, this, q->tx_borrowed);
 781 }
 782
 783 static __inline__ struct cbq_class *
 784 cbq_under_limit(struct cbq_class *cl)
 785 {
 786         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 787         struct cbq_class *this_cl = cl;
 788
 789         if (cl->tparent == NULL)
 790                 return cl;
 791
 792         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 793                 cl->delayed = 0;
 794                 return cl;
 795         }
 796
 797         do {
 798                 /* It is very suspicious place. Now overlimit
 799                    action is generated for not bounded classes
 800                    only if link is completely congested.
 801                    Though it is in agree with ancestor-only paradigm,
 802                    it looks very stupid. Particularly,
 803                    it means that this chunk of code will either
 804                    never be called or result in strong amplification
 805                    of burstiness. Dangerous, silly, and, however,
 806                    no another solution exists.
 807                  */
 808                 if ((cl = cl->borrow) == NULL) {
 809                         this_cl->qstats.overlimits++;
 810                         this_cl->overlimit(this_cl);
 811                         return NULL;
 812                 }
 813                 if (cl->level > q->toplevel)
 814                         return NULL;
 815         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 816
 817         cl->delayed = 0;
 818         return cl;
 819 }
 820
 821 static __inline__ struct sk_buff *
 822 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 823 {
 824         struct cbq_sched_data *q = qdisc_priv(sch);
 825         struct cbq_class *cl_tail, *cl_prev, *cl;
 826         struct sk_buff *skb;
 827         int deficit;
 828
 829         cl_tail = cl_prev = q->active[prio];
 830         cl = cl_prev->next_alive;
 831
 832         do {
 833                 deficit = 0;
 834
 835                 /* Start round */
 836                 do {
 837                         struct cbq_class *borrow = cl;
 838
 839                         if (cl->q->q.qlen &&
 840                             (borrow = cbq_under_limit(cl)) == NULL)
 841                                 goto skip_class;
 842
 843                         if (cl->deficit <= 0) {
 844                                 /* Class exhausted its allotment per
 845                                    this round. Switch to the next one.
 846                                  */
 847                                 deficit = 1;
 848                                 cl->deficit += cl->quantum;
 849                                 goto next_class;
 850                         }
 851
 852                         skb = cl->q->dequeue(cl->q);
 853
 854                         /* Class did not give us any skb :-(
 855                            It could occur even if cl->q->q.qlen != 0
 856                            f.e. if cl->q == "tbf"
 857                          */
 858                         if (skb == NULL)
 859                                 goto skip_class;
 860
 861                         cl->deficit -= qdisc_pkt_len(skb);
 862                         q->tx_class = cl;
 863                         q->tx_borrowed = borrow;
 864                         if (borrow != cl) {
 865 #ifndef CBQ_XSTATS_BORROWS_BYTES
 866                                 borrow->xstats.borrows++;
 867                                 cl->xstats.borrows++;
 868 #else
 869                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
 870                                 cl->xstats.borrows += qdisc_pkt_len(skb);
 871 #endif
 872                         }
 873                         q->tx_len = qdisc_pkt_len(skb);
 874
 875                         if (cl->deficit <= 0) {
 876                                 q->active[prio] = cl;
 877                                 cl = cl->next_alive;
 878                                 cl->deficit += cl->quantum;
 879                         }
 880                         return skb;
 881
 882 skip_class:
 883                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 884                                 /* Class is empty or penalized.
 885                                    Unlink it from active chain.
 886                                  */
 887                                 cl_prev->next_alive = cl->next_alive;
 888                                 cl->next_alive = NULL;
 889
 890                                 /* Did cl_tail point to it? */
 891                                 if (cl == cl_tail) {
 892                                         /* Repair it! */
 893                                         cl_tail = cl_prev;
 894
 895                                         /* Was it the last class in this band? */
 896                                         if (cl == cl_tail) {
 897                                                 /* Kill the band! */
 898                                                 q->active[prio] = NULL;
 899                                                 q->activemask &= ~(1<<prio);
 900                                                 if (cl->q->q.qlen)
 901                                                         cbq_activate_class(cl);
 902                                                 return NULL;
 903                                         }
 904
 905                                         q->active[prio] = cl_tail;
 906                                 }
 907                                 if (cl->q->q.qlen)
 908                                         cbq_activate_class(cl);
 909
 910                                 cl = cl_prev;
 911                         }
 912
 913 next_class:
 914                         cl_prev = cl;
 915                         cl = cl->next_alive;
 916                 } while (cl_prev != cl_tail);
 917         } while (deficit);
 918
 919         q->active[prio] = cl_prev;
 920
 921         return NULL;
 922 }
 923
 924 static __inline__ struct sk_buff *
 925 cbq_dequeue_1(struct Qdisc *sch)
 926 {
 927         struct cbq_sched_data *q = qdisc_priv(sch);
 928         struct sk_buff *skb;
 929         unsigned activemask;
 930
 931         activemask = q->activemask&0xFF;
 932         while (activemask) {
 933                 int prio = ffz(~activemask);
 934                 activemask &= ~(1<<prio);
 935                 skb = cbq_dequeue_prio(sch, prio);
 936                 if (skb)
 937                         return skb;
 938         }
 939         return NULL;
 940 }
 941
 942 static struct sk_buff *
 943 cbq_dequeue(struct Qdisc *sch)
 944 {
 945         struct sk_buff *skb;
 946         struct cbq_sched_data *q = qdisc_priv(sch);
 947         psched_time_t now;
 948         psched_tdiff_t incr;
 949
 950         now = psched_get_time();
 951         incr = now - q->now_rt;
 952
 953         if (q->tx_class) {
 954                 psched_tdiff_t incr2;
 955                 /* Time integrator. We calculate EOS time
 956                    by adding expected packet transmission time.
 957                    If real time is greater, we warp artificial clock,
 958                    so that:
 959
 960                    cbq_time = max(real_time, work);
 961                  */
 962                 incr2 = L2T(&q->link, q->tx_len);
 963                 q->now += incr2;
 964                 cbq_update(q);
 965                 if ((incr -= incr2) < 0)
 966                         incr = 0;
 967         }
 968         q->now += incr;
 969         q->now_rt = now;
 970
 971         for (;;) {
 972                 q->wd_expires = 0;
 973
 974                 skb = cbq_dequeue_1(sch);
 975                 if (skb) {
 976                         sch->q.qlen--;
 977                         sch->flags &= ~TCQ_F_THROTTLED;
 978                         return skb;
 979                 }
 980
 981                 /* All the classes are overlimit.
 982
 983                    It is possible, if:
 984
 985                    1. Scheduler is empty.
 986                    2. Toplevel cutoff inhibited borrowing.
 987                    3. Root class is overlimit.
 988
 989                    Reset 2d and 3d conditions and retry.
 990
 991                    Note, that NS and cbq-2.0 are buggy, peeking
 992                    an arbitrary class is appropriate for ancestor-only
 993                    sharing, but not for toplevel algorithm.
 994
 995                    Our version is better, but slower, because it requires
 996                    two passes, but it is unavoidable with top-level sharing.
 997                 */
 998
 999                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1000                     q->link.undertime == PSCHED_PASTPERFECT)
1001                         break;
1002
1003                 q->toplevel = TC_CBQ_MAXLEVEL;
1004                 q->link.undertime = PSCHED_PASTPERFECT;
1005         }
1006
1007         /* No packets in scheduler or nobody wants to give them to us :-(
1008            Sigh... start watchdog timer in the last case. */
1009
1010         if (sch->q.qlen) {
1011                 sch->qstats.overlimits++;
1012                 if (q->wd_expires)
1013                         qdisc_watchdog_schedule(&q->watchdog,
1014                                                 now + q->wd_expires);
1015         }
1016         return NULL;
1017 }
1018
1019 /* CBQ class maintanance routines */
1020
1021 static void cbq_adjust_levels(struct cbq_class *this)
1022 {
1023         if (this == NULL)
1024                 return;
1025
1026         do {
1027                 int level = 0;
1028                 struct cbq_class *cl;
1029
1030                 if ((cl = this->children) != NULL) {
1031                         do {
1032                                 if (cl->level > level)
1033                                         level = cl->level;
1034                         } while ((cl = cl->sibling) != this->children);
1035                 }
1036                 this->level = level+1;
1037         } while ((this = this->tparent) != NULL);
1038 }
1039
1040 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1041 {
1042         struct cbq_class *cl;
1043         struct hlist_node *n;
1044         unsigned int h;
1045
1046         if (q->quanta[prio] == 0)
1047                 return;
1048
1049         for (h = 0; h < q->clhash.hashsize; h++) {
1050                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1051                         /* BUGGGG... Beware! This expression suffer of
1052                            arithmetic overflows!
1053                          */
1054                         if (cl->priority == prio) {
1055                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1056                                         q->quanta[prio];
1057                         }
1058                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1059                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1060                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1061                         }
1062                 }
1063         }
1064 }
1065
1066 static void cbq_sync_defmap(struct cbq_class *cl)
1067 {
1068         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1069         struct cbq_class *split = cl->split;
1070         unsigned h;
1071         int i;
1072
1073         if (split == NULL)
1074                 return;
1075
1076         for (i=0; i<=TC_PRIO_MAX; i++) {
1077                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1078                         split->defaults[i] = NULL;
1079         }
1080
1081         for (i=0; i<=TC_PRIO_MAX; i++) {
1082                 int level = split->level;
1083
1084                 if (split->defaults[i])
1085                         continue;
1086
1087                 for (h = 0; h < q->clhash.hashsize; h++) {
1088                         struct hlist_node *n;
1089                         struct cbq_class *c;
1090
1091                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1092                                              common.hnode) {
1093                                 if (c->split == split && c->level < level &&
1094                                     c->defmap&(1<<i)) {
1095                                         split->defaults[i] = c;
1096                                         level = c->level;
1097                                 }
1098                         }
1099                 }
1100         }
1101 }
1102
1103 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1104 {
1105         struct cbq_class *split = NULL;
1106
1107         if (splitid == 0) {
1108                 if ((split = cl->split) == NULL)
1109                         return;
1110                 splitid = split->common.classid;
1111         }
1112
1113         if (split == NULL || split->common.classid != splitid) {
1114                 for (split = cl->tparent; split; split = split->tparent)
1115                         if (split->common.classid == splitid)
1116                                 break;
1117         }
1118
1119         if (split == NULL)
1120                 return;
1121
1122         if (cl->split != split) {
1123                 cl->defmap = 0;
1124                 cbq_sync_defmap(cl);
1125                 cl->split = split;
1126                 cl->defmap = def&mask;
1127         } else
1128                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1129
1130         cbq_sync_defmap(cl);
1131 }
1132
1133 static void cbq_unlink_class(struct cbq_class *this)
1134 {
1135         struct cbq_class *cl, **clp;
1136         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1137
1138         qdisc_class_hash_remove(&q->clhash, &this->common);
1139
1140         if (this->tparent) {
1141                 clp=&this->sibling;
1142                 cl = *clp;
1143                 do {
1144                         if (cl == this) {
1145                                 *clp = cl->sibling;
1146                                 break;
1147                         }
1148                         clp = &cl->sibling;
1149                 } while ((cl = *clp) != this->sibling);
1150
1151                 if (this->tparent->children == this) {
1152                         this->tparent->children = this->sibling;
1153                         if (this->sibling == this)
1154                                 this->tparent->children = NULL;
1155                 }
1156         } else {
1157                 WARN_ON(this->sibling != this);
1158         }
1159 }
1160
1161 static void cbq_link_class(struct cbq_class *this)
1162 {
1163         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1164         struct cbq_class *parent = this->tparent;
1165
1166         this->sibling = this;
1167         qdisc_class_hash_insert(&q->clhash, &this->common);
1168
1169         if (parent == NULL)
1170                 return;
1171
1172         if (parent->children == NULL) {
1173                 parent->children = this;
1174         } else {
1175                 this->sibling = parent->children->sibling;
1176                 parent->children->sibling = this;
1177         }
1178 }
1179
1180 static unsigned int cbq_drop(struct Qdisc* sch)
1181 {
1182         struct cbq_sched_data *q = qdisc_priv(sch);
1183         struct cbq_class *cl, *cl_head;
1184         int prio;
1185         unsigned int len;
1186
1187         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1188                 if ((cl_head = q->active[prio]) == NULL)
1189                         continue;
1190
1191                 cl = cl_head;
1192                 do {
1193                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1194                                 sch->q.qlen--;
1195                                 if (!cl->q->q.qlen)
1196                                         cbq_deactivate_class(cl);
1197                                 return len;
1198                         }
1199                 } while ((cl = cl->next_alive) != cl_head);
1200         }
1201         return 0;
1202 }
1203
1204 static void
1205 cbq_reset(struct Qdisc* sch)
1206 {
1207         struct cbq_sched_data *q = qdisc_priv(sch);
1208         struct cbq_class *cl;
1209         struct hlist_node *n;
1210         int prio;
1211         unsigned h;
1212
1213         q->activemask = 0;
1214         q->pmask = 0;
1215         q->tx_class = NULL;
1216         q->tx_borrowed = NULL;
1217         qdisc_watchdog_cancel(&q->watchdog);
1218         hrtimer_cancel(&q->delay_timer);
1219         q->toplevel = TC_CBQ_MAXLEVEL;
1220         q->now = psched_get_time();
1221         q->now_rt = q->now;
1222
1223         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1224                 q->active[prio] = NULL;
1225
1226         for (h = 0; h < q->clhash.hashsize; h++) {
1227                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1228                         qdisc_reset(cl->q);
1229
1230                         cl->next_alive = NULL;
1231                         cl->undertime = PSCHED_PASTPERFECT;
1232                         cl->avgidle = cl->maxidle;
1233                         cl->deficit = cl->quantum;
1234                         cl->cpriority = cl->priority;
1235                 }
1236         }
1237         sch->q.qlen = 0;
1238 }
1239
1240
1241 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1242 {
1243         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1244                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1245                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1246         }
1247         if (lss->change&TCF_CBQ_LSS_EWMA)
1248                 cl->ewma_log = lss->ewma_log;
1249         if (lss->change&TCF_CBQ_LSS_AVPKT)
1250                 cl->avpkt = lss->avpkt;
1251         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1252                 cl->minidle = -(long)lss->minidle;
1253         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1254                 cl->maxidle = lss->maxidle;
1255                 cl->avgidle = lss->maxidle;
1256         }
1257         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1258                 cl->offtime = lss->offtime;
1259         return 0;
1260 }
1261
1262 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1263 {
1264         q->nclasses[cl->priority]--;
1265         q->quanta[cl->priority] -= cl->weight;
1266         cbq_normalize_quanta(q, cl->priority);
1267 }
1268
1269 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1270 {
1271         q->nclasses[cl->priority]++;
1272         q->quanta[cl->priority] += cl->weight;
1273         cbq_normalize_quanta(q, cl->priority);
1274 }
1275
1276 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1277 {
1278         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1279
1280         if (wrr->allot)
1281                 cl->allot = wrr->allot;
1282         if (wrr->weight)
1283                 cl->weight = wrr->weight;
1284         if (wrr->priority) {
1285                 cl->priority = wrr->priority-1;
1286                 cl->cpriority = cl->priority;
1287                 if (cl->priority >= cl->priority2)
1288                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1289         }
1290
1291         cbq_addprio(q, cl);
1292         return 0;
1293 }
1294
1295 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1296 {
1297         switch (ovl->strategy) {
1298         case TC_CBQ_OVL_CLASSIC:
1299                 cl->overlimit = cbq_ovl_classic;
1300                 break;
1301         case TC_CBQ_OVL_DELAY:
1302                 cl->overlimit = cbq_ovl_delay;
1303                 break;
1304         case TC_CBQ_OVL_LOWPRIO:
1305                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1306                     ovl->priority2-1 <= cl->priority)
1307                         return -EINVAL;
1308                 cl->priority2 = ovl->priority2-1;
1309                 cl->overlimit = cbq_ovl_lowprio;
1310                 break;
1311         case TC_CBQ_OVL_DROP:
1312                 cl->overlimit = cbq_ovl_drop;
1313                 break;
1314         case TC_CBQ_OVL_RCLASSIC:
1315                 cl->overlimit = cbq_ovl_rclassic;
1316                 break;
1317         default:
1318                 return -EINVAL;
1319         }
1320         cl->penalty = ovl->penalty;
1321         return 0;
1322 }
1323
1324 #ifdef CONFIG_NET_CLS_ACT
1325 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1326 {
1327         cl->police = p->police;
1328
1329         if (cl->q->handle) {
1330                 if (p->police == TC_POLICE_RECLASSIFY)
1331                         cl->q->reshape_fail = cbq_reshape_fail;
1332                 else
1333                         cl->q->reshape_fail = NULL;
1334         }
1335         return 0;
1336 }
1337 #endif
1338
1339 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1340 {
1341         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1342         return 0;
1343 }
1344
1345 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1346         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1347         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1348         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1349         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1350         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1351         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1352         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1353 };
1354
1355 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1356 {
1357         struct cbq_sched_data *q = qdisc_priv(sch);
1358         struct nlattr *tb[TCA_CBQ_MAX + 1];
1359         struct tc_ratespec *r;
1360         int err;
1361
1362         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1363         if (err < 0)
1364                 return err;
1365
1366         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1367                 return -EINVAL;
1368
1369         r = nla_data(tb[TCA_CBQ_RATE]);
1370
1371         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1372                 return -EINVAL;
1373
1374         err = qdisc_class_hash_init(&q->clhash);
1375         if (err < 0)
1376                 goto put_rtab;
1377
1378         q->link.refcnt = 1;
1379         q->link.sibling = &q->link;
1380         q->link.common.classid = sch->handle;
1381         q->link.qdisc = sch;
1382         if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1383                                             &pfifo_qdisc_ops,
1384                                             sch->handle)))
1385                 q->link.q = &noop_qdisc;
1386
1387         q->link.priority = TC_CBQ_MAXPRIO-1;
1388         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1389         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1390         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1391         q->link.overlimit = cbq_ovl_classic;
1392         q->link.allot = psched_mtu(qdisc_dev(sch));
1393         q->link.quantum = q->link.allot;
1394         q->link.weight = q->link.R_tab->rate.rate;
1395
1396         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1397         q->link.avpkt = q->link.allot/2;
1398         q->link.minidle = -0x7FFFFFFF;
1399
1400         qdisc_watchdog_init(&q->watchdog, sch);
1401         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1402         q->delay_timer.function = cbq_undelay;
1403         q->toplevel = TC_CBQ_MAXLEVEL;
1404         q->now = psched_get_time();
1405         q->now_rt = q->now;
1406
1407         cbq_link_class(&q->link);
1408
1409         if (tb[TCA_CBQ_LSSOPT])
1410                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1411
1412         cbq_addprio(q, &q->link);
1413         return 0;
1414
1415 put_rtab:
1416         qdisc_put_rtab(q->link.R_tab);
1417         return err;
1418 }
1419
1420 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1421 {
1422         unsigned char *b = skb_tail_pointer(skb);
1423
1424         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1425         return skb->len;
1426
1427 nla_put_failure:
1428         nlmsg_trim(skb, b);
1429         return -1;
1430 }
1431
1432 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1433 {
1434         unsigned char *b = skb_tail_pointer(skb);
1435         struct tc_cbq_lssopt opt;
1436
1437         opt.flags = 0;
1438         if (cl->borrow == NULL)
1439                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1440         if (cl->share == NULL)
1441                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1442         opt.ewma_log = cl->ewma_log;
1443         opt.level = cl->level;
1444         opt.avpkt = cl->avpkt;
1445         opt.maxidle = cl->maxidle;
1446         opt.minidle = (u32)(-cl->minidle);
1447         opt.offtime = cl->offtime;
1448         opt.change = ~0;
1449         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1450         return skb->len;
1451
1452 nla_put_failure:
1453         nlmsg_trim(skb, b);
1454         return -1;
1455 }
1456
1457 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1458 {
1459         unsigned char *b = skb_tail_pointer(skb);
1460         struct tc_cbq_wrropt opt;
1461
1462         opt.flags = 0;
1463         opt.allot = cl->allot;
1464         opt.priority = cl->priority+1;
1465         opt.cpriority = cl->cpriority+1;
1466         opt.weight = cl->weight;
1467         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1468         return skb->len;
1469
1470 nla_put_failure:
1471         nlmsg_trim(skb, b);
1472         return -1;
1473 }
1474
1475 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1476 {
1477         unsigned char *b = skb_tail_pointer(skb);
1478         struct tc_cbq_ovl opt;
1479
1480         opt.strategy = cl->ovl_strategy;
1481         opt.priority2 = cl->priority2+1;
1482         opt.pad = 0;
1483         opt.penalty = cl->penalty;
1484         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1485         return skb->len;
1486
1487 nla_put_failure:
1488         nlmsg_trim(skb, b);
1489         return -1;
1490 }
1491
1492 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1493 {
1494         unsigned char *b = skb_tail_pointer(skb);
1495         struct tc_cbq_fopt opt;
1496
1497         if (cl->split || cl->defmap) {
1498                 opt.split = cl->split ? cl->split->common.classid : 0;
1499                 opt.defmap = cl->defmap;
1500                 opt.defchange = ~0;
1501                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1502         }
1503         return skb->len;
1504
1505 nla_put_failure:
1506         nlmsg_trim(skb, b);
1507         return -1;
1508 }
1509
1510 #ifdef CONFIG_NET_CLS_ACT
1511 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1512 {
1513         unsigned char *b = skb_tail_pointer(skb);
1514         struct tc_cbq_police opt;
1515
1516         if (cl->police) {
1517                 opt.police = cl->police;
1518                 opt.__res1 = 0;
1519                 opt.__res2 = 0;
1520                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1521         }
1522         return skb->len;
1523
1524 nla_put_failure:
1525         nlmsg_trim(skb, b);
1526         return -1;
1527 }
1528 #endif
1529
1530 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1531 {
1532         if (cbq_dump_lss(skb, cl) < 0 ||
1533             cbq_dump_rate(skb, cl) < 0 ||
1534             cbq_dump_wrr(skb, cl) < 0 ||
1535             cbq_dump_ovl(skb, cl) < 0 ||
1536 #ifdef CONFIG_NET_CLS_ACT
1537             cbq_dump_police(skb, cl) < 0 ||
1538 #endif
1539             cbq_dump_fopt(skb, cl) < 0)
1540                 return -1;
1541         return 0;
1542 }
1543
1544 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1545 {
1546         struct cbq_sched_data *q = qdisc_priv(sch);
1547         struct nlattr *nest;
1548
1549         nest = nla_nest_start(skb, TCA_OPTIONS);
1550         if (nest == NULL)
1551                 goto nla_put_failure;
1552         if (cbq_dump_attr(skb, &q->link) < 0)
1553                 goto nla_put_failure;
1554         nla_nest_end(skb, nest);
1555         return skb->len;
1556
1557 nla_put_failure:
1558         nla_nest_cancel(skb, nest);
1559         return -1;
1560 }
1561
1562 static int
1563 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1564 {
1565         struct cbq_sched_data *q = qdisc_priv(sch);
1566
1567         q->link.xstats.avgidle = q->link.avgidle;
1568         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1569 }
1570
1571 static int
1572 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1573                struct sk_buff *skb, struct tcmsg *tcm)
1574 {
1575         struct cbq_class *cl = (struct cbq_class*)arg;
1576         struct nlattr *nest;
1577
1578         if (cl->tparent)
1579                 tcm->tcm_parent = cl->tparent->common.classid;
1580         else
1581                 tcm->tcm_parent = TC_H_ROOT;
1582         tcm->tcm_handle = cl->common.classid;
1583         tcm->tcm_info = cl->q->handle;
1584
1585         nest = nla_nest_start(skb, TCA_OPTIONS);
1586         if (nest == NULL)
1587                 goto nla_put_failure;
1588         if (cbq_dump_attr(skb, cl) < 0)
1589                 goto nla_put_failure;
1590         nla_nest_end(skb, nest);
1591         return skb->len;
1592
1593 nla_put_failure:
1594         nla_nest_cancel(skb, nest);
1595         return -1;
1596 }
1597
1598 static int
1599 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1600         struct gnet_dump *d)
1601 {
1602         struct cbq_sched_data *q = qdisc_priv(sch);
1603         struct cbq_class *cl = (struct cbq_class*)arg;
1604
1605         cl->qstats.qlen = cl->q->q.qlen;
1606         cl->xstats.avgidle = cl->avgidle;
1607         cl->xstats.undertime = 0;
1608
1609         if (cl->undertime != PSCHED_PASTPERFECT)
1610                 cl->xstats.undertime = cl->undertime - q->now;
1611
1612         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1613             gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1614             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1615                 return -1;
1616
1617         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1618 }
1619
1620 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1621                      struct Qdisc **old)
1622 {
1623         struct cbq_class *cl = (struct cbq_class*)arg;
1624
1625         if (new == NULL) {
1626                 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1627                                         &pfifo_qdisc_ops, cl->common.classid);
1628                 if (new == NULL)
1629                         return -ENOBUFS;
1630         } else {
1631 #ifdef CONFIG_NET_CLS_ACT
1632                 if (cl->police == TC_POLICE_RECLASSIFY)
1633                         new->reshape_fail = cbq_reshape_fail;
1634 #endif
1635         }
1636         sch_tree_lock(sch);
1637         *old = cl->q;
1638         cl->q = new;
1639         qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1640         qdisc_reset(*old);
1641         sch_tree_unlock(sch);
1642
1643         return 0;
1644 }
1645
1646 static struct Qdisc *
1647 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1648 {
1649         struct cbq_class *cl = (struct cbq_class*)arg;
1650
1651         return cl->q;
1652 }
1653
1654 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1655 {
1656         struct cbq_class *cl = (struct cbq_class *)arg;
1657
1658         if (cl->q->q.qlen == 0)
1659                 cbq_deactivate_class(cl);
1660 }
1661
1662 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1663 {
1664         struct cbq_sched_data *q = qdisc_priv(sch);
1665         struct cbq_class *cl = cbq_class_lookup(q, classid);
1666
1667         if (cl) {
1668                 cl->refcnt++;
1669                 return (unsigned long)cl;
1670         }
1671         return 0;
1672 }
1673
1674 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1675 {
1676         struct cbq_sched_data *q = qdisc_priv(sch);
1677
1678         WARN_ON(cl->filters);
1679
1680         tcf_destroy_chain(&cl->filter_list);
1681         qdisc_destroy(cl->q);
1682         qdisc_put_rtab(cl->R_tab);
1683         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1684         if (cl != &q->link)
1685                 kfree(cl);
1686 }
1687
1688 static void
1689 cbq_destroy(struct Qdisc* sch)
1690 {
1691         struct cbq_sched_data *q = qdisc_priv(sch);
1692         struct hlist_node *n, *next;
1693         struct cbq_class *cl;
1694         unsigned h;
1695
1696 #ifdef CONFIG_NET_CLS_ACT
1697         q->rx_class = NULL;
1698 #endif
1699         /*
1700          * Filters must be destroyed first because we don't destroy the
1701          * classes from root to leafs which means that filters can still
1702          * be bound to classes which have been destroyed already. --TGR '04
1703          */
1704         for (h = 0; h < q->clhash.hashsize; h++) {
1705                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1706                         tcf_destroy_chain(&cl->filter_list);
1707         }
1708         for (h = 0; h < q->clhash.hashsize; h++) {
1709                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1710                                           common.hnode)
1711                         cbq_destroy_class(sch, cl);
1712         }
1713         qdisc_class_hash_destroy(&q->clhash);
1714 }
1715
1716 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1717 {
1718         struct cbq_class *cl = (struct cbq_class*)arg;
1719
1720         if (--cl->refcnt == 0) {
1721 #ifdef CONFIG_NET_CLS_ACT
1722                 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1723                 struct cbq_sched_data *q = qdisc_priv(sch);
1724
1725                 spin_lock_bh(root_lock);
1726                 if (q->rx_class == cl)
1727                         q->rx_class = NULL;
1728                 spin_unlock_bh(root_lock);
1729 #endif
1730
1731                 cbq_destroy_class(sch, cl);
1732         }
1733 }
1734
1735 static int
1736 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1737                  unsigned long *arg)
1738 {
1739         int err;
1740         struct cbq_sched_data *q = qdisc_priv(sch);
1741         struct cbq_class *cl = (struct cbq_class*)*arg;
1742         struct nlattr *opt = tca[TCA_OPTIONS];
1743         struct nlattr *tb[TCA_CBQ_MAX + 1];
1744         struct cbq_class *parent;
1745         struct qdisc_rate_table *rtab = NULL;
1746
1747         if (opt == NULL)
1748                 return -EINVAL;
1749
1750         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1751         if (err < 0)
1752                 return err;
1753
1754         if (cl) {
1755                 /* Check parent */
1756                 if (parentid) {
1757                         if (cl->tparent &&
1758                             cl->tparent->common.classid != parentid)
1759                                 return -EINVAL;
1760                         if (!cl->tparent && parentid != TC_H_ROOT)
1761                                 return -EINVAL;
1762                 }
1763
1764                 if (tb[TCA_CBQ_RATE]) {
1765                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1766                                               tb[TCA_CBQ_RTAB]);
1767                         if (rtab == NULL)
1768                                 return -EINVAL;
1769                 }
1770
1771                 if (tca[TCA_RATE]) {
1772                         err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1773                                                     qdisc_root_sleeping_lock(sch),
1774                                                     tca[TCA_RATE]);
1775                         if (err) {
1776                                 if (rtab)
1777                                         qdisc_put_rtab(rtab);
1778                                 return err;
1779                         }
1780                 }
1781
1782                 /* Change class parameters */
1783                 sch_tree_lock(sch);
1784
1785                 if (cl->next_alive != NULL)
1786                         cbq_deactivate_class(cl);
1787
1788                 if (rtab) {
1789                         qdisc_put_rtab(cl->R_tab);
1790                         cl->R_tab = rtab;
1791                 }
1792
1793                 if (tb[TCA_CBQ_LSSOPT])
1794                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1795
1796                 if (tb[TCA_CBQ_WRROPT]) {
1797                         cbq_rmprio(q, cl);
1798                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1799                 }
1800
1801                 if (tb[TCA_CBQ_OVL_STRATEGY])
1802                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1803
1804 #ifdef CONFIG_NET_CLS_ACT
1805                 if (tb[TCA_CBQ_POLICE])
1806                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1807 #endif
1808
1809                 if (tb[TCA_CBQ_FOPT])
1810                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1811
1812                 if (cl->q->q.qlen)
1813                         cbq_activate_class(cl);
1814
1815                 sch_tree_unlock(sch);
1816
1817                 return 0;
1818         }
1819
1820         if (parentid == TC_H_ROOT)
1821                 return -EINVAL;
1822
1823         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1824             tb[TCA_CBQ_LSSOPT] == NULL)
1825                 return -EINVAL;
1826
1827         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1828         if (rtab == NULL)
1829                 return -EINVAL;
1830
1831         if (classid) {
1832                 err = -EINVAL;
1833                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1834                         goto failure;
1835         } else {
1836                 int i;
1837                 classid = TC_H_MAKE(sch->handle,0x8000);
1838
1839                 for (i=0; i<0x8000; i++) {
1840                         if (++q->hgenerator >= 0x8000)
1841                                 q->hgenerator = 1;
1842                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1843                                 break;
1844                 }
1845                 err = -ENOSR;
1846                 if (i >= 0x8000)
1847                         goto failure;
1848                 classid = classid|q->hgenerator;
1849         }
1850
1851         parent = &q->link;
1852         if (parentid) {
1853                 parent = cbq_class_lookup(q, parentid);
1854                 err = -EINVAL;
1855                 if (parent == NULL)
1856                         goto failure;
1857         }
1858
1859         err = -ENOBUFS;
1860         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1861         if (cl == NULL)
1862                 goto failure;
1863
1864         if (tca[TCA_RATE]) {
1865                 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1866                                         qdisc_root_sleeping_lock(sch),
1867                                         tca[TCA_RATE]);
1868                 if (err) {
1869                         kfree(cl);
1870                         goto failure;
1871                 }
1872         }
1873
1874         cl->R_tab = rtab;
1875         rtab = NULL;
1876         cl->refcnt = 1;
1877         if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1878                                         &pfifo_qdisc_ops, classid)))
1879                 cl->q = &noop_qdisc;
1880         cl->common.classid = classid;
1881         cl->tparent = parent;
1882         cl->qdisc = sch;
1883         cl->allot = parent->allot;
1884         cl->quantum = cl->allot;
1885         cl->weight = cl->R_tab->rate.rate;
1886
1887         sch_tree_lock(sch);
1888         cbq_link_class(cl);
1889         cl->borrow = cl->tparent;
1890         if (cl->tparent != &q->link)
1891                 cl->share = cl->tparent;
1892         cbq_adjust_levels(parent);
1893         cl->minidle = -0x7FFFFFFF;
1894         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1895         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1896         if (cl->ewma_log==0)
1897                 cl->ewma_log = q->link.ewma_log;
1898         if (cl->maxidle==0)
1899                 cl->maxidle = q->link.maxidle;
1900         if (cl->avpkt==0)
1901                 cl->avpkt = q->link.avpkt;
1902         cl->overlimit = cbq_ovl_classic;
1903         if (tb[TCA_CBQ_OVL_STRATEGY])
1904                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1905 #ifdef CONFIG_NET_CLS_ACT
1906         if (tb[TCA_CBQ_POLICE])
1907                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1908 #endif
1909         if (tb[TCA_CBQ_FOPT])
1910                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1911         sch_tree_unlock(sch);
1912
1913         qdisc_class_hash_grow(sch, &q->clhash);
1914
1915         *arg = (unsigned long)cl;
1916         return 0;
1917
1918 failure:
1919         qdisc_put_rtab(rtab);
1920         return err;
1921 }
1922
1923 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1924 {
1925         struct cbq_sched_data *q = qdisc_priv(sch);
1926         struct cbq_class *cl = (struct cbq_class*)arg;
1927         unsigned int qlen;
1928
1929         if (cl->filters || cl->children || cl == &q->link)
1930                 return -EBUSY;
1931
1932         sch_tree_lock(sch);
1933
1934         qlen = cl->q->q.qlen;
1935         qdisc_reset(cl->q);
1936         qdisc_tree_decrease_qlen(cl->q, qlen);
1937
1938         if (cl->next_alive)
1939                 cbq_deactivate_class(cl);
1940
1941         if (q->tx_borrowed == cl)
1942                 q->tx_borrowed = q->tx_class;
1943         if (q->tx_class == cl) {
1944                 q->tx_class = NULL;
1945                 q->tx_borrowed = NULL;
1946         }
1947 #ifdef CONFIG_NET_CLS_ACT
1948         if (q->rx_class == cl)
1949                 q->rx_class = NULL;
1950 #endif
1951
1952         cbq_unlink_class(cl);
1953         cbq_adjust_levels(cl->tparent);
1954         cl->defmap = 0;
1955         cbq_sync_defmap(cl);
1956
1957         cbq_rmprio(q, cl);
1958         sch_tree_unlock(sch);
1959
1960         BUG_ON(--cl->refcnt == 0);
1961         /*
1962          * This shouldn't happen: we "hold" one cops->get() when called
1963          * from tc_ctl_tclass; the destroy method is done from cops->put().
1964          */
1965
1966         return 0;
1967 }
1968
1969 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1970 {
1971         struct cbq_sched_data *q = qdisc_priv(sch);
1972         struct cbq_class *cl = (struct cbq_class *)arg;
1973
1974         if (cl == NULL)
1975                 cl = &q->link;
1976
1977         return &cl->filter_list;
1978 }
1979
1980 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1981                                      u32 classid)
1982 {
1983         struct cbq_sched_data *q = qdisc_priv(sch);
1984         struct cbq_class *p = (struct cbq_class*)parent;
1985         struct cbq_class *cl = cbq_class_lookup(q, classid);
1986
1987         if (cl) {
1988                 if (p && p->level <= cl->level)
1989                         return 0;
1990                 cl->filters++;
1991                 return (unsigned long)cl;
1992         }
1993         return 0;
1994 }
1995
1996 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1997 {
1998         struct cbq_class *cl = (struct cbq_class*)arg;
1999
2000         cl->filters--;
2001 }
2002
2003 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2004 {
2005         struct cbq_sched_data *q = qdisc_priv(sch);
2006         struct cbq_class *cl;
2007         struct hlist_node *n;
2008         unsigned h;
2009
2010         if (arg->stop)
2011                 return;
2012
2013         for (h = 0; h < q->clhash.hashsize; h++) {
2014                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2015                         if (arg->count < arg->skip) {
2016                                 arg->count++;
2017                                 continue;
2018                         }
2019                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2020                                 arg->stop = 1;
2021                                 return;
2022                         }
2023                         arg->count++;
2024                 }
2025         }
2026 }
2027
2028 static const struct Qdisc_class_ops cbq_class_ops = {
2029         .graft          =       cbq_graft,
2030         .leaf           =       cbq_leaf,
2031         .qlen_notify    =       cbq_qlen_notify,
2032         .get            =       cbq_get,
2033         .put            =       cbq_put,
2034         .change         =       cbq_change_class,
2035         .delete         =       cbq_delete,
2036         .walk           =       cbq_walk,
2037         .tcf_chain      =       cbq_find_tcf,
2038         .bind_tcf       =       cbq_bind_filter,
2039         .unbind_tcf     =       cbq_unbind_filter,
2040         .dump           =       cbq_dump_class,
2041         .dump_stats     =       cbq_dump_class_stats,
2042 };
2043
2044 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2045         .next           =       NULL,
2046         .cl_ops         =       &cbq_class_ops,
2047         .id             =       "cbq",
2048         .priv_size      =       sizeof(struct cbq_sched_data),
2049         .enqueue        =       cbq_enqueue,
2050         .dequeue        =       cbq_dequeue,
2051         .peek           =       qdisc_peek_dequeued,
2052         .drop           =       cbq_drop,
2053         .init           =       cbq_init,
2054         .reset          =       cbq_reset,
2055         .destroy        =       cbq_destroy,
2056         .change         =       NULL,
2057         .dump           =       cbq_dump,
2058         .dump_stats     =       cbq_dump_stats,
2059         .owner          =       THIS_MODULE,
2060 };
2061
2062 static int __init cbq_module_init(void)
2063 {
2064         return register_qdisc(&cbq_qdisc_ops);
2065 }
2066 static void __exit cbq_module_exit(void)
2067 {
2068         unregister_qdisc(&cbq_qdisc_ops);
2069 }
2070 module_init(cbq_module_init)
2071 module_exit(cbq_module_exit)
2072 MODULE_LICENSE("GPL");