linux/net/sched/sch_cbq.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/slab.h>
  15#include <linux/types.h>
  16#include <linux/kernel.h>
  17#include <linux/string.h>
  18#include <linux/errno.h>
  19#include <linux/skbuff.h>
  20#include <net/netlink.h>
  21#include <net/pkt_sched.h>
  22
  23
  24/*      Class-Based Queueing (CBQ) algorithm.
  25        =======================================
  26
  27        Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28                 Management Models for Packet Networks",
  29                 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31                 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33                 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34                 Parameters", 1996
  35
  36                 [4] Sally Floyd and Michael Speer, "Experimental Results
  37                 for Class-Based Queueing", 1998, not published.
  38
  39        -----------------------------------------------------------------------
  40
  41        Algorithm skeleton was taken from NS simulator cbq.cc.
  42        If someone wants to check this code against the LBL version,
  43        he should take into account that ONLY the skeleton was borrowed,
  44        the implementation is different. Particularly:
  45
  46        --- The WRR algorithm is different. Our version looks more
  47        reasonable (I hope) and works when quanta are allowed to be
  48        less than MTU, which is always the case when real time classes
  49        have small rates. Note, that the statement of [3] is
  50        incomplete, delay may actually be estimated even if class
  51        per-round allotment is less than MTU. Namely, if per-round
  52        allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54        delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56        In the worst case we have IntServ estimate with D = W*r+k*MTU
  57        and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60        --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61        interpret some places, which look like wrong translations
  62        from NS. Anyone is advised to find these differences
  63        and explain to me, why I am wrong 8).
  64
  65        --- Linux has no EOI event, so that we cannot estimate true class
  66        idle time. Workaround is to consider the next dequeue event
  67        as sign that previous packet is finished. This is wrong because of
  68        internal device queueing, but on a permanently loaded link it is true.
  69        Moreover, combined with clock integrator, this scheme looks
  70        very close to an ideal solution.  */
  71
  72struct cbq_sched_data;
  73
  74
  75struct cbq_class
  76{
  77        struct Qdisc_class_common common;
  78        struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  79
  80/* Parameters */
  81        unsigned char           priority;       /* class priority */
  82        unsigned char           priority2;      /* priority to be used after overlimit */
  83        unsigned char           ewma_log;       /* time constant for idle time calculation */
  84        unsigned char           ovl_strategy;
  85#ifdef CONFIG_NET_CLS_ACT
  86        unsigned char           police;
  87#endif
  88
  89        u32                     defmap;
  90
  91        /* Link-sharing scheduler parameters */
  92        long                    maxidle;        /* Class parameters: see below. */
  93        long                    offtime;
  94        long                    minidle;
  95        u32                     avpkt;
  96        struct qdisc_rate_table *R_tab;
  97
  98        /* Overlimit strategy parameters */
  99        void                    (*overlimit)(struct cbq_class *cl);
 100        psched_tdiff_t          penalty;
 101
 102        /* General scheduler (WRR) parameters */
 103        long                    allot;
 104        long                    quantum;        /* Allotment per WRR round */
 105        long                    weight;         /* Relative allotment: see below */
 106
 107        struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 108        struct cbq_class        *split;         /* Ptr to split node */
 109        struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 110        struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 111        struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 112                                                   parent otherwise */
 113        struct cbq_class        *sibling;       /* Sibling chain */
 114        struct cbq_class        *children;      /* Pointer to children chain */
 115
 116        struct Qdisc            *q;             /* Elementary queueing discipline */
 117
 118
 119/* Variables */
 120        unsigned char           cpriority;      /* Effective priority */
 121        unsigned char           delayed;
 122        unsigned char           level;          /* level of the class in hierarchy:
 123                                                   0 for leaf classes, and maximal
 124                                                   level of children + 1 for nodes.
 125                                                 */
 126
 127        psched_time_t           last;           /* Last end of service */
 128        psched_time_t           undertime;
 129        long                    avgidle;
 130        long                    deficit;        /* Saved deficit for WRR */
 131        psched_time_t           penalized;
 132        struct gnet_stats_basic_packed bstats;
 133        struct gnet_stats_queue qstats;
 134        struct gnet_stats_rate_est rate_est;
 135        struct tc_cbq_xstats    xstats;
 136
 137        struct tcf_proto        *filter_list;
 138
 139        int                     refcnt;
 140        int                     filters;
 141
 142        struct cbq_class        *defaults[TC_PRIO_MAX+1];
 143};
 144
 145struct cbq_sched_data
 146{
 147        struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 148        int                     nclasses[TC_CBQ_MAXPRIO+1];
 149        unsigned                quanta[TC_CBQ_MAXPRIO+1];
 150
 151        struct cbq_class        link;
 152
 153        unsigned                activemask;
 154        struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 155                                                                   with backlog */
 156
 157#ifdef CONFIG_NET_CLS_ACT
 158        struct cbq_class        *rx_class;
 159#endif
 160        struct cbq_class        *tx_class;
 161        struct cbq_class        *tx_borrowed;
 162        int                     tx_len;
 163        psched_time_t           now;            /* Cached timestamp */
 164        psched_time_t           now_rt;         /* Cached real time */
 165        unsigned                pmask;
 166
 167        struct hrtimer          delay_timer;
 168        struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 169                                                   started when CBQ has
 170                                                   backlog, but cannot
 171                                                   transmit just now */
 172        psched_tdiff_t          wd_expires;
 173        int                     toplevel;
 174        u32                     hgenerator;
 175};
 176
 177
 178#define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 179
 180static __inline__ struct cbq_class *
 181cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 182{
 183        struct Qdisc_class_common *clc;
 184
 185        clc = qdisc_class_find(&q->clhash, classid);
 186        if (clc == NULL)
 187                return NULL;
 188        return container_of(clc, struct cbq_class, common);
 189}
 190
 191#ifdef CONFIG_NET_CLS_ACT
 192
 193static struct cbq_class *
 194cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 195{
 196        struct cbq_class *cl, *new;
 197
 198        for (cl = this->tparent; cl; cl = cl->tparent)
 199                if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 200                        return new;
 201
 202        return NULL;
 203}
 204
 205#endif
 206
 207/* Classify packet. The procedure is pretty complicated, but
 208   it allows us to combine link sharing and priority scheduling
 209   transparently.
 210
 211   Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 212   so that it resolves to split nodes. Then packets are classified
 213   by logical priority, or a more specific classifier may be attached
 214   to the split node.
 215 */
 216
 217static struct cbq_class *
 218cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 219{
 220        struct cbq_sched_data *q = qdisc_priv(sch);
 221        struct cbq_class *head = &q->link;
 222        struct cbq_class **defmap;
 223        struct cbq_class *cl = NULL;
 224        u32 prio = skb->priority;
 225        struct tcf_result res;
 226
 227        /*
 228         *  Step 1. If skb->priority points to one of our classes, use it.
 229         */
 230        if (TC_H_MAJ(prio^sch->handle) == 0 &&
 231            (cl = cbq_class_lookup(q, prio)) != NULL)
 232                return cl;
 233
 234        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235        for (;;) {
 236                int result = 0;
 237                defmap = head->defaults;
 238
 239                /*
 240                 * Step 2+n. Apply classifier.
 241                 */
 242                if (!head->filter_list ||
 243                    (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 244                        goto fallback;
 245
 246                if ((cl = (void*)res.class) == NULL) {
 247                        if (TC_H_MAJ(res.classid))
 248                                cl = cbq_class_lookup(q, res.classid);
 249                        else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 250                                cl = defmap[TC_PRIO_BESTEFFORT];
 251
 252                        if (cl == NULL || cl->level >= head->level)
 253                                goto fallback;
 254                }
 255
 256#ifdef CONFIG_NET_CLS_ACT
 257                switch (result) {
 258                case TC_ACT_QUEUED:
 259                case TC_ACT_STOLEN:
 260                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 261                case TC_ACT_SHOT:
 262                        return NULL;
 263                case TC_ACT_RECLASSIFY:
 264                        return cbq_reclassify(skb, cl);
 265                }
 266#endif
 267                if (cl->level == 0)
 268                        return cl;
 269
 270                /*
 271                 * Step 3+n. If classifier selected a link sharing class,
 272                 *         apply agency specific classifier.
 273                 *         Repeat this procdure until we hit a leaf node.
 274                 */
 275                head = cl;
 276        }
 277
 278fallback:
 279        cl = head;
 280
 281        /*
 282         * Step 4. No success...
 283         */
 284        if (TC_H_MAJ(prio) == 0 &&
 285            !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 286            !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 287                return head;
 288
 289        return cl;
 290}
 291
 292/*
 293   A packet has just been enqueued on the empty class.
 294   cbq_activate_class adds it to the tail of active class list
 295   of its priority band.
 296 */
 297
 298static __inline__ void cbq_activate_class(struct cbq_class *cl)
 299{
 300        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 301        int prio = cl->cpriority;
 302        struct cbq_class *cl_tail;
 303
 304        cl_tail = q->active[prio];
 305        q->active[prio] = cl;
 306
 307        if (cl_tail != NULL) {
 308                cl->next_alive = cl_tail->next_alive;
 309                cl_tail->next_alive = cl;
 310        } else {
 311                cl->next_alive = cl;
 312                q->activemask |= (1<<prio);
 313        }
 314}
 315
 316/*
 317   Unlink class from active chain.
 318   Note that this same procedure is done directly in cbq_dequeue*
 319   during round-robin procedure.
 320 */
 321
 322static void cbq_deactivate_class(struct cbq_class *this)
 323{
 324        struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 325        int prio = this->cpriority;
 326        struct cbq_class *cl;
 327        struct cbq_class *cl_prev = q->active[prio];
 328
 329        do {
 330                cl = cl_prev->next_alive;
 331                if (cl == this) {
 332                        cl_prev->next_alive = cl->next_alive;
 333                        cl->next_alive = NULL;
 334
 335                        if (cl == q->active[prio]) {
 336                                q->active[prio] = cl_prev;
 337                                if (cl == q->active[prio]) {
 338                                        q->active[prio] = NULL;
 339                                        q->activemask &= ~(1<<prio);
 340                                        return;
 341                                }
 342                        }
 343                        return;
 344                }
 345        } while ((cl_prev = cl) != q->active[prio]);
 346}
 347
 348static void
 349cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 350{
 351        int toplevel = q->toplevel;
 352
 353        if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 354                psched_time_t now;
 355                psched_tdiff_t incr;
 356
 357                now = psched_get_time();
 358                incr = now - q->now_rt;
 359                now = q->now + incr;
 360
 361                do {
 362                        if (cl->undertime < now) {
 363                                q->toplevel = cl->level;
 364                                return;
 365                        }
 366                } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 367        }
 368}
 369
 370static int
 371cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 372{
 373        struct cbq_sched_data *q = qdisc_priv(sch);
 374        int uninitialized_var(ret);
 375        struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 376
 377#ifdef CONFIG_NET_CLS_ACT
 378        q->rx_class = cl;
 379#endif
 380        if (cl == NULL) {
 381                if (ret & __NET_XMIT_BYPASS)
 382                        sch->qstats.drops++;
 383                kfree_skb(skb);
 384                return ret;
 385        }
 386
 387#ifdef CONFIG_NET_CLS_ACT
 388        cl->q->__parent = sch;
 389#endif
 390        ret = qdisc_enqueue(skb, cl->q);
 391        if (ret == NET_XMIT_SUCCESS) {
 392                sch->q.qlen++;
 393                cbq_mark_toplevel(q, cl);
 394                if (!cl->next_alive)
 395                        cbq_activate_class(cl);
 396                return ret;
 397        }
 398
 399        if (net_xmit_drop_count(ret)) {
 400                sch->qstats.drops++;
 401                cbq_mark_toplevel(q, cl);
 402                cl->qstats.drops++;
 403        }
 404        return ret;
 405}
 406
 407/* Overlimit actions */
 408
 409/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 410
 411static void cbq_ovl_classic(struct cbq_class *cl)
 412{
 413        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 414        psched_tdiff_t delay = cl->undertime - q->now;
 415
 416        if (!cl->delayed) {
 417                delay += cl->offtime;
 418
 419                /*
 420                   Class goes to sleep, so that it will have no
 421                   chance to work avgidle. Let's forgive it 8)
 422
 423                   BTW cbq-2.0 has a crap in this
 424                   place, apparently they forgot to shift it by cl->ewma_log.
 425                 */
 426                if (cl->avgidle < 0)
 427                        delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 428                if (cl->avgidle < cl->minidle)
 429                        cl->avgidle = cl->minidle;
 430                if (delay <= 0)
 431                        delay = 1;
 432                cl->undertime = q->now + delay;
 433
 434                cl->xstats.overactions++;
 435                cl->delayed = 1;
 436        }
 437        if (q->wd_expires == 0 || q->wd_expires > delay)
 438                q->wd_expires = delay;
 439
 440        /* Dirty work! We must schedule wakeups based on
 441           real available rate, rather than leaf rate,
 442           which may be tiny (even zero).
 443         */
 444        if (q->toplevel == TC_CBQ_MAXLEVEL) {
 445                struct cbq_class *b;
 446                psched_tdiff_t base_delay = q->wd_expires;
 447
 448                for (b = cl->borrow; b; b = b->borrow) {
 449                        delay = b->undertime - q->now;
 450                        if (delay < base_delay) {
 451                                if (delay <= 0)
 452                                        delay = 1;
 453                                base_delay = delay;
 454                        }
 455                }
 456
 457                q->wd_expires = base_delay;
 458        }
 459}
 460
 461/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 462   they go overlimit
 463 */
 464
 465static void cbq_ovl_rclassic(struct cbq_class *cl)
 466{
 467        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 468        struct cbq_class *this = cl;
 469
 470        do {
 471                if (cl->level > q->toplevel) {
 472                        cl = NULL;
 473                        break;
 474                }
 475        } while ((cl = cl->borrow) != NULL);
 476
 477        if (cl == NULL)
 478                cl = this;
 479        cbq_ovl_classic(cl);
 480}
 481
 482/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 483
 484static void cbq_ovl_delay(struct cbq_class *cl)
 485{
 486        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 487        psched_tdiff_t delay = cl->undertime - q->now;
 488
 489        if (test_bit(__QDISC_STATE_DEACTIVATED,
 490                     &qdisc_root_sleeping(cl->qdisc)->state))
 491                return;
 492
 493        if (!cl->delayed) {
 494                psched_time_t sched = q->now;
 495                ktime_t expires;
 496
 497                delay += cl->offtime;
 498                if (cl->avgidle < 0)
 499                        delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 500                if (cl->avgidle < cl->minidle)
 501                        cl->avgidle = cl->minidle;
 502                cl->undertime = q->now + delay;
 503
 504                if (delay > 0) {
 505                        sched += delay + cl->penalty;
 506                        cl->penalized = sched;
 507                        cl->cpriority = TC_CBQ_MAXPRIO;
 508                        q->pmask |= (1<<TC_CBQ_MAXPRIO);
 509
 510                        expires = ktime_set(0, 0);
 511                        expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
 512                        if (hrtimer_try_to_cancel(&q->delay_timer) &&
 513                            ktime_to_ns(ktime_sub(
 514                                        hrtimer_get_expires(&q->delay_timer),
 515                                        expires)) > 0)
 516                                hrtimer_set_expires(&q->delay_timer, expires);
 517                        hrtimer_restart(&q->delay_timer);
 518                        cl->delayed = 1;
 519                        cl->xstats.overactions++;
 520                        return;
 521                }
 522                delay = 1;
 523        }
 524        if (q->wd_expires == 0 || q->wd_expires > delay)
 525                q->wd_expires = delay;
 526}
 527
 528/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 529
 530static void cbq_ovl_lowprio(struct cbq_class *cl)
 531{
 532        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 533
 534        cl->penalized = q->now + cl->penalty;
 535
 536        if (cl->cpriority != cl->priority2) {
 537                cl->cpriority = cl->priority2;
 538                q->pmask |= (1<<cl->cpriority);
 539                cl->xstats.overactions++;
 540        }
 541        cbq_ovl_classic(cl);
 542}
 543
 544/* TC_CBQ_OVL_DROP: penalize class by dropping */
 545
 546static void cbq_ovl_drop(struct cbq_class *cl)
 547{
 548        if (cl->q->ops->drop)
 549                if (cl->q->ops->drop(cl->q))
 550                        cl->qdisc->q.qlen--;
 551        cl->xstats.overactions++;
 552        cbq_ovl_classic(cl);
 553}
 554
 555static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 556                                       psched_time_t now)
 557{
 558        struct cbq_class *cl;
 559        struct cbq_class *cl_prev = q->active[prio];
 560        psched_time_t sched = now;
 561
 562        if (cl_prev == NULL)
 563                return 0;
 564
 565        do {
 566                cl = cl_prev->next_alive;
 567                if (now - cl->penalized > 0) {
 568                        cl_prev->next_alive = cl->next_alive;
 569                        cl->next_alive = NULL;
 570                        cl->cpriority = cl->priority;
 571                        cl->delayed = 0;
 572                        cbq_activate_class(cl);
 573
 574                        if (cl == q->active[prio]) {
 575                                q->active[prio] = cl_prev;
 576                                if (cl == q->active[prio]) {
 577                                        q->active[prio] = NULL;
 578                                        return 0;
 579                                }
 580                        }
 581
 582                        cl = cl_prev->next_alive;
 583                } else if (sched - cl->penalized > 0)
 584                        sched = cl->penalized;
 585        } while ((cl_prev = cl) != q->active[prio]);
 586
 587        return sched - now;
 588}
 589
 590static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 591{
 592        struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 593                                                delay_timer);
 594        struct Qdisc *sch = q->watchdog.qdisc;
 595        psched_time_t now;
 596        psched_tdiff_t delay = 0;
 597        unsigned pmask;
 598
 599        now = psched_get_time();
 600
 601        pmask = q->pmask;
 602        q->pmask = 0;
 603
 604        while (pmask) {
 605                int prio = ffz(~pmask);
 606                psched_tdiff_t tmp;
 607
 608                pmask &= ~(1<<prio);
 609
 610                tmp = cbq_undelay_prio(q, prio, now);
 611                if (tmp > 0) {
 612                        q->pmask |= 1<<prio;
 613                        if (tmp < delay || delay == 0)
 614                                delay = tmp;
 615                }
 616        }
 617
 618        if (delay) {
 619                ktime_t time;
 620
 621                time = ktime_set(0, 0);
 622                time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 623                hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 624        }
 625
 626        sch->flags &= ~TCQ_F_THROTTLED;
 627        __netif_schedule(qdisc_root(sch));
 628        return HRTIMER_NORESTART;
 629}
 630
 631#ifdef CONFIG_NET_CLS_ACT
 632static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 633{
 634        struct Qdisc *sch = child->__parent;
 635        struct cbq_sched_data *q = qdisc_priv(sch);
 636        struct cbq_class *cl = q->rx_class;
 637
 638        q->rx_class = NULL;
 639
 640        if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 641                int ret;
 642
 643                cbq_mark_toplevel(q, cl);
 644
 645                q->rx_class = cl;
 646                cl->q->__parent = sch;
 647
 648                ret = qdisc_enqueue(skb, cl->q);
 649                if (ret == NET_XMIT_SUCCESS) {
 650                        sch->q.qlen++;
 651                        if (!cl->next_alive)
 652                                cbq_activate_class(cl);
 653                        return 0;
 654                }
 655                if (net_xmit_drop_count(ret))
 656                        sch->qstats.drops++;
 657                return 0;
 658        }
 659
 660        sch->qstats.drops++;
 661        return -1;
 662}
 663#endif
 664
 665/*
 666   It is mission critical procedure.
 667
 668   We "regenerate" toplevel cutoff, if transmitting class
 669   has backlog and it is not regulated. It is not part of
 670   original CBQ description, but looks more reasonable.
 671   Probably, it is wrong. This question needs further investigation.
 672*/
 673
 674static __inline__ void
 675cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 676                    struct cbq_class *borrowed)
 677{
 678        if (cl && q->toplevel >= borrowed->level) {
 679                if (cl->q->q.qlen > 1) {
 680                        do {
 681                                if (borrowed->undertime == PSCHED_PASTPERFECT) {
 682                                        q->toplevel = borrowed->level;
 683                                        return;
 684                                }
 685                        } while ((borrowed=borrowed->borrow) != NULL);
 686                }
 687#if 0
 688        /* It is not necessary now. Uncommenting it
 689           will save CPU cycles, but decrease fairness.
 690         */
 691                q->toplevel = TC_CBQ_MAXLEVEL;
 692#endif
 693        }
 694}
 695
 696static void
 697cbq_update(struct cbq_sched_data *q)
 698{
 699        struct cbq_class *this = q->tx_class;
 700        struct cbq_class *cl = this;
 701        int len = q->tx_len;
 702
 703        q->tx_class = NULL;
 704
 705        for ( ; cl; cl = cl->share) {
 706                long avgidle = cl->avgidle;
 707                long idle;
 708
 709                cl->bstats.packets++;
 710                cl->bstats.bytes += len;
 711
 712                /*
 713                   (now - last) is total time between packet right edges.
 714                   (last_pktlen/rate) is "virtual" busy time, so that
 715
 716                         idle = (now - last) - last_pktlen/rate
 717                 */
 718
 719                idle = q->now - cl->last;
 720                if ((unsigned long)idle > 128*1024*1024) {
 721                        avgidle = cl->maxidle;
 722                } else {
 723                        idle -= L2T(cl, len);
 724
 725                /* true_avgidle := (1-W)*true_avgidle + W*idle,
 726                   where W=2^{-ewma_log}. But cl->avgidle is scaled:
 727                   cl->avgidle == true_avgidle/W,
 728                   hence:
 729                 */
 730                        avgidle += idle - (avgidle>>cl->ewma_log);
 731                }
 732
 733                if (avgidle <= 0) {
 734                        /* Overlimit or at-limit */
 735
 736                        if (avgidle < cl->minidle)
 737                                avgidle = cl->minidle;
 738
 739                        cl->avgidle = avgidle;
 740
 741                        /* Calculate expected time, when this class
 742                           will be allowed to send.
 743                           It will occur, when:
 744                           (1-W)*true_avgidle + W*delay = 0, i.e.
 745                           idle = (1/W - 1)*(-true_avgidle)
 746                           or
 747                           idle = (1 - W)*(-cl->avgidle);
 748                         */
 749                        idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 750
 751                        /*
 752                           That is not all.
 753                           To maintain the rate allocated to the class,
 754                           we add to undertime virtual clock,
 755                           necessary to complete transmitted packet.
 756                           (len/phys_bandwidth has been already passed
 757                           to the moment of cbq_update)
 758                         */
 759
 760                        idle -= L2T(&q->link, len);
 761                        idle += L2T(cl, len);
 762
 763                        cl->undertime = q->now + idle;
 764                } else {
 765                        /* Underlimit */
 766
 767                        cl->undertime = PSCHED_PASTPERFECT;
 768                        if (avgidle > cl->maxidle)
 769                                cl->avgidle = cl->maxidle;
 770                        else
 771                                cl->avgidle = avgidle;
 772                }
 773                cl->last = q->now;
 774        }
 775
 776        cbq_update_toplevel(q, this, q->tx_borrowed);
 777}
 778
 779static __inline__ struct cbq_class *
 780cbq_under_limit(struct cbq_class *cl)
 781{
 782        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 783        struct cbq_class *this_cl = cl;
 784
 785        if (cl->tparent == NULL)
 786                return cl;
 787
 788        if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 789                cl->delayed = 0;
 790                return cl;
 791        }
 792
 793        do {
 794                /* It is very suspicious place. Now overlimit
 795                   action is generated for not bounded classes
 796                   only if link is completely congested.
 797                   Though it is in agree with ancestor-only paradigm,
 798                   it looks very stupid. Particularly,
 799                   it means that this chunk of code will either
 800                   never be called or result in strong amplification
 801                   of burstiness. Dangerous, silly, and, however,
 802                   no another solution exists.
 803                 */
 804                if ((cl = cl->borrow) == NULL) {
 805                        this_cl->qstats.overlimits++;
 806                        this_cl->overlimit(this_cl);
 807                        return NULL;
 808                }
 809                if (cl->level > q->toplevel)
 810                        return NULL;
 811        } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 812
 813        cl->delayed = 0;
 814        return cl;
 815}
 816
 817static __inline__ struct sk_buff *
 818cbq_dequeue_prio(struct Qdisc *sch, int prio)
 819{
 820        struct cbq_sched_data *q = qdisc_priv(sch);
 821        struct cbq_class *cl_tail, *cl_prev, *cl;
 822        struct sk_buff *skb;
 823        int deficit;
 824
 825        cl_tail = cl_prev = q->active[prio];
 826        cl = cl_prev->next_alive;
 827
 828        do {
 829                deficit = 0;
 830
 831                /* Start round */
 832                do {
 833                        struct cbq_class *borrow = cl;
 834
 835                        if (cl->q->q.qlen &&
 836                            (borrow = cbq_under_limit(cl)) == NULL)
 837                                goto skip_class;
 838
 839                        if (cl->deficit <= 0) {
 840                                /* Class exhausted its allotment per
 841                                   this round. Switch to the next one.
 842                                 */
 843                                deficit = 1;
 844                                cl->deficit += cl->quantum;
 845                                goto next_class;
 846                        }
 847
 848                        skb = cl->q->dequeue(cl->q);
 849
 850                        /* Class did not give us any skb :-(
 851                           It could occur even if cl->q->q.qlen != 0
 852                           f.e. if cl->q == "tbf"
 853                         */
 854                        if (skb == NULL)
 855                                goto skip_class;
 856
 857                        cl->deficit -= qdisc_pkt_len(skb);
 858                        q->tx_class = cl;
 859                        q->tx_borrowed = borrow;
 860                        if (borrow != cl) {
 861#ifndef CBQ_XSTATS_BORROWS_BYTES
 862                                borrow->xstats.borrows++;
 863                                cl->xstats.borrows++;
 864#else
 865                                borrow->xstats.borrows += qdisc_pkt_len(skb);
 866                                cl->xstats.borrows += qdisc_pkt_len(skb);
 867#endif
 868                        }
 869                        q->tx_len = qdisc_pkt_len(skb);
 870
 871                        if (cl->deficit <= 0) {
 872                                q->active[prio] = cl;
 873                                cl = cl->next_alive;
 874                                cl->deficit += cl->quantum;
 875                        }
 876                        return skb;
 877
 878skip_class:
 879                        if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 880                                /* Class is empty or penalized.
 881                                   Unlink it from active chain.
 882                                 */
 883                                cl_prev->next_alive = cl->next_alive;
 884                                cl->next_alive = NULL;
 885
 886                                /* Did cl_tail point to it? */
 887                                if (cl == cl_tail) {
 888                                        /* Repair it! */
 889                                        cl_tail = cl_prev;
 890
 891                                        /* Was it the last class in this band? */
 892                                        if (cl == cl_tail) {
 893                                                /* Kill the band! */
 894                                                q->active[prio] = NULL;
 895                                                q->activemask &= ~(1<<prio);
 896                                                if (cl->q->q.qlen)
 897                                                        cbq_activate_class(cl);
 898                                                return NULL;
 899                                        }
 900
 901                                        q->active[prio] = cl_tail;
 902                                }
 903                                if (cl->q->q.qlen)
 904                                        cbq_activate_class(cl);
 905
 906                                cl = cl_prev;
 907                        }
 908
 909next_class:
 910                        cl_prev = cl;
 911                        cl = cl->next_alive;
 912                } while (cl_prev != cl_tail);
 913        } while (deficit);
 914
 915        q->active[prio] = cl_prev;
 916
 917        return NULL;
 918}
 919
 920static __inline__ struct sk_buff *
 921cbq_dequeue_1(struct Qdisc *sch)
 922{
 923        struct cbq_sched_data *q = qdisc_priv(sch);
 924        struct sk_buff *skb;
 925        unsigned activemask;
 926
 927        activemask = q->activemask&0xFF;
 928        while (activemask) {
 929                int prio = ffz(~activemask);
 930                activemask &= ~(1<<prio);
 931                skb = cbq_dequeue_prio(sch, prio);
 932                if (skb)
 933                        return skb;
 934        }
 935        return NULL;
 936}
 937
 938static struct sk_buff *
 939cbq_dequeue(struct Qdisc *sch)
 940{
 941        struct sk_buff *skb;
 942        struct cbq_sched_data *q = qdisc_priv(sch);
 943        psched_time_t now;
 944        psched_tdiff_t incr;
 945
 946        now = psched_get_time();
 947        incr = now - q->now_rt;
 948
 949        if (q->tx_class) {
 950                psched_tdiff_t incr2;
 951                /* Time integrator. We calculate EOS time
 952                   by adding expected packet transmission time.
 953                   If real time is greater, we warp artificial clock,
 954                   so that:
 955
 956                   cbq_time = max(real_time, work);
 957                 */
 958                incr2 = L2T(&q->link, q->tx_len);
 959                q->now += incr2;
 960                cbq_update(q);
 961                if ((incr -= incr2) < 0)
 962                        incr = 0;
 963        }
 964        q->now += incr;
 965        q->now_rt = now;
 966
 967        for (;;) {
 968                q->wd_expires = 0;
 969
 970                skb = cbq_dequeue_1(sch);
 971                if (skb) {
 972                        qdisc_bstats_update(sch, skb);
 973                        sch->q.qlen--;
 974                        sch->flags &= ~TCQ_F_THROTTLED;
 975                        return skb;
 976                }
 977
 978                /* All the classes are overlimit.
 979
 980                   It is possible, if:
 981
 982                   1. Scheduler is empty.
 983                   2. Toplevel cutoff inhibited borrowing.
 984                   3. Root class is overlimit.
 985
 986                   Reset 2d and 3d conditions and retry.
 987
 988                   Note, that NS and cbq-2.0 are buggy, peeking
 989                   an arbitrary class is appropriate for ancestor-only
 990                   sharing, but not for toplevel algorithm.
 991
 992                   Our version is better, but slower, because it requires
 993                   two passes, but it is unavoidable with top-level sharing.
 994                */
 995
 996                if (q->toplevel == TC_CBQ_MAXLEVEL &&
 997                    q->link.undertime == PSCHED_PASTPERFECT)
 998                        break;
 999
1000                q->toplevel = TC_CBQ_MAXLEVEL;
1001                q->link.undertime = PSCHED_PASTPERFECT;
1002        }
1003
1004        /* No packets in scheduler or nobody wants to give them to us :-(
1005           Sigh... start watchdog timer in the last case. */
1006
1007        if (sch->q.qlen) {
1008                sch->qstats.overlimits++;
1009                if (q->wd_expires)
1010                        qdisc_watchdog_schedule(&q->watchdog,
1011                                                now + q->wd_expires);
1012        }
1013        return NULL;
1014}
1015
1016/* CBQ class maintanance routines */
1017
1018static void cbq_adjust_levels(struct cbq_class *this)
1019{
1020        if (this == NULL)
1021                return;
1022
1023        do {
1024                int level = 0;
1025                struct cbq_class *cl;
1026
1027                if ((cl = this->children) != NULL) {
1028                        do {
1029                                if (cl->level > level)
1030                                        level = cl->level;
1031                        } while ((cl = cl->sibling) != this->children);
1032                }
1033                this->level = level+1;
1034        } while ((this = this->tparent) != NULL);
1035}
1036
1037static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1038{
1039        struct cbq_class *cl;
1040        struct hlist_node *n;
1041        unsigned int h;
1042
1043        if (q->quanta[prio] == 0)
1044                return;
1045
1046        for (h = 0; h < q->clhash.hashsize; h++) {
1047                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1048                        /* BUGGGG... Beware! This expression suffer of
1049                           arithmetic overflows!
1050                         */
1051                        if (cl->priority == prio) {
1052                                cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1053                                        q->quanta[prio];
1054                        }
1055                        if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1056                                printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1057                                cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1058                        }
1059                }
1060        }
1061}
1062
1063static void cbq_sync_defmap(struct cbq_class *cl)
1064{
1065        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1066        struct cbq_class *split = cl->split;
1067        unsigned h;
1068        int i;
1069
1070        if (split == NULL)
1071                return;
1072
1073        for (i=0; i<=TC_PRIO_MAX; i++) {
1074                if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1075                        split->defaults[i] = NULL;
1076        }
1077
1078        for (i=0; i<=TC_PRIO_MAX; i++) {
1079                int level = split->level;
1080
1081                if (split->defaults[i])
1082                        continue;
1083
1084                for (h = 0; h < q->clhash.hashsize; h++) {
1085                        struct hlist_node *n;
1086                        struct cbq_class *c;
1087
1088                        hlist_for_each_entry(c, n, &q->clhash.hash[h],
1089                                             common.hnode) {
1090                                if (c->split == split && c->level < level &&
1091                                    c->defmap&(1<<i)) {
1092                                        split->defaults[i] = c;
1093                                        level = c->level;
1094                                }
1095                        }
1096                }
1097        }
1098}
1099
1100static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1101{
1102        struct cbq_class *split = NULL;
1103
1104        if (splitid == 0) {
1105                if ((split = cl->split) == NULL)
1106                        return;
1107                splitid = split->common.classid;
1108        }
1109
1110        if (split == NULL || split->common.classid != splitid) {
1111                for (split = cl->tparent; split; split = split->tparent)
1112                        if (split->common.classid == splitid)
1113                                break;
1114        }
1115
1116        if (split == NULL)
1117                return;
1118
1119        if (cl->split != split) {
1120                cl->defmap = 0;
1121                cbq_sync_defmap(cl);
1122                cl->split = split;
1123                cl->defmap = def&mask;
1124        } else
1125                cl->defmap = (cl->defmap&~mask)|(def&mask);
1126
1127        cbq_sync_defmap(cl);
1128}
1129
1130static void cbq_unlink_class(struct cbq_class *this)
1131{
1132        struct cbq_class *cl, **clp;
1133        struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1134
1135        qdisc_class_hash_remove(&q->clhash, &this->common);
1136
1137        if (this->tparent) {
1138                clp=&this->sibling;
1139                cl = *clp;
1140                do {
1141                        if (cl == this) {
1142                                *clp = cl->sibling;
1143                                break;
1144                        }
1145                        clp = &cl->sibling;
1146                } while ((cl = *clp) != this->sibling);
1147
1148                if (this->tparent->children == this) {
1149                        this->tparent->children = this->sibling;
1150                        if (this->sibling == this)
1151                                this->tparent->children = NULL;
1152                }
1153        } else {
1154                WARN_ON(this->sibling != this);
1155        }
1156}
1157
1158static void cbq_link_class(struct cbq_class *this)
1159{
1160        struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1161        struct cbq_class *parent = this->tparent;
1162
1163        this->sibling = this;
1164        qdisc_class_hash_insert(&q->clhash, &this->common);
1165
1166        if (parent == NULL)
1167                return;
1168
1169        if (parent->children == NULL) {
1170                parent->children = this;
1171        } else {
1172                this->sibling = parent->children->sibling;
1173                parent->children->sibling = this;
1174        }
1175}
1176
1177static unsigned int cbq_drop(struct Qdisc* sch)
1178{
1179        struct cbq_sched_data *q = qdisc_priv(sch);
1180        struct cbq_class *cl, *cl_head;
1181        int prio;
1182        unsigned int len;
1183
1184        for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1185                if ((cl_head = q->active[prio]) == NULL)
1186                        continue;
1187
1188                cl = cl_head;
1189                do {
1190                        if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1191                                sch->q.qlen--;
1192                                if (!cl->q->q.qlen)
1193                                        cbq_deactivate_class(cl);
1194                                return len;
1195                        }
1196                } while ((cl = cl->next_alive) != cl_head);
1197        }
1198        return 0;
1199}
1200
1201static void
1202cbq_reset(struct Qdisc* sch)
1203{
1204        struct cbq_sched_data *q = qdisc_priv(sch);
1205        struct cbq_class *cl;
1206        struct hlist_node *n;
1207        int prio;
1208        unsigned h;
1209
1210        q->activemask = 0;
1211        q->pmask = 0;
1212        q->tx_class = NULL;
1213        q->tx_borrowed = NULL;
1214        qdisc_watchdog_cancel(&q->watchdog);
1215        hrtimer_cancel(&q->delay_timer);
1216        q->toplevel = TC_CBQ_MAXLEVEL;
1217        q->now = psched_get_time();
1218        q->now_rt = q->now;
1219
1220        for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1221                q->active[prio] = NULL;
1222
1223        for (h = 0; h < q->clhash.hashsize; h++) {
1224                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1225                        qdisc_reset(cl->q);
1226
1227                        cl->next_alive = NULL;
1228                        cl->undertime = PSCHED_PASTPERFECT;
1229                        cl->avgidle = cl->maxidle;
1230                        cl->deficit = cl->quantum;
1231                        cl->cpriority = cl->priority;
1232                }
1233        }
1234        sch->q.qlen = 0;
1235}
1236
1237
1238static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1239{
1240        if (lss->change&TCF_CBQ_LSS_FLAGS) {
1241                cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1242                cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1243        }
1244        if (lss->change&TCF_CBQ_LSS_EWMA)
1245                cl->ewma_log = lss->ewma_log;
1246        if (lss->change&TCF_CBQ_LSS_AVPKT)
1247                cl->avpkt = lss->avpkt;
1248        if (lss->change&TCF_CBQ_LSS_MINIDLE)
1249                cl->minidle = -(long)lss->minidle;
1250        if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1251                cl->maxidle = lss->maxidle;
1252                cl->avgidle = lss->maxidle;
1253        }
1254        if (lss->change&TCF_CBQ_LSS_OFFTIME)
1255                cl->offtime = lss->offtime;
1256        return 0;
1257}
1258
1259static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1260{
1261        q->nclasses[cl->priority]--;
1262        q->quanta[cl->priority] -= cl->weight;
1263        cbq_normalize_quanta(q, cl->priority);
1264}
1265
1266static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1267{
1268        q->nclasses[cl->priority]++;
1269        q->quanta[cl->priority] += cl->weight;
1270        cbq_normalize_quanta(q, cl->priority);
1271}
1272
1273static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1274{
1275        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1276
1277        if (wrr->allot)
1278                cl->allot = wrr->allot;
1279        if (wrr->weight)
1280                cl->weight = wrr->weight;
1281        if (wrr->priority) {
1282                cl->priority = wrr->priority-1;
1283                cl->cpriority = cl->priority;
1284                if (cl->priority >= cl->priority2)
1285                        cl->priority2 = TC_CBQ_MAXPRIO-1;
1286        }
1287
1288        cbq_addprio(q, cl);
1289        return 0;
1290}
1291
1292static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1293{
1294        switch (ovl->strategy) {
1295        case TC_CBQ_OVL_CLASSIC:
1296                cl->overlimit = cbq_ovl_classic;
1297                break;
1298        case TC_CBQ_OVL_DELAY:
1299                cl->overlimit = cbq_ovl_delay;
1300                break;
1301        case TC_CBQ_OVL_LOWPRIO:
1302                if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1303                    ovl->priority2-1 <= cl->priority)
1304                        return -EINVAL;
1305                cl->priority2 = ovl->priority2-1;
1306                cl->overlimit = cbq_ovl_lowprio;
1307                break;
1308        case TC_CBQ_OVL_DROP:
1309                cl->overlimit = cbq_ovl_drop;
1310                break;
1311        case TC_CBQ_OVL_RCLASSIC:
1312                cl->overlimit = cbq_ovl_rclassic;
1313                break;
1314        default:
1315                return -EINVAL;
1316        }
1317        cl->penalty = ovl->penalty;
1318        return 0;
1319}
1320
1321#ifdef CONFIG_NET_CLS_ACT
1322static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1323{
1324        cl->police = p->police;
1325
1326        if (cl->q->handle) {
1327                if (p->police == TC_POLICE_RECLASSIFY)
1328                        cl->q->reshape_fail = cbq_reshape_fail;
1329                else
1330                        cl->q->reshape_fail = NULL;
1331        }
1332        return 0;
1333}
1334#endif
1335
1336static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1337{
1338        cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1339        return 0;
1340}
1341
1342static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1343        [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1344        [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1345        [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1346        [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1347        [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1348        [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1349        [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1350};
1351
1352static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1353{
1354        struct cbq_sched_data *q = qdisc_priv(sch);
1355        struct nlattr *tb[TCA_CBQ_MAX + 1];
1356        struct tc_ratespec *r;
1357        int err;
1358
1359        err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1360        if (err < 0)
1361                return err;
1362
1363        if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1364                return -EINVAL;
1365
1366        r = nla_data(tb[TCA_CBQ_RATE]);
1367
1368        if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1369                return -EINVAL;
1370
1371        err = qdisc_class_hash_init(&q->clhash);
1372        if (err < 0)
1373                goto put_rtab;
1374
1375        q->link.refcnt = 1;
1376        q->link.sibling = &q->link;
1377        q->link.common.classid = sch->handle;
1378        q->link.qdisc = sch;
1379        q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1380                                      sch->handle);
1381        if (!q->link.q)
1382                q->link.q = &noop_qdisc;
1383
1384        q->link.priority = TC_CBQ_MAXPRIO-1;
1385        q->link.priority2 = TC_CBQ_MAXPRIO-1;
1386        q->link.cpriority = TC_CBQ_MAXPRIO-1;
1387        q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1388        q->link.overlimit = cbq_ovl_classic;
1389        q->link.allot = psched_mtu(qdisc_dev(sch));
1390        q->link.quantum = q->link.allot;
1391        q->link.weight = q->link.R_tab->rate.rate;
1392
1393        q->link.ewma_log = TC_CBQ_DEF_EWMA;
1394        q->link.avpkt = q->link.allot/2;
1395        q->link.minidle = -0x7FFFFFFF;
1396
1397        qdisc_watchdog_init(&q->watchdog, sch);
1398        hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1399        q->delay_timer.function = cbq_undelay;
1400        q->toplevel = TC_CBQ_MAXLEVEL;
1401        q->now = psched_get_time();
1402        q->now_rt = q->now;
1403
1404        cbq_link_class(&q->link);
1405
1406        if (tb[TCA_CBQ_LSSOPT])
1407                cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1408
1409        cbq_addprio(q, &q->link);
1410        return 0;
1411
1412put_rtab:
1413        qdisc_put_rtab(q->link.R_tab);
1414        return err;
1415}
1416
1417static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1418{
1419        unsigned char *b = skb_tail_pointer(skb);
1420
1421        NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1422        return skb->len;
1423
1424nla_put_failure:
1425        nlmsg_trim(skb, b);
1426        return -1;
1427}
1428
1429static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1430{
1431        unsigned char *b = skb_tail_pointer(skb);
1432        struct tc_cbq_lssopt opt;
1433
1434        opt.flags = 0;
1435        if (cl->borrow == NULL)
1436                opt.flags |= TCF_CBQ_LSS_BOUNDED;
1437        if (cl->share == NULL)
1438                opt.flags |= TCF_CBQ_LSS_ISOLATED;
1439        opt.ewma_log = cl->ewma_log;
1440        opt.level = cl->level;
1441        opt.avpkt = cl->avpkt;
1442        opt.maxidle = cl->maxidle;
1443        opt.minidle = (u32)(-cl->minidle);
1444        opt.offtime = cl->offtime;
1445        opt.change = ~0;
1446        NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1447        return skb->len;
1448
1449nla_put_failure:
1450        nlmsg_trim(skb, b);
1451        return -1;
1452}
1453
1454static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1455{
1456        unsigned char *b = skb_tail_pointer(skb);
1457        struct tc_cbq_wrropt opt;
1458
1459        opt.flags = 0;
1460        opt.allot = cl->allot;
1461        opt.priority = cl->priority+1;
1462        opt.cpriority = cl->cpriority+1;
1463        opt.weight = cl->weight;
1464        NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1465        return skb->len;
1466
1467nla_put_failure:
1468        nlmsg_trim(skb, b);
1469        return -1;
1470}
1471
1472static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1473{
1474        unsigned char *b = skb_tail_pointer(skb);
1475        struct tc_cbq_ovl opt;
1476
1477        opt.strategy = cl->ovl_strategy;
1478        opt.priority2 = cl->priority2+1;
1479        opt.pad = 0;
1480        opt.penalty = cl->penalty;
1481        NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1482        return skb->len;
1483
1484nla_put_failure:
1485        nlmsg_trim(skb, b);
1486        return -1;
1487}
1488
1489static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1490{
1491        unsigned char *b = skb_tail_pointer(skb);
1492        struct tc_cbq_fopt opt;
1493
1494        if (cl->split || cl->defmap) {
1495                opt.split = cl->split ? cl->split->common.classid : 0;
1496                opt.defmap = cl->defmap;
1497                opt.defchange = ~0;
1498                NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1499        }
1500        return skb->len;
1501
1502nla_put_failure:
1503        nlmsg_trim(skb, b);
1504        return -1;
1505}
1506
1507#ifdef CONFIG_NET_CLS_ACT
1508static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1509{
1510        unsigned char *b = skb_tail_pointer(skb);
1511        struct tc_cbq_police opt;
1512
1513        if (cl->police) {
1514                opt.police = cl->police;
1515                opt.__res1 = 0;
1516                opt.__res2 = 0;
1517                NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1518        }
1519        return skb->len;
1520
1521nla_put_failure:
1522        nlmsg_trim(skb, b);
1523        return -1;
1524}
1525#endif
1526
1527static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1528{
1529        if (cbq_dump_lss(skb, cl) < 0 ||
1530            cbq_dump_rate(skb, cl) < 0 ||
1531            cbq_dump_wrr(skb, cl) < 0 ||
1532            cbq_dump_ovl(skb, cl) < 0 ||
1533#ifdef CONFIG_NET_CLS_ACT
1534            cbq_dump_police(skb, cl) < 0 ||
1535#endif
1536            cbq_dump_fopt(skb, cl) < 0)
1537                return -1;
1538        return 0;
1539}
1540
1541static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1542{
1543        struct cbq_sched_data *q = qdisc_priv(sch);
1544        struct nlattr *nest;
1545
1546        nest = nla_nest_start(skb, TCA_OPTIONS);
1547        if (nest == NULL)
1548                goto nla_put_failure;
1549        if (cbq_dump_attr(skb, &q->link) < 0)
1550                goto nla_put_failure;
1551        nla_nest_end(skb, nest);
1552        return skb->len;
1553
1554nla_put_failure:
1555        nla_nest_cancel(skb, nest);
1556        return -1;
1557}
1558
1559static int
1560cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1561{
1562        struct cbq_sched_data *q = qdisc_priv(sch);
1563
1564        q->link.xstats.avgidle = q->link.avgidle;
1565        return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1566}
1567
1568static int
1569cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1570               struct sk_buff *skb, struct tcmsg *tcm)
1571{
1572        struct cbq_class *cl = (struct cbq_class*)arg;
1573        struct nlattr *nest;
1574
1575        if (cl->tparent)
1576                tcm->tcm_parent = cl->tparent->common.classid;
1577        else
1578                tcm->tcm_parent = TC_H_ROOT;
1579        tcm->tcm_handle = cl->common.classid;
1580        tcm->tcm_info = cl->q->handle;
1581
1582        nest = nla_nest_start(skb, TCA_OPTIONS);
1583        if (nest == NULL)
1584                goto nla_put_failure;
1585        if (cbq_dump_attr(skb, cl) < 0)
1586                goto nla_put_failure;
1587        nla_nest_end(skb, nest);
1588        return skb->len;
1589
1590nla_put_failure:
1591        nla_nest_cancel(skb, nest);
1592        return -1;
1593}
1594
1595static int
1596cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1597        struct gnet_dump *d)
1598{
1599        struct cbq_sched_data *q = qdisc_priv(sch);
1600        struct cbq_class *cl = (struct cbq_class*)arg;
1601
1602        cl->qstats.qlen = cl->q->q.qlen;
1603        cl->xstats.avgidle = cl->avgidle;
1604        cl->xstats.undertime = 0;
1605
1606        if (cl->undertime != PSCHED_PASTPERFECT)
1607                cl->xstats.undertime = cl->undertime - q->now;
1608
1609        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1610            gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1611            gnet_stats_copy_queue(d, &cl->qstats) < 0)
1612                return -1;
1613
1614        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1615}
1616
1617static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1618                     struct Qdisc **old)
1619{
1620        struct cbq_class *cl = (struct cbq_class*)arg;
1621
1622        if (new == NULL) {
1623                new = qdisc_create_dflt(sch->dev_queue,
1624                                        &pfifo_qdisc_ops, cl->common.classid);
1625                if (new == NULL)
1626                        return -ENOBUFS;
1627        } else {
1628#ifdef CONFIG_NET_CLS_ACT
1629                if (cl->police == TC_POLICE_RECLASSIFY)
1630                        new->reshape_fail = cbq_reshape_fail;
1631#endif
1632        }
1633        sch_tree_lock(sch);
1634        *old = cl->q;
1635        cl->q = new;
1636        qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1637        qdisc_reset(*old);
1638        sch_tree_unlock(sch);
1639
1640        return 0;
1641}
1642
1643static struct Qdisc *
1644cbq_leaf(struct Qdisc *sch, unsigned long arg)
1645{
1646        struct cbq_class *cl = (struct cbq_class*)arg;
1647
1648        return cl->q;
1649}
1650
1651static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1652{
1653        struct cbq_class *cl = (struct cbq_class *)arg;
1654
1655        if (cl->q->q.qlen == 0)
1656                cbq_deactivate_class(cl);
1657}
1658
1659static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1660{
1661        struct cbq_sched_data *q = qdisc_priv(sch);
1662        struct cbq_class *cl = cbq_class_lookup(q, classid);
1663
1664        if (cl) {
1665                cl->refcnt++;
1666                return (unsigned long)cl;
1667        }
1668        return 0;
1669}
1670
1671static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1672{
1673        struct cbq_sched_data *q = qdisc_priv(sch);
1674
1675        WARN_ON(cl->filters);
1676
1677        tcf_destroy_chain(&cl->filter_list);
1678        qdisc_destroy(cl->q);
1679        qdisc_put_rtab(cl->R_tab);
1680        gen_kill_estimator(&cl->bstats, &cl->rate_est);
1681        if (cl != &q->link)
1682                kfree(cl);
1683}
1684
1685static void
1686cbq_destroy(struct Qdisc* sch)
1687{
1688        struct cbq_sched_data *q = qdisc_priv(sch);
1689        struct hlist_node *n, *next;
1690        struct cbq_class *cl;
1691        unsigned h;
1692
1693#ifdef CONFIG_NET_CLS_ACT
1694        q->rx_class = NULL;
1695#endif
1696        /*
1697         * Filters must be destroyed first because we don't destroy the
1698         * classes from root to leafs which means that filters can still
1699         * be bound to classes which have been destroyed already. --TGR '04
1700         */
1701        for (h = 0; h < q->clhash.hashsize; h++) {
1702                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1703                        tcf_destroy_chain(&cl->filter_list);
1704        }
1705        for (h = 0; h < q->clhash.hashsize; h++) {
1706                hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1707                                          common.hnode)
1708                        cbq_destroy_class(sch, cl);
1709        }
1710        qdisc_class_hash_destroy(&q->clhash);
1711}
1712
1713static void cbq_put(struct Qdisc *sch, unsigned long arg)
1714{
1715        struct cbq_class *cl = (struct cbq_class*)arg;
1716
1717        if (--cl->refcnt == 0) {
1718#ifdef CONFIG_NET_CLS_ACT
1719                spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1720                struct cbq_sched_data *q = qdisc_priv(sch);
1721
1722                spin_lock_bh(root_lock);
1723                if (q->rx_class == cl)
1724                        q->rx_class = NULL;
1725                spin_unlock_bh(root_lock);
1726#endif
1727
1728                cbq_destroy_class(sch, cl);
1729        }
1730}
1731
1732static int
1733cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1734                 unsigned long *arg)
1735{
1736        int err;
1737        struct cbq_sched_data *q = qdisc_priv(sch);
1738        struct cbq_class *cl = (struct cbq_class*)*arg;
1739        struct nlattr *opt = tca[TCA_OPTIONS];
1740        struct nlattr *tb[TCA_CBQ_MAX + 1];
1741        struct cbq_class *parent;
1742        struct qdisc_rate_table *rtab = NULL;
1743
1744        if (opt == NULL)
1745                return -EINVAL;
1746
1747        err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1748        if (err < 0)
1749                return err;
1750
1751        if (cl) {
1752                /* Check parent */
1753                if (parentid) {
1754                        if (cl->tparent &&
1755                            cl->tparent->common.classid != parentid)
1756                                return -EINVAL;
1757                        if (!cl->tparent && parentid != TC_H_ROOT)
1758                                return -EINVAL;
1759                }
1760
1761                if (tb[TCA_CBQ_RATE]) {
1762                        rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1763                                              tb[TCA_CBQ_RTAB]);
1764                        if (rtab == NULL)
1765                                return -EINVAL;
1766                }
1767
1768                if (tca[TCA_RATE]) {
1769                        err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1770                                                    qdisc_root_sleeping_lock(sch),
1771                                                    tca[TCA_RATE]);
1772                        if (err) {
1773                                if (rtab)
1774                                        qdisc_put_rtab(rtab);
1775                                return err;
1776                        }
1777                }
1778
1779                /* Change class parameters */
1780                sch_tree_lock(sch);
1781
1782                if (cl->next_alive != NULL)
1783                        cbq_deactivate_class(cl);
1784
1785                if (rtab) {
1786                        qdisc_put_rtab(cl->R_tab);
1787                        cl->R_tab = rtab;
1788                }
1789
1790                if (tb[TCA_CBQ_LSSOPT])
1791                        cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1792
1793                if (tb[TCA_CBQ_WRROPT]) {
1794                        cbq_rmprio(q, cl);
1795                        cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1796                }
1797
1798                if (tb[TCA_CBQ_OVL_STRATEGY])
1799                        cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1800
1801#ifdef CONFIG_NET_CLS_ACT
1802                if (tb[TCA_CBQ_POLICE])
1803                        cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1804#endif
1805
1806                if (tb[TCA_CBQ_FOPT])
1807                        cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1808
1809                if (cl->q->q.qlen)
1810                        cbq_activate_class(cl);
1811
1812                sch_tree_unlock(sch);
1813
1814                return 0;
1815        }
1816
1817        if (parentid == TC_H_ROOT)
1818                return -EINVAL;
1819
1820        if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1821            tb[TCA_CBQ_LSSOPT] == NULL)
1822                return -EINVAL;
1823
1824        rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1825        if (rtab == NULL)
1826                return -EINVAL;
1827
1828        if (classid) {
1829                err = -EINVAL;
1830                if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1831                        goto failure;
1832        } else {
1833                int i;
1834                classid = TC_H_MAKE(sch->handle,0x8000);
1835
1836                for (i=0; i<0x8000; i++) {
1837                        if (++q->hgenerator >= 0x8000)
1838                                q->hgenerator = 1;
1839                        if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1840                                break;
1841                }
1842                err = -ENOSR;
1843                if (i >= 0x8000)
1844                        goto failure;
1845                classid = classid|q->hgenerator;
1846        }
1847
1848        parent = &q->link;
1849        if (parentid) {
1850                parent = cbq_class_lookup(q, parentid);
1851                err = -EINVAL;
1852                if (parent == NULL)
1853                        goto failure;
1854        }
1855
1856        err = -ENOBUFS;
1857        cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1858        if (cl == NULL)
1859                goto failure;
1860
1861        if (tca[TCA_RATE]) {
1862                err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1863                                        qdisc_root_sleeping_lock(sch),
1864                                        tca[TCA_RATE]);
1865                if (err) {
1866                        kfree(cl);
1867                        goto failure;
1868                }
1869        }
1870
1871        cl->R_tab = rtab;
1872        rtab = NULL;
1873        cl->refcnt = 1;
1874        cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1875        if (!cl->q)
1876                cl->q = &noop_qdisc;
1877        cl->common.classid = classid;
1878        cl->tparent = parent;
1879        cl->qdisc = sch;
1880        cl->allot = parent->allot;
1881        cl->quantum = cl->allot;
1882        cl->weight = cl->R_tab->rate.rate;
1883
1884        sch_tree_lock(sch);
1885        cbq_link_class(cl);
1886        cl->borrow = cl->tparent;
1887        if (cl->tparent != &q->link)
1888                cl->share = cl->tparent;
1889        cbq_adjust_levels(parent);
1890        cl->minidle = -0x7FFFFFFF;
1891        cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1892        cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1893        if (cl->ewma_log==0)
1894                cl->ewma_log = q->link.ewma_log;
1895        if (cl->maxidle==0)
1896                cl->maxidle = q->link.maxidle;
1897        if (cl->avpkt==0)
1898                cl->avpkt = q->link.avpkt;
1899        cl->overlimit = cbq_ovl_classic;
1900        if (tb[TCA_CBQ_OVL_STRATEGY])
1901                cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1902#ifdef CONFIG_NET_CLS_ACT
1903        if (tb[TCA_CBQ_POLICE])
1904                cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1905#endif
1906        if (tb[TCA_CBQ_FOPT])
1907                cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1908        sch_tree_unlock(sch);
1909
1910        qdisc_class_hash_grow(sch, &q->clhash);
1911
1912        *arg = (unsigned long)cl;
1913        return 0;
1914
1915failure:
1916        qdisc_put_rtab(rtab);
1917        return err;
1918}
1919
1920static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1921{
1922        struct cbq_sched_data *q = qdisc_priv(sch);
1923        struct cbq_class *cl = (struct cbq_class*)arg;
1924        unsigned int qlen;
1925
1926        if (cl->filters || cl->children || cl == &q->link)
1927                return -EBUSY;
1928
1929        sch_tree_lock(sch);
1930
1931        qlen = cl->q->q.qlen;
1932        qdisc_reset(cl->q);
1933        qdisc_tree_decrease_qlen(cl->q, qlen);
1934
1935        if (cl->next_alive)
1936                cbq_deactivate_class(cl);
1937
1938        if (q->tx_borrowed == cl)
1939                q->tx_borrowed = q->tx_class;
1940        if (q->tx_class == cl) {
1941                q->tx_class = NULL;
1942                q->tx_borrowed = NULL;
1943        }
1944#ifdef CONFIG_NET_CLS_ACT
1945        if (q->rx_class == cl)
1946                q->rx_class = NULL;
1947#endif
1948
1949        cbq_unlink_class(cl);
1950        cbq_adjust_levels(cl->tparent);
1951        cl->defmap = 0;
1952        cbq_sync_defmap(cl);
1953
1954        cbq_rmprio(q, cl);
1955        sch_tree_unlock(sch);
1956
1957        BUG_ON(--cl->refcnt == 0);
1958        /*
1959         * This shouldn't happen: we "hold" one cops->get() when called
1960         * from tc_ctl_tclass; the destroy method is done from cops->put().
1961         */
1962
1963        return 0;
1964}
1965
1966static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1967{
1968        struct cbq_sched_data *q = qdisc_priv(sch);
1969        struct cbq_class *cl = (struct cbq_class *)arg;
1970
1971        if (cl == NULL)
1972                cl = &q->link;
1973
1974        return &cl->filter_list;
1975}
1976
1977static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1978                                     u32 classid)
1979{
1980        struct cbq_sched_data *q = qdisc_priv(sch);
1981        struct cbq_class *p = (struct cbq_class*)parent;
1982        struct cbq_class *cl = cbq_class_lookup(q, classid);
1983
1984        if (cl) {
1985                if (p && p->level <= cl->level)
1986                        return 0;
1987                cl->filters++;
1988                return (unsigned long)cl;
1989        }
1990        return 0;
1991}
1992
1993static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1994{
1995        struct cbq_class *cl = (struct cbq_class*)arg;
1996
1997        cl->filters--;
1998}
1999
2000static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2001{
2002        struct cbq_sched_data *q = qdisc_priv(sch);
2003        struct cbq_class *cl;
2004        struct hlist_node *n;
2005        unsigned h;
2006
2007        if (arg->stop)
2008                return;
2009
2010        for (h = 0; h < q->clhash.hashsize; h++) {
2011                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2012                        if (arg->count < arg->skip) {
2013                                arg->count++;
2014                                continue;
2015                        }
2016                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2017                                arg->stop = 1;
2018                                return;
2019                        }
2020                        arg->count++;
2021                }
2022        }
2023}
2024
2025static const struct Qdisc_class_ops cbq_class_ops = {
2026        .graft          =       cbq_graft,
2027        .leaf           =       cbq_leaf,
2028        .qlen_notify    =       cbq_qlen_notify,
2029        .get            =       cbq_get,
2030        .put            =       cbq_put,
2031        .change         =       cbq_change_class,
2032        .delete         =       cbq_delete,
2033        .walk           =       cbq_walk,
2034        .tcf_chain      =       cbq_find_tcf,
2035        .bind_tcf       =       cbq_bind_filter,
2036        .unbind_tcf     =       cbq_unbind_filter,
2037        .dump           =       cbq_dump_class,
2038        .dump_stats     =       cbq_dump_class_stats,
2039};
2040
2041static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2042        .next           =       NULL,
2043        .cl_ops         =       &cbq_class_ops,
2044        .id             =       "cbq",
2045        .priv_size      =       sizeof(struct cbq_sched_data),
2046        .enqueue        =       cbq_enqueue,
2047        .dequeue        =       cbq_dequeue,
2048        .peek           =       qdisc_peek_dequeued,
2049        .drop           =       cbq_drop,
2050        .init           =       cbq_init,
2051        .reset          =       cbq_reset,
2052        .destroy        =       cbq_destroy,
2053        .change         =       NULL,
2054        .dump           =       cbq_dump,
2055        .dump_stats     =       cbq_dump_stats,
2056        .owner          =       THIS_MODULE,
2057};
2058
2059static int __init cbq_module_init(void)
2060{
2061        return register_qdisc(&cbq_qdisc_ops);
2062}
2063static void __exit cbq_module_exit(void)
2064{
2065        unregister_qdisc(&cbq_qdisc_ops);
2066}
2067module_init(cbq_module_init)
2068module_exit(cbq_module_exit)
2069MODULE_LICENSE("GPL");
2070