linux/net/sched/sch_cbq.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/slab.h>
  15#include <linux/types.h>
  16#include <linux/kernel.h>
  17#include <linux/string.h>
  18#include <linux/errno.h>
  19#include <linux/skbuff.h>
  20#include <net/netlink.h>
  21#include <net/pkt_sched.h>
  22
  23
  24/*      Class-Based Queueing (CBQ) algorithm.
  25        =======================================
  26
  27        Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28                 Management Models for Packet Networks",
  29                 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31                 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33                 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34                 Parameters", 1996
  35
  36                 [4] Sally Floyd and Michael Speer, "Experimental Results
  37                 for Class-Based Queueing", 1998, not published.
  38
  39        -----------------------------------------------------------------------
  40
  41        Algorithm skeleton was taken from NS simulator cbq.cc.
  42        If someone wants to check this code against the LBL version,
  43        he should take into account that ONLY the skeleton was borrowed,
  44        the implementation is different. Particularly:
  45
  46        --- The WRR algorithm is different. Our version looks more
  47        reasonable (I hope) and works when quanta are allowed to be
  48        less than MTU, which is always the case when real time classes
  49        have small rates. Note, that the statement of [3] is
  50        incomplete, delay may actually be estimated even if class
  51        per-round allotment is less than MTU. Namely, if per-round
  52        allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54        delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56        In the worst case we have IntServ estimate with D = W*r+k*MTU
  57        and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60        --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61        interpret some places, which look like wrong translations
  62        from NS. Anyone is advised to find these differences
  63        and explain to me, why I am wrong 8).
  64
  65        --- Linux has no EOI event, so that we cannot estimate true class
  66        idle time. Workaround is to consider the next dequeue event
  67        as sign that previous packet is finished. This is wrong because of
  68        internal device queueing, but on a permanently loaded link it is true.
  69        Moreover, combined with clock integrator, this scheme looks
  70        very close to an ideal solution.  */
  71
  72struct cbq_sched_data;
  73
  74
  75struct cbq_class {
  76        struct Qdisc_class_common common;
  77        struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79/* Parameters */
  80        unsigned char           priority;       /* class priority */
  81        unsigned char           priority2;      /* priority to be used after overlimit */
  82        unsigned char           ewma_log;       /* time constant for idle time calculation */
  83        unsigned char           ovl_strategy;
  84#ifdef CONFIG_NET_CLS_ACT
  85        unsigned char           police;
  86#endif
  87
  88        u32                     defmap;
  89
  90        /* Link-sharing scheduler parameters */
  91        long                    maxidle;        /* Class parameters: see below. */
  92        long                    offtime;
  93        long                    minidle;
  94        u32                     avpkt;
  95        struct qdisc_rate_table *R_tab;
  96
  97        /* Overlimit strategy parameters */
  98        void                    (*overlimit)(struct cbq_class *cl);
  99        psched_tdiff_t          penalty;
 100
 101        /* General scheduler (WRR) parameters */
 102        long                    allot;
 103        long                    quantum;        /* Allotment per WRR round */
 104        long                    weight;         /* Relative allotment: see below */
 105
 106        struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107        struct cbq_class        *split;         /* Ptr to split node */
 108        struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109        struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110        struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                   parent otherwise */
 112        struct cbq_class        *sibling;       /* Sibling chain */
 113        struct cbq_class        *children;      /* Pointer to children chain */
 114
 115        struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118/* Variables */
 119        unsigned char           cpriority;      /* Effective priority */
 120        unsigned char           delayed;
 121        unsigned char           level;          /* level of the class in hierarchy:
 122                                                   0 for leaf classes, and maximal
 123                                                   level of children + 1 for nodes.
 124                                                 */
 125
 126        psched_time_t           last;           /* Last end of service */
 127        psched_time_t           undertime;
 128        long                    avgidle;
 129        long                    deficit;        /* Saved deficit for WRR */
 130        psched_time_t           penalized;
 131        struct gnet_stats_basic_packed bstats;
 132        struct gnet_stats_queue qstats;
 133        struct gnet_stats_rate_est rate_est;
 134        struct tc_cbq_xstats    xstats;
 135
 136        struct tcf_proto        *filter_list;
 137
 138        int                     refcnt;
 139        int                     filters;
 140
 141        struct cbq_class        *defaults[TC_PRIO_MAX + 1];
 142};
 143
 144struct cbq_sched_data {
 145        struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 146        int                     nclasses[TC_CBQ_MAXPRIO + 1];
 147        unsigned int            quanta[TC_CBQ_MAXPRIO + 1];
 148
 149        struct cbq_class        link;
 150
 151        unsigned int            activemask;
 152        struct cbq_class        *active[TC_CBQ_MAXPRIO + 1];    /* List of all classes
 153                                                                   with backlog */
 154
 155#ifdef CONFIG_NET_CLS_ACT
 156        struct cbq_class        *rx_class;
 157#endif
 158        struct cbq_class        *tx_class;
 159        struct cbq_class        *tx_borrowed;
 160        int                     tx_len;
 161        psched_time_t           now;            /* Cached timestamp */
 162        psched_time_t           now_rt;         /* Cached real time */
 163        unsigned int            pmask;
 164
 165        struct hrtimer          delay_timer;
 166        struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 167                                                   started when CBQ has
 168                                                   backlog, but cannot
 169                                                   transmit just now */
 170        psched_tdiff_t          wd_expires;
 171        int                     toplevel;
 172        u32                     hgenerator;
 173};
 174
 175
 176#define L2T(cl, len)    qdisc_l2t((cl)->R_tab, len)
 177
 178static inline struct cbq_class *
 179cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 180{
 181        struct Qdisc_class_common *clc;
 182
 183        clc = qdisc_class_find(&q->clhash, classid);
 184        if (clc == NULL)
 185                return NULL;
 186        return container_of(clc, struct cbq_class, common);
 187}
 188
 189#ifdef CONFIG_NET_CLS_ACT
 190
 191static struct cbq_class *
 192cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 193{
 194        struct cbq_class *cl;
 195
 196        for (cl = this->tparent; cl; cl = cl->tparent) {
 197                struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
 198
 199                if (new != NULL && new != this)
 200                        return new;
 201        }
 202        return NULL;
 203}
 204
 205#endif
 206
 207/* Classify packet. The procedure is pretty complicated, but
 208 * it allows us to combine link sharing and priority scheduling
 209 * transparently.
 210 *
 211 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 212 * so that it resolves to split nodes. Then packets are classified
 213 * by logical priority, or a more specific classifier may be attached
 214 * to the split node.
 215 */
 216
 217static struct cbq_class *
 218cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 219{
 220        struct cbq_sched_data *q = qdisc_priv(sch);
 221        struct cbq_class *head = &q->link;
 222        struct cbq_class **defmap;
 223        struct cbq_class *cl = NULL;
 224        u32 prio = skb->priority;
 225        struct tcf_result res;
 226
 227        /*
 228         *  Step 1. If skb->priority points to one of our classes, use it.
 229         */
 230        if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
 231            (cl = cbq_class_lookup(q, prio)) != NULL)
 232                return cl;
 233
 234        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235        for (;;) {
 236                int result = 0;
 237                defmap = head->defaults;
 238
 239                /*
 240                 * Step 2+n. Apply classifier.
 241                 */
 242                if (!head->filter_list ||
 243                    (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 244                        goto fallback;
 245
 246                cl = (void *)res.class;
 247                if (!cl) {
 248                        if (TC_H_MAJ(res.classid))
 249                                cl = cbq_class_lookup(q, res.classid);
 250                        else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
 251                                cl = defmap[TC_PRIO_BESTEFFORT];
 252
 253                        if (cl == NULL || cl->level >= head->level)
 254                                goto fallback;
 255                }
 256
 257#ifdef CONFIG_NET_CLS_ACT
 258                switch (result) {
 259                case TC_ACT_QUEUED:
 260                case TC_ACT_STOLEN:
 261                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 262                case TC_ACT_SHOT:
 263                        return NULL;
 264                case TC_ACT_RECLASSIFY:
 265                        return cbq_reclassify(skb, cl);
 266                }
 267#endif
 268                if (cl->level == 0)
 269                        return cl;
 270
 271                /*
 272                 * Step 3+n. If classifier selected a link sharing class,
 273                 *         apply agency specific classifier.
 274                 *         Repeat this procdure until we hit a leaf node.
 275                 */
 276                head = cl;
 277        }
 278
 279fallback:
 280        cl = head;
 281
 282        /*
 283         * Step 4. No success...
 284         */
 285        if (TC_H_MAJ(prio) == 0 &&
 286            !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
 287            !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 288                return head;
 289
 290        return cl;
 291}
 292
 293/*
 294 * A packet has just been enqueued on the empty class.
 295 * cbq_activate_class adds it to the tail of active class list
 296 * of its priority band.
 297 */
 298
 299static inline void cbq_activate_class(struct cbq_class *cl)
 300{
 301        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 302        int prio = cl->cpriority;
 303        struct cbq_class *cl_tail;
 304
 305        cl_tail = q->active[prio];
 306        q->active[prio] = cl;
 307
 308        if (cl_tail != NULL) {
 309                cl->next_alive = cl_tail->next_alive;
 310                cl_tail->next_alive = cl;
 311        } else {
 312                cl->next_alive = cl;
 313                q->activemask |= (1<<prio);
 314        }
 315}
 316
 317/*
 318 * Unlink class from active chain.
 319 * Note that this same procedure is done directly in cbq_dequeue*
 320 * during round-robin procedure.
 321 */
 322
 323static void cbq_deactivate_class(struct cbq_class *this)
 324{
 325        struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 326        int prio = this->cpriority;
 327        struct cbq_class *cl;
 328        struct cbq_class *cl_prev = q->active[prio];
 329
 330        do {
 331                cl = cl_prev->next_alive;
 332                if (cl == this) {
 333                        cl_prev->next_alive = cl->next_alive;
 334                        cl->next_alive = NULL;
 335
 336                        if (cl == q->active[prio]) {
 337                                q->active[prio] = cl_prev;
 338                                if (cl == q->active[prio]) {
 339                                        q->active[prio] = NULL;
 340                                        q->activemask &= ~(1<<prio);
 341                                        return;
 342                                }
 343                        }
 344                        return;
 345                }
 346        } while ((cl_prev = cl) != q->active[prio]);
 347}
 348
 349static void
 350cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 351{
 352        int toplevel = q->toplevel;
 353
 354        if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
 355                psched_time_t now;
 356                psched_tdiff_t incr;
 357
 358                now = psched_get_time();
 359                incr = now - q->now_rt;
 360                now = q->now + incr;
 361
 362                do {
 363                        if (cl->undertime < now) {
 364                                q->toplevel = cl->level;
 365                                return;
 366                        }
 367                } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
 368        }
 369}
 370
 371static int
 372cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 373{
 374        struct cbq_sched_data *q = qdisc_priv(sch);
 375        int uninitialized_var(ret);
 376        struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 377
 378#ifdef CONFIG_NET_CLS_ACT
 379        q->rx_class = cl;
 380#endif
 381        if (cl == NULL) {
 382                if (ret & __NET_XMIT_BYPASS)
 383                        sch->qstats.drops++;
 384                kfree_skb(skb);
 385                return ret;
 386        }
 387
 388#ifdef CONFIG_NET_CLS_ACT
 389        cl->q->__parent = sch;
 390#endif
 391        ret = qdisc_enqueue(skb, cl->q);
 392        if (ret == NET_XMIT_SUCCESS) {
 393                sch->q.qlen++;
 394                cbq_mark_toplevel(q, cl);
 395                if (!cl->next_alive)
 396                        cbq_activate_class(cl);
 397                return ret;
 398        }
 399
 400        if (net_xmit_drop_count(ret)) {
 401                sch->qstats.drops++;
 402                cbq_mark_toplevel(q, cl);
 403                cl->qstats.drops++;
 404        }
 405        return ret;
 406}
 407
 408/* Overlimit actions */
 409
 410/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 411
 412static void cbq_ovl_classic(struct cbq_class *cl)
 413{
 414        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 415        psched_tdiff_t delay = cl->undertime - q->now;
 416
 417        if (!cl->delayed) {
 418                delay += cl->offtime;
 419
 420                /*
 421                 * Class goes to sleep, so that it will have no
 422                 * chance to work avgidle. Let's forgive it 8)
 423                 *
 424                 * BTW cbq-2.0 has a crap in this
 425                 * place, apparently they forgot to shift it by cl->ewma_log.
 426                 */
 427                if (cl->avgidle < 0)
 428                        delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 429                if (cl->avgidle < cl->minidle)
 430                        cl->avgidle = cl->minidle;
 431                if (delay <= 0)
 432                        delay = 1;
 433                cl->undertime = q->now + delay;
 434
 435                cl->xstats.overactions++;
 436                cl->delayed = 1;
 437        }
 438        if (q->wd_expires == 0 || q->wd_expires > delay)
 439                q->wd_expires = delay;
 440
 441        /* Dirty work! We must schedule wakeups based on
 442         * real available rate, rather than leaf rate,
 443         * which may be tiny (even zero).
 444         */
 445        if (q->toplevel == TC_CBQ_MAXLEVEL) {
 446                struct cbq_class *b;
 447                psched_tdiff_t base_delay = q->wd_expires;
 448
 449                for (b = cl->borrow; b; b = b->borrow) {
 450                        delay = b->undertime - q->now;
 451                        if (delay < base_delay) {
 452                                if (delay <= 0)
 453                                        delay = 1;
 454                                base_delay = delay;
 455                        }
 456                }
 457
 458                q->wd_expires = base_delay;
 459        }
 460}
 461
 462/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 463 * they go overlimit
 464 */
 465
 466static void cbq_ovl_rclassic(struct cbq_class *cl)
 467{
 468        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 469        struct cbq_class *this = cl;
 470
 471        do {
 472                if (cl->level > q->toplevel) {
 473                        cl = NULL;
 474                        break;
 475                }
 476        } while ((cl = cl->borrow) != NULL);
 477
 478        if (cl == NULL)
 479                cl = this;
 480        cbq_ovl_classic(cl);
 481}
 482
 483/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 484
 485static void cbq_ovl_delay(struct cbq_class *cl)
 486{
 487        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 488        psched_tdiff_t delay = cl->undertime - q->now;
 489
 490        if (test_bit(__QDISC_STATE_DEACTIVATED,
 491                     &qdisc_root_sleeping(cl->qdisc)->state))
 492                return;
 493
 494        if (!cl->delayed) {
 495                psched_time_t sched = q->now;
 496                ktime_t expires;
 497
 498                delay += cl->offtime;
 499                if (cl->avgidle < 0)
 500                        delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 501                if (cl->avgidle < cl->minidle)
 502                        cl->avgidle = cl->minidle;
 503                cl->undertime = q->now + delay;
 504
 505                if (delay > 0) {
 506                        sched += delay + cl->penalty;
 507                        cl->penalized = sched;
 508                        cl->cpriority = TC_CBQ_MAXPRIO;
 509                        q->pmask |= (1<<TC_CBQ_MAXPRIO);
 510
 511                        expires = ktime_set(0, 0);
 512                        expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
 513                        if (hrtimer_try_to_cancel(&q->delay_timer) &&
 514                            ktime_to_ns(ktime_sub(
 515                                        hrtimer_get_expires(&q->delay_timer),
 516                                        expires)) > 0)
 517                                hrtimer_set_expires(&q->delay_timer, expires);
 518                        hrtimer_restart(&q->delay_timer);
 519                        cl->delayed = 1;
 520                        cl->xstats.overactions++;
 521                        return;
 522                }
 523                delay = 1;
 524        }
 525        if (q->wd_expires == 0 || q->wd_expires > delay)
 526                q->wd_expires = delay;
 527}
 528
 529/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 530
 531static void cbq_ovl_lowprio(struct cbq_class *cl)
 532{
 533        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 534
 535        cl->penalized = q->now + cl->penalty;
 536
 537        if (cl->cpriority != cl->priority2) {
 538                cl->cpriority = cl->priority2;
 539                q->pmask |= (1<<cl->cpriority);
 540                cl->xstats.overactions++;
 541        }
 542        cbq_ovl_classic(cl);
 543}
 544
 545/* TC_CBQ_OVL_DROP: penalize class by dropping */
 546
 547static void cbq_ovl_drop(struct cbq_class *cl)
 548{
 549        if (cl->q->ops->drop)
 550                if (cl->q->ops->drop(cl->q))
 551                        cl->qdisc->q.qlen--;
 552        cl->xstats.overactions++;
 553        cbq_ovl_classic(cl);
 554}
 555
 556static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 557                                       psched_time_t now)
 558{
 559        struct cbq_class *cl;
 560        struct cbq_class *cl_prev = q->active[prio];
 561        psched_time_t sched = now;
 562
 563        if (cl_prev == NULL)
 564                return 0;
 565
 566        do {
 567                cl = cl_prev->next_alive;
 568                if (now - cl->penalized > 0) {
 569                        cl_prev->next_alive = cl->next_alive;
 570                        cl->next_alive = NULL;
 571                        cl->cpriority = cl->priority;
 572                        cl->delayed = 0;
 573                        cbq_activate_class(cl);
 574
 575                        if (cl == q->active[prio]) {
 576                                q->active[prio] = cl_prev;
 577                                if (cl == q->active[prio]) {
 578                                        q->active[prio] = NULL;
 579                                        return 0;
 580                                }
 581                        }
 582
 583                        cl = cl_prev->next_alive;
 584                } else if (sched - cl->penalized > 0)
 585                        sched = cl->penalized;
 586        } while ((cl_prev = cl) != q->active[prio]);
 587
 588        return sched - now;
 589}
 590
 591static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 592{
 593        struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 594                                                delay_timer);
 595        struct Qdisc *sch = q->watchdog.qdisc;
 596        psched_time_t now;
 597        psched_tdiff_t delay = 0;
 598        unsigned int pmask;
 599
 600        now = psched_get_time();
 601
 602        pmask = q->pmask;
 603        q->pmask = 0;
 604
 605        while (pmask) {
 606                int prio = ffz(~pmask);
 607                psched_tdiff_t tmp;
 608
 609                pmask &= ~(1<<prio);
 610
 611                tmp = cbq_undelay_prio(q, prio, now);
 612                if (tmp > 0) {
 613                        q->pmask |= 1<<prio;
 614                        if (tmp < delay || delay == 0)
 615                                delay = tmp;
 616                }
 617        }
 618
 619        if (delay) {
 620                ktime_t time;
 621
 622                time = ktime_set(0, 0);
 623                time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 624                hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 625        }
 626
 627        qdisc_unthrottled(sch);
 628        __netif_schedule(qdisc_root(sch));
 629        return HRTIMER_NORESTART;
 630}
 631
 632#ifdef CONFIG_NET_CLS_ACT
 633static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 634{
 635        struct Qdisc *sch = child->__parent;
 636        struct cbq_sched_data *q = qdisc_priv(sch);
 637        struct cbq_class *cl = q->rx_class;
 638
 639        q->rx_class = NULL;
 640
 641        if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 642                int ret;
 643
 644                cbq_mark_toplevel(q, cl);
 645
 646                q->rx_class = cl;
 647                cl->q->__parent = sch;
 648
 649                ret = qdisc_enqueue(skb, cl->q);
 650                if (ret == NET_XMIT_SUCCESS) {
 651                        sch->q.qlen++;
 652                        if (!cl->next_alive)
 653                                cbq_activate_class(cl);
 654                        return 0;
 655                }
 656                if (net_xmit_drop_count(ret))
 657                        sch->qstats.drops++;
 658                return 0;
 659        }
 660
 661        sch->qstats.drops++;
 662        return -1;
 663}
 664#endif
 665
 666/*
 667 * It is mission critical procedure.
 668 *
 669 * We "regenerate" toplevel cutoff, if transmitting class
 670 * has backlog and it is not regulated. It is not part of
 671 * original CBQ description, but looks more reasonable.
 672 * Probably, it is wrong. This question needs further investigation.
 673 */
 674
 675static inline void
 676cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 677                    struct cbq_class *borrowed)
 678{
 679        if (cl && q->toplevel >= borrowed->level) {
 680                if (cl->q->q.qlen > 1) {
 681                        do {
 682                                if (borrowed->undertime == PSCHED_PASTPERFECT) {
 683                                        q->toplevel = borrowed->level;
 684                                        return;
 685                                }
 686                        } while ((borrowed = borrowed->borrow) != NULL);
 687                }
 688#if 0
 689        /* It is not necessary now. Uncommenting it
 690           will save CPU cycles, but decrease fairness.
 691         */
 692                q->toplevel = TC_CBQ_MAXLEVEL;
 693#endif
 694        }
 695}
 696
 697static void
 698cbq_update(struct cbq_sched_data *q)
 699{
 700        struct cbq_class *this = q->tx_class;
 701        struct cbq_class *cl = this;
 702        int len = q->tx_len;
 703
 704        q->tx_class = NULL;
 705
 706        for ( ; cl; cl = cl->share) {
 707                long avgidle = cl->avgidle;
 708                long idle;
 709
 710                cl->bstats.packets++;
 711                cl->bstats.bytes += len;
 712
 713                /*
 714                 * (now - last) is total time between packet right edges.
 715                 * (last_pktlen/rate) is "virtual" busy time, so that
 716                 *
 717                 *      idle = (now - last) - last_pktlen/rate
 718                 */
 719
 720                idle = q->now - cl->last;
 721                if ((unsigned long)idle > 128*1024*1024) {
 722                        avgidle = cl->maxidle;
 723                } else {
 724                        idle -= L2T(cl, len);
 725
 726                /* true_avgidle := (1-W)*true_avgidle + W*idle,
 727                 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
 728                 * cl->avgidle == true_avgidle/W,
 729                 * hence:
 730                 */
 731                        avgidle += idle - (avgidle>>cl->ewma_log);
 732                }
 733
 734                if (avgidle <= 0) {
 735                        /* Overlimit or at-limit */
 736
 737                        if (avgidle < cl->minidle)
 738                                avgidle = cl->minidle;
 739
 740                        cl->avgidle = avgidle;
 741
 742                        /* Calculate expected time, when this class
 743                         * will be allowed to send.
 744                         * It will occur, when:
 745                         * (1-W)*true_avgidle + W*delay = 0, i.e.
 746                         * idle = (1/W - 1)*(-true_avgidle)
 747                         * or
 748                         * idle = (1 - W)*(-cl->avgidle);
 749                         */
 750                        idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 751
 752                        /*
 753                         * That is not all.
 754                         * To maintain the rate allocated to the class,
 755                         * we add to undertime virtual clock,
 756                         * necessary to complete transmitted packet.
 757                         * (len/phys_bandwidth has been already passed
 758                         * to the moment of cbq_update)
 759                         */
 760
 761                        idle -= L2T(&q->link, len);
 762                        idle += L2T(cl, len);
 763
 764                        cl->undertime = q->now + idle;
 765                } else {
 766                        /* Underlimit */
 767
 768                        cl->undertime = PSCHED_PASTPERFECT;
 769                        if (avgidle > cl->maxidle)
 770                                cl->avgidle = cl->maxidle;
 771                        else
 772                                cl->avgidle = avgidle;
 773                }
 774                cl->last = q->now;
 775        }
 776
 777        cbq_update_toplevel(q, this, q->tx_borrowed);
 778}
 779
 780static inline struct cbq_class *
 781cbq_under_limit(struct cbq_class *cl)
 782{
 783        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 784        struct cbq_class *this_cl = cl;
 785
 786        if (cl->tparent == NULL)
 787                return cl;
 788
 789        if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 790                cl->delayed = 0;
 791                return cl;
 792        }
 793
 794        do {
 795                /* It is very suspicious place. Now overlimit
 796                 * action is generated for not bounded classes
 797                 * only if link is completely congested.
 798                 * Though it is in agree with ancestor-only paradigm,
 799                 * it looks very stupid. Particularly,
 800                 * it means that this chunk of code will either
 801                 * never be called or result in strong amplification
 802                 * of burstiness. Dangerous, silly, and, however,
 803                 * no another solution exists.
 804                 */
 805                cl = cl->borrow;
 806                if (!cl) {
 807                        this_cl->qstats.overlimits++;
 808                        this_cl->overlimit(this_cl);
 809                        return NULL;
 810                }
 811                if (cl->level > q->toplevel)
 812                        return NULL;
 813        } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 814
 815        cl->delayed = 0;
 816        return cl;
 817}
 818
 819static inline struct sk_buff *
 820cbq_dequeue_prio(struct Qdisc *sch, int prio)
 821{
 822        struct cbq_sched_data *q = qdisc_priv(sch);
 823        struct cbq_class *cl_tail, *cl_prev, *cl;
 824        struct sk_buff *skb;
 825        int deficit;
 826
 827        cl_tail = cl_prev = q->active[prio];
 828        cl = cl_prev->next_alive;
 829
 830        do {
 831                deficit = 0;
 832
 833                /* Start round */
 834                do {
 835                        struct cbq_class *borrow = cl;
 836
 837                        if (cl->q->q.qlen &&
 838                            (borrow = cbq_under_limit(cl)) == NULL)
 839                                goto skip_class;
 840
 841                        if (cl->deficit <= 0) {
 842                                /* Class exhausted its allotment per
 843                                 * this round. Switch to the next one.
 844                                 */
 845                                deficit = 1;
 846                                cl->deficit += cl->quantum;
 847                                goto next_class;
 848                        }
 849
 850                        skb = cl->q->dequeue(cl->q);
 851
 852                        /* Class did not give us any skb :-(
 853                         * It could occur even if cl->q->q.qlen != 0
 854                         * f.e. if cl->q == "tbf"
 855                         */
 856                        if (skb == NULL)
 857                                goto skip_class;
 858
 859                        cl->deficit -= qdisc_pkt_len(skb);
 860                        q->tx_class = cl;
 861                        q->tx_borrowed = borrow;
 862                        if (borrow != cl) {
 863#ifndef CBQ_XSTATS_BORROWS_BYTES
 864                                borrow->xstats.borrows++;
 865                                cl->xstats.borrows++;
 866#else
 867                                borrow->xstats.borrows += qdisc_pkt_len(skb);
 868                                cl->xstats.borrows += qdisc_pkt_len(skb);
 869#endif
 870                        }
 871                        q->tx_len = qdisc_pkt_len(skb);
 872
 873                        if (cl->deficit <= 0) {
 874                                q->active[prio] = cl;
 875                                cl = cl->next_alive;
 876                                cl->deficit += cl->quantum;
 877                        }
 878                        return skb;
 879
 880skip_class:
 881                        if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 882                                /* Class is empty or penalized.
 883                                 * Unlink it from active chain.
 884                                 */
 885                                cl_prev->next_alive = cl->next_alive;
 886                                cl->next_alive = NULL;
 887
 888                                /* Did cl_tail point to it? */
 889                                if (cl == cl_tail) {
 890                                        /* Repair it! */
 891                                        cl_tail = cl_prev;
 892
 893                                        /* Was it the last class in this band? */
 894                                        if (cl == cl_tail) {
 895                                                /* Kill the band! */
 896                                                q->active[prio] = NULL;
 897                                                q->activemask &= ~(1<<prio);
 898                                                if (cl->q->q.qlen)
 899                                                        cbq_activate_class(cl);
 900                                                return NULL;
 901                                        }
 902
 903                                        q->active[prio] = cl_tail;
 904                                }
 905                                if (cl->q->q.qlen)
 906                                        cbq_activate_class(cl);
 907
 908                                cl = cl_prev;
 909                        }
 910
 911next_class:
 912                        cl_prev = cl;
 913                        cl = cl->next_alive;
 914                } while (cl_prev != cl_tail);
 915        } while (deficit);
 916
 917        q->active[prio] = cl_prev;
 918
 919        return NULL;
 920}
 921
 922static inline struct sk_buff *
 923cbq_dequeue_1(struct Qdisc *sch)
 924{
 925        struct cbq_sched_data *q = qdisc_priv(sch);
 926        struct sk_buff *skb;
 927        unsigned int activemask;
 928
 929        activemask = q->activemask & 0xFF;
 930        while (activemask) {
 931                int prio = ffz(~activemask);
 932                activemask &= ~(1<<prio);
 933                skb = cbq_dequeue_prio(sch, prio);
 934                if (skb)
 935                        return skb;
 936        }
 937        return NULL;
 938}
 939
 940static struct sk_buff *
 941cbq_dequeue(struct Qdisc *sch)
 942{
 943        struct sk_buff *skb;
 944        struct cbq_sched_data *q = qdisc_priv(sch);
 945        psched_time_t now;
 946        psched_tdiff_t incr;
 947
 948        now = psched_get_time();
 949        incr = now - q->now_rt;
 950
 951        if (q->tx_class) {
 952                psched_tdiff_t incr2;
 953                /* Time integrator. We calculate EOS time
 954                 * by adding expected packet transmission time.
 955                 * If real time is greater, we warp artificial clock,
 956                 * so that:
 957                 *
 958                 * cbq_time = max(real_time, work);
 959                 */
 960                incr2 = L2T(&q->link, q->tx_len);
 961                q->now += incr2;
 962                cbq_update(q);
 963                if ((incr -= incr2) < 0)
 964                        incr = 0;
 965        }
 966        q->now += incr;
 967        q->now_rt = now;
 968
 969        for (;;) {
 970                q->wd_expires = 0;
 971
 972                skb = cbq_dequeue_1(sch);
 973                if (skb) {
 974                        qdisc_bstats_update(sch, skb);
 975                        sch->q.qlen--;
 976                        qdisc_unthrottled(sch);
 977                        return skb;
 978                }
 979
 980                /* All the classes are overlimit.
 981                 *
 982                 * It is possible, if:
 983                 *
 984                 * 1. Scheduler is empty.
 985                 * 2. Toplevel cutoff inhibited borrowing.
 986                 * 3. Root class is overlimit.
 987                 *
 988                 * Reset 2d and 3d conditions and retry.
 989                 *
 990                 * Note, that NS and cbq-2.0 are buggy, peeking
 991                 * an arbitrary class is appropriate for ancestor-only
 992                 * sharing, but not for toplevel algorithm.
 993                 *
 994                 * Our version is better, but slower, because it requires
 995                 * two passes, but it is unavoidable with top-level sharing.
 996                 */
 997
 998                if (q->toplevel == TC_CBQ_MAXLEVEL &&
 999                    q->link.undertime == PSCHED_PASTPERFECT)
1000                        break;
1001
1002                q->toplevel = TC_CBQ_MAXLEVEL;
1003                q->link.undertime = PSCHED_PASTPERFECT;
1004        }
1005
1006        /* No packets in scheduler or nobody wants to give them to us :-(
1007         * Sigh... start watchdog timer in the last case.
1008         */
1009
1010        if (sch->q.qlen) {
1011                sch->qstats.overlimits++;
1012                if (q->wd_expires)
1013                        qdisc_watchdog_schedule(&q->watchdog,
1014                                                now + q->wd_expires);
1015        }
1016        return NULL;
1017}
1018
1019/* CBQ class maintanance routines */
1020
1021static void cbq_adjust_levels(struct cbq_class *this)
1022{
1023        if (this == NULL)
1024                return;
1025
1026        do {
1027                int level = 0;
1028                struct cbq_class *cl;
1029
1030                cl = this->children;
1031                if (cl) {
1032                        do {
1033                                if (cl->level > level)
1034                                        level = cl->level;
1035                        } while ((cl = cl->sibling) != this->children);
1036                }
1037                this->level = level + 1;
1038        } while ((this = this->tparent) != NULL);
1039}
1040
1041static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1042{
1043        struct cbq_class *cl;
1044        struct hlist_node *n;
1045        unsigned int h;
1046
1047        if (q->quanta[prio] == 0)
1048                return;
1049
1050        for (h = 0; h < q->clhash.hashsize; h++) {
1051                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1052                        /* BUGGGG... Beware! This expression suffer of
1053                         * arithmetic overflows!
1054                         */
1055                        if (cl->priority == prio) {
1056                                cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1057                                        q->quanta[prio];
1058                        }
1059                        if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1060                                pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1061                                           cl->common.classid, cl->quantum);
1062                                cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1063                        }
1064                }
1065        }
1066}
1067
1068static void cbq_sync_defmap(struct cbq_class *cl)
1069{
1070        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1071        struct cbq_class *split = cl->split;
1072        unsigned int h;
1073        int i;
1074
1075        if (split == NULL)
1076                return;
1077
1078        for (i = 0; i <= TC_PRIO_MAX; i++) {
1079                if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1080                        split->defaults[i] = NULL;
1081        }
1082
1083        for (i = 0; i <= TC_PRIO_MAX; i++) {
1084                int level = split->level;
1085
1086                if (split->defaults[i])
1087                        continue;
1088
1089                for (h = 0; h < q->clhash.hashsize; h++) {
1090                        struct hlist_node *n;
1091                        struct cbq_class *c;
1092
1093                        hlist_for_each_entry(c, n, &q->clhash.hash[h],
1094                                             common.hnode) {
1095                                if (c->split == split && c->level < level &&
1096                                    c->defmap & (1<<i)) {
1097                                        split->defaults[i] = c;
1098                                        level = c->level;
1099                                }
1100                        }
1101                }
1102        }
1103}
1104
1105static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1106{
1107        struct cbq_class *split = NULL;
1108
1109        if (splitid == 0) {
1110                split = cl->split;
1111                if (!split)
1112                        return;
1113                splitid = split->common.classid;
1114        }
1115
1116        if (split == NULL || split->common.classid != splitid) {
1117                for (split = cl->tparent; split; split = split->tparent)
1118                        if (split->common.classid == splitid)
1119                                break;
1120        }
1121
1122        if (split == NULL)
1123                return;
1124
1125        if (cl->split != split) {
1126                cl->defmap = 0;
1127                cbq_sync_defmap(cl);
1128                cl->split = split;
1129                cl->defmap = def & mask;
1130        } else
1131                cl->defmap = (cl->defmap & ~mask) | (def & mask);
1132
1133        cbq_sync_defmap(cl);
1134}
1135
1136static void cbq_unlink_class(struct cbq_class *this)
1137{
1138        struct cbq_class *cl, **clp;
1139        struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1140
1141        qdisc_class_hash_remove(&q->clhash, &this->common);
1142
1143        if (this->tparent) {
1144                clp = &this->sibling;
1145                cl = *clp;
1146                do {
1147                        if (cl == this) {
1148                                *clp = cl->sibling;
1149                                break;
1150                        }
1151                        clp = &cl->sibling;
1152                } while ((cl = *clp) != this->sibling);
1153
1154                if (this->tparent->children == this) {
1155                        this->tparent->children = this->sibling;
1156                        if (this->sibling == this)
1157                                this->tparent->children = NULL;
1158                }
1159        } else {
1160                WARN_ON(this->sibling != this);
1161        }
1162}
1163
1164static void cbq_link_class(struct cbq_class *this)
1165{
1166        struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1167        struct cbq_class *parent = this->tparent;
1168
1169        this->sibling = this;
1170        qdisc_class_hash_insert(&q->clhash, &this->common);
1171
1172        if (parent == NULL)
1173                return;
1174
1175        if (parent->children == NULL) {
1176                parent->children = this;
1177        } else {
1178                this->sibling = parent->children->sibling;
1179                parent->children->sibling = this;
1180        }
1181}
1182
1183static unsigned int cbq_drop(struct Qdisc *sch)
1184{
1185        struct cbq_sched_data *q = qdisc_priv(sch);
1186        struct cbq_class *cl, *cl_head;
1187        int prio;
1188        unsigned int len;
1189
1190        for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1191                cl_head = q->active[prio];
1192                if (!cl_head)
1193                        continue;
1194
1195                cl = cl_head;
1196                do {
1197                        if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1198                                sch->q.qlen--;
1199                                if (!cl->q->q.qlen)
1200                                        cbq_deactivate_class(cl);
1201                                return len;
1202                        }
1203                } while ((cl = cl->next_alive) != cl_head);
1204        }
1205        return 0;
1206}
1207
1208static void
1209cbq_reset(struct Qdisc *sch)
1210{
1211        struct cbq_sched_data *q = qdisc_priv(sch);
1212        struct cbq_class *cl;
1213        struct hlist_node *n;
1214        int prio;
1215        unsigned int h;
1216
1217        q->activemask = 0;
1218        q->pmask = 0;
1219        q->tx_class = NULL;
1220        q->tx_borrowed = NULL;
1221        qdisc_watchdog_cancel(&q->watchdog);
1222        hrtimer_cancel(&q->delay_timer);
1223        q->toplevel = TC_CBQ_MAXLEVEL;
1224        q->now = psched_get_time();
1225        q->now_rt = q->now;
1226
1227        for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1228                q->active[prio] = NULL;
1229
1230        for (h = 0; h < q->clhash.hashsize; h++) {
1231                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1232                        qdisc_reset(cl->q);
1233
1234                        cl->next_alive = NULL;
1235                        cl->undertime = PSCHED_PASTPERFECT;
1236                        cl->avgidle = cl->maxidle;
1237                        cl->deficit = cl->quantum;
1238                        cl->cpriority = cl->priority;
1239                }
1240        }
1241        sch->q.qlen = 0;
1242}
1243
1244
1245static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1246{
1247        if (lss->change & TCF_CBQ_LSS_FLAGS) {
1248                cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1249                cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1250        }
1251        if (lss->change & TCF_CBQ_LSS_EWMA)
1252                cl->ewma_log = lss->ewma_log;
1253        if (lss->change & TCF_CBQ_LSS_AVPKT)
1254                cl->avpkt = lss->avpkt;
1255        if (lss->change & TCF_CBQ_LSS_MINIDLE)
1256                cl->minidle = -(long)lss->minidle;
1257        if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1258                cl->maxidle = lss->maxidle;
1259                cl->avgidle = lss->maxidle;
1260        }
1261        if (lss->change & TCF_CBQ_LSS_OFFTIME)
1262                cl->offtime = lss->offtime;
1263        return 0;
1264}
1265
1266static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1267{
1268        q->nclasses[cl->priority]--;
1269        q->quanta[cl->priority] -= cl->weight;
1270        cbq_normalize_quanta(q, cl->priority);
1271}
1272
1273static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1274{
1275        q->nclasses[cl->priority]++;
1276        q->quanta[cl->priority] += cl->weight;
1277        cbq_normalize_quanta(q, cl->priority);
1278}
1279
1280static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1281{
1282        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1283
1284        if (wrr->allot)
1285                cl->allot = wrr->allot;
1286        if (wrr->weight)
1287                cl->weight = wrr->weight;
1288        if (wrr->priority) {
1289                cl->priority = wrr->priority - 1;
1290                cl->cpriority = cl->priority;
1291                if (cl->priority >= cl->priority2)
1292                        cl->priority2 = TC_CBQ_MAXPRIO - 1;
1293        }
1294
1295        cbq_addprio(q, cl);
1296        return 0;
1297}
1298
1299static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1300{
1301        switch (ovl->strategy) {
1302        case TC_CBQ_OVL_CLASSIC:
1303                cl->overlimit = cbq_ovl_classic;
1304                break;
1305        case TC_CBQ_OVL_DELAY:
1306                cl->overlimit = cbq_ovl_delay;
1307                break;
1308        case TC_CBQ_OVL_LOWPRIO:
1309                if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1310                    ovl->priority2 - 1 <= cl->priority)
1311                        return -EINVAL;
1312                cl->priority2 = ovl->priority2 - 1;
1313                cl->overlimit = cbq_ovl_lowprio;
1314                break;
1315        case TC_CBQ_OVL_DROP:
1316                cl->overlimit = cbq_ovl_drop;
1317                break;
1318        case TC_CBQ_OVL_RCLASSIC:
1319                cl->overlimit = cbq_ovl_rclassic;
1320                break;
1321        default:
1322                return -EINVAL;
1323        }
1324        cl->penalty = ovl->penalty;
1325        return 0;
1326}
1327
1328#ifdef CONFIG_NET_CLS_ACT
1329static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1330{
1331        cl->police = p->police;
1332
1333        if (cl->q->handle) {
1334                if (p->police == TC_POLICE_RECLASSIFY)
1335                        cl->q->reshape_fail = cbq_reshape_fail;
1336                else
1337                        cl->q->reshape_fail = NULL;
1338        }
1339        return 0;
1340}
1341#endif
1342
1343static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1344{
1345        cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1346        return 0;
1347}
1348
1349static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1350        [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1351        [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1352        [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1353        [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1354        [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1355        [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1356        [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1357};
1358
1359static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1360{
1361        struct cbq_sched_data *q = qdisc_priv(sch);
1362        struct nlattr *tb[TCA_CBQ_MAX + 1];
1363        struct tc_ratespec *r;
1364        int err;
1365
1366        err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1367        if (err < 0)
1368                return err;
1369
1370        if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1371                return -EINVAL;
1372
1373        r = nla_data(tb[TCA_CBQ_RATE]);
1374
1375        if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1376                return -EINVAL;
1377
1378        err = qdisc_class_hash_init(&q->clhash);
1379        if (err < 0)
1380                goto put_rtab;
1381
1382        q->link.refcnt = 1;
1383        q->link.sibling = &q->link;
1384        q->link.common.classid = sch->handle;
1385        q->link.qdisc = sch;
1386        q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1387                                      sch->handle);
1388        if (!q->link.q)
1389                q->link.q = &noop_qdisc;
1390
1391        q->link.priority = TC_CBQ_MAXPRIO - 1;
1392        q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1393        q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1394        q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1395        q->link.overlimit = cbq_ovl_classic;
1396        q->link.allot = psched_mtu(qdisc_dev(sch));
1397        q->link.quantum = q->link.allot;
1398        q->link.weight = q->link.R_tab->rate.rate;
1399
1400        q->link.ewma_log = TC_CBQ_DEF_EWMA;
1401        q->link.avpkt = q->link.allot/2;
1402        q->link.minidle = -0x7FFFFFFF;
1403
1404        qdisc_watchdog_init(&q->watchdog, sch);
1405        hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1406        q->delay_timer.function = cbq_undelay;
1407        q->toplevel = TC_CBQ_MAXLEVEL;
1408        q->now = psched_get_time();
1409        q->now_rt = q->now;
1410
1411        cbq_link_class(&q->link);
1412
1413        if (tb[TCA_CBQ_LSSOPT])
1414                cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1415
1416        cbq_addprio(q, &q->link);
1417        return 0;
1418
1419put_rtab:
1420        qdisc_put_rtab(q->link.R_tab);
1421        return err;
1422}
1423
1424static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1425{
1426        unsigned char *b = skb_tail_pointer(skb);
1427
1428        NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1429        return skb->len;
1430
1431nla_put_failure:
1432        nlmsg_trim(skb, b);
1433        return -1;
1434}
1435
1436static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1437{
1438        unsigned char *b = skb_tail_pointer(skb);
1439        struct tc_cbq_lssopt opt;
1440
1441        opt.flags = 0;
1442        if (cl->borrow == NULL)
1443                opt.flags |= TCF_CBQ_LSS_BOUNDED;
1444        if (cl->share == NULL)
1445                opt.flags |= TCF_CBQ_LSS_ISOLATED;
1446        opt.ewma_log = cl->ewma_log;
1447        opt.level = cl->level;
1448        opt.avpkt = cl->avpkt;
1449        opt.maxidle = cl->maxidle;
1450        opt.minidle = (u32)(-cl->minidle);
1451        opt.offtime = cl->offtime;
1452        opt.change = ~0;
1453        NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1454        return skb->len;
1455
1456nla_put_failure:
1457        nlmsg_trim(skb, b);
1458        return -1;
1459}
1460
1461static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1462{
1463        unsigned char *b = skb_tail_pointer(skb);
1464        struct tc_cbq_wrropt opt;
1465
1466        opt.flags = 0;
1467        opt.allot = cl->allot;
1468        opt.priority = cl->priority + 1;
1469        opt.cpriority = cl->cpriority + 1;
1470        opt.weight = cl->weight;
1471        NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1472        return skb->len;
1473
1474nla_put_failure:
1475        nlmsg_trim(skb, b);
1476        return -1;
1477}
1478
1479static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1480{
1481        unsigned char *b = skb_tail_pointer(skb);
1482        struct tc_cbq_ovl opt;
1483
1484        opt.strategy = cl->ovl_strategy;
1485        opt.priority2 = cl->priority2 + 1;
1486        opt.pad = 0;
1487        opt.penalty = cl->penalty;
1488        NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1489        return skb->len;
1490
1491nla_put_failure:
1492        nlmsg_trim(skb, b);
1493        return -1;
1494}
1495
1496static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1497{
1498        unsigned char *b = skb_tail_pointer(skb);
1499        struct tc_cbq_fopt opt;
1500
1501        if (cl->split || cl->defmap) {
1502                opt.split = cl->split ? cl->split->common.classid : 0;
1503                opt.defmap = cl->defmap;
1504                opt.defchange = ~0;
1505                NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1506        }
1507        return skb->len;
1508
1509nla_put_failure:
1510        nlmsg_trim(skb, b);
1511        return -1;
1512}
1513
1514#ifdef CONFIG_NET_CLS_ACT
1515static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1516{
1517        unsigned char *b = skb_tail_pointer(skb);
1518        struct tc_cbq_police opt;
1519
1520        if (cl->police) {
1521                opt.police = cl->police;
1522                opt.__res1 = 0;
1523                opt.__res2 = 0;
1524                NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1525        }
1526        return skb->len;
1527
1528nla_put_failure:
1529        nlmsg_trim(skb, b);
1530        return -1;
1531}
1532#endif
1533
1534static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1535{
1536        if (cbq_dump_lss(skb, cl) < 0 ||
1537            cbq_dump_rate(skb, cl) < 0 ||
1538            cbq_dump_wrr(skb, cl) < 0 ||
1539            cbq_dump_ovl(skb, cl) < 0 ||
1540#ifdef CONFIG_NET_CLS_ACT
1541            cbq_dump_police(skb, cl) < 0 ||
1542#endif
1543            cbq_dump_fopt(skb, cl) < 0)
1544                return -1;
1545        return 0;
1546}
1547
1548static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1549{
1550        struct cbq_sched_data *q = qdisc_priv(sch);
1551        struct nlattr *nest;
1552
1553        nest = nla_nest_start(skb, TCA_OPTIONS);
1554        if (nest == NULL)
1555                goto nla_put_failure;
1556        if (cbq_dump_attr(skb, &q->link) < 0)
1557                goto nla_put_failure;
1558        nla_nest_end(skb, nest);
1559        return skb->len;
1560
1561nla_put_failure:
1562        nla_nest_cancel(skb, nest);
1563        return -1;
1564}
1565
1566static int
1567cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1568{
1569        struct cbq_sched_data *q = qdisc_priv(sch);
1570
1571        q->link.xstats.avgidle = q->link.avgidle;
1572        return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1573}
1574
1575static int
1576cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1577               struct sk_buff *skb, struct tcmsg *tcm)
1578{
1579        struct cbq_class *cl = (struct cbq_class *)arg;
1580        struct nlattr *nest;
1581
1582        if (cl->tparent)
1583                tcm->tcm_parent = cl->tparent->common.classid;
1584        else
1585                tcm->tcm_parent = TC_H_ROOT;
1586        tcm->tcm_handle = cl->common.classid;
1587        tcm->tcm_info = cl->q->handle;
1588
1589        nest = nla_nest_start(skb, TCA_OPTIONS);
1590        if (nest == NULL)
1591                goto nla_put_failure;
1592        if (cbq_dump_attr(skb, cl) < 0)
1593                goto nla_put_failure;
1594        nla_nest_end(skb, nest);
1595        return skb->len;
1596
1597nla_put_failure:
1598        nla_nest_cancel(skb, nest);
1599        return -1;
1600}
1601
1602static int
1603cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1604        struct gnet_dump *d)
1605{
1606        struct cbq_sched_data *q = qdisc_priv(sch);
1607        struct cbq_class *cl = (struct cbq_class *)arg;
1608
1609        cl->qstats.qlen = cl->q->q.qlen;
1610        cl->xstats.avgidle = cl->avgidle;
1611        cl->xstats.undertime = 0;
1612
1613        if (cl->undertime != PSCHED_PASTPERFECT)
1614                cl->xstats.undertime = cl->undertime - q->now;
1615
1616        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1617            gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1618            gnet_stats_copy_queue(d, &cl->qstats) < 0)
1619                return -1;
1620
1621        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1622}
1623
1624static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1625                     struct Qdisc **old)
1626{
1627        struct cbq_class *cl = (struct cbq_class *)arg;
1628
1629        if (new == NULL) {
1630                new = qdisc_create_dflt(sch->dev_queue,
1631                                        &pfifo_qdisc_ops, cl->common.classid);
1632                if (new == NULL)
1633                        return -ENOBUFS;
1634        } else {
1635#ifdef CONFIG_NET_CLS_ACT
1636                if (cl->police == TC_POLICE_RECLASSIFY)
1637                        new->reshape_fail = cbq_reshape_fail;
1638#endif
1639        }
1640        sch_tree_lock(sch);
1641        *old = cl->q;
1642        cl->q = new;
1643        qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1644        qdisc_reset(*old);
1645        sch_tree_unlock(sch);
1646
1647        return 0;
1648}
1649
1650static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1651{
1652        struct cbq_class *cl = (struct cbq_class *)arg;
1653
1654        return cl->q;
1655}
1656
1657static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1658{
1659        struct cbq_class *cl = (struct cbq_class *)arg;
1660
1661        if (cl->q->q.qlen == 0)
1662                cbq_deactivate_class(cl);
1663}
1664
1665static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1666{
1667        struct cbq_sched_data *q = qdisc_priv(sch);
1668        struct cbq_class *cl = cbq_class_lookup(q, classid);
1669
1670        if (cl) {
1671                cl->refcnt++;
1672                return (unsigned long)cl;
1673        }
1674        return 0;
1675}
1676
1677static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1678{
1679        struct cbq_sched_data *q = qdisc_priv(sch);
1680
1681        WARN_ON(cl->filters);
1682
1683        tcf_destroy_chain(&cl->filter_list);
1684        qdisc_destroy(cl->q);
1685        qdisc_put_rtab(cl->R_tab);
1686        gen_kill_estimator(&cl->bstats, &cl->rate_est);
1687        if (cl != &q->link)
1688                kfree(cl);
1689}
1690
1691static void cbq_destroy(struct Qdisc *sch)
1692{
1693        struct cbq_sched_data *q = qdisc_priv(sch);
1694        struct hlist_node *n, *next;
1695        struct cbq_class *cl;
1696        unsigned int h;
1697
1698#ifdef CONFIG_NET_CLS_ACT
1699        q->rx_class = NULL;
1700#endif
1701        /*
1702         * Filters must be destroyed first because we don't destroy the
1703         * classes from root to leafs which means that filters can still
1704         * be bound to classes which have been destroyed already. --TGR '04
1705         */
1706        for (h = 0; h < q->clhash.hashsize; h++) {
1707                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1708                        tcf_destroy_chain(&cl->filter_list);
1709        }
1710        for (h = 0; h < q->clhash.hashsize; h++) {
1711                hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1712                                          common.hnode)
1713                        cbq_destroy_class(sch, cl);
1714        }
1715        qdisc_class_hash_destroy(&q->clhash);
1716}
1717
1718static void cbq_put(struct Qdisc *sch, unsigned long arg)
1719{
1720        struct cbq_class *cl = (struct cbq_class *)arg;
1721
1722        if (--cl->refcnt == 0) {
1723#ifdef CONFIG_NET_CLS_ACT
1724                spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1725                struct cbq_sched_data *q = qdisc_priv(sch);
1726
1727                spin_lock_bh(root_lock);
1728                if (q->rx_class == cl)
1729                        q->rx_class = NULL;
1730                spin_unlock_bh(root_lock);
1731#endif
1732
1733                cbq_destroy_class(sch, cl);
1734        }
1735}
1736
1737static int
1738cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1739                 unsigned long *arg)
1740{
1741        int err;
1742        struct cbq_sched_data *q = qdisc_priv(sch);
1743        struct cbq_class *cl = (struct cbq_class *)*arg;
1744        struct nlattr *opt = tca[TCA_OPTIONS];
1745        struct nlattr *tb[TCA_CBQ_MAX + 1];
1746        struct cbq_class *parent;
1747        struct qdisc_rate_table *rtab = NULL;
1748
1749        if (opt == NULL)
1750                return -EINVAL;
1751
1752        err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1753        if (err < 0)
1754                return err;
1755
1756        if (cl) {
1757                /* Check parent */
1758                if (parentid) {
1759                        if (cl->tparent &&
1760                            cl->tparent->common.classid != parentid)
1761                                return -EINVAL;
1762                        if (!cl->tparent && parentid != TC_H_ROOT)
1763                                return -EINVAL;
1764                }
1765
1766                if (tb[TCA_CBQ_RATE]) {
1767                        rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1768                                              tb[TCA_CBQ_RTAB]);
1769                        if (rtab == NULL)
1770                                return -EINVAL;
1771                }
1772
1773                if (tca[TCA_RATE]) {
1774                        err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1775                                                    qdisc_root_sleeping_lock(sch),
1776                                                    tca[TCA_RATE]);
1777                        if (err) {
1778                                if (rtab)
1779                                        qdisc_put_rtab(rtab);
1780                                return err;
1781                        }
1782                }
1783
1784                /* Change class parameters */
1785                sch_tree_lock(sch);
1786
1787                if (cl->next_alive != NULL)
1788                        cbq_deactivate_class(cl);
1789
1790                if (rtab) {
1791                        qdisc_put_rtab(cl->R_tab);
1792                        cl->R_tab = rtab;
1793                }
1794
1795                if (tb[TCA_CBQ_LSSOPT])
1796                        cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1797
1798                if (tb[TCA_CBQ_WRROPT]) {
1799                        cbq_rmprio(q, cl);
1800                        cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1801                }
1802
1803                if (tb[TCA_CBQ_OVL_STRATEGY])
1804                        cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1805
1806#ifdef CONFIG_NET_CLS_ACT
1807                if (tb[TCA_CBQ_POLICE])
1808                        cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1809#endif
1810
1811                if (tb[TCA_CBQ_FOPT])
1812                        cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1813
1814                if (cl->q->q.qlen)
1815                        cbq_activate_class(cl);
1816
1817                sch_tree_unlock(sch);
1818
1819                return 0;
1820        }
1821
1822        if (parentid == TC_H_ROOT)
1823                return -EINVAL;
1824
1825        if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1826            tb[TCA_CBQ_LSSOPT] == NULL)
1827                return -EINVAL;
1828
1829        rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1830        if (rtab == NULL)
1831                return -EINVAL;
1832
1833        if (classid) {
1834                err = -EINVAL;
1835                if (TC_H_MAJ(classid ^ sch->handle) ||
1836                    cbq_class_lookup(q, classid))
1837                        goto failure;
1838        } else {
1839                int i;
1840                classid = TC_H_MAKE(sch->handle, 0x8000);
1841
1842                for (i = 0; i < 0x8000; i++) {
1843                        if (++q->hgenerator >= 0x8000)
1844                                q->hgenerator = 1;
1845                        if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1846                                break;
1847                }
1848                err = -ENOSR;
1849                if (i >= 0x8000)
1850                        goto failure;
1851                classid = classid|q->hgenerator;
1852        }
1853
1854        parent = &q->link;
1855        if (parentid) {
1856                parent = cbq_class_lookup(q, parentid);
1857                err = -EINVAL;
1858                if (parent == NULL)
1859                        goto failure;
1860        }
1861
1862        err = -ENOBUFS;
1863        cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1864        if (cl == NULL)
1865                goto failure;
1866
1867        if (tca[TCA_RATE]) {
1868                err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1869                                        qdisc_root_sleeping_lock(sch),
1870                                        tca[TCA_RATE]);
1871                if (err) {
1872                        kfree(cl);
1873                        goto failure;
1874                }
1875        }
1876
1877        cl->R_tab = rtab;
1878        rtab = NULL;
1879        cl->refcnt = 1;
1880        cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1881        if (!cl->q)
1882                cl->q = &noop_qdisc;
1883        cl->common.classid = classid;
1884        cl->tparent = parent;
1885        cl->qdisc = sch;
1886        cl->allot = parent->allot;
1887        cl->quantum = cl->allot;
1888        cl->weight = cl->R_tab->rate.rate;
1889
1890        sch_tree_lock(sch);
1891        cbq_link_class(cl);
1892        cl->borrow = cl->tparent;
1893        if (cl->tparent != &q->link)
1894                cl->share = cl->tparent;
1895        cbq_adjust_levels(parent);
1896        cl->minidle = -0x7FFFFFFF;
1897        cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1898        cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1899        if (cl->ewma_log == 0)
1900                cl->ewma_log = q->link.ewma_log;
1901        if (cl->maxidle == 0)
1902                cl->maxidle = q->link.maxidle;
1903        if (cl->avpkt == 0)
1904                cl->avpkt = q->link.avpkt;
1905        cl->overlimit = cbq_ovl_classic;
1906        if (tb[TCA_CBQ_OVL_STRATEGY])
1907                cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1908#ifdef CONFIG_NET_CLS_ACT
1909        if (tb[TCA_CBQ_POLICE])
1910                cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1911#endif
1912        if (tb[TCA_CBQ_FOPT])
1913                cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1914        sch_tree_unlock(sch);
1915
1916        qdisc_class_hash_grow(sch, &q->clhash);
1917
1918        *arg = (unsigned long)cl;
1919        return 0;
1920
1921failure:
1922        qdisc_put_rtab(rtab);
1923        return err;
1924}
1925
1926static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1927{
1928        struct cbq_sched_data *q = qdisc_priv(sch);
1929        struct cbq_class *cl = (struct cbq_class *)arg;
1930        unsigned int qlen;
1931
1932        if (cl->filters || cl->children || cl == &q->link)
1933                return -EBUSY;
1934
1935        sch_tree_lock(sch);
1936
1937        qlen = cl->q->q.qlen;
1938        qdisc_reset(cl->q);
1939        qdisc_tree_decrease_qlen(cl->q, qlen);
1940
1941        if (cl->next_alive)
1942                cbq_deactivate_class(cl);
1943
1944        if (q->tx_borrowed == cl)
1945                q->tx_borrowed = q->tx_class;
1946        if (q->tx_class == cl) {
1947                q->tx_class = NULL;
1948                q->tx_borrowed = NULL;
1949        }
1950#ifdef CONFIG_NET_CLS_ACT
1951        if (q->rx_class == cl)
1952                q->rx_class = NULL;
1953#endif
1954
1955        cbq_unlink_class(cl);
1956        cbq_adjust_levels(cl->tparent);
1957        cl->defmap = 0;
1958        cbq_sync_defmap(cl);
1959
1960        cbq_rmprio(q, cl);
1961        sch_tree_unlock(sch);
1962
1963        BUG_ON(--cl->refcnt == 0);
1964        /*
1965         * This shouldn't happen: we "hold" one cops->get() when called
1966         * from tc_ctl_tclass; the destroy method is done from cops->put().
1967         */
1968
1969        return 0;
1970}
1971
1972static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1973{
1974        struct cbq_sched_data *q = qdisc_priv(sch);
1975        struct cbq_class *cl = (struct cbq_class *)arg;
1976
1977        if (cl == NULL)
1978                cl = &q->link;
1979
1980        return &cl->filter_list;
1981}
1982
1983static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1984                                     u32 classid)
1985{
1986        struct cbq_sched_data *q = qdisc_priv(sch);
1987        struct cbq_class *p = (struct cbq_class *)parent;
1988        struct cbq_class *cl = cbq_class_lookup(q, classid);
1989
1990        if (cl) {
1991                if (p && p->level <= cl->level)
1992                        return 0;
1993                cl->filters++;
1994                return (unsigned long)cl;
1995        }
1996        return 0;
1997}
1998
1999static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2000{
2001        struct cbq_class *cl = (struct cbq_class *)arg;
2002
2003        cl->filters--;
2004}
2005
2006static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2007{
2008        struct cbq_sched_data *q = qdisc_priv(sch);
2009        struct cbq_class *cl;
2010        struct hlist_node *n;
2011        unsigned int h;
2012
2013        if (arg->stop)
2014                return;
2015
2016        for (h = 0; h < q->clhash.hashsize; h++) {
2017                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2018                        if (arg->count < arg->skip) {
2019                                arg->count++;
2020                                continue;
2021                        }
2022                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2023                                arg->stop = 1;
2024                                return;
2025                        }
2026                        arg->count++;
2027                }
2028        }
2029}
2030
2031static const struct Qdisc_class_ops cbq_class_ops = {
2032        .graft          =       cbq_graft,
2033        .leaf           =       cbq_leaf,
2034        .qlen_notify    =       cbq_qlen_notify,
2035        .get            =       cbq_get,
2036        .put            =       cbq_put,
2037        .change         =       cbq_change_class,
2038        .delete         =       cbq_delete,
2039        .walk           =       cbq_walk,
2040        .tcf_chain      =       cbq_find_tcf,
2041        .bind_tcf       =       cbq_bind_filter,
2042        .unbind_tcf     =       cbq_unbind_filter,
2043        .dump           =       cbq_dump_class,
2044        .dump_stats     =       cbq_dump_class_stats,
2045};
2046
2047static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2048        .next           =       NULL,
2049        .cl_ops         =       &cbq_class_ops,
2050        .id             =       "cbq",
2051        .priv_size      =       sizeof(struct cbq_sched_data),
2052        .enqueue        =       cbq_enqueue,
2053        .dequeue        =       cbq_dequeue,
2054        .peek           =       qdisc_peek_dequeued,
2055        .drop           =       cbq_drop,
2056        .init           =       cbq_init,
2057        .reset          =       cbq_reset,
2058        .destroy        =       cbq_destroy,
2059        .change         =       NULL,
2060        .dump           =       cbq_dump,
2061        .dump_stats     =       cbq_dump_stats,
2062        .owner          =       THIS_MODULE,
2063};
2064
2065static int __init cbq_module_init(void)
2066{
2067        return register_qdisc(&cbq_qdisc_ops);
2068}
2069static void __exit cbq_module_exit(void)
2070{
2071        unregister_qdisc(&cbq_qdisc_ops);
2072}
2073module_init(cbq_module_init)
2074module_exit(cbq_module_exit)
2075MODULE_LICENSE("GPL");
2076