linux/net/sched/sch_cbq.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/slab.h>
  15#include <linux/types.h>
  16#include <linux/kernel.h>
  17#include <linux/string.h>
  18#include <linux/errno.h>
  19#include <linux/skbuff.h>
  20#include <net/netlink.h>
  21#include <net/pkt_sched.h>
  22
  23
  24/*      Class-Based Queueing (CBQ) algorithm.
  25        =======================================
  26
  27        Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28                 Management Models for Packet Networks",
  29                 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31                 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33                 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34                 Parameters", 1996
  35
  36                 [4] Sally Floyd and Michael Speer, "Experimental Results
  37                 for Class-Based Queueing", 1998, not published.
  38
  39        -----------------------------------------------------------------------
  40
  41        Algorithm skeleton was taken from NS simulator cbq.cc.
  42        If someone wants to check this code against the LBL version,
  43        he should take into account that ONLY the skeleton was borrowed,
  44        the implementation is different. Particularly:
  45
  46        --- The WRR algorithm is different. Our version looks more
  47        reasonable (I hope) and works when quanta are allowed to be
  48        less than MTU, which is always the case when real time classes
  49        have small rates. Note, that the statement of [3] is
  50        incomplete, delay may actually be estimated even if class
  51        per-round allotment is less than MTU. Namely, if per-round
  52        allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54        delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56        In the worst case we have IntServ estimate with D = W*r+k*MTU
  57        and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60        --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61        interpret some places, which look like wrong translations
  62        from NS. Anyone is advised to find these differences
  63        and explain to me, why I am wrong 8).
  64
  65        --- Linux has no EOI event, so that we cannot estimate true class
  66        idle time. Workaround is to consider the next dequeue event
  67        as sign that previous packet is finished. This is wrong because of
  68        internal device queueing, but on a permanently loaded link it is true.
  69        Moreover, combined with clock integrator, this scheme looks
  70        very close to an ideal solution.  */
  71
  72struct cbq_sched_data;
  73
  74
  75struct cbq_class {
  76        struct Qdisc_class_common common;
  77        struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79/* Parameters */
  80        unsigned char           priority;       /* class priority */
  81        unsigned char           priority2;      /* priority to be used after overlimit */
  82        unsigned char           ewma_log;       /* time constant for idle time calculation */
  83        unsigned char           ovl_strategy;
  84#ifdef CONFIG_NET_CLS_ACT
  85        unsigned char           police;
  86#endif
  87
  88        u32                     defmap;
  89
  90        /* Link-sharing scheduler parameters */
  91        long                    maxidle;        /* Class parameters: see below. */
  92        long                    offtime;
  93        long                    minidle;
  94        u32                     avpkt;
  95        struct qdisc_rate_table *R_tab;
  96
  97        /* Overlimit strategy parameters */
  98        void                    (*overlimit)(struct cbq_class *cl);
  99        psched_tdiff_t          penalty;
 100
 101        /* General scheduler (WRR) parameters */
 102        long                    allot;
 103        long                    quantum;        /* Allotment per WRR round */
 104        long                    weight;         /* Relative allotment: see below */
 105
 106        struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107        struct cbq_class        *split;         /* Ptr to split node */
 108        struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109        struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110        struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                   parent otherwise */
 112        struct cbq_class        *sibling;       /* Sibling chain */
 113        struct cbq_class        *children;      /* Pointer to children chain */
 114
 115        struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118/* Variables */
 119        unsigned char           cpriority;      /* Effective priority */
 120        unsigned char           delayed;
 121        unsigned char           level;          /* level of the class in hierarchy:
 122                                                   0 for leaf classes, and maximal
 123                                                   level of children + 1 for nodes.
 124                                                 */
 125
 126        psched_time_t           last;           /* Last end of service */
 127        psched_time_t           undertime;
 128        long                    avgidle;
 129        long                    deficit;        /* Saved deficit for WRR */
 130        psched_time_t           penalized;
 131        struct gnet_stats_basic_packed bstats;
 132        struct gnet_stats_queue qstats;
 133        struct gnet_stats_rate_est rate_est;
 134        struct tc_cbq_xstats    xstats;
 135
 136        struct tcf_proto        *filter_list;
 137
 138        int                     refcnt;
 139        int                     filters;
 140
 141        struct cbq_class        *defaults[TC_PRIO_MAX + 1];
 142};
 143
 144struct cbq_sched_data {
 145        struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 146        int                     nclasses[TC_CBQ_MAXPRIO + 1];
 147        unsigned int            quanta[TC_CBQ_MAXPRIO + 1];
 148
 149        struct cbq_class        link;
 150
 151        unsigned int            activemask;
 152        struct cbq_class        *active[TC_CBQ_MAXPRIO + 1];    /* List of all classes
 153                                                                   with backlog */
 154
 155#ifdef CONFIG_NET_CLS_ACT
 156        struct cbq_class        *rx_class;
 157#endif
 158        struct cbq_class        *tx_class;
 159        struct cbq_class        *tx_borrowed;
 160        int                     tx_len;
 161        psched_time_t           now;            /* Cached timestamp */
 162        psched_time_t           now_rt;         /* Cached real time */
 163        unsigned int            pmask;
 164
 165        struct hrtimer          delay_timer;
 166        struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 167                                                   started when CBQ has
 168                                                   backlog, but cannot
 169                                                   transmit just now */
 170        psched_tdiff_t          wd_expires;
 171        int                     toplevel;
 172        u32                     hgenerator;
 173};
 174
 175
 176#define L2T(cl, len)    qdisc_l2t((cl)->R_tab, len)
 177
 178static inline struct cbq_class *
 179cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 180{
 181        struct Qdisc_class_common *clc;
 182
 183        clc = qdisc_class_find(&q->clhash, classid);
 184        if (clc == NULL)
 185                return NULL;
 186        return container_of(clc, struct cbq_class, common);
 187}
 188
 189#ifdef CONFIG_NET_CLS_ACT
 190
 191static struct cbq_class *
 192cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 193{
 194        struct cbq_class *cl;
 195
 196        for (cl = this->tparent; cl; cl = cl->tparent) {
 197                struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
 198
 199                if (new != NULL && new != this)
 200                        return new;
 201        }
 202        return NULL;
 203}
 204
 205#endif
 206
 207/* Classify packet. The procedure is pretty complicated, but
 208 * it allows us to combine link sharing and priority scheduling
 209 * transparently.
 210 *
 211 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 212 * so that it resolves to split nodes. Then packets are classified
 213 * by logical priority, or a more specific classifier may be attached
 214 * to the split node.
 215 */
 216
 217static struct cbq_class *
 218cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 219{
 220        struct cbq_sched_data *q = qdisc_priv(sch);
 221        struct cbq_class *head = &q->link;
 222        struct cbq_class **defmap;
 223        struct cbq_class *cl = NULL;
 224        u32 prio = skb->priority;
 225        struct tcf_result res;
 226
 227        /*
 228         *  Step 1. If skb->priority points to one of our classes, use it.
 229         */
 230        if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
 231            (cl = cbq_class_lookup(q, prio)) != NULL)
 232                return cl;
 233
 234        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235        for (;;) {
 236                int result = 0;
 237                defmap = head->defaults;
 238
 239                /*
 240                 * Step 2+n. Apply classifier.
 241                 */
 242                if (!head->filter_list ||
 243                    (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 244                        goto fallback;
 245
 246                cl = (void *)res.class;
 247                if (!cl) {
 248                        if (TC_H_MAJ(res.classid))
 249                                cl = cbq_class_lookup(q, res.classid);
 250                        else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
 251                                cl = defmap[TC_PRIO_BESTEFFORT];
 252
 253                        if (cl == NULL)
 254                                goto fallback;
 255                }
 256                if (cl->level >= head->level)
 257                        goto fallback;
 258#ifdef CONFIG_NET_CLS_ACT
 259                switch (result) {
 260                case TC_ACT_QUEUED:
 261                case TC_ACT_STOLEN:
 262                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 263                case TC_ACT_SHOT:
 264                        return NULL;
 265                case TC_ACT_RECLASSIFY:
 266                        return cbq_reclassify(skb, cl);
 267                }
 268#endif
 269                if (cl->level == 0)
 270                        return cl;
 271
 272                /*
 273                 * Step 3+n. If classifier selected a link sharing class,
 274                 *         apply agency specific classifier.
 275                 *         Repeat this procdure until we hit a leaf node.
 276                 */
 277                head = cl;
 278        }
 279
 280fallback:
 281        cl = head;
 282
 283        /*
 284         * Step 4. No success...
 285         */
 286        if (TC_H_MAJ(prio) == 0 &&
 287            !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
 288            !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 289                return head;
 290
 291        return cl;
 292}
 293
 294/*
 295 * A packet has just been enqueued on the empty class.
 296 * cbq_activate_class adds it to the tail of active class list
 297 * of its priority band.
 298 */
 299
 300static inline void cbq_activate_class(struct cbq_class *cl)
 301{
 302        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 303        int prio = cl->cpriority;
 304        struct cbq_class *cl_tail;
 305
 306        cl_tail = q->active[prio];
 307        q->active[prio] = cl;
 308
 309        if (cl_tail != NULL) {
 310                cl->next_alive = cl_tail->next_alive;
 311                cl_tail->next_alive = cl;
 312        } else {
 313                cl->next_alive = cl;
 314                q->activemask |= (1<<prio);
 315        }
 316}
 317
 318/*
 319 * Unlink class from active chain.
 320 * Note that this same procedure is done directly in cbq_dequeue*
 321 * during round-robin procedure.
 322 */
 323
 324static void cbq_deactivate_class(struct cbq_class *this)
 325{
 326        struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 327        int prio = this->cpriority;
 328        struct cbq_class *cl;
 329        struct cbq_class *cl_prev = q->active[prio];
 330
 331        do {
 332                cl = cl_prev->next_alive;
 333                if (cl == this) {
 334                        cl_prev->next_alive = cl->next_alive;
 335                        cl->next_alive = NULL;
 336
 337                        if (cl == q->active[prio]) {
 338                                q->active[prio] = cl_prev;
 339                                if (cl == q->active[prio]) {
 340                                        q->active[prio] = NULL;
 341                                        q->activemask &= ~(1<<prio);
 342                                        return;
 343                                }
 344                        }
 345                        return;
 346                }
 347        } while ((cl_prev = cl) != q->active[prio]);
 348}
 349
 350static void
 351cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 352{
 353        int toplevel = q->toplevel;
 354
 355        if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
 356                psched_time_t now;
 357                psched_tdiff_t incr;
 358
 359                now = psched_get_time();
 360                incr = now - q->now_rt;
 361                now = q->now + incr;
 362
 363                do {
 364                        if (cl->undertime < now) {
 365                                q->toplevel = cl->level;
 366                                return;
 367                        }
 368                } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
 369        }
 370}
 371
 372static int
 373cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 374{
 375        struct cbq_sched_data *q = qdisc_priv(sch);
 376        int uninitialized_var(ret);
 377        struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 378
 379#ifdef CONFIG_NET_CLS_ACT
 380        q->rx_class = cl;
 381#endif
 382        if (cl == NULL) {
 383                if (ret & __NET_XMIT_BYPASS)
 384                        sch->qstats.drops++;
 385                kfree_skb(skb);
 386                return ret;
 387        }
 388
 389#ifdef CONFIG_NET_CLS_ACT
 390        cl->q->__parent = sch;
 391#endif
 392        ret = qdisc_enqueue(skb, cl->q);
 393        if (ret == NET_XMIT_SUCCESS) {
 394                sch->q.qlen++;
 395                cbq_mark_toplevel(q, cl);
 396                if (!cl->next_alive)
 397                        cbq_activate_class(cl);
 398                return ret;
 399        }
 400
 401        if (net_xmit_drop_count(ret)) {
 402                sch->qstats.drops++;
 403                cbq_mark_toplevel(q, cl);
 404                cl->qstats.drops++;
 405        }
 406        return ret;
 407}
 408
 409/* Overlimit actions */
 410
 411/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 412
 413static void cbq_ovl_classic(struct cbq_class *cl)
 414{
 415        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 416        psched_tdiff_t delay = cl->undertime - q->now;
 417
 418        if (!cl->delayed) {
 419                delay += cl->offtime;
 420
 421                /*
 422                 * Class goes to sleep, so that it will have no
 423                 * chance to work avgidle. Let's forgive it 8)
 424                 *
 425                 * BTW cbq-2.0 has a crap in this
 426                 * place, apparently they forgot to shift it by cl->ewma_log.
 427                 */
 428                if (cl->avgidle < 0)
 429                        delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 430                if (cl->avgidle < cl->minidle)
 431                        cl->avgidle = cl->minidle;
 432                if (delay <= 0)
 433                        delay = 1;
 434                cl->undertime = q->now + delay;
 435
 436                cl->xstats.overactions++;
 437                cl->delayed = 1;
 438        }
 439        if (q->wd_expires == 0 || q->wd_expires > delay)
 440                q->wd_expires = delay;
 441
 442        /* Dirty work! We must schedule wakeups based on
 443         * real available rate, rather than leaf rate,
 444         * which may be tiny (even zero).
 445         */
 446        if (q->toplevel == TC_CBQ_MAXLEVEL) {
 447                struct cbq_class *b;
 448                psched_tdiff_t base_delay = q->wd_expires;
 449
 450                for (b = cl->borrow; b; b = b->borrow) {
 451                        delay = b->undertime - q->now;
 452                        if (delay < base_delay) {
 453                                if (delay <= 0)
 454                                        delay = 1;
 455                                base_delay = delay;
 456                        }
 457                }
 458
 459                q->wd_expires = base_delay;
 460        }
 461}
 462
 463/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 464 * they go overlimit
 465 */
 466
 467static void cbq_ovl_rclassic(struct cbq_class *cl)
 468{
 469        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 470        struct cbq_class *this = cl;
 471
 472        do {
 473                if (cl->level > q->toplevel) {
 474                        cl = NULL;
 475                        break;
 476                }
 477        } while ((cl = cl->borrow) != NULL);
 478
 479        if (cl == NULL)
 480                cl = this;
 481        cbq_ovl_classic(cl);
 482}
 483
 484/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 485
 486static void cbq_ovl_delay(struct cbq_class *cl)
 487{
 488        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 489        psched_tdiff_t delay = cl->undertime - q->now;
 490
 491        if (test_bit(__QDISC_STATE_DEACTIVATED,
 492                     &qdisc_root_sleeping(cl->qdisc)->state))
 493                return;
 494
 495        if (!cl->delayed) {
 496                psched_time_t sched = q->now;
 497                ktime_t expires;
 498
 499                delay += cl->offtime;
 500                if (cl->avgidle < 0)
 501                        delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 502                if (cl->avgidle < cl->minidle)
 503                        cl->avgidle = cl->minidle;
 504                cl->undertime = q->now + delay;
 505
 506                if (delay > 0) {
 507                        sched += delay + cl->penalty;
 508                        cl->penalized = sched;
 509                        cl->cpriority = TC_CBQ_MAXPRIO;
 510                        q->pmask |= (1<<TC_CBQ_MAXPRIO);
 511
 512                        expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
 513                        if (hrtimer_try_to_cancel(&q->delay_timer) &&
 514                            ktime_to_ns(ktime_sub(
 515                                        hrtimer_get_expires(&q->delay_timer),
 516                                        expires)) > 0)
 517                                hrtimer_set_expires(&q->delay_timer, expires);
 518                        hrtimer_restart(&q->delay_timer);
 519                        cl->delayed = 1;
 520                        cl->xstats.overactions++;
 521                        return;
 522                }
 523                delay = 1;
 524        }
 525        if (q->wd_expires == 0 || q->wd_expires > delay)
 526                q->wd_expires = delay;
 527}
 528
 529/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 530
 531static void cbq_ovl_lowprio(struct cbq_class *cl)
 532{
 533        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 534
 535        cl->penalized = q->now + cl->penalty;
 536
 537        if (cl->cpriority != cl->priority2) {
 538                cl->cpriority = cl->priority2;
 539                q->pmask |= (1<<cl->cpriority);
 540                cl->xstats.overactions++;
 541        }
 542        cbq_ovl_classic(cl);
 543}
 544
 545/* TC_CBQ_OVL_DROP: penalize class by dropping */
 546
 547static void cbq_ovl_drop(struct cbq_class *cl)
 548{
 549        if (cl->q->ops->drop)
 550                if (cl->q->ops->drop(cl->q))
 551                        cl->qdisc->q.qlen--;
 552        cl->xstats.overactions++;
 553        cbq_ovl_classic(cl);
 554}
 555
 556static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 557                                       psched_time_t now)
 558{
 559        struct cbq_class *cl;
 560        struct cbq_class *cl_prev = q->active[prio];
 561        psched_time_t sched = now;
 562
 563        if (cl_prev == NULL)
 564                return 0;
 565
 566        do {
 567                cl = cl_prev->next_alive;
 568                if (now - cl->penalized > 0) {
 569                        cl_prev->next_alive = cl->next_alive;
 570                        cl->next_alive = NULL;
 571                        cl->cpriority = cl->priority;
 572                        cl->delayed = 0;
 573                        cbq_activate_class(cl);
 574
 575                        if (cl == q->active[prio]) {
 576                                q->active[prio] = cl_prev;
 577                                if (cl == q->active[prio]) {
 578                                        q->active[prio] = NULL;
 579                                        return 0;
 580                                }
 581                        }
 582
 583                        cl = cl_prev->next_alive;
 584                } else if (sched - cl->penalized > 0)
 585                        sched = cl->penalized;
 586        } while ((cl_prev = cl) != q->active[prio]);
 587
 588        return sched - now;
 589}
 590
 591static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 592{
 593        struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 594                                                delay_timer);
 595        struct Qdisc *sch = q->watchdog.qdisc;
 596        psched_time_t now;
 597        psched_tdiff_t delay = 0;
 598        unsigned int pmask;
 599
 600        now = psched_get_time();
 601
 602        pmask = q->pmask;
 603        q->pmask = 0;
 604
 605        while (pmask) {
 606                int prio = ffz(~pmask);
 607                psched_tdiff_t tmp;
 608
 609                pmask &= ~(1<<prio);
 610
 611                tmp = cbq_undelay_prio(q, prio, now);
 612                if (tmp > 0) {
 613                        q->pmask |= 1<<prio;
 614                        if (tmp < delay || delay == 0)
 615                                delay = tmp;
 616                }
 617        }
 618
 619        if (delay) {
 620                ktime_t time;
 621
 622                time = ktime_set(0, 0);
 623                time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 624                hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 625        }
 626
 627        qdisc_unthrottled(sch);
 628        __netif_schedule(qdisc_root(sch));
 629        return HRTIMER_NORESTART;
 630}
 631
 632#ifdef CONFIG_NET_CLS_ACT
 633static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 634{
 635        struct Qdisc *sch = child->__parent;
 636        struct cbq_sched_data *q = qdisc_priv(sch);
 637        struct cbq_class *cl = q->rx_class;
 638
 639        q->rx_class = NULL;
 640
 641        if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 642                int ret;
 643
 644                cbq_mark_toplevel(q, cl);
 645
 646                q->rx_class = cl;
 647                cl->q->__parent = sch;
 648
 649                ret = qdisc_enqueue(skb, cl->q);
 650                if (ret == NET_XMIT_SUCCESS) {
 651                        sch->q.qlen++;
 652                        if (!cl->next_alive)
 653                                cbq_activate_class(cl);
 654                        return 0;
 655                }
 656                if (net_xmit_drop_count(ret))
 657                        sch->qstats.drops++;
 658                return 0;
 659        }
 660
 661        sch->qstats.drops++;
 662        return -1;
 663}
 664#endif
 665
 666/*
 667 * It is mission critical procedure.
 668 *
 669 * We "regenerate" toplevel cutoff, if transmitting class
 670 * has backlog and it is not regulated. It is not part of
 671 * original CBQ description, but looks more reasonable.
 672 * Probably, it is wrong. This question needs further investigation.
 673 */
 674
 675static inline void
 676cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 677                    struct cbq_class *borrowed)
 678{
 679        if (cl && q->toplevel >= borrowed->level) {
 680                if (cl->q->q.qlen > 1) {
 681                        do {
 682                                if (borrowed->undertime == PSCHED_PASTPERFECT) {
 683                                        q->toplevel = borrowed->level;
 684                                        return;
 685                                }
 686                        } while ((borrowed = borrowed->borrow) != NULL);
 687                }
 688#if 0
 689        /* It is not necessary now. Uncommenting it
 690           will save CPU cycles, but decrease fairness.
 691         */
 692                q->toplevel = TC_CBQ_MAXLEVEL;
 693#endif
 694        }
 695}
 696
 697static void
 698cbq_update(struct cbq_sched_data *q)
 699{
 700        struct cbq_class *this = q->tx_class;
 701        struct cbq_class *cl = this;
 702        int len = q->tx_len;
 703
 704        q->tx_class = NULL;
 705
 706        for ( ; cl; cl = cl->share) {
 707                long avgidle = cl->avgidle;
 708                long idle;
 709
 710                cl->bstats.packets++;
 711                cl->bstats.bytes += len;
 712
 713                /*
 714                 * (now - last) is total time between packet right edges.
 715                 * (last_pktlen/rate) is "virtual" busy time, so that
 716                 *
 717                 *      idle = (now - last) - last_pktlen/rate
 718                 */
 719
 720                idle = q->now - cl->last;
 721                if ((unsigned long)idle > 128*1024*1024) {
 722                        avgidle = cl->maxidle;
 723                } else {
 724                        idle -= L2T(cl, len);
 725
 726                /* true_avgidle := (1-W)*true_avgidle + W*idle,
 727                 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
 728                 * cl->avgidle == true_avgidle/W,
 729                 * hence:
 730                 */
 731                        avgidle += idle - (avgidle>>cl->ewma_log);
 732                }
 733
 734                if (avgidle <= 0) {
 735                        /* Overlimit or at-limit */
 736
 737                        if (avgidle < cl->minidle)
 738                                avgidle = cl->minidle;
 739
 740                        cl->avgidle = avgidle;
 741
 742                        /* Calculate expected time, when this class
 743                         * will be allowed to send.
 744                         * It will occur, when:
 745                         * (1-W)*true_avgidle + W*delay = 0, i.e.
 746                         * idle = (1/W - 1)*(-true_avgidle)
 747                         * or
 748                         * idle = (1 - W)*(-cl->avgidle);
 749                         */
 750                        idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 751
 752                        /*
 753                         * That is not all.
 754                         * To maintain the rate allocated to the class,
 755                         * we add to undertime virtual clock,
 756                         * necessary to complete transmitted packet.
 757                         * (len/phys_bandwidth has been already passed
 758                         * to the moment of cbq_update)
 759                         */
 760
 761                        idle -= L2T(&q->link, len);
 762                        idle += L2T(cl, len);
 763
 764                        cl->undertime = q->now + idle;
 765                } else {
 766                        /* Underlimit */
 767
 768                        cl->undertime = PSCHED_PASTPERFECT;
 769                        if (avgidle > cl->maxidle)
 770                                cl->avgidle = cl->maxidle;
 771                        else
 772                                cl->avgidle = avgidle;
 773                }
 774                cl->last = q->now;
 775        }
 776
 777        cbq_update_toplevel(q, this, q->tx_borrowed);
 778}
 779
 780static inline struct cbq_class *
 781cbq_under_limit(struct cbq_class *cl)
 782{
 783        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 784        struct cbq_class *this_cl = cl;
 785
 786        if (cl->tparent == NULL)
 787                return cl;
 788
 789        if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 790                cl->delayed = 0;
 791                return cl;
 792        }
 793
 794        do {
 795                /* It is very suspicious place. Now overlimit
 796                 * action is generated for not bounded classes
 797                 * only if link is completely congested.
 798                 * Though it is in agree with ancestor-only paradigm,
 799                 * it looks very stupid. Particularly,
 800                 * it means that this chunk of code will either
 801                 * never be called or result in strong amplification
 802                 * of burstiness. Dangerous, silly, and, however,
 803                 * no another solution exists.
 804                 */
 805                cl = cl->borrow;
 806                if (!cl) {
 807                        this_cl->qstats.overlimits++;
 808                        this_cl->overlimit(this_cl);
 809                        return NULL;
 810                }
 811                if (cl->level > q->toplevel)
 812                        return NULL;
 813        } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 814
 815        cl->delayed = 0;
 816        return cl;
 817}
 818
 819static inline struct sk_buff *
 820cbq_dequeue_prio(struct Qdisc *sch, int prio)
 821{
 822        struct cbq_sched_data *q = qdisc_priv(sch);
 823        struct cbq_class *cl_tail, *cl_prev, *cl;
 824        struct sk_buff *skb;
 825        int deficit;
 826
 827        cl_tail = cl_prev = q->active[prio];
 828        cl = cl_prev->next_alive;
 829
 830        do {
 831                deficit = 0;
 832
 833                /* Start round */
 834                do {
 835                        struct cbq_class *borrow = cl;
 836
 837                        if (cl->q->q.qlen &&
 838                            (borrow = cbq_under_limit(cl)) == NULL)
 839                                goto skip_class;
 840
 841                        if (cl->deficit <= 0) {
 842                                /* Class exhausted its allotment per
 843                                 * this round. Switch to the next one.
 844                                 */
 845                                deficit = 1;
 846                                cl->deficit += cl->quantum;
 847                                goto next_class;
 848                        }
 849
 850                        skb = cl->q->dequeue(cl->q);
 851
 852                        /* Class did not give us any skb :-(
 853                         * It could occur even if cl->q->q.qlen != 0
 854                         * f.e. if cl->q == "tbf"
 855                         */
 856                        if (skb == NULL)
 857                                goto skip_class;
 858
 859                        cl->deficit -= qdisc_pkt_len(skb);
 860                        q->tx_class = cl;
 861                        q->tx_borrowed = borrow;
 862                        if (borrow != cl) {
 863#ifndef CBQ_XSTATS_BORROWS_BYTES
 864                                borrow->xstats.borrows++;
 865                                cl->xstats.borrows++;
 866#else
 867                                borrow->xstats.borrows += qdisc_pkt_len(skb);
 868                                cl->xstats.borrows += qdisc_pkt_len(skb);
 869#endif
 870                        }
 871                        q->tx_len = qdisc_pkt_len(skb);
 872
 873                        if (cl->deficit <= 0) {
 874                                q->active[prio] = cl;
 875                                cl = cl->next_alive;
 876                                cl->deficit += cl->quantum;
 877                        }
 878                        return skb;
 879
 880skip_class:
 881                        if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 882                                /* Class is empty or penalized.
 883                                 * Unlink it from active chain.
 884                                 */
 885                                cl_prev->next_alive = cl->next_alive;
 886                                cl->next_alive = NULL;
 887
 888                                /* Did cl_tail point to it? */
 889                                if (cl == cl_tail) {
 890                                        /* Repair it! */
 891                                        cl_tail = cl_prev;
 892
 893                                        /* Was it the last class in this band? */
 894                                        if (cl == cl_tail) {
 895                                                /* Kill the band! */
 896                                                q->active[prio] = NULL;
 897                                                q->activemask &= ~(1<<prio);
 898                                                if (cl->q->q.qlen)
 899                                                        cbq_activate_class(cl);
 900                                                return NULL;
 901                                        }
 902
 903                                        q->active[prio] = cl_tail;
 904                                }
 905                                if (cl->q->q.qlen)
 906                                        cbq_activate_class(cl);
 907
 908                                cl = cl_prev;
 909                        }
 910
 911next_class:
 912                        cl_prev = cl;
 913                        cl = cl->next_alive;
 914                } while (cl_prev != cl_tail);
 915        } while (deficit);
 916
 917        q->active[prio] = cl_prev;
 918
 919        return NULL;
 920}
 921
 922static inline struct sk_buff *
 923cbq_dequeue_1(struct Qdisc *sch)
 924{
 925        struct cbq_sched_data *q = qdisc_priv(sch);
 926        struct sk_buff *skb;
 927        unsigned int activemask;
 928
 929        activemask = q->activemask & 0xFF;
 930        while (activemask) {
 931                int prio = ffz(~activemask);
 932                activemask &= ~(1<<prio);
 933                skb = cbq_dequeue_prio(sch, prio);
 934                if (skb)
 935                        return skb;
 936        }
 937        return NULL;
 938}
 939
 940static struct sk_buff *
 941cbq_dequeue(struct Qdisc *sch)
 942{
 943        struct sk_buff *skb;
 944        struct cbq_sched_data *q = qdisc_priv(sch);
 945        psched_time_t now;
 946        psched_tdiff_t incr;
 947
 948        now = psched_get_time();
 949        incr = now - q->now_rt;
 950
 951        if (q->tx_class) {
 952                psched_tdiff_t incr2;
 953                /* Time integrator. We calculate EOS time
 954                 * by adding expected packet transmission time.
 955                 * If real time is greater, we warp artificial clock,
 956                 * so that:
 957                 *
 958                 * cbq_time = max(real_time, work);
 959                 */
 960                incr2 = L2T(&q->link, q->tx_len);
 961                q->now += incr2;
 962                cbq_update(q);
 963                if ((incr -= incr2) < 0)
 964                        incr = 0;
 965                q->now += incr;
 966        } else {
 967                if (now > q->now)
 968                        q->now = now;
 969        }
 970        q->now_rt = now;
 971
 972        for (;;) {
 973                q->wd_expires = 0;
 974
 975                skb = cbq_dequeue_1(sch);
 976                if (skb) {
 977                        qdisc_bstats_update(sch, skb);
 978                        sch->q.qlen--;
 979                        qdisc_unthrottled(sch);
 980                        return skb;
 981                }
 982
 983                /* All the classes are overlimit.
 984                 *
 985                 * It is possible, if:
 986                 *
 987                 * 1. Scheduler is empty.
 988                 * 2. Toplevel cutoff inhibited borrowing.
 989                 * 3. Root class is overlimit.
 990                 *
 991                 * Reset 2d and 3d conditions and retry.
 992                 *
 993                 * Note, that NS and cbq-2.0 are buggy, peeking
 994                 * an arbitrary class is appropriate for ancestor-only
 995                 * sharing, but not for toplevel algorithm.
 996                 *
 997                 * Our version is better, but slower, because it requires
 998                 * two passes, but it is unavoidable with top-level sharing.
 999                 */
1000
1001                if (q->toplevel == TC_CBQ_MAXLEVEL &&
1002                    q->link.undertime == PSCHED_PASTPERFECT)
1003                        break;
1004
1005                q->toplevel = TC_CBQ_MAXLEVEL;
1006                q->link.undertime = PSCHED_PASTPERFECT;
1007        }
1008
1009        /* No packets in scheduler or nobody wants to give them to us :-(
1010         * Sigh... start watchdog timer in the last case.
1011         */
1012
1013        if (sch->q.qlen) {
1014                sch->qstats.overlimits++;
1015                if (q->wd_expires)
1016                        qdisc_watchdog_schedule(&q->watchdog,
1017                                                now + q->wd_expires);
1018        }
1019        return NULL;
1020}
1021
1022/* CBQ class maintanance routines */
1023
1024static void cbq_adjust_levels(struct cbq_class *this)
1025{
1026        if (this == NULL)
1027                return;
1028
1029        do {
1030                int level = 0;
1031                struct cbq_class *cl;
1032
1033                cl = this->children;
1034                if (cl) {
1035                        do {
1036                                if (cl->level > level)
1037                                        level = cl->level;
1038                        } while ((cl = cl->sibling) != this->children);
1039                }
1040                this->level = level + 1;
1041        } while ((this = this->tparent) != NULL);
1042}
1043
1044static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1045{
1046        struct cbq_class *cl;
1047        unsigned int h;
1048
1049        if (q->quanta[prio] == 0)
1050                return;
1051
1052        for (h = 0; h < q->clhash.hashsize; h++) {
1053                hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1054                        /* BUGGGG... Beware! This expression suffer of
1055                         * arithmetic overflows!
1056                         */
1057                        if (cl->priority == prio) {
1058                                cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1059                                        q->quanta[prio];
1060                        }
1061                        if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1062                                pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1063                                           cl->common.classid, cl->quantum);
1064                                cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1065                        }
1066                }
1067        }
1068}
1069
1070static void cbq_sync_defmap(struct cbq_class *cl)
1071{
1072        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1073        struct cbq_class *split = cl->split;
1074        unsigned int h;
1075        int i;
1076
1077        if (split == NULL)
1078                return;
1079
1080        for (i = 0; i <= TC_PRIO_MAX; i++) {
1081                if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1082                        split->defaults[i] = NULL;
1083        }
1084
1085        for (i = 0; i <= TC_PRIO_MAX; i++) {
1086                int level = split->level;
1087
1088                if (split->defaults[i])
1089                        continue;
1090
1091                for (h = 0; h < q->clhash.hashsize; h++) {
1092                        struct cbq_class *c;
1093
1094                        hlist_for_each_entry(c, &q->clhash.hash[h],
1095                                             common.hnode) {
1096                                if (c->split == split && c->level < level &&
1097                                    c->defmap & (1<<i)) {
1098                                        split->defaults[i] = c;
1099                                        level = c->level;
1100                                }
1101                        }
1102                }
1103        }
1104}
1105
1106static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1107{
1108        struct cbq_class *split = NULL;
1109
1110        if (splitid == 0) {
1111                split = cl->split;
1112                if (!split)
1113                        return;
1114                splitid = split->common.classid;
1115        }
1116
1117        if (split == NULL || split->common.classid != splitid) {
1118                for (split = cl->tparent; split; split = split->tparent)
1119                        if (split->common.classid == splitid)
1120                                break;
1121        }
1122
1123        if (split == NULL)
1124                return;
1125
1126        if (cl->split != split) {
1127                cl->defmap = 0;
1128                cbq_sync_defmap(cl);
1129                cl->split = split;
1130                cl->defmap = def & mask;
1131        } else
1132                cl->defmap = (cl->defmap & ~mask) | (def & mask);
1133
1134        cbq_sync_defmap(cl);
1135}
1136
1137static void cbq_unlink_class(struct cbq_class *this)
1138{
1139        struct cbq_class *cl, **clp;
1140        struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1141
1142        qdisc_class_hash_remove(&q->clhash, &this->common);
1143
1144        if (this->tparent) {
1145                clp = &this->sibling;
1146                cl = *clp;
1147                do {
1148                        if (cl == this) {
1149                                *clp = cl->sibling;
1150                                break;
1151                        }
1152                        clp = &cl->sibling;
1153                } while ((cl = *clp) != this->sibling);
1154
1155                if (this->tparent->children == this) {
1156                        this->tparent->children = this->sibling;
1157                        if (this->sibling == this)
1158                                this->tparent->children = NULL;
1159                }
1160        } else {
1161                WARN_ON(this->sibling != this);
1162        }
1163}
1164
1165static void cbq_link_class(struct cbq_class *this)
1166{
1167        struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1168        struct cbq_class *parent = this->tparent;
1169
1170        this->sibling = this;
1171        qdisc_class_hash_insert(&q->clhash, &this->common);
1172
1173        if (parent == NULL)
1174                return;
1175
1176        if (parent->children == NULL) {
1177                parent->children = this;
1178        } else {
1179                this->sibling = parent->children->sibling;
1180                parent->children->sibling = this;
1181        }
1182}
1183
1184static unsigned int cbq_drop(struct Qdisc *sch)
1185{
1186        struct cbq_sched_data *q = qdisc_priv(sch);
1187        struct cbq_class *cl, *cl_head;
1188        int prio;
1189        unsigned int len;
1190
1191        for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1192                cl_head = q->active[prio];
1193                if (!cl_head)
1194                        continue;
1195
1196                cl = cl_head;
1197                do {
1198                        if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1199                                sch->q.qlen--;
1200                                if (!cl->q->q.qlen)
1201                                        cbq_deactivate_class(cl);
1202                                return len;
1203                        }
1204                } while ((cl = cl->next_alive) != cl_head);
1205        }
1206        return 0;
1207}
1208
1209static void
1210cbq_reset(struct Qdisc *sch)
1211{
1212        struct cbq_sched_data *q = qdisc_priv(sch);
1213        struct cbq_class *cl;
1214        int prio;
1215        unsigned int h;
1216
1217        q->activemask = 0;
1218        q->pmask = 0;
1219        q->tx_class = NULL;
1220        q->tx_borrowed = NULL;
1221        qdisc_watchdog_cancel(&q->watchdog);
1222        hrtimer_cancel(&q->delay_timer);
1223        q->toplevel = TC_CBQ_MAXLEVEL;
1224        q->now = psched_get_time();
1225        q->now_rt = q->now;
1226
1227        for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1228                q->active[prio] = NULL;
1229
1230        for (h = 0; h < q->clhash.hashsize; h++) {
1231                hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1232                        qdisc_reset(cl->q);
1233
1234                        cl->next_alive = NULL;
1235                        cl->undertime = PSCHED_PASTPERFECT;
1236                        cl->avgidle = cl->maxidle;
1237                        cl->deficit = cl->quantum;
1238                        cl->cpriority = cl->priority;
1239                }
1240        }
1241        sch->q.qlen = 0;
1242}
1243
1244
1245static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1246{
1247        if (lss->change & TCF_CBQ_LSS_FLAGS) {
1248                cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1249                cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1250        }
1251        if (lss->change & TCF_CBQ_LSS_EWMA)
1252                cl->ewma_log = lss->ewma_log;
1253        if (lss->change & TCF_CBQ_LSS_AVPKT)
1254                cl->avpkt = lss->avpkt;
1255        if (lss->change & TCF_CBQ_LSS_MINIDLE)
1256                cl->minidle = -(long)lss->minidle;
1257        if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1258                cl->maxidle = lss->maxidle;
1259                cl->avgidle = lss->maxidle;
1260        }
1261        if (lss->change & TCF_CBQ_LSS_OFFTIME)
1262                cl->offtime = lss->offtime;
1263        return 0;
1264}
1265
1266static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1267{
1268        q->nclasses[cl->priority]--;
1269        q->quanta[cl->priority] -= cl->weight;
1270        cbq_normalize_quanta(q, cl->priority);
1271}
1272
1273static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1274{
1275        q->nclasses[cl->priority]++;
1276        q->quanta[cl->priority] += cl->weight;
1277        cbq_normalize_quanta(q, cl->priority);
1278}
1279
1280static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1281{
1282        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1283
1284        if (wrr->allot)
1285                cl->allot = wrr->allot;
1286        if (wrr->weight)
1287                cl->weight = wrr->weight;
1288        if (wrr->priority) {
1289                cl->priority = wrr->priority - 1;
1290                cl->cpriority = cl->priority;
1291                if (cl->priority >= cl->priority2)
1292                        cl->priority2 = TC_CBQ_MAXPRIO - 1;
1293        }
1294
1295        cbq_addprio(q, cl);
1296        return 0;
1297}
1298
1299static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1300{
1301        switch (ovl->strategy) {
1302        case TC_CBQ_OVL_CLASSIC:
1303                cl->overlimit = cbq_ovl_classic;
1304                break;
1305        case TC_CBQ_OVL_DELAY:
1306                cl->overlimit = cbq_ovl_delay;
1307                break;
1308        case TC_CBQ_OVL_LOWPRIO:
1309                if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1310                    ovl->priority2 - 1 <= cl->priority)
1311                        return -EINVAL;
1312                cl->priority2 = ovl->priority2 - 1;
1313                cl->overlimit = cbq_ovl_lowprio;
1314                break;
1315        case TC_CBQ_OVL_DROP:
1316                cl->overlimit = cbq_ovl_drop;
1317                break;
1318        case TC_CBQ_OVL_RCLASSIC:
1319                cl->overlimit = cbq_ovl_rclassic;
1320                break;
1321        default:
1322                return -EINVAL;
1323        }
1324        cl->penalty = ovl->penalty;
1325        return 0;
1326}
1327
1328#ifdef CONFIG_NET_CLS_ACT
1329static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1330{
1331        cl->police = p->police;
1332
1333        if (cl->q->handle) {
1334                if (p->police == TC_POLICE_RECLASSIFY)
1335                        cl->q->reshape_fail = cbq_reshape_fail;
1336                else
1337                        cl->q->reshape_fail = NULL;
1338        }
1339        return 0;
1340}
1341#endif
1342
1343static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1344{
1345        cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1346        return 0;
1347}
1348
1349static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1350        [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1351        [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1352        [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1353        [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1354        [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1355        [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1356        [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1357};
1358
1359static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1360{
1361        struct cbq_sched_data *q = qdisc_priv(sch);
1362        struct nlattr *tb[TCA_CBQ_MAX + 1];
1363        struct tc_ratespec *r;
1364        int err;
1365
1366        err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1367        if (err < 0)
1368                return err;
1369
1370        if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1371                return -EINVAL;
1372
1373        r = nla_data(tb[TCA_CBQ_RATE]);
1374
1375        if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1376                return -EINVAL;
1377
1378        err = qdisc_class_hash_init(&q->clhash);
1379        if (err < 0)
1380                goto put_rtab;
1381
1382        q->link.refcnt = 1;
1383        q->link.sibling = &q->link;
1384        q->link.common.classid = sch->handle;
1385        q->link.qdisc = sch;
1386        q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1387                                      sch->handle);
1388        if (!q->link.q)
1389                q->link.q = &noop_qdisc;
1390
1391        q->link.priority = TC_CBQ_MAXPRIO - 1;
1392        q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1393        q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1394        q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1395        q->link.overlimit = cbq_ovl_classic;
1396        q->link.allot = psched_mtu(qdisc_dev(sch));
1397        q->link.quantum = q->link.allot;
1398        q->link.weight = q->link.R_tab->rate.rate;
1399
1400        q->link.ewma_log = TC_CBQ_DEF_EWMA;
1401        q->link.avpkt = q->link.allot/2;
1402        q->link.minidle = -0x7FFFFFFF;
1403
1404        qdisc_watchdog_init(&q->watchdog, sch);
1405        hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1406        q->delay_timer.function = cbq_undelay;
1407        q->toplevel = TC_CBQ_MAXLEVEL;
1408        q->now = psched_get_time();
1409        q->now_rt = q->now;
1410
1411        cbq_link_class(&q->link);
1412
1413        if (tb[TCA_CBQ_LSSOPT])
1414                cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1415
1416        cbq_addprio(q, &q->link);
1417        return 0;
1418
1419put_rtab:
1420        qdisc_put_rtab(q->link.R_tab);
1421        return err;
1422}
1423
1424static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1425{
1426        unsigned char *b = skb_tail_pointer(skb);
1427
1428        if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
1429                goto nla_put_failure;
1430        return skb->len;
1431
1432nla_put_failure:
1433        nlmsg_trim(skb, b);
1434        return -1;
1435}
1436
1437static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1438{
1439        unsigned char *b = skb_tail_pointer(skb);
1440        struct tc_cbq_lssopt opt;
1441
1442        opt.flags = 0;
1443        if (cl->borrow == NULL)
1444                opt.flags |= TCF_CBQ_LSS_BOUNDED;
1445        if (cl->share == NULL)
1446                opt.flags |= TCF_CBQ_LSS_ISOLATED;
1447        opt.ewma_log = cl->ewma_log;
1448        opt.level = cl->level;
1449        opt.avpkt = cl->avpkt;
1450        opt.maxidle = cl->maxidle;
1451        opt.minidle = (u32)(-cl->minidle);
1452        opt.offtime = cl->offtime;
1453        opt.change = ~0;
1454        if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
1455                goto nla_put_failure;
1456        return skb->len;
1457
1458nla_put_failure:
1459        nlmsg_trim(skb, b);
1460        return -1;
1461}
1462
1463static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1464{
1465        unsigned char *b = skb_tail_pointer(skb);
1466        struct tc_cbq_wrropt opt;
1467
1468        opt.flags = 0;
1469        opt.allot = cl->allot;
1470        opt.priority = cl->priority + 1;
1471        opt.cpriority = cl->cpriority + 1;
1472        opt.weight = cl->weight;
1473        if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
1474                goto nla_put_failure;
1475        return skb->len;
1476
1477nla_put_failure:
1478        nlmsg_trim(skb, b);
1479        return -1;
1480}
1481
1482static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1483{
1484        unsigned char *b = skb_tail_pointer(skb);
1485        struct tc_cbq_ovl opt;
1486
1487        opt.strategy = cl->ovl_strategy;
1488        opt.priority2 = cl->priority2 + 1;
1489        opt.pad = 0;
1490        opt.penalty = cl->penalty;
1491        if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
1492                goto nla_put_failure;
1493        return skb->len;
1494
1495nla_put_failure:
1496        nlmsg_trim(skb, b);
1497        return -1;
1498}
1499
1500static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1501{
1502        unsigned char *b = skb_tail_pointer(skb);
1503        struct tc_cbq_fopt opt;
1504
1505        if (cl->split || cl->defmap) {
1506                opt.split = cl->split ? cl->split->common.classid : 0;
1507                opt.defmap = cl->defmap;
1508                opt.defchange = ~0;
1509                if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
1510                        goto nla_put_failure;
1511        }
1512        return skb->len;
1513
1514nla_put_failure:
1515        nlmsg_trim(skb, b);
1516        return -1;
1517}
1518
1519#ifdef CONFIG_NET_CLS_ACT
1520static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1521{
1522        unsigned char *b = skb_tail_pointer(skb);
1523        struct tc_cbq_police opt;
1524
1525        if (cl->police) {
1526                opt.police = cl->police;
1527                opt.__res1 = 0;
1528                opt.__res2 = 0;
1529                if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
1530                        goto nla_put_failure;
1531        }
1532        return skb->len;
1533
1534nla_put_failure:
1535        nlmsg_trim(skb, b);
1536        return -1;
1537}
1538#endif
1539
1540static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1541{
1542        if (cbq_dump_lss(skb, cl) < 0 ||
1543            cbq_dump_rate(skb, cl) < 0 ||
1544            cbq_dump_wrr(skb, cl) < 0 ||
1545            cbq_dump_ovl(skb, cl) < 0 ||
1546#ifdef CONFIG_NET_CLS_ACT
1547            cbq_dump_police(skb, cl) < 0 ||
1548#endif
1549            cbq_dump_fopt(skb, cl) < 0)
1550                return -1;
1551        return 0;
1552}
1553
1554static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1555{
1556        struct cbq_sched_data *q = qdisc_priv(sch);
1557        struct nlattr *nest;
1558
1559        nest = nla_nest_start(skb, TCA_OPTIONS);
1560        if (nest == NULL)
1561                goto nla_put_failure;
1562        if (cbq_dump_attr(skb, &q->link) < 0)
1563                goto nla_put_failure;
1564        nla_nest_end(skb, nest);
1565        return skb->len;
1566
1567nla_put_failure:
1568        nla_nest_cancel(skb, nest);
1569        return -1;
1570}
1571
1572static int
1573cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1574{
1575        struct cbq_sched_data *q = qdisc_priv(sch);
1576
1577        q->link.xstats.avgidle = q->link.avgidle;
1578        return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1579}
1580
1581static int
1582cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1583               struct sk_buff *skb, struct tcmsg *tcm)
1584{
1585        struct cbq_class *cl = (struct cbq_class *)arg;
1586        struct nlattr *nest;
1587
1588        if (cl->tparent)
1589                tcm->tcm_parent = cl->tparent->common.classid;
1590        else
1591                tcm->tcm_parent = TC_H_ROOT;
1592        tcm->tcm_handle = cl->common.classid;
1593        tcm->tcm_info = cl->q->handle;
1594
1595        nest = nla_nest_start(skb, TCA_OPTIONS);
1596        if (nest == NULL)
1597                goto nla_put_failure;
1598        if (cbq_dump_attr(skb, cl) < 0)
1599                goto nla_put_failure;
1600        nla_nest_end(skb, nest);
1601        return skb->len;
1602
1603nla_put_failure:
1604        nla_nest_cancel(skb, nest);
1605        return -1;
1606}
1607
1608static int
1609cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1610        struct gnet_dump *d)
1611{
1612        struct cbq_sched_data *q = qdisc_priv(sch);
1613        struct cbq_class *cl = (struct cbq_class *)arg;
1614
1615        cl->qstats.qlen = cl->q->q.qlen;
1616        cl->xstats.avgidle = cl->avgidle;
1617        cl->xstats.undertime = 0;
1618
1619        if (cl->undertime != PSCHED_PASTPERFECT)
1620                cl->xstats.undertime = cl->undertime - q->now;
1621
1622        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1623            gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1624            gnet_stats_copy_queue(d, &cl->qstats) < 0)
1625                return -1;
1626
1627        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1628}
1629
1630static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1631                     struct Qdisc **old)
1632{
1633        struct cbq_class *cl = (struct cbq_class *)arg;
1634
1635        if (new == NULL) {
1636                new = qdisc_create_dflt(sch->dev_queue,
1637                                        &pfifo_qdisc_ops, cl->common.classid);
1638                if (new == NULL)
1639                        return -ENOBUFS;
1640        } else {
1641#ifdef CONFIG_NET_CLS_ACT
1642                if (cl->police == TC_POLICE_RECLASSIFY)
1643                        new->reshape_fail = cbq_reshape_fail;
1644#endif
1645        }
1646        sch_tree_lock(sch);
1647        *old = cl->q;
1648        cl->q = new;
1649        qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1650        qdisc_reset(*old);
1651        sch_tree_unlock(sch);
1652
1653        return 0;
1654}
1655
1656static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1657{
1658        struct cbq_class *cl = (struct cbq_class *)arg;
1659
1660        return cl->q;
1661}
1662
1663static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1664{
1665        struct cbq_class *cl = (struct cbq_class *)arg;
1666
1667        if (cl->q->q.qlen == 0)
1668                cbq_deactivate_class(cl);
1669}
1670
1671static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1672{
1673        struct cbq_sched_data *q = qdisc_priv(sch);
1674        struct cbq_class *cl = cbq_class_lookup(q, classid);
1675
1676        if (cl) {
1677                cl->refcnt++;
1678                return (unsigned long)cl;
1679        }
1680        return 0;
1681}
1682
1683static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1684{
1685        struct cbq_sched_data *q = qdisc_priv(sch);
1686
1687        WARN_ON(cl->filters);
1688
1689        tcf_destroy_chain(&cl->filter_list);
1690        qdisc_destroy(cl->q);
1691        qdisc_put_rtab(cl->R_tab);
1692        gen_kill_estimator(&cl->bstats, &cl->rate_est);
1693        if (cl != &q->link)
1694                kfree(cl);
1695}
1696
1697static void cbq_destroy(struct Qdisc *sch)
1698{
1699        struct cbq_sched_data *q = qdisc_priv(sch);
1700        struct hlist_node *next;
1701        struct cbq_class *cl;
1702        unsigned int h;
1703
1704#ifdef CONFIG_NET_CLS_ACT
1705        q->rx_class = NULL;
1706#endif
1707        /*
1708         * Filters must be destroyed first because we don't destroy the
1709         * classes from root to leafs which means that filters can still
1710         * be bound to classes which have been destroyed already. --TGR '04
1711         */
1712        for (h = 0; h < q->clhash.hashsize; h++) {
1713                hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
1714                        tcf_destroy_chain(&cl->filter_list);
1715        }
1716        for (h = 0; h < q->clhash.hashsize; h++) {
1717                hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
1718                                          common.hnode)
1719                        cbq_destroy_class(sch, cl);
1720        }
1721        qdisc_class_hash_destroy(&q->clhash);
1722}
1723
1724static void cbq_put(struct Qdisc *sch, unsigned long arg)
1725{
1726        struct cbq_class *cl = (struct cbq_class *)arg;
1727
1728        if (--cl->refcnt == 0) {
1729#ifdef CONFIG_NET_CLS_ACT
1730                spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1731                struct cbq_sched_data *q = qdisc_priv(sch);
1732
1733                spin_lock_bh(root_lock);
1734                if (q->rx_class == cl)
1735                        q->rx_class = NULL;
1736                spin_unlock_bh(root_lock);
1737#endif
1738
1739                cbq_destroy_class(sch, cl);
1740        }
1741}
1742
1743static int
1744cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1745                 unsigned long *arg)
1746{
1747        int err;
1748        struct cbq_sched_data *q = qdisc_priv(sch);
1749        struct cbq_class *cl = (struct cbq_class *)*arg;
1750        struct nlattr *opt = tca[TCA_OPTIONS];
1751        struct nlattr *tb[TCA_CBQ_MAX + 1];
1752        struct cbq_class *parent;
1753        struct qdisc_rate_table *rtab = NULL;
1754
1755        if (opt == NULL)
1756                return -EINVAL;
1757
1758        err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1759        if (err < 0)
1760                return err;
1761
1762        if (cl) {
1763                /* Check parent */
1764                if (parentid) {
1765                        if (cl->tparent &&
1766                            cl->tparent->common.classid != parentid)
1767                                return -EINVAL;
1768                        if (!cl->tparent && parentid != TC_H_ROOT)
1769                                return -EINVAL;
1770                }
1771
1772                if (tb[TCA_CBQ_RATE]) {
1773                        rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1774                                              tb[TCA_CBQ_RTAB]);
1775                        if (rtab == NULL)
1776                                return -EINVAL;
1777                }
1778
1779                if (tca[TCA_RATE]) {
1780                        err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1781                                                    qdisc_root_sleeping_lock(sch),
1782                                                    tca[TCA_RATE]);
1783                        if (err) {
1784                                if (rtab)
1785                                        qdisc_put_rtab(rtab);
1786                                return err;
1787                        }
1788                }
1789
1790                /* Change class parameters */
1791                sch_tree_lock(sch);
1792
1793                if (cl->next_alive != NULL)
1794                        cbq_deactivate_class(cl);
1795
1796                if (rtab) {
1797                        qdisc_put_rtab(cl->R_tab);
1798                        cl->R_tab = rtab;
1799                }
1800
1801                if (tb[TCA_CBQ_LSSOPT])
1802                        cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1803
1804                if (tb[TCA_CBQ_WRROPT]) {
1805                        cbq_rmprio(q, cl);
1806                        cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1807                }
1808
1809                if (tb[TCA_CBQ_OVL_STRATEGY])
1810                        cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1811
1812#ifdef CONFIG_NET_CLS_ACT
1813                if (tb[TCA_CBQ_POLICE])
1814                        cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1815#endif
1816
1817                if (tb[TCA_CBQ_FOPT])
1818                        cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1819
1820                if (cl->q->q.qlen)
1821                        cbq_activate_class(cl);
1822
1823                sch_tree_unlock(sch);
1824
1825                return 0;
1826        }
1827
1828        if (parentid == TC_H_ROOT)
1829                return -EINVAL;
1830
1831        if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1832            tb[TCA_CBQ_LSSOPT] == NULL)
1833                return -EINVAL;
1834
1835        rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1836        if (rtab == NULL)
1837                return -EINVAL;
1838
1839        if (classid) {
1840                err = -EINVAL;
1841                if (TC_H_MAJ(classid ^ sch->handle) ||
1842                    cbq_class_lookup(q, classid))
1843                        goto failure;
1844        } else {
1845                int i;
1846                classid = TC_H_MAKE(sch->handle, 0x8000);
1847
1848                for (i = 0; i < 0x8000; i++) {
1849                        if (++q->hgenerator >= 0x8000)
1850                                q->hgenerator = 1;
1851                        if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1852                                break;
1853                }
1854                err = -ENOSR;
1855                if (i >= 0x8000)
1856                        goto failure;
1857                classid = classid|q->hgenerator;
1858        }
1859
1860        parent = &q->link;
1861        if (parentid) {
1862                parent = cbq_class_lookup(q, parentid);
1863                err = -EINVAL;
1864                if (parent == NULL)
1865                        goto failure;
1866        }
1867
1868        err = -ENOBUFS;
1869        cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1870        if (cl == NULL)
1871                goto failure;
1872
1873        if (tca[TCA_RATE]) {
1874                err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1875                                        qdisc_root_sleeping_lock(sch),
1876                                        tca[TCA_RATE]);
1877                if (err) {
1878                        kfree(cl);
1879                        goto failure;
1880                }
1881        }
1882
1883        cl->R_tab = rtab;
1884        rtab = NULL;
1885        cl->refcnt = 1;
1886        cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1887        if (!cl->q)
1888                cl->q = &noop_qdisc;
1889        cl->common.classid = classid;
1890        cl->tparent = parent;
1891        cl->qdisc = sch;
1892        cl->allot = parent->allot;
1893        cl->quantum = cl->allot;
1894        cl->weight = cl->R_tab->rate.rate;
1895
1896        sch_tree_lock(sch);
1897        cbq_link_class(cl);
1898        cl->borrow = cl->tparent;
1899        if (cl->tparent != &q->link)
1900                cl->share = cl->tparent;
1901        cbq_adjust_levels(parent);
1902        cl->minidle = -0x7FFFFFFF;
1903        cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1904        cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1905        if (cl->ewma_log == 0)
1906                cl->ewma_log = q->link.ewma_log;
1907        if (cl->maxidle == 0)
1908                cl->maxidle = q->link.maxidle;
1909        if (cl->avpkt == 0)
1910                cl->avpkt = q->link.avpkt;
1911        cl->overlimit = cbq_ovl_classic;
1912        if (tb[TCA_CBQ_OVL_STRATEGY])
1913                cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1914#ifdef CONFIG_NET_CLS_ACT
1915        if (tb[TCA_CBQ_POLICE])
1916                cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1917#endif
1918        if (tb[TCA_CBQ_FOPT])
1919                cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1920        sch_tree_unlock(sch);
1921
1922        qdisc_class_hash_grow(sch, &q->clhash);
1923
1924        *arg = (unsigned long)cl;
1925        return 0;
1926
1927failure:
1928        qdisc_put_rtab(rtab);
1929        return err;
1930}
1931
1932static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1933{
1934        struct cbq_sched_data *q = qdisc_priv(sch);
1935        struct cbq_class *cl = (struct cbq_class *)arg;
1936        unsigned int qlen;
1937
1938        if (cl->filters || cl->children || cl == &q->link)
1939                return -EBUSY;
1940
1941        sch_tree_lock(sch);
1942
1943        qlen = cl->q->q.qlen;
1944        qdisc_reset(cl->q);
1945        qdisc_tree_decrease_qlen(cl->q, qlen);
1946
1947        if (cl->next_alive)
1948                cbq_deactivate_class(cl);
1949
1950        if (q->tx_borrowed == cl)
1951                q->tx_borrowed = q->tx_class;
1952        if (q->tx_class == cl) {
1953                q->tx_class = NULL;
1954                q->tx_borrowed = NULL;
1955        }
1956#ifdef CONFIG_NET_CLS_ACT
1957        if (q->rx_class == cl)
1958                q->rx_class = NULL;
1959#endif
1960
1961        cbq_unlink_class(cl);
1962        cbq_adjust_levels(cl->tparent);
1963        cl->defmap = 0;
1964        cbq_sync_defmap(cl);
1965
1966        cbq_rmprio(q, cl);
1967        sch_tree_unlock(sch);
1968
1969        BUG_ON(--cl->refcnt == 0);
1970        /*
1971         * This shouldn't happen: we "hold" one cops->get() when called
1972         * from tc_ctl_tclass; the destroy method is done from cops->put().
1973         */
1974
1975        return 0;
1976}
1977
1978static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1979{
1980        struct cbq_sched_data *q = qdisc_priv(sch);
1981        struct cbq_class *cl = (struct cbq_class *)arg;
1982
1983        if (cl == NULL)
1984                cl = &q->link;
1985
1986        return &cl->filter_list;
1987}
1988
1989static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1990                                     u32 classid)
1991{
1992        struct cbq_sched_data *q = qdisc_priv(sch);
1993        struct cbq_class *p = (struct cbq_class *)parent;
1994        struct cbq_class *cl = cbq_class_lookup(q, classid);
1995
1996        if (cl) {
1997                if (p && p->level <= cl->level)
1998                        return 0;
1999                cl->filters++;
2000                return (unsigned long)cl;
2001        }
2002        return 0;
2003}
2004
2005static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2006{
2007        struct cbq_class *cl = (struct cbq_class *)arg;
2008
2009        cl->filters--;
2010}
2011
2012static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2013{
2014        struct cbq_sched_data *q = qdisc_priv(sch);
2015        struct cbq_class *cl;
2016        unsigned int h;
2017
2018        if (arg->stop)
2019                return;
2020
2021        for (h = 0; h < q->clhash.hashsize; h++) {
2022                hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
2023                        if (arg->count < arg->skip) {
2024                                arg->count++;
2025                                continue;
2026                        }
2027                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2028                                arg->stop = 1;
2029                                return;
2030                        }
2031                        arg->count++;
2032                }
2033        }
2034}
2035
2036static const struct Qdisc_class_ops cbq_class_ops = {
2037        .graft          =       cbq_graft,
2038        .leaf           =       cbq_leaf,
2039        .qlen_notify    =       cbq_qlen_notify,
2040        .get            =       cbq_get,
2041        .put            =       cbq_put,
2042        .change         =       cbq_change_class,
2043        .delete         =       cbq_delete,
2044        .walk           =       cbq_walk,
2045        .tcf_chain      =       cbq_find_tcf,
2046        .bind_tcf       =       cbq_bind_filter,
2047        .unbind_tcf     =       cbq_unbind_filter,
2048        .dump           =       cbq_dump_class,
2049        .dump_stats     =       cbq_dump_class_stats,
2050};
2051
2052static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2053        .next           =       NULL,
2054        .cl_ops         =       &cbq_class_ops,
2055        .id             =       "cbq",
2056        .priv_size      =       sizeof(struct cbq_sched_data),
2057        .enqueue        =       cbq_enqueue,
2058        .dequeue        =       cbq_dequeue,
2059        .peek           =       qdisc_peek_dequeued,
2060        .drop           =       cbq_drop,
2061        .init           =       cbq_init,
2062        .reset          =       cbq_reset,
2063        .destroy        =       cbq_destroy,
2064        .change         =       NULL,
2065        .dump           =       cbq_dump,
2066        .dump_stats     =       cbq_dump_stats,
2067        .owner          =       THIS_MODULE,
2068};
2069
2070static int __init cbq_module_init(void)
2071{
2072        return register_qdisc(&cbq_qdisc_ops);
2073}
2074static void __exit cbq_module_exit(void)
2075{
2076        unregister_qdisc(&cbq_qdisc_ops);
2077}
2078module_init(cbq_module_init)
2079module_exit(cbq_module_exit)
2080MODULE_LICENSE("GPL");
2081