linux/net/sched/sch_htb.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Martin Devera, <devik@cdi.cz>
  10 *
  11 * Credits (in time order) for older HTB versions:
  12 *              Stef Coene <stef.coene@docum.org>
  13 *                      HTB support at LARTC mailing list
  14 *              Ondrej Kraus, <krauso@barr.cz>
  15 *                      found missing INIT_QDISC(htb)
  16 *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
  17 *                      helped a lot to locate nasty class stall bug
  18 *              Andi Kleen, Jamal Hadi, Bert Hubert
  19 *                      code review and helpful comments on shaping
  20 *              Tomasz Wrona, <tw@eter.tym.pl>
  21 *                      created test case so that I was able to fix nasty bug
  22 *              Wilfried Weissmann
  23 *                      spotted bug in dequeue code and helped with fix
  24 *              Jiri Fojtasek
  25 *                      fixed requeue routine
  26 *              and many others. thanks.
  27 */
  28#include <linux/module.h>
  29#include <linux/moduleparam.h>
  30#include <linux/types.h>
  31#include <linux/kernel.h>
  32#include <linux/string.h>
  33#include <linux/errno.h>
  34#include <linux/skbuff.h>
  35#include <linux/list.h>
  36#include <linux/compiler.h>
  37#include <linux/rbtree.h>
  38#include <linux/workqueue.h>
  39#include <net/netlink.h>
  40#include <net/pkt_sched.h>
  41
  42/* HTB algorithm.
  43    Author: devik@cdi.cz
  44    ========================================================================
  45    HTB is like TBF with multiple classes. It is also similar to CBQ because
  46    it allows to assign priority to each class in hierarchy.
  47    In fact it is another implementation of Floyd's formal sharing.
  48
  49    Levels:
  50    Each class is assigned level. Leaf has ALWAYS level 0 and root
  51    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
  52    one less than their parent.
  53*/
  54
  55static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
  56#define HTB_VER 0x30011         /* major must be matched with number suplied by TC as version */
  57
  58#if HTB_VER >> 16 != TC_HTB_PROTOVER
  59#error "Mismatched sch_htb.c and pkt_sch.h"
  60#endif
  61
  62/* Module parameter and sysfs export */
  63module_param    (htb_hysteresis, int, 0640);
  64MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
  65
  66/* used internaly to keep status of single class */
  67enum htb_cmode {
  68        HTB_CANT_SEND,          /* class can't send and can't borrow */
  69        HTB_MAY_BORROW,         /* class can't send but may borrow */
  70        HTB_CAN_SEND            /* class can send */
  71};
  72
  73/* interior & leaf nodes; props specific to leaves are marked L: */
  74struct htb_class {
  75        struct Qdisc_class_common common;
  76        /* general class parameters */
  77        struct gnet_stats_basic_packed bstats;
  78        struct gnet_stats_queue qstats;
  79        struct gnet_stats_rate_est rate_est;
  80        struct tc_htb_xstats xstats;    /* our special stats */
  81        int refcnt;             /* usage count of this class */
  82
  83        /* topology */
  84        int level;              /* our level (see above) */
  85        unsigned int children;
  86        struct htb_class *parent;       /* parent class */
  87
  88        int prio;               /* these two are used only by leaves... */
  89        int quantum;            /* but stored for parent-to-leaf return */
  90
  91        union {
  92                struct htb_class_leaf {
  93                        struct Qdisc *q;
  94                        int deficit[TC_HTB_MAXDEPTH];
  95                        struct list_head drop_list;
  96                } leaf;
  97                struct htb_class_inner {
  98                        struct rb_root feed[TC_HTB_NUMPRIO];    /* feed trees */
  99                        struct rb_node *ptr[TC_HTB_NUMPRIO];    /* current class ptr */
 100                        /* When class changes from state 1->2 and disconnects from
 101                           parent's feed then we lost ptr value and start from the
 102                           first child again. Here we store classid of the
 103                           last valid ptr (used when ptr is NULL). */
 104                        u32 last_ptr_id[TC_HTB_NUMPRIO];
 105                } inner;
 106        } un;
 107        struct rb_node node[TC_HTB_NUMPRIO];    /* node for self or feed tree */
 108        struct rb_node pq_node; /* node for event queue */
 109        psched_time_t pq_key;
 110
 111        int prio_activity;      /* for which prios are we active */
 112        enum htb_cmode cmode;   /* current mode of the class */
 113
 114        /* class attached filters */
 115        struct tcf_proto *filter_list;
 116        int filter_cnt;
 117
 118        /* token bucket parameters */
 119        struct qdisc_rate_table *rate;  /* rate table of the class itself */
 120        struct qdisc_rate_table *ceil;  /* ceiling rate (limits borrows too) */
 121        long buffer, cbuffer;   /* token bucket depth/rate */
 122        psched_tdiff_t mbuffer; /* max wait time */
 123        long tokens, ctokens;   /* current number of tokens */
 124        psched_time_t t_c;      /* checkpoint time */
 125};
 126
 127struct htb_sched {
 128        struct Qdisc_class_hash clhash;
 129        struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 130
 131        /* self list - roots of self generating tree */
 132        struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 133        int row_mask[TC_HTB_MAXDEPTH];
 134        struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 135        u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 136
 137        /* self wait list - roots of wait PQs per row */
 138        struct rb_root wait_pq[TC_HTB_MAXDEPTH];
 139
 140        /* time of nearest event per level (row) */
 141        psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
 142
 143        int defcls;             /* class where unclassified flows go to */
 144
 145        /* filters for qdisc itself */
 146        struct tcf_proto *filter_list;
 147
 148        int rate2quantum;       /* quant = rate / rate2quantum */
 149        psched_time_t now;      /* cached dequeue time */
 150        struct qdisc_watchdog watchdog;
 151
 152        /* non shaped skbs; let them go directly thru */
 153        struct sk_buff_head direct_queue;
 154        int direct_qlen;        /* max qlen of above */
 155
 156        long direct_pkts;
 157
 158#define HTB_WARN_TOOMANYEVENTS  0x1
 159        unsigned int warned;    /* only one warning */
 160        struct work_struct work;
 161};
 162
 163/* find class in global hash table using given handle */
 164static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 165{
 166        struct htb_sched *q = qdisc_priv(sch);
 167        struct Qdisc_class_common *clc;
 168
 169        clc = qdisc_class_find(&q->clhash, handle);
 170        if (clc == NULL)
 171                return NULL;
 172        return container_of(clc, struct htb_class, common);
 173}
 174
 175/**
 176 * htb_classify - classify a packet into class
 177 *
 178 * It returns NULL if the packet should be dropped or -1 if the packet
 179 * should be passed directly thru. In all other cases leaf class is returned.
 180 * We allow direct class selection by classid in priority. The we examine
 181 * filters in qdisc and in inner nodes (if higher filter points to the inner
 182 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
 183 * internal fifo (direct). These packets then go directly thru. If we still
 184 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
 185 * then finish and return direct queue.
 186 */
 187#define HTB_DIRECT (struct htb_class*)-1
 188
 189static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 190                                      int *qerr)
 191{
 192        struct htb_sched *q = qdisc_priv(sch);
 193        struct htb_class *cl;
 194        struct tcf_result res;
 195        struct tcf_proto *tcf;
 196        int result;
 197
 198        /* allow to select class by setting skb->priority to valid classid;
 199           note that nfmark can be used too by attaching filter fw with no
 200           rules in it */
 201        if (skb->priority == sch->handle)
 202                return HTB_DIRECT;      /* X:0 (direct flow) selected */
 203        if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
 204                return cl;
 205
 206        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 207        tcf = q->filter_list;
 208        while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 209#ifdef CONFIG_NET_CLS_ACT
 210                switch (result) {
 211                case TC_ACT_QUEUED:
 212                case TC_ACT_STOLEN:
 213                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 214                case TC_ACT_SHOT:
 215                        return NULL;
 216                }
 217#endif
 218                if ((cl = (void *)res.class) == NULL) {
 219                        if (res.classid == sch->handle)
 220                                return HTB_DIRECT;      /* X:0 (direct flow) */
 221                        if ((cl = htb_find(res.classid, sch)) == NULL)
 222                                break;  /* filter selected invalid classid */
 223                }
 224                if (!cl->level)
 225                        return cl;      /* we hit leaf; return it */
 226
 227                /* we have got inner class; apply inner filter chain */
 228                tcf = cl->filter_list;
 229        }
 230        /* classification failed; try to use default class */
 231        cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 232        if (!cl || cl->level)
 233                return HTB_DIRECT;      /* bad default .. this is safe bet */
 234        return cl;
 235}
 236
 237/**
 238 * htb_add_to_id_tree - adds class to the round robin list
 239 *
 240 * Routine adds class to the list (actually tree) sorted by classid.
 241 * Make sure that class is not already on such list for given prio.
 242 */
 243static void htb_add_to_id_tree(struct rb_root *root,
 244                               struct htb_class *cl, int prio)
 245{
 246        struct rb_node **p = &root->rb_node, *parent = NULL;
 247
 248        while (*p) {
 249                struct htb_class *c;
 250                parent = *p;
 251                c = rb_entry(parent, struct htb_class, node[prio]);
 252
 253                if (cl->common.classid > c->common.classid)
 254                        p = &parent->rb_right;
 255                else
 256                        p = &parent->rb_left;
 257        }
 258        rb_link_node(&cl->node[prio], parent, p);
 259        rb_insert_color(&cl->node[prio], root);
 260}
 261
 262/**
 263 * htb_add_to_wait_tree - adds class to the event queue with delay
 264 *
 265 * The class is added to priority event queue to indicate that class will
 266 * change its mode in cl->pq_key microseconds. Make sure that class is not
 267 * already in the queue.
 268 */
 269static void htb_add_to_wait_tree(struct htb_sched *q,
 270                                 struct htb_class *cl, long delay)
 271{
 272        struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
 273
 274        cl->pq_key = q->now + delay;
 275        if (cl->pq_key == q->now)
 276                cl->pq_key++;
 277
 278        /* update the nearest event cache */
 279        if (q->near_ev_cache[cl->level] > cl->pq_key)
 280                q->near_ev_cache[cl->level] = cl->pq_key;
 281
 282        while (*p) {
 283                struct htb_class *c;
 284                parent = *p;
 285                c = rb_entry(parent, struct htb_class, pq_node);
 286                if (cl->pq_key >= c->pq_key)
 287                        p = &parent->rb_right;
 288                else
 289                        p = &parent->rb_left;
 290        }
 291        rb_link_node(&cl->pq_node, parent, p);
 292        rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
 293}
 294
 295/**
 296 * htb_next_rb_node - finds next node in binary tree
 297 *
 298 * When we are past last key we return NULL.
 299 * Average complexity is 2 steps per call.
 300 */
 301static inline void htb_next_rb_node(struct rb_node **n)
 302{
 303        *n = rb_next(*n);
 304}
 305
 306/**
 307 * htb_add_class_to_row - add class to its row
 308 *
 309 * The class is added to row at priorities marked in mask.
 310 * It does nothing if mask == 0.
 311 */
 312static inline void htb_add_class_to_row(struct htb_sched *q,
 313                                        struct htb_class *cl, int mask)
 314{
 315        q->row_mask[cl->level] |= mask;
 316        while (mask) {
 317                int prio = ffz(~mask);
 318                mask &= ~(1 << prio);
 319                htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
 320        }
 321}
 322
 323/* If this triggers, it is a bug in this code, but it need not be fatal */
 324static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 325{
 326        if (RB_EMPTY_NODE(rb)) {
 327                WARN_ON(1);
 328        } else {
 329                rb_erase(rb, root);
 330                RB_CLEAR_NODE(rb);
 331        }
 332}
 333
 334
 335/**
 336 * htb_remove_class_from_row - removes class from its row
 337 *
 338 * The class is removed from row at priorities marked in mask.
 339 * It does nothing if mask == 0.
 340 */
 341static inline void htb_remove_class_from_row(struct htb_sched *q,
 342                                                 struct htb_class *cl, int mask)
 343{
 344        int m = 0;
 345
 346        while (mask) {
 347                int prio = ffz(~mask);
 348
 349                mask &= ~(1 << prio);
 350                if (q->ptr[cl->level][prio] == cl->node + prio)
 351                        htb_next_rb_node(q->ptr[cl->level] + prio);
 352
 353                htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
 354                if (!q->row[cl->level][prio].rb_node)
 355                        m |= 1 << prio;
 356        }
 357        q->row_mask[cl->level] &= ~m;
 358}
 359
 360/**
 361 * htb_activate_prios - creates active classe's feed chain
 362 *
 363 * The class is connected to ancestors and/or appropriate rows
 364 * for priorities it is participating on. cl->cmode must be new
 365 * (activated) mode. It does nothing if cl->prio_activity == 0.
 366 */
 367static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 368{
 369        struct htb_class *p = cl->parent;
 370        long m, mask = cl->prio_activity;
 371
 372        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 373                m = mask;
 374                while (m) {
 375                        int prio = ffz(~m);
 376                        m &= ~(1 << prio);
 377
 378                        if (p->un.inner.feed[prio].rb_node)
 379                                /* parent already has its feed in use so that
 380                                   reset bit in mask as parent is already ok */
 381                                mask &= ~(1 << prio);
 382
 383                        htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
 384                }
 385                p->prio_activity |= mask;
 386                cl = p;
 387                p = cl->parent;
 388
 389        }
 390        if (cl->cmode == HTB_CAN_SEND && mask)
 391                htb_add_class_to_row(q, cl, mask);
 392}
 393
 394/**
 395 * htb_deactivate_prios - remove class from feed chain
 396 *
 397 * cl->cmode must represent old mode (before deactivation). It does
 398 * nothing if cl->prio_activity == 0. Class is removed from all feed
 399 * chains and rows.
 400 */
 401static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 402{
 403        struct htb_class *p = cl->parent;
 404        long m, mask = cl->prio_activity;
 405
 406        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 407                m = mask;
 408                mask = 0;
 409                while (m) {
 410                        int prio = ffz(~m);
 411                        m &= ~(1 << prio);
 412
 413                        if (p->un.inner.ptr[prio] == cl->node + prio) {
 414                                /* we are removing child which is pointed to from
 415                                   parent feed - forget the pointer but remember
 416                                   classid */
 417                                p->un.inner.last_ptr_id[prio] = cl->common.classid;
 418                                p->un.inner.ptr[prio] = NULL;
 419                        }
 420
 421                        htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
 422
 423                        if (!p->un.inner.feed[prio].rb_node)
 424                                mask |= 1 << prio;
 425                }
 426
 427                p->prio_activity &= ~mask;
 428                cl = p;
 429                p = cl->parent;
 430
 431        }
 432        if (cl->cmode == HTB_CAN_SEND && mask)
 433                htb_remove_class_from_row(q, cl, mask);
 434}
 435
 436static inline long htb_lowater(const struct htb_class *cl)
 437{
 438        if (htb_hysteresis)
 439                return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
 440        else
 441                return 0;
 442}
 443static inline long htb_hiwater(const struct htb_class *cl)
 444{
 445        if (htb_hysteresis)
 446                return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
 447        else
 448                return 0;
 449}
 450
 451
 452/**
 453 * htb_class_mode - computes and returns current class mode
 454 *
 455 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
 456 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
 457 * from now to time when cl will change its state.
 458 * Also it is worth to note that class mode doesn't change simply
 459 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
 460 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
 461 * mode transitions per time unit. The speed gain is about 1/6.
 462 */
 463static inline enum htb_cmode
 464htb_class_mode(struct htb_class *cl, long *diff)
 465{
 466        long toks;
 467
 468        if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
 469                *diff = -toks;
 470                return HTB_CANT_SEND;
 471        }
 472
 473        if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
 474                return HTB_CAN_SEND;
 475
 476        *diff = -toks;
 477        return HTB_MAY_BORROW;
 478}
 479
 480/**
 481 * htb_change_class_mode - changes classe's mode
 482 *
 483 * This should be the only way how to change classe's mode under normal
 484 * cirsumstances. Routine will update feed lists linkage, change mode
 485 * and add class to the wait event queue if appropriate. New mode should
 486 * be different from old one and cl->pq_key has to be valid if changing
 487 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
 488 */
 489static void
 490htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
 491{
 492        enum htb_cmode new_mode = htb_class_mode(cl, diff);
 493
 494        if (new_mode == cl->cmode)
 495                return;
 496
 497        if (cl->prio_activity) {        /* not necessary: speed optimization */
 498                if (cl->cmode != HTB_CANT_SEND)
 499                        htb_deactivate_prios(q, cl);
 500                cl->cmode = new_mode;
 501                if (new_mode != HTB_CANT_SEND)
 502                        htb_activate_prios(q, cl);
 503        } else
 504                cl->cmode = new_mode;
 505}
 506
 507/**
 508 * htb_activate - inserts leaf cl into appropriate active feeds
 509 *
 510 * Routine learns (new) priority of leaf and activates feed chain
 511 * for the prio. It can be called on already active leaf safely.
 512 * It also adds leaf into droplist.
 513 */
 514static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 515{
 516        WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
 517
 518        if (!cl->prio_activity) {
 519                cl->prio_activity = 1 << cl->prio;
 520                htb_activate_prios(q, cl);
 521                list_add_tail(&cl->un.leaf.drop_list,
 522                              q->drops + cl->prio);
 523        }
 524}
 525
 526/**
 527 * htb_deactivate - remove leaf cl from active feeds
 528 *
 529 * Make sure that leaf is active. In the other words it can't be called
 530 * with non-active leaf. It also removes class from the drop list.
 531 */
 532static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 533{
 534        WARN_ON(!cl->prio_activity);
 535
 536        htb_deactivate_prios(q, cl);
 537        cl->prio_activity = 0;
 538        list_del_init(&cl->un.leaf.drop_list);
 539}
 540
 541static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 542{
 543        int uninitialized_var(ret);
 544        struct htb_sched *q = qdisc_priv(sch);
 545        struct htb_class *cl = htb_classify(skb, sch, &ret);
 546
 547        if (cl == HTB_DIRECT) {
 548                /* enqueue to helper queue */
 549                if (q->direct_queue.qlen < q->direct_qlen) {
 550                        __skb_queue_tail(&q->direct_queue, skb);
 551                        q->direct_pkts++;
 552                } else {
 553                        kfree_skb(skb);
 554                        sch->qstats.drops++;
 555                        return NET_XMIT_DROP;
 556                }
 557#ifdef CONFIG_NET_CLS_ACT
 558        } else if (!cl) {
 559                if (ret & __NET_XMIT_BYPASS)
 560                        sch->qstats.drops++;
 561                kfree_skb(skb);
 562                return ret;
 563#endif
 564        } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
 565                if (net_xmit_drop_count(ret)) {
 566                        sch->qstats.drops++;
 567                        cl->qstats.drops++;
 568                }
 569                return ret;
 570        } else {
 571                cl->bstats.packets +=
 572                        skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
 573                cl->bstats.bytes += qdisc_pkt_len(skb);
 574                htb_activate(q, cl);
 575        }
 576
 577        sch->q.qlen++;
 578        sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
 579        sch->bstats.bytes += qdisc_pkt_len(skb);
 580        return NET_XMIT_SUCCESS;
 581}
 582
 583static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
 584{
 585        long toks = diff + cl->tokens;
 586
 587        if (toks > cl->buffer)
 588                toks = cl->buffer;
 589        toks -= (long) qdisc_l2t(cl->rate, bytes);
 590        if (toks <= -cl->mbuffer)
 591                toks = 1 - cl->mbuffer;
 592
 593        cl->tokens = toks;
 594}
 595
 596static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
 597{
 598        long toks = diff + cl->ctokens;
 599
 600        if (toks > cl->cbuffer)
 601                toks = cl->cbuffer;
 602        toks -= (long) qdisc_l2t(cl->ceil, bytes);
 603        if (toks <= -cl->mbuffer)
 604                toks = 1 - cl->mbuffer;
 605
 606        cl->ctokens = toks;
 607}
 608
 609/**
 610 * htb_charge_class - charges amount "bytes" to leaf and ancestors
 611 *
 612 * Routine assumes that packet "bytes" long was dequeued from leaf cl
 613 * borrowing from "level". It accounts bytes to ceil leaky bucket for
 614 * leaf and all ancestors and to rate bucket for ancestors at levels
 615 * "level" and higher. It also handles possible change of mode resulting
 616 * from the update. Note that mode can also increase here (MAY_BORROW to
 617 * CAN_SEND) because we can use more precise clock that event queue here.
 618 * In such case we remove class from event queue first.
 619 */
 620static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 621                             int level, struct sk_buff *skb)
 622{
 623        int bytes = qdisc_pkt_len(skb);
 624        enum htb_cmode old_mode;
 625        long diff;
 626
 627        while (cl) {
 628                diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
 629                if (cl->level >= level) {
 630                        if (cl->level == level)
 631                                cl->xstats.lends++;
 632                        htb_accnt_tokens(cl, bytes, diff);
 633                } else {
 634                        cl->xstats.borrows++;
 635                        cl->tokens += diff;     /* we moved t_c; update tokens */
 636                }
 637                htb_accnt_ctokens(cl, bytes, diff);
 638                cl->t_c = q->now;
 639
 640                old_mode = cl->cmode;
 641                diff = 0;
 642                htb_change_class_mode(q, cl, &diff);
 643                if (old_mode != cl->cmode) {
 644                        if (old_mode != HTB_CAN_SEND)
 645                                htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
 646                        if (cl->cmode != HTB_CAN_SEND)
 647                                htb_add_to_wait_tree(q, cl, diff);
 648                }
 649
 650                /* update byte stats except for leaves which are already updated */
 651                if (cl->level) {
 652                        cl->bstats.bytes += bytes;
 653                        cl->bstats.packets += skb_is_gso(skb)?
 654                                        skb_shinfo(skb)->gso_segs:1;
 655                }
 656                cl = cl->parent;
 657        }
 658}
 659
 660/**
 661 * htb_do_events - make mode changes to classes at the level
 662 *
 663 * Scans event queue for pending events and applies them. Returns time of
 664 * next pending event (0 for no event in pq, q->now for too many events).
 665 * Note: Applied are events whose have cl->pq_key <= q->now.
 666 */
 667static psched_time_t htb_do_events(struct htb_sched *q, int level,
 668                                   unsigned long start)
 669{
 670        /* don't run for longer than 2 jiffies; 2 is used instead of
 671           1 to simplify things when jiffy is going to be incremented
 672           too soon */
 673        unsigned long stop_at = start + 2;
 674        while (time_before(jiffies, stop_at)) {
 675                struct htb_class *cl;
 676                long diff;
 677                struct rb_node *p = rb_first(&q->wait_pq[level]);
 678
 679                if (!p)
 680                        return 0;
 681
 682                cl = rb_entry(p, struct htb_class, pq_node);
 683                if (cl->pq_key > q->now)
 684                        return cl->pq_key;
 685
 686                htb_safe_rb_erase(p, q->wait_pq + level);
 687                diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
 688                htb_change_class_mode(q, cl, &diff);
 689                if (cl->cmode != HTB_CAN_SEND)
 690                        htb_add_to_wait_tree(q, cl, diff);
 691        }
 692
 693        /* too much load - let's continue after a break for scheduling */
 694        if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
 695                printk(KERN_WARNING "htb: too many events!\n");
 696                q->warned |= HTB_WARN_TOOMANYEVENTS;
 697        }
 698
 699        return q->now;
 700}
 701
 702/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
 703   is no such one exists. */
 704static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 705                                              u32 id)
 706{
 707        struct rb_node *r = NULL;
 708        while (n) {
 709                struct htb_class *cl =
 710                    rb_entry(n, struct htb_class, node[prio]);
 711
 712                if (id > cl->common.classid) {
 713                        n = n->rb_right;
 714                } else if (id < cl->common.classid) {
 715                        r = n;
 716                        n = n->rb_left;
 717                } else {
 718                        return n;
 719                }
 720        }
 721        return r;
 722}
 723
 724/**
 725 * htb_lookup_leaf - returns next leaf class in DRR order
 726 *
 727 * Find leaf where current feed pointers points to.
 728 */
 729static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 730                                         struct rb_node **pptr, u32 * pid)
 731{
 732        int i;
 733        struct {
 734                struct rb_node *root;
 735                struct rb_node **pptr;
 736                u32 *pid;
 737        } stk[TC_HTB_MAXDEPTH], *sp = stk;
 738
 739        BUG_ON(!tree->rb_node);
 740        sp->root = tree->rb_node;
 741        sp->pptr = pptr;
 742        sp->pid = pid;
 743
 744        for (i = 0; i < 65535; i++) {
 745                if (!*sp->pptr && *sp->pid) {
 746                        /* ptr was invalidated but id is valid - try to recover
 747                           the original or next ptr */
 748                        *sp->pptr =
 749                            htb_id_find_next_upper(prio, sp->root, *sp->pid);
 750                }
 751                *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
 752                                   can become out of date quickly */
 753                if (!*sp->pptr) {       /* we are at right end; rewind & go up */
 754                        *sp->pptr = sp->root;
 755                        while ((*sp->pptr)->rb_left)
 756                                *sp->pptr = (*sp->pptr)->rb_left;
 757                        if (sp > stk) {
 758                                sp--;
 759                                if (!*sp->pptr) {
 760                                        WARN_ON(1);
 761                                        return NULL;
 762                                }
 763                                htb_next_rb_node(sp->pptr);
 764                        }
 765                } else {
 766                        struct htb_class *cl;
 767                        cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
 768                        if (!cl->level)
 769                                return cl;
 770                        (++sp)->root = cl->un.inner.feed[prio].rb_node;
 771                        sp->pptr = cl->un.inner.ptr + prio;
 772                        sp->pid = cl->un.inner.last_ptr_id + prio;
 773                }
 774        }
 775        WARN_ON(1);
 776        return NULL;
 777}
 778
 779/* dequeues packet at given priority and level; call only if
 780   you are sure that there is active class at prio/level */
 781static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
 782                                        int level)
 783{
 784        struct sk_buff *skb = NULL;
 785        struct htb_class *cl, *start;
 786        /* look initial class up in the row */
 787        start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
 788                                     q->ptr[level] + prio,
 789                                     q->last_ptr_id[level] + prio);
 790
 791        do {
 792next:
 793                if (unlikely(!cl))
 794                        return NULL;
 795
 796                /* class can be empty - it is unlikely but can be true if leaf
 797                   qdisc drops packets in enqueue routine or if someone used
 798                   graft operation on the leaf since last dequeue;
 799                   simply deactivate and skip such class */
 800                if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
 801                        struct htb_class *next;
 802                        htb_deactivate(q, cl);
 803
 804                        /* row/level might become empty */
 805                        if ((q->row_mask[level] & (1 << prio)) == 0)
 806                                return NULL;
 807
 808                        next = htb_lookup_leaf(q->row[level] + prio,
 809                                               prio, q->ptr[level] + prio,
 810                                               q->last_ptr_id[level] + prio);
 811
 812                        if (cl == start)        /* fix start if we just deleted it */
 813                                start = next;
 814                        cl = next;
 815                        goto next;
 816                }
 817
 818                skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
 819                if (likely(skb != NULL))
 820                        break;
 821
 822                qdisc_warn_nonwc("htb", cl->un.leaf.q);
 823                htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 824                                  ptr[0]) + prio);
 825                cl = htb_lookup_leaf(q->row[level] + prio, prio,
 826                                     q->ptr[level] + prio,
 827                                     q->last_ptr_id[level] + prio);
 828
 829        } while (cl != start);
 830
 831        if (likely(skb != NULL)) {
 832                cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
 833                if (cl->un.leaf.deficit[level] < 0) {
 834                        cl->un.leaf.deficit[level] += cl->quantum;
 835                        htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 836                                          ptr[0]) + prio);
 837                }
 838                /* this used to be after charge_class but this constelation
 839                   gives us slightly better performance */
 840                if (!cl->un.leaf.q->q.qlen)
 841                        htb_deactivate(q, cl);
 842                htb_charge_class(q, cl, level, skb);
 843        }
 844        return skb;
 845}
 846
 847static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 848{
 849        struct sk_buff *skb = NULL;
 850        struct htb_sched *q = qdisc_priv(sch);
 851        int level;
 852        psched_time_t next_event;
 853        unsigned long start_at;
 854
 855        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
 856        skb = __skb_dequeue(&q->direct_queue);
 857        if (skb != NULL) {
 858                sch->flags &= ~TCQ_F_THROTTLED;
 859                sch->q.qlen--;
 860                return skb;
 861        }
 862
 863        if (!sch->q.qlen)
 864                goto fin;
 865        q->now = psched_get_time();
 866        start_at = jiffies;
 867
 868        next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
 869
 870        for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 871                /* common case optimization - skip event handler quickly */
 872                int m;
 873                psched_time_t event;
 874
 875                if (q->now >= q->near_ev_cache[level]) {
 876                        event = htb_do_events(q, level, start_at);
 877                        if (!event)
 878                                event = q->now + PSCHED_TICKS_PER_SEC;
 879                        q->near_ev_cache[level] = event;
 880                } else
 881                        event = q->near_ev_cache[level];
 882
 883                if (next_event > event)
 884                        next_event = event;
 885
 886                m = ~q->row_mask[level];
 887                while (m != (int)(-1)) {
 888                        int prio = ffz(m);
 889                        m |= 1 << prio;
 890                        skb = htb_dequeue_tree(q, prio, level);
 891                        if (likely(skb != NULL)) {
 892                                sch->q.qlen--;
 893                                sch->flags &= ~TCQ_F_THROTTLED;
 894                                goto fin;
 895                        }
 896                }
 897        }
 898        sch->qstats.overlimits++;
 899        if (likely(next_event > q->now))
 900                qdisc_watchdog_schedule(&q->watchdog, next_event);
 901        else
 902                schedule_work(&q->work);
 903fin:
 904        return skb;
 905}
 906
 907/* try to drop from each class (by prio) until one succeed */
 908static unsigned int htb_drop(struct Qdisc *sch)
 909{
 910        struct htb_sched *q = qdisc_priv(sch);
 911        int prio;
 912
 913        for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
 914                struct list_head *p;
 915                list_for_each(p, q->drops + prio) {
 916                        struct htb_class *cl = list_entry(p, struct htb_class,
 917                                                          un.leaf.drop_list);
 918                        unsigned int len;
 919                        if (cl->un.leaf.q->ops->drop &&
 920                            (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
 921                                sch->q.qlen--;
 922                                if (!cl->un.leaf.q->q.qlen)
 923                                        htb_deactivate(q, cl);
 924                                return len;
 925                        }
 926                }
 927        }
 928        return 0;
 929}
 930
 931/* reset all classes */
 932/* always caled under BH & queue lock */
 933static void htb_reset(struct Qdisc *sch)
 934{
 935        struct htb_sched *q = qdisc_priv(sch);
 936        struct htb_class *cl;
 937        struct hlist_node *n;
 938        unsigned int i;
 939
 940        for (i = 0; i < q->clhash.hashsize; i++) {
 941                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
 942                        if (cl->level)
 943                                memset(&cl->un.inner, 0, sizeof(cl->un.inner));
 944                        else {
 945                                if (cl->un.leaf.q)
 946                                        qdisc_reset(cl->un.leaf.q);
 947                                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 948                        }
 949                        cl->prio_activity = 0;
 950                        cl->cmode = HTB_CAN_SEND;
 951
 952                }
 953        }
 954        qdisc_watchdog_cancel(&q->watchdog);
 955        __skb_queue_purge(&q->direct_queue);
 956        sch->q.qlen = 0;
 957        memset(q->row, 0, sizeof(q->row));
 958        memset(q->row_mask, 0, sizeof(q->row_mask));
 959        memset(q->wait_pq, 0, sizeof(q->wait_pq));
 960        memset(q->ptr, 0, sizeof(q->ptr));
 961        for (i = 0; i < TC_HTB_NUMPRIO; i++)
 962                INIT_LIST_HEAD(q->drops + i);
 963}
 964
 965static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
 966        [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
 967        [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
 968        [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 969        [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 970};
 971
 972static void htb_work_func(struct work_struct *work)
 973{
 974        struct htb_sched *q = container_of(work, struct htb_sched, work);
 975        struct Qdisc *sch = q->watchdog.qdisc;
 976
 977        __netif_schedule(qdisc_root(sch));
 978}
 979
 980static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 981{
 982        struct htb_sched *q = qdisc_priv(sch);
 983        struct nlattr *tb[TCA_HTB_INIT + 1];
 984        struct tc_htb_glob *gopt;
 985        int err;
 986        int i;
 987
 988        if (!opt)
 989                return -EINVAL;
 990
 991        err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
 992        if (err < 0)
 993                return err;
 994
 995        if (tb[TCA_HTB_INIT] == NULL) {
 996                printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
 997                return -EINVAL;
 998        }
 999        gopt = nla_data(tb[TCA_HTB_INIT]);
1000        if (gopt->version != HTB_VER >> 16) {
1001                printk(KERN_ERR
1002                       "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1003                       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1004                return -EINVAL;
1005        }
1006
1007        err = qdisc_class_hash_init(&q->clhash);
1008        if (err < 0)
1009                return err;
1010        for (i = 0; i < TC_HTB_NUMPRIO; i++)
1011                INIT_LIST_HEAD(q->drops + i);
1012
1013        qdisc_watchdog_init(&q->watchdog, sch);
1014        INIT_WORK(&q->work, htb_work_func);
1015        skb_queue_head_init(&q->direct_queue);
1016
1017        q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1018        if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1019                q->direct_qlen = 2;
1020
1021        if ((q->rate2quantum = gopt->rate2quantum) < 1)
1022                q->rate2quantum = 1;
1023        q->defcls = gopt->defcls;
1024
1025        return 0;
1026}
1027
1028static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1029{
1030        spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1031        struct htb_sched *q = qdisc_priv(sch);
1032        struct nlattr *nest;
1033        struct tc_htb_glob gopt;
1034
1035        spin_lock_bh(root_lock);
1036
1037        gopt.direct_pkts = q->direct_pkts;
1038        gopt.version = HTB_VER;
1039        gopt.rate2quantum = q->rate2quantum;
1040        gopt.defcls = q->defcls;
1041        gopt.debug = 0;
1042
1043        nest = nla_nest_start(skb, TCA_OPTIONS);
1044        if (nest == NULL)
1045                goto nla_put_failure;
1046        NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1047        nla_nest_end(skb, nest);
1048
1049        spin_unlock_bh(root_lock);
1050        return skb->len;
1051
1052nla_put_failure:
1053        spin_unlock_bh(root_lock);
1054        nla_nest_cancel(skb, nest);
1055        return -1;
1056}
1057
1058static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1059                          struct sk_buff *skb, struct tcmsg *tcm)
1060{
1061        struct htb_class *cl = (struct htb_class *)arg;
1062        spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1063        struct nlattr *nest;
1064        struct tc_htb_opt opt;
1065
1066        spin_lock_bh(root_lock);
1067        tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1068        tcm->tcm_handle = cl->common.classid;
1069        if (!cl->level && cl->un.leaf.q)
1070                tcm->tcm_info = cl->un.leaf.q->handle;
1071
1072        nest = nla_nest_start(skb, TCA_OPTIONS);
1073        if (nest == NULL)
1074                goto nla_put_failure;
1075
1076        memset(&opt, 0, sizeof(opt));
1077
1078        opt.rate = cl->rate->rate;
1079        opt.buffer = cl->buffer;
1080        opt.ceil = cl->ceil->rate;
1081        opt.cbuffer = cl->cbuffer;
1082        opt.quantum = cl->quantum;
1083        opt.prio = cl->prio;
1084        opt.level = cl->level;
1085        NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1086
1087        nla_nest_end(skb, nest);
1088        spin_unlock_bh(root_lock);
1089        return skb->len;
1090
1091nla_put_failure:
1092        spin_unlock_bh(root_lock);
1093        nla_nest_cancel(skb, nest);
1094        return -1;
1095}
1096
1097static int
1098htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1099{
1100        struct htb_class *cl = (struct htb_class *)arg;
1101
1102        if (!cl->level && cl->un.leaf.q)
1103                cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1104        cl->xstats.tokens = cl->tokens;
1105        cl->xstats.ctokens = cl->ctokens;
1106
1107        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1108            gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1109            gnet_stats_copy_queue(d, &cl->qstats) < 0)
1110                return -1;
1111
1112        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1113}
1114
1115static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1116                     struct Qdisc **old)
1117{
1118        struct htb_class *cl = (struct htb_class *)arg;
1119
1120        if (cl->level)
1121                return -EINVAL;
1122        if (new == NULL &&
1123            (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1124                                     &pfifo_qdisc_ops,
1125                                     cl->common.classid)) == NULL)
1126                return -ENOBUFS;
1127
1128        sch_tree_lock(sch);
1129        *old = cl->un.leaf.q;
1130        cl->un.leaf.q = new;
1131        if (*old != NULL) {
1132                qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1133                qdisc_reset(*old);
1134        }
1135        sch_tree_unlock(sch);
1136        return 0;
1137}
1138
1139static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1140{
1141        struct htb_class *cl = (struct htb_class *)arg;
1142        return !cl->level ? cl->un.leaf.q : NULL;
1143}
1144
1145static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1146{
1147        struct htb_class *cl = (struct htb_class *)arg;
1148
1149        if (cl->un.leaf.q->q.qlen == 0)
1150                htb_deactivate(qdisc_priv(sch), cl);
1151}
1152
1153static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1154{
1155        struct htb_class *cl = htb_find(classid, sch);
1156        if (cl)
1157                cl->refcnt++;
1158        return (unsigned long)cl;
1159}
1160
1161static inline int htb_parent_last_child(struct htb_class *cl)
1162{
1163        if (!cl->parent)
1164                /* the root class */
1165                return 0;
1166        if (cl->parent->children > 1)
1167                /* not the last child */
1168                return 0;
1169        return 1;
1170}
1171
1172static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1173                               struct Qdisc *new_q)
1174{
1175        struct htb_class *parent = cl->parent;
1176
1177        WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
1178
1179        if (parent->cmode != HTB_CAN_SEND)
1180                htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1181
1182        parent->level = 0;
1183        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1184        INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1185        parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1186        parent->tokens = parent->buffer;
1187        parent->ctokens = parent->cbuffer;
1188        parent->t_c = psched_get_time();
1189        parent->cmode = HTB_CAN_SEND;
1190}
1191
1192static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1193{
1194        if (!cl->level) {
1195                WARN_ON(!cl->un.leaf.q);
1196                qdisc_destroy(cl->un.leaf.q);
1197        }
1198        gen_kill_estimator(&cl->bstats, &cl->rate_est);
1199        qdisc_put_rtab(cl->rate);
1200        qdisc_put_rtab(cl->ceil);
1201
1202        tcf_destroy_chain(&cl->filter_list);
1203        kfree(cl);
1204}
1205
1206static void htb_destroy(struct Qdisc *sch)
1207{
1208        struct htb_sched *q = qdisc_priv(sch);
1209        struct hlist_node *n, *next;
1210        struct htb_class *cl;
1211        unsigned int i;
1212
1213        cancel_work_sync(&q->work);
1214        qdisc_watchdog_cancel(&q->watchdog);
1215        /* This line used to be after htb_destroy_class call below
1216           and surprisingly it worked in 2.4. But it must precede it
1217           because filter need its target class alive to be able to call
1218           unbind_filter on it (without Oops). */
1219        tcf_destroy_chain(&q->filter_list);
1220
1221        for (i = 0; i < q->clhash.hashsize; i++) {
1222                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
1223                        tcf_destroy_chain(&cl->filter_list);
1224        }
1225        for (i = 0; i < q->clhash.hashsize; i++) {
1226                hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1227                                          common.hnode)
1228                        htb_destroy_class(sch, cl);
1229        }
1230        qdisc_class_hash_destroy(&q->clhash);
1231        __skb_queue_purge(&q->direct_queue);
1232}
1233
1234static int htb_delete(struct Qdisc *sch, unsigned long arg)
1235{
1236        struct htb_sched *q = qdisc_priv(sch);
1237        struct htb_class *cl = (struct htb_class *)arg;
1238        unsigned int qlen;
1239        struct Qdisc *new_q = NULL;
1240        int last_child = 0;
1241
1242        // TODO: why don't allow to delete subtree ? references ? does
1243        // tc subsys quarantee us that in htb_destroy it holds no class
1244        // refs so that we can remove children safely there ?
1245        if (cl->children || cl->filter_cnt)
1246                return -EBUSY;
1247
1248        if (!cl->level && htb_parent_last_child(cl)) {
1249                new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1250                                          &pfifo_qdisc_ops,
1251                                          cl->parent->common.classid);
1252                last_child = 1;
1253        }
1254
1255        sch_tree_lock(sch);
1256
1257        if (!cl->level) {
1258                qlen = cl->un.leaf.q->q.qlen;
1259                qdisc_reset(cl->un.leaf.q);
1260                qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
1261        }
1262
1263        /* delete from hash and active; remainder in destroy_class */
1264        qdisc_class_hash_remove(&q->clhash, &cl->common);
1265        if (cl->parent)
1266                cl->parent->children--;
1267
1268        if (cl->prio_activity)
1269                htb_deactivate(q, cl);
1270
1271        if (cl->cmode != HTB_CAN_SEND)
1272                htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1273
1274        if (last_child)
1275                htb_parent_to_leaf(q, cl, new_q);
1276
1277        BUG_ON(--cl->refcnt == 0);
1278        /*
1279         * This shouldn't happen: we "hold" one cops->get() when called
1280         * from tc_ctl_tclass; the destroy method is done from cops->put().
1281         */
1282
1283        sch_tree_unlock(sch);
1284        return 0;
1285}
1286
1287static void htb_put(struct Qdisc *sch, unsigned long arg)
1288{
1289        struct htb_class *cl = (struct htb_class *)arg;
1290
1291        if (--cl->refcnt == 0)
1292                htb_destroy_class(sch, cl);
1293}
1294
1295static int htb_change_class(struct Qdisc *sch, u32 classid,
1296                            u32 parentid, struct nlattr **tca,
1297                            unsigned long *arg)
1298{
1299        int err = -EINVAL;
1300        struct htb_sched *q = qdisc_priv(sch);
1301        struct htb_class *cl = (struct htb_class *)*arg, *parent;
1302        struct nlattr *opt = tca[TCA_OPTIONS];
1303        struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1304        struct nlattr *tb[TCA_HTB_RTAB + 1];
1305        struct tc_htb_opt *hopt;
1306
1307        /* extract all subattrs from opt attr */
1308        if (!opt)
1309                goto failure;
1310
1311        err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
1312        if (err < 0)
1313                goto failure;
1314
1315        err = -EINVAL;
1316        if (tb[TCA_HTB_PARMS] == NULL)
1317                goto failure;
1318
1319        parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1320
1321        hopt = nla_data(tb[TCA_HTB_PARMS]);
1322
1323        rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1324        ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1325        if (!rtab || !ctab)
1326                goto failure;
1327
1328        if (!cl) {              /* new class */
1329                struct Qdisc *new_q;
1330                int prio;
1331                struct {
1332                        struct nlattr           nla;
1333                        struct gnet_estimator   opt;
1334                } est = {
1335                        .nla = {
1336                                .nla_len        = nla_attr_size(sizeof(est.opt)),
1337                                .nla_type       = TCA_RATE,
1338                        },
1339                        .opt = {
1340                                /* 4s interval, 16s averaging constant */
1341                                .interval       = 2,
1342                                .ewma_log       = 2,
1343                        },
1344                };
1345
1346                /* check for valid classid */
1347                if (!classid || TC_H_MAJ(classid ^ sch->handle)
1348                    || htb_find(classid, sch))
1349                        goto failure;
1350
1351                /* check maximal depth */
1352                if (parent && parent->parent && parent->parent->level < 2) {
1353                        printk(KERN_ERR "htb: tree is too deep\n");
1354                        goto failure;
1355                }
1356                err = -ENOBUFS;
1357                if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1358                        goto failure;
1359
1360                err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1361                                        qdisc_root_sleeping_lock(sch),
1362                                        tca[TCA_RATE] ? : &est.nla);
1363                if (err) {
1364                        kfree(cl);
1365                        goto failure;
1366                }
1367
1368                cl->refcnt = 1;
1369                cl->children = 0;
1370                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1371                RB_CLEAR_NODE(&cl->pq_node);
1372
1373                for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1374                        RB_CLEAR_NODE(&cl->node[prio]);
1375
1376                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1377                   so that can't be used inside of sch_tree_lock
1378                   -- thanks to Karlis Peisenieks */
1379                new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1380                                          &pfifo_qdisc_ops, classid);
1381                sch_tree_lock(sch);
1382                if (parent && !parent->level) {
1383                        unsigned int qlen = parent->un.leaf.q->q.qlen;
1384
1385                        /* turn parent into inner node */
1386                        qdisc_reset(parent->un.leaf.q);
1387                        qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
1388                        qdisc_destroy(parent->un.leaf.q);
1389                        if (parent->prio_activity)
1390                                htb_deactivate(q, parent);
1391
1392                        /* remove from evt list because of level change */
1393                        if (parent->cmode != HTB_CAN_SEND) {
1394                                htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1395                                parent->cmode = HTB_CAN_SEND;
1396                        }
1397                        parent->level = (parent->parent ? parent->parent->level
1398                                         : TC_HTB_MAXDEPTH) - 1;
1399                        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1400                }
1401                /* leaf (we) needs elementary qdisc */
1402                cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1403
1404                cl->common.classid = classid;
1405                cl->parent = parent;
1406
1407                /* set class to be in HTB_CAN_SEND state */
1408                cl->tokens = hopt->buffer;
1409                cl->ctokens = hopt->cbuffer;
1410                cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC;        /* 1min */
1411                cl->t_c = psched_get_time();
1412                cl->cmode = HTB_CAN_SEND;
1413
1414                /* attach to the hash list and parent's family */
1415                qdisc_class_hash_insert(&q->clhash, &cl->common);
1416                if (parent)
1417                        parent->children++;
1418        } else {
1419                if (tca[TCA_RATE]) {
1420                        err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1421                                                    qdisc_root_sleeping_lock(sch),
1422                                                    tca[TCA_RATE]);
1423                        if (err)
1424                                return err;
1425                }
1426                sch_tree_lock(sch);
1427        }
1428
1429        /* it used to be a nasty bug here, we have to check that node
1430           is really leaf before changing cl->un.leaf ! */
1431        if (!cl->level) {
1432                cl->quantum = rtab->rate.rate / q->rate2quantum;
1433                if (!hopt->quantum && cl->quantum < 1000) {
1434                        printk(KERN_WARNING
1435                               "HTB: quantum of class %X is small. Consider r2q change.\n",
1436                               cl->common.classid);
1437                        cl->quantum = 1000;
1438                }
1439                if (!hopt->quantum && cl->quantum > 200000) {
1440                        printk(KERN_WARNING
1441                               "HTB: quantum of class %X is big. Consider r2q change.\n",
1442                               cl->common.classid);
1443                        cl->quantum = 200000;
1444                }
1445                if (hopt->quantum)
1446                        cl->quantum = hopt->quantum;
1447                if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1448                        cl->prio = TC_HTB_NUMPRIO - 1;
1449        }
1450
1451        cl->buffer = hopt->buffer;
1452        cl->cbuffer = hopt->cbuffer;
1453        if (cl->rate)
1454                qdisc_put_rtab(cl->rate);
1455        cl->rate = rtab;
1456        if (cl->ceil)
1457                qdisc_put_rtab(cl->ceil);
1458        cl->ceil = ctab;
1459        sch_tree_unlock(sch);
1460
1461        qdisc_class_hash_grow(sch, &q->clhash);
1462
1463        *arg = (unsigned long)cl;
1464        return 0;
1465
1466failure:
1467        if (rtab)
1468                qdisc_put_rtab(rtab);
1469        if (ctab)
1470                qdisc_put_rtab(ctab);
1471        return err;
1472}
1473
1474static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1475{
1476        struct htb_sched *q = qdisc_priv(sch);
1477        struct htb_class *cl = (struct htb_class *)arg;
1478        struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1479
1480        return fl;
1481}
1482
1483static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1484                                     u32 classid)
1485{
1486        struct htb_class *cl = htb_find(classid, sch);
1487
1488        /*if (cl && !cl->level) return 0;
1489           The line above used to be there to prevent attaching filters to
1490           leaves. But at least tc_index filter uses this just to get class
1491           for other reasons so that we have to allow for it.
1492           ----
1493           19.6.2002 As Werner explained it is ok - bind filter is just
1494           another way to "lock" the class - unlike "get" this lock can
1495           be broken by class during destroy IIUC.
1496         */
1497        if (cl)
1498                cl->filter_cnt++;
1499        return (unsigned long)cl;
1500}
1501
1502static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1503{
1504        struct htb_class *cl = (struct htb_class *)arg;
1505
1506        if (cl)
1507                cl->filter_cnt--;
1508}
1509
1510static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1511{
1512        struct htb_sched *q = qdisc_priv(sch);
1513        struct htb_class *cl;
1514        struct hlist_node *n;
1515        unsigned int i;
1516
1517        if (arg->stop)
1518                return;
1519
1520        for (i = 0; i < q->clhash.hashsize; i++) {
1521                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1522                        if (arg->count < arg->skip) {
1523                                arg->count++;
1524                                continue;
1525                        }
1526                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1527                                arg->stop = 1;
1528                                return;
1529                        }
1530                        arg->count++;
1531                }
1532        }
1533}
1534
1535static const struct Qdisc_class_ops htb_class_ops = {
1536        .graft          =       htb_graft,
1537        .leaf           =       htb_leaf,
1538        .qlen_notify    =       htb_qlen_notify,
1539        .get            =       htb_get,
1540        .put            =       htb_put,
1541        .change         =       htb_change_class,
1542        .delete         =       htb_delete,
1543        .walk           =       htb_walk,
1544        .tcf_chain      =       htb_find_tcf,
1545        .bind_tcf       =       htb_bind_filter,
1546        .unbind_tcf     =       htb_unbind_filter,
1547        .dump           =       htb_dump_class,
1548        .dump_stats     =       htb_dump_class_stats,
1549};
1550
1551static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1552        .next           =       NULL,
1553        .cl_ops         =       &htb_class_ops,
1554        .id             =       "htb",
1555        .priv_size      =       sizeof(struct htb_sched),
1556        .enqueue        =       htb_enqueue,
1557        .dequeue        =       htb_dequeue,
1558        .peek           =       qdisc_peek_dequeued,
1559        .drop           =       htb_drop,
1560        .init           =       htb_init,
1561        .reset          =       htb_reset,
1562        .destroy        =       htb_destroy,
1563        .change         =       NULL /* htb_change */,
1564        .dump           =       htb_dump,
1565        .owner          =       THIS_MODULE,
1566};
1567
1568static int __init htb_module_init(void)
1569{
1570        return register_qdisc(&htb_qdisc_ops);
1571}
1572static void __exit htb_module_exit(void)
1573{
1574        unregister_qdisc(&htb_qdisc_ops);
1575}
1576
1577module_init(htb_module_init)
1578module_exit(htb_module_exit)
1579MODULE_LICENSE("GPL");
1580