linux/net/sched/sch_htb.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Martin Devera, <devik@cdi.cz>
  10 *
  11 * Credits (in time order) for older HTB versions:
  12 *              Stef Coene <stef.coene@docum.org>
  13 *                      HTB support at LARTC mailing list
  14 *              Ondrej Kraus, <krauso@barr.cz>
  15 *                      found missing INIT_QDISC(htb)
  16 *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
  17 *                      helped a lot to locate nasty class stall bug
  18 *              Andi Kleen, Jamal Hadi, Bert Hubert
  19 *                      code review and helpful comments on shaping
  20 *              Tomasz Wrona, <tw@eter.tym.pl>
  21 *                      created test case so that I was able to fix nasty bug
  22 *              Wilfried Weissmann
  23 *                      spotted bug in dequeue code and helped with fix
  24 *              Jiri Fojtasek
  25 *                      fixed requeue routine
  26 *              and many others. thanks.
  27 */
  28#include <linux/module.h>
  29#include <linux/moduleparam.h>
  30#include <linux/types.h>
  31#include <linux/kernel.h>
  32#include <linux/string.h>
  33#include <linux/errno.h>
  34#include <linux/skbuff.h>
  35#include <linux/list.h>
  36#include <linux/compiler.h>
  37#include <linux/rbtree.h>
  38#include <linux/workqueue.h>
  39#include <linux/slab.h>
  40#include <net/netlink.h>
  41#include <net/sch_generic.h>
  42#include <net/pkt_sched.h>
  43
  44/* HTB algorithm.
  45    Author: devik@cdi.cz
  46    ========================================================================
  47    HTB is like TBF with multiple classes. It is also similar to CBQ because
  48    it allows to assign priority to each class in hierarchy.
  49    In fact it is another implementation of Floyd's formal sharing.
  50
  51    Levels:
  52    Each class is assigned level. Leaf has ALWAYS level 0 and root
  53    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
  54    one less than their parent.
  55*/
  56
  57static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
  58#define HTB_VER 0x30011         /* major must be matched with number suplied by TC as version */
  59
  60#if HTB_VER >> 16 != TC_HTB_PROTOVER
  61#error "Mismatched sch_htb.c and pkt_sch.h"
  62#endif
  63
  64/* Module parameter and sysfs export */
  65module_param    (htb_hysteresis, int, 0640);
  66MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
  67
  68/* used internaly to keep status of single class */
  69enum htb_cmode {
  70        HTB_CANT_SEND,          /* class can't send and can't borrow */
  71        HTB_MAY_BORROW,         /* class can't send but may borrow */
  72        HTB_CAN_SEND            /* class can send */
  73};
  74
  75/* interior & leaf nodes; props specific to leaves are marked L: */
  76struct htb_class {
  77        struct Qdisc_class_common common;
  78        /* general class parameters */
  79        struct gnet_stats_basic_packed bstats;
  80        struct gnet_stats_queue qstats;
  81        struct gnet_stats_rate_est rate_est;
  82        struct tc_htb_xstats xstats;    /* our special stats */
  83        int refcnt;             /* usage count of this class */
  84
  85        /* topology */
  86        int level;              /* our level (see above) */
  87        unsigned int children;
  88        struct htb_class *parent;       /* parent class */
  89
  90        int prio;               /* these two are used only by leaves... */
  91        int quantum;            /* but stored for parent-to-leaf return */
  92
  93        union {
  94                struct htb_class_leaf {
  95                        struct Qdisc *q;
  96                        int deficit[TC_HTB_MAXDEPTH];
  97                        struct list_head drop_list;
  98                } leaf;
  99                struct htb_class_inner {
 100                        struct rb_root feed[TC_HTB_NUMPRIO];    /* feed trees */
 101                        struct rb_node *ptr[TC_HTB_NUMPRIO];    /* current class ptr */
 102                        /* When class changes from state 1->2 and disconnects from
 103                         * parent's feed then we lost ptr value and start from the
 104                         * first child again. Here we store classid of the
 105                         * last valid ptr (used when ptr is NULL).
 106                         */
 107                        u32 last_ptr_id[TC_HTB_NUMPRIO];
 108                } inner;
 109        } un;
 110        struct rb_node node[TC_HTB_NUMPRIO];    /* node for self or feed tree */
 111        struct rb_node pq_node; /* node for event queue */
 112        s64     pq_key;
 113
 114        int prio_activity;      /* for which prios are we active */
 115        enum htb_cmode cmode;   /* current mode of the class */
 116
 117        /* class attached filters */
 118        struct tcf_proto *filter_list;
 119        int filter_cnt;
 120
 121        /* token bucket parameters */
 122        struct psched_ratecfg rate;
 123        struct psched_ratecfg ceil;
 124        s64     buffer, cbuffer;        /* token bucket depth/rate */
 125        s64     mbuffer;                /* max wait time */
 126        s64     tokens, ctokens;        /* current number of tokens */
 127        s64     t_c;                    /* checkpoint time */
 128};
 129
 130struct htb_sched {
 131        struct Qdisc_class_hash clhash;
 132        struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 133
 134        /* self list - roots of self generating tree */
 135        struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 136        int row_mask[TC_HTB_MAXDEPTH];
 137        struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 138        u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 139
 140        /* self wait list - roots of wait PQs per row */
 141        struct rb_root wait_pq[TC_HTB_MAXDEPTH];
 142
 143        /* time of nearest event per level (row) */
 144        s64     near_ev_cache[TC_HTB_MAXDEPTH];
 145
 146        int defcls;             /* class where unclassified flows go to */
 147
 148        /* filters for qdisc itself */
 149        struct tcf_proto *filter_list;
 150
 151        int     rate2quantum;   /* quant = rate / rate2quantum */
 152        s64     now;    /* cached dequeue time */
 153        struct qdisc_watchdog watchdog;
 154
 155        /* non shaped skbs; let them go directly thru */
 156        struct sk_buff_head direct_queue;
 157        int direct_qlen;        /* max qlen of above */
 158
 159        long direct_pkts;
 160
 161#define HTB_WARN_TOOMANYEVENTS  0x1
 162        unsigned int warned;    /* only one warning */
 163        struct work_struct work;
 164};
 165
 166/* find class in global hash table using given handle */
 167static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 168{
 169        struct htb_sched *q = qdisc_priv(sch);
 170        struct Qdisc_class_common *clc;
 171
 172        clc = qdisc_class_find(&q->clhash, handle);
 173        if (clc == NULL)
 174                return NULL;
 175        return container_of(clc, struct htb_class, common);
 176}
 177
 178/**
 179 * htb_classify - classify a packet into class
 180 *
 181 * It returns NULL if the packet should be dropped or -1 if the packet
 182 * should be passed directly thru. In all other cases leaf class is returned.
 183 * We allow direct class selection by classid in priority. The we examine
 184 * filters in qdisc and in inner nodes (if higher filter points to the inner
 185 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
 186 * internal fifo (direct). These packets then go directly thru. If we still
 187 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
 188 * then finish and return direct queue.
 189 */
 190#define HTB_DIRECT ((struct htb_class *)-1L)
 191
 192static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 193                                      int *qerr)
 194{
 195        struct htb_sched *q = qdisc_priv(sch);
 196        struct htb_class *cl;
 197        struct tcf_result res;
 198        struct tcf_proto *tcf;
 199        int result;
 200
 201        /* allow to select class by setting skb->priority to valid classid;
 202         * note that nfmark can be used too by attaching filter fw with no
 203         * rules in it
 204         */
 205        if (skb->priority == sch->handle)
 206                return HTB_DIRECT;      /* X:0 (direct flow) selected */
 207        cl = htb_find(skb->priority, sch);
 208        if (cl && cl->level == 0)
 209                return cl;
 210
 211        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 212        tcf = q->filter_list;
 213        while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 214#ifdef CONFIG_NET_CLS_ACT
 215                switch (result) {
 216                case TC_ACT_QUEUED:
 217                case TC_ACT_STOLEN:
 218                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 219                case TC_ACT_SHOT:
 220                        return NULL;
 221                }
 222#endif
 223                cl = (void *)res.class;
 224                if (!cl) {
 225                        if (res.classid == sch->handle)
 226                                return HTB_DIRECT;      /* X:0 (direct flow) */
 227                        cl = htb_find(res.classid, sch);
 228                        if (!cl)
 229                                break;  /* filter selected invalid classid */
 230                }
 231                if (!cl->level)
 232                        return cl;      /* we hit leaf; return it */
 233
 234                /* we have got inner class; apply inner filter chain */
 235                tcf = cl->filter_list;
 236        }
 237        /* classification failed; try to use default class */
 238        cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 239        if (!cl || cl->level)
 240                return HTB_DIRECT;      /* bad default .. this is safe bet */
 241        return cl;
 242}
 243
 244/**
 245 * htb_add_to_id_tree - adds class to the round robin list
 246 *
 247 * Routine adds class to the list (actually tree) sorted by classid.
 248 * Make sure that class is not already on such list for given prio.
 249 */
 250static void htb_add_to_id_tree(struct rb_root *root,
 251                               struct htb_class *cl, int prio)
 252{
 253        struct rb_node **p = &root->rb_node, *parent = NULL;
 254
 255        while (*p) {
 256                struct htb_class *c;
 257                parent = *p;
 258                c = rb_entry(parent, struct htb_class, node[prio]);
 259
 260                if (cl->common.classid > c->common.classid)
 261                        p = &parent->rb_right;
 262                else
 263                        p = &parent->rb_left;
 264        }
 265        rb_link_node(&cl->node[prio], parent, p);
 266        rb_insert_color(&cl->node[prio], root);
 267}
 268
 269/**
 270 * htb_add_to_wait_tree - adds class to the event queue with delay
 271 *
 272 * The class is added to priority event queue to indicate that class will
 273 * change its mode in cl->pq_key microseconds. Make sure that class is not
 274 * already in the queue.
 275 */
 276static void htb_add_to_wait_tree(struct htb_sched *q,
 277                                 struct htb_class *cl, s64 delay)
 278{
 279        struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
 280
 281        cl->pq_key = q->now + delay;
 282        if (cl->pq_key == q->now)
 283                cl->pq_key++;
 284
 285        /* update the nearest event cache */
 286        if (q->near_ev_cache[cl->level] > cl->pq_key)
 287                q->near_ev_cache[cl->level] = cl->pq_key;
 288
 289        while (*p) {
 290                struct htb_class *c;
 291                parent = *p;
 292                c = rb_entry(parent, struct htb_class, pq_node);
 293                if (cl->pq_key >= c->pq_key)
 294                        p = &parent->rb_right;
 295                else
 296                        p = &parent->rb_left;
 297        }
 298        rb_link_node(&cl->pq_node, parent, p);
 299        rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
 300}
 301
 302/**
 303 * htb_next_rb_node - finds next node in binary tree
 304 *
 305 * When we are past last key we return NULL.
 306 * Average complexity is 2 steps per call.
 307 */
 308static inline void htb_next_rb_node(struct rb_node **n)
 309{
 310        *n = rb_next(*n);
 311}
 312
 313/**
 314 * htb_add_class_to_row - add class to its row
 315 *
 316 * The class is added to row at priorities marked in mask.
 317 * It does nothing if mask == 0.
 318 */
 319static inline void htb_add_class_to_row(struct htb_sched *q,
 320                                        struct htb_class *cl, int mask)
 321{
 322        q->row_mask[cl->level] |= mask;
 323        while (mask) {
 324                int prio = ffz(~mask);
 325                mask &= ~(1 << prio);
 326                htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
 327        }
 328}
 329
 330/* If this triggers, it is a bug in this code, but it need not be fatal */
 331static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 332{
 333        if (RB_EMPTY_NODE(rb)) {
 334                WARN_ON(1);
 335        } else {
 336                rb_erase(rb, root);
 337                RB_CLEAR_NODE(rb);
 338        }
 339}
 340
 341
 342/**
 343 * htb_remove_class_from_row - removes class from its row
 344 *
 345 * The class is removed from row at priorities marked in mask.
 346 * It does nothing if mask == 0.
 347 */
 348static inline void htb_remove_class_from_row(struct htb_sched *q,
 349                                                 struct htb_class *cl, int mask)
 350{
 351        int m = 0;
 352
 353        while (mask) {
 354                int prio = ffz(~mask);
 355
 356                mask &= ~(1 << prio);
 357                if (q->ptr[cl->level][prio] == cl->node + prio)
 358                        htb_next_rb_node(q->ptr[cl->level] + prio);
 359
 360                htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
 361                if (!q->row[cl->level][prio].rb_node)
 362                        m |= 1 << prio;
 363        }
 364        q->row_mask[cl->level] &= ~m;
 365}
 366
 367/**
 368 * htb_activate_prios - creates active classe's feed chain
 369 *
 370 * The class is connected to ancestors and/or appropriate rows
 371 * for priorities it is participating on. cl->cmode must be new
 372 * (activated) mode. It does nothing if cl->prio_activity == 0.
 373 */
 374static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 375{
 376        struct htb_class *p = cl->parent;
 377        long m, mask = cl->prio_activity;
 378
 379        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 380                m = mask;
 381                while (m) {
 382                        int prio = ffz(~m);
 383                        m &= ~(1 << prio);
 384
 385                        if (p->un.inner.feed[prio].rb_node)
 386                                /* parent already has its feed in use so that
 387                                 * reset bit in mask as parent is already ok
 388                                 */
 389                                mask &= ~(1 << prio);
 390
 391                        htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
 392                }
 393                p->prio_activity |= mask;
 394                cl = p;
 395                p = cl->parent;
 396
 397        }
 398        if (cl->cmode == HTB_CAN_SEND && mask)
 399                htb_add_class_to_row(q, cl, mask);
 400}
 401
 402/**
 403 * htb_deactivate_prios - remove class from feed chain
 404 *
 405 * cl->cmode must represent old mode (before deactivation). It does
 406 * nothing if cl->prio_activity == 0. Class is removed from all feed
 407 * chains and rows.
 408 */
 409static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 410{
 411        struct htb_class *p = cl->parent;
 412        long m, mask = cl->prio_activity;
 413
 414        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 415                m = mask;
 416                mask = 0;
 417                while (m) {
 418                        int prio = ffz(~m);
 419                        m &= ~(1 << prio);
 420
 421                        if (p->un.inner.ptr[prio] == cl->node + prio) {
 422                                /* we are removing child which is pointed to from
 423                                 * parent feed - forget the pointer but remember
 424                                 * classid
 425                                 */
 426                                p->un.inner.last_ptr_id[prio] = cl->common.classid;
 427                                p->un.inner.ptr[prio] = NULL;
 428                        }
 429
 430                        htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
 431
 432                        if (!p->un.inner.feed[prio].rb_node)
 433                                mask |= 1 << prio;
 434                }
 435
 436                p->prio_activity &= ~mask;
 437                cl = p;
 438                p = cl->parent;
 439
 440        }
 441        if (cl->cmode == HTB_CAN_SEND && mask)
 442                htb_remove_class_from_row(q, cl, mask);
 443}
 444
 445static inline s64 htb_lowater(const struct htb_class *cl)
 446{
 447        if (htb_hysteresis)
 448                return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
 449        else
 450                return 0;
 451}
 452static inline s64 htb_hiwater(const struct htb_class *cl)
 453{
 454        if (htb_hysteresis)
 455                return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
 456        else
 457                return 0;
 458}
 459
 460
 461/**
 462 * htb_class_mode - computes and returns current class mode
 463 *
 464 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
 465 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
 466 * from now to time when cl will change its state.
 467 * Also it is worth to note that class mode doesn't change simply
 468 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
 469 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
 470 * mode transitions per time unit. The speed gain is about 1/6.
 471 */
 472static inline enum htb_cmode
 473htb_class_mode(struct htb_class *cl, s64 *diff)
 474{
 475        s64 toks;
 476
 477        if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
 478                *diff = -toks;
 479                return HTB_CANT_SEND;
 480        }
 481
 482        if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
 483                return HTB_CAN_SEND;
 484
 485        *diff = -toks;
 486        return HTB_MAY_BORROW;
 487}
 488
 489/**
 490 * htb_change_class_mode - changes classe's mode
 491 *
 492 * This should be the only way how to change classe's mode under normal
 493 * cirsumstances. Routine will update feed lists linkage, change mode
 494 * and add class to the wait event queue if appropriate. New mode should
 495 * be different from old one and cl->pq_key has to be valid if changing
 496 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
 497 */
 498static void
 499htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
 500{
 501        enum htb_cmode new_mode = htb_class_mode(cl, diff);
 502
 503        if (new_mode == cl->cmode)
 504                return;
 505
 506        if (cl->prio_activity) {        /* not necessary: speed optimization */
 507                if (cl->cmode != HTB_CANT_SEND)
 508                        htb_deactivate_prios(q, cl);
 509                cl->cmode = new_mode;
 510                if (new_mode != HTB_CANT_SEND)
 511                        htb_activate_prios(q, cl);
 512        } else
 513                cl->cmode = new_mode;
 514}
 515
 516/**
 517 * htb_activate - inserts leaf cl into appropriate active feeds
 518 *
 519 * Routine learns (new) priority of leaf and activates feed chain
 520 * for the prio. It can be called on already active leaf safely.
 521 * It also adds leaf into droplist.
 522 */
 523static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 524{
 525        WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
 526
 527        if (!cl->prio_activity) {
 528                cl->prio_activity = 1 << cl->prio;
 529                htb_activate_prios(q, cl);
 530                list_add_tail(&cl->un.leaf.drop_list,
 531                              q->drops + cl->prio);
 532        }
 533}
 534
 535/**
 536 * htb_deactivate - remove leaf cl from active feeds
 537 *
 538 * Make sure that leaf is active. In the other words it can't be called
 539 * with non-active leaf. It also removes class from the drop list.
 540 */
 541static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 542{
 543        WARN_ON(!cl->prio_activity);
 544
 545        htb_deactivate_prios(q, cl);
 546        cl->prio_activity = 0;
 547        list_del_init(&cl->un.leaf.drop_list);
 548}
 549
 550static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 551{
 552        int uninitialized_var(ret);
 553        struct htb_sched *q = qdisc_priv(sch);
 554        struct htb_class *cl = htb_classify(skb, sch, &ret);
 555
 556        if (cl == HTB_DIRECT) {
 557                /* enqueue to helper queue */
 558                if (q->direct_queue.qlen < q->direct_qlen) {
 559                        __skb_queue_tail(&q->direct_queue, skb);
 560                        q->direct_pkts++;
 561                } else {
 562                        return qdisc_drop(skb, sch);
 563                }
 564#ifdef CONFIG_NET_CLS_ACT
 565        } else if (!cl) {
 566                if (ret & __NET_XMIT_BYPASS)
 567                        sch->qstats.drops++;
 568                kfree_skb(skb);
 569                return ret;
 570#endif
 571        } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
 572                if (net_xmit_drop_count(ret)) {
 573                        sch->qstats.drops++;
 574                        cl->qstats.drops++;
 575                }
 576                return ret;
 577        } else {
 578                htb_activate(q, cl);
 579        }
 580
 581        sch->q.qlen++;
 582        return NET_XMIT_SUCCESS;
 583}
 584
 585static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
 586{
 587        s64 toks = diff + cl->tokens;
 588
 589        if (toks > cl->buffer)
 590                toks = cl->buffer;
 591        toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
 592        if (toks <= -cl->mbuffer)
 593                toks = 1 - cl->mbuffer;
 594
 595        cl->tokens = toks;
 596}
 597
 598static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
 599{
 600        s64 toks = diff + cl->ctokens;
 601
 602        if (toks > cl->cbuffer)
 603                toks = cl->cbuffer;
 604        toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
 605        if (toks <= -cl->mbuffer)
 606                toks = 1 - cl->mbuffer;
 607
 608        cl->ctokens = toks;
 609}
 610
 611/**
 612 * htb_charge_class - charges amount "bytes" to leaf and ancestors
 613 *
 614 * Routine assumes that packet "bytes" long was dequeued from leaf cl
 615 * borrowing from "level". It accounts bytes to ceil leaky bucket for
 616 * leaf and all ancestors and to rate bucket for ancestors at levels
 617 * "level" and higher. It also handles possible change of mode resulting
 618 * from the update. Note that mode can also increase here (MAY_BORROW to
 619 * CAN_SEND) because we can use more precise clock that event queue here.
 620 * In such case we remove class from event queue first.
 621 */
 622static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 623                             int level, struct sk_buff *skb)
 624{
 625        int bytes = qdisc_pkt_len(skb);
 626        enum htb_cmode old_mode;
 627        s64 diff;
 628
 629        while (cl) {
 630                diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 631                if (cl->level >= level) {
 632                        if (cl->level == level)
 633                                cl->xstats.lends++;
 634                        htb_accnt_tokens(cl, bytes, diff);
 635                } else {
 636                        cl->xstats.borrows++;
 637                        cl->tokens += diff;     /* we moved t_c; update tokens */
 638                }
 639                htb_accnt_ctokens(cl, bytes, diff);
 640                cl->t_c = q->now;
 641
 642                old_mode = cl->cmode;
 643                diff = 0;
 644                htb_change_class_mode(q, cl, &diff);
 645                if (old_mode != cl->cmode) {
 646                        if (old_mode != HTB_CAN_SEND)
 647                                htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
 648                        if (cl->cmode != HTB_CAN_SEND)
 649                                htb_add_to_wait_tree(q, cl, diff);
 650                }
 651
 652                /* update basic stats except for leaves which are already updated */
 653                if (cl->level)
 654                        bstats_update(&cl->bstats, skb);
 655
 656                cl = cl->parent;
 657        }
 658}
 659
 660/**
 661 * htb_do_events - make mode changes to classes at the level
 662 *
 663 * Scans event queue for pending events and applies them. Returns time of
 664 * next pending event (0 for no event in pq, q->now for too many events).
 665 * Note: Applied are events whose have cl->pq_key <= q->now.
 666 */
 667static s64 htb_do_events(struct htb_sched *q, int level,
 668                         unsigned long start)
 669{
 670        /* don't run for longer than 2 jiffies; 2 is used instead of
 671         * 1 to simplify things when jiffy is going to be incremented
 672         * too soon
 673         */
 674        unsigned long stop_at = start + 2;
 675        while (time_before(jiffies, stop_at)) {
 676                struct htb_class *cl;
 677                s64 diff;
 678                struct rb_node *p = rb_first(&q->wait_pq[level]);
 679
 680                if (!p)
 681                        return 0;
 682
 683                cl = rb_entry(p, struct htb_class, pq_node);
 684                if (cl->pq_key > q->now)
 685                        return cl->pq_key;
 686
 687                htb_safe_rb_erase(p, q->wait_pq + level);
 688                diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 689                htb_change_class_mode(q, cl, &diff);
 690                if (cl->cmode != HTB_CAN_SEND)
 691                        htb_add_to_wait_tree(q, cl, diff);
 692        }
 693
 694        /* too much load - let's continue after a break for scheduling */
 695        if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
 696                pr_warning("htb: too many events!\n");
 697                q->warned |= HTB_WARN_TOOMANYEVENTS;
 698        }
 699
 700        return q->now;
 701}
 702
 703/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
 704 * is no such one exists.
 705 */
 706static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 707                                              u32 id)
 708{
 709        struct rb_node *r = NULL;
 710        while (n) {
 711                struct htb_class *cl =
 712                    rb_entry(n, struct htb_class, node[prio]);
 713
 714                if (id > cl->common.classid) {
 715                        n = n->rb_right;
 716                } else if (id < cl->common.classid) {
 717                        r = n;
 718                        n = n->rb_left;
 719                } else {
 720                        return n;
 721                }
 722        }
 723        return r;
 724}
 725
 726/**
 727 * htb_lookup_leaf - returns next leaf class in DRR order
 728 *
 729 * Find leaf where current feed pointers points to.
 730 */
 731static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 732                                         struct rb_node **pptr, u32 * pid)
 733{
 734        int i;
 735        struct {
 736                struct rb_node *root;
 737                struct rb_node **pptr;
 738                u32 *pid;
 739        } stk[TC_HTB_MAXDEPTH], *sp = stk;
 740
 741        BUG_ON(!tree->rb_node);
 742        sp->root = tree->rb_node;
 743        sp->pptr = pptr;
 744        sp->pid = pid;
 745
 746        for (i = 0; i < 65535; i++) {
 747                if (!*sp->pptr && *sp->pid) {
 748                        /* ptr was invalidated but id is valid - try to recover
 749                         * the original or next ptr
 750                         */
 751                        *sp->pptr =
 752                            htb_id_find_next_upper(prio, sp->root, *sp->pid);
 753                }
 754                *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
 755                                 * can become out of date quickly
 756                                 */
 757                if (!*sp->pptr) {       /* we are at right end; rewind & go up */
 758                        *sp->pptr = sp->root;
 759                        while ((*sp->pptr)->rb_left)
 760                                *sp->pptr = (*sp->pptr)->rb_left;
 761                        if (sp > stk) {
 762                                sp--;
 763                                if (!*sp->pptr) {
 764                                        WARN_ON(1);
 765                                        return NULL;
 766                                }
 767                                htb_next_rb_node(sp->pptr);
 768                        }
 769                } else {
 770                        struct htb_class *cl;
 771                        cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
 772                        if (!cl->level)
 773                                return cl;
 774                        (++sp)->root = cl->un.inner.feed[prio].rb_node;
 775                        sp->pptr = cl->un.inner.ptr + prio;
 776                        sp->pid = cl->un.inner.last_ptr_id + prio;
 777                }
 778        }
 779        WARN_ON(1);
 780        return NULL;
 781}
 782
 783/* dequeues packet at given priority and level; call only if
 784 * you are sure that there is active class at prio/level
 785 */
 786static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
 787                                        int level)
 788{
 789        struct sk_buff *skb = NULL;
 790        struct htb_class *cl, *start;
 791        /* look initial class up in the row */
 792        start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
 793                                     q->ptr[level] + prio,
 794                                     q->last_ptr_id[level] + prio);
 795
 796        do {
 797next:
 798                if (unlikely(!cl))
 799                        return NULL;
 800
 801                /* class can be empty - it is unlikely but can be true if leaf
 802                 * qdisc drops packets in enqueue routine or if someone used
 803                 * graft operation on the leaf since last dequeue;
 804                 * simply deactivate and skip such class
 805                 */
 806                if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
 807                        struct htb_class *next;
 808                        htb_deactivate(q, cl);
 809
 810                        /* row/level might become empty */
 811                        if ((q->row_mask[level] & (1 << prio)) == 0)
 812                                return NULL;
 813
 814                        next = htb_lookup_leaf(q->row[level] + prio,
 815                                               prio, q->ptr[level] + prio,
 816                                               q->last_ptr_id[level] + prio);
 817
 818                        if (cl == start)        /* fix start if we just deleted it */
 819                                start = next;
 820                        cl = next;
 821                        goto next;
 822                }
 823
 824                skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
 825                if (likely(skb != NULL))
 826                        break;
 827
 828                qdisc_warn_nonwc("htb", cl->un.leaf.q);
 829                htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 830                                  ptr[0]) + prio);
 831                cl = htb_lookup_leaf(q->row[level] + prio, prio,
 832                                     q->ptr[level] + prio,
 833                                     q->last_ptr_id[level] + prio);
 834
 835        } while (cl != start);
 836
 837        if (likely(skb != NULL)) {
 838                bstats_update(&cl->bstats, skb);
 839                cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
 840                if (cl->un.leaf.deficit[level] < 0) {
 841                        cl->un.leaf.deficit[level] += cl->quantum;
 842                        htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 843                                          ptr[0]) + prio);
 844                }
 845                /* this used to be after charge_class but this constelation
 846                 * gives us slightly better performance
 847                 */
 848                if (!cl->un.leaf.q->q.qlen)
 849                        htb_deactivate(q, cl);
 850                htb_charge_class(q, cl, level, skb);
 851        }
 852        return skb;
 853}
 854
 855static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 856{
 857        struct sk_buff *skb;
 858        struct htb_sched *q = qdisc_priv(sch);
 859        int level;
 860        s64 next_event;
 861        unsigned long start_at;
 862
 863        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
 864        skb = __skb_dequeue(&q->direct_queue);
 865        if (skb != NULL) {
 866ok:
 867                qdisc_bstats_update(sch, skb);
 868                qdisc_unthrottled(sch);
 869                sch->q.qlen--;
 870                return skb;
 871        }
 872
 873        if (!sch->q.qlen)
 874                goto fin;
 875        q->now = ktime_to_ns(ktime_get());
 876        start_at = jiffies;
 877
 878        next_event = q->now + 5LLU * NSEC_PER_SEC;
 879
 880        for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 881                /* common case optimization - skip event handler quickly */
 882                int m;
 883                s64 event;
 884
 885                if (q->now >= q->near_ev_cache[level]) {
 886                        event = htb_do_events(q, level, start_at);
 887                        if (!event)
 888                                event = q->now + NSEC_PER_SEC;
 889                        q->near_ev_cache[level] = event;
 890                } else
 891                        event = q->near_ev_cache[level];
 892
 893                if (next_event > event)
 894                        next_event = event;
 895
 896                m = ~q->row_mask[level];
 897                while (m != (int)(-1)) {
 898                        int prio = ffz(m);
 899
 900                        m |= 1 << prio;
 901                        skb = htb_dequeue_tree(q, prio, level);
 902                        if (likely(skb != NULL))
 903                                goto ok;
 904                }
 905        }
 906        sch->qstats.overlimits++;
 907        if (likely(next_event > q->now)) {
 908                if (!test_bit(__QDISC_STATE_DEACTIVATED,
 909                              &qdisc_root_sleeping(q->watchdog.qdisc)->state)) {
 910                        ktime_t time = ns_to_ktime(next_event);
 911                        qdisc_throttled(q->watchdog.qdisc);
 912                        hrtimer_start(&q->watchdog.timer, time,
 913                                      HRTIMER_MODE_ABS);
 914                }
 915        } else {
 916                schedule_work(&q->work);
 917        }
 918fin:
 919        return skb;
 920}
 921
 922/* try to drop from each class (by prio) until one succeed */
 923static unsigned int htb_drop(struct Qdisc *sch)
 924{
 925        struct htb_sched *q = qdisc_priv(sch);
 926        int prio;
 927
 928        for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
 929                struct list_head *p;
 930                list_for_each(p, q->drops + prio) {
 931                        struct htb_class *cl = list_entry(p, struct htb_class,
 932                                                          un.leaf.drop_list);
 933                        unsigned int len;
 934                        if (cl->un.leaf.q->ops->drop &&
 935                            (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
 936                                sch->q.qlen--;
 937                                if (!cl->un.leaf.q->q.qlen)
 938                                        htb_deactivate(q, cl);
 939                                return len;
 940                        }
 941                }
 942        }
 943        return 0;
 944}
 945
 946/* reset all classes */
 947/* always caled under BH & queue lock */
 948static void htb_reset(struct Qdisc *sch)
 949{
 950        struct htb_sched *q = qdisc_priv(sch);
 951        struct htb_class *cl;
 952        unsigned int i;
 953
 954        for (i = 0; i < q->clhash.hashsize; i++) {
 955                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 956                        if (cl->level)
 957                                memset(&cl->un.inner, 0, sizeof(cl->un.inner));
 958                        else {
 959                                if (cl->un.leaf.q)
 960                                        qdisc_reset(cl->un.leaf.q);
 961                                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 962                        }
 963                        cl->prio_activity = 0;
 964                        cl->cmode = HTB_CAN_SEND;
 965
 966                }
 967        }
 968        qdisc_watchdog_cancel(&q->watchdog);
 969        __skb_queue_purge(&q->direct_queue);
 970        sch->q.qlen = 0;
 971        memset(q->row, 0, sizeof(q->row));
 972        memset(q->row_mask, 0, sizeof(q->row_mask));
 973        memset(q->wait_pq, 0, sizeof(q->wait_pq));
 974        memset(q->ptr, 0, sizeof(q->ptr));
 975        for (i = 0; i < TC_HTB_NUMPRIO; i++)
 976                INIT_LIST_HEAD(q->drops + i);
 977}
 978
 979static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
 980        [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
 981        [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
 982        [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 983        [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 984        [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
 985};
 986
 987static void htb_work_func(struct work_struct *work)
 988{
 989        struct htb_sched *q = container_of(work, struct htb_sched, work);
 990        struct Qdisc *sch = q->watchdog.qdisc;
 991
 992        __netif_schedule(qdisc_root(sch));
 993}
 994
 995static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 996{
 997        struct htb_sched *q = qdisc_priv(sch);
 998        struct nlattr *tb[TCA_HTB_MAX + 1];
 999        struct tc_htb_glob *gopt;
1000        int err;
1001        int i;
1002
1003        if (!opt)
1004                return -EINVAL;
1005
1006        err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
1007        if (err < 0)
1008                return err;
1009
1010        if (!tb[TCA_HTB_INIT])
1011                return -EINVAL;
1012
1013        gopt = nla_data(tb[TCA_HTB_INIT]);
1014        if (gopt->version != HTB_VER >> 16)
1015                return -EINVAL;
1016
1017        err = qdisc_class_hash_init(&q->clhash);
1018        if (err < 0)
1019                return err;
1020        for (i = 0; i < TC_HTB_NUMPRIO; i++)
1021                INIT_LIST_HEAD(q->drops + i);
1022
1023        qdisc_watchdog_init(&q->watchdog, sch);
1024        INIT_WORK(&q->work, htb_work_func);
1025        skb_queue_head_init(&q->direct_queue);
1026
1027        if (tb[TCA_HTB_DIRECT_QLEN])
1028                q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1029        else {
1030                q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1031                if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1032                        q->direct_qlen = 2;
1033        }
1034        if ((q->rate2quantum = gopt->rate2quantum) < 1)
1035                q->rate2quantum = 1;
1036        q->defcls = gopt->defcls;
1037
1038        return 0;
1039}
1040
1041static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1042{
1043        spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1044        struct htb_sched *q = qdisc_priv(sch);
1045        struct nlattr *nest;
1046        struct tc_htb_glob gopt;
1047
1048        spin_lock_bh(root_lock);
1049
1050        gopt.direct_pkts = q->direct_pkts;
1051        gopt.version = HTB_VER;
1052        gopt.rate2quantum = q->rate2quantum;
1053        gopt.defcls = q->defcls;
1054        gopt.debug = 0;
1055
1056        nest = nla_nest_start(skb, TCA_OPTIONS);
1057        if (nest == NULL)
1058                goto nla_put_failure;
1059        if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1060            nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1061                goto nla_put_failure;
1062        nla_nest_end(skb, nest);
1063
1064        spin_unlock_bh(root_lock);
1065        return skb->len;
1066
1067nla_put_failure:
1068        spin_unlock_bh(root_lock);
1069        nla_nest_cancel(skb, nest);
1070        return -1;
1071}
1072
1073static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1074                          struct sk_buff *skb, struct tcmsg *tcm)
1075{
1076        struct htb_class *cl = (struct htb_class *)arg;
1077        spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1078        struct nlattr *nest;
1079        struct tc_htb_opt opt;
1080
1081        spin_lock_bh(root_lock);
1082        tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1083        tcm->tcm_handle = cl->common.classid;
1084        if (!cl->level && cl->un.leaf.q)
1085                tcm->tcm_info = cl->un.leaf.q->handle;
1086
1087        nest = nla_nest_start(skb, TCA_OPTIONS);
1088        if (nest == NULL)
1089                goto nla_put_failure;
1090
1091        memset(&opt, 0, sizeof(opt));
1092
1093        psched_ratecfg_getrate(&opt.rate, &cl->rate);
1094        opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1095        psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1096        opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1097        opt.quantum = cl->quantum;
1098        opt.prio = cl->prio;
1099        opt.level = cl->level;
1100        if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1101                goto nla_put_failure;
1102
1103        nla_nest_end(skb, nest);
1104        spin_unlock_bh(root_lock);
1105        return skb->len;
1106
1107nla_put_failure:
1108        spin_unlock_bh(root_lock);
1109        nla_nest_cancel(skb, nest);
1110        return -1;
1111}
1112
1113static int
1114htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1115{
1116        struct htb_class *cl = (struct htb_class *)arg;
1117
1118        if (!cl->level && cl->un.leaf.q)
1119                cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1120        cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
1121        cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
1122
1123        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1124            gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
1125            gnet_stats_copy_queue(d, &cl->qstats) < 0)
1126                return -1;
1127
1128        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1129}
1130
1131static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1132                     struct Qdisc **old)
1133{
1134        struct htb_class *cl = (struct htb_class *)arg;
1135
1136        if (cl->level)
1137                return -EINVAL;
1138        if (new == NULL &&
1139            (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1140                                     cl->common.classid)) == NULL)
1141                return -ENOBUFS;
1142
1143        sch_tree_lock(sch);
1144        *old = cl->un.leaf.q;
1145        cl->un.leaf.q = new;
1146        if (*old != NULL) {
1147                qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1148                qdisc_reset(*old);
1149        }
1150        sch_tree_unlock(sch);
1151        return 0;
1152}
1153
1154static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1155{
1156        struct htb_class *cl = (struct htb_class *)arg;
1157        return !cl->level ? cl->un.leaf.q : NULL;
1158}
1159
1160static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1161{
1162        struct htb_class *cl = (struct htb_class *)arg;
1163
1164        if (cl->un.leaf.q->q.qlen == 0)
1165                htb_deactivate(qdisc_priv(sch), cl);
1166}
1167
1168static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1169{
1170        struct htb_class *cl = htb_find(classid, sch);
1171        if (cl)
1172                cl->refcnt++;
1173        return (unsigned long)cl;
1174}
1175
1176static inline int htb_parent_last_child(struct htb_class *cl)
1177{
1178        if (!cl->parent)
1179                /* the root class */
1180                return 0;
1181        if (cl->parent->children > 1)
1182                /* not the last child */
1183                return 0;
1184        return 1;
1185}
1186
1187static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1188                               struct Qdisc *new_q)
1189{
1190        struct htb_class *parent = cl->parent;
1191
1192        WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
1193
1194        if (parent->cmode != HTB_CAN_SEND)
1195                htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1196
1197        parent->level = 0;
1198        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1199        INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1200        parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1201        parent->tokens = parent->buffer;
1202        parent->ctokens = parent->cbuffer;
1203        parent->t_c = ktime_to_ns(ktime_get());
1204        parent->cmode = HTB_CAN_SEND;
1205}
1206
1207static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1208{
1209        if (!cl->level) {
1210                WARN_ON(!cl->un.leaf.q);
1211                qdisc_destroy(cl->un.leaf.q);
1212        }
1213        gen_kill_estimator(&cl->bstats, &cl->rate_est);
1214        tcf_destroy_chain(&cl->filter_list);
1215        kfree(cl);
1216}
1217
1218static void htb_destroy(struct Qdisc *sch)
1219{
1220        struct htb_sched *q = qdisc_priv(sch);
1221        struct hlist_node *next;
1222        struct htb_class *cl;
1223        unsigned int i;
1224
1225        cancel_work_sync(&q->work);
1226        qdisc_watchdog_cancel(&q->watchdog);
1227        /* This line used to be after htb_destroy_class call below
1228         * and surprisingly it worked in 2.4. But it must precede it
1229         * because filter need its target class alive to be able to call
1230         * unbind_filter on it (without Oops).
1231         */
1232        tcf_destroy_chain(&q->filter_list);
1233
1234        for (i = 0; i < q->clhash.hashsize; i++) {
1235                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
1236                        tcf_destroy_chain(&cl->filter_list);
1237        }
1238        for (i = 0; i < q->clhash.hashsize; i++) {
1239                hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1240                                          common.hnode)
1241                        htb_destroy_class(sch, cl);
1242        }
1243        qdisc_class_hash_destroy(&q->clhash);
1244        __skb_queue_purge(&q->direct_queue);
1245}
1246
1247static int htb_delete(struct Qdisc *sch, unsigned long arg)
1248{
1249        struct htb_sched *q = qdisc_priv(sch);
1250        struct htb_class *cl = (struct htb_class *)arg;
1251        unsigned int qlen;
1252        struct Qdisc *new_q = NULL;
1253        int last_child = 0;
1254
1255        // TODO: why don't allow to delete subtree ? references ? does
1256        // tc subsys quarantee us that in htb_destroy it holds no class
1257        // refs so that we can remove children safely there ?
1258        if (cl->children || cl->filter_cnt)
1259                return -EBUSY;
1260
1261        if (!cl->level && htb_parent_last_child(cl)) {
1262                new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1263                                          cl->parent->common.classid);
1264                last_child = 1;
1265        }
1266
1267        sch_tree_lock(sch);
1268
1269        if (!cl->level) {
1270                qlen = cl->un.leaf.q->q.qlen;
1271                qdisc_reset(cl->un.leaf.q);
1272                qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
1273        }
1274
1275        /* delete from hash and active; remainder in destroy_class */
1276        qdisc_class_hash_remove(&q->clhash, &cl->common);
1277        if (cl->parent)
1278                cl->parent->children--;
1279
1280        if (cl->prio_activity)
1281                htb_deactivate(q, cl);
1282
1283        if (cl->cmode != HTB_CAN_SEND)
1284                htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1285
1286        if (last_child)
1287                htb_parent_to_leaf(q, cl, new_q);
1288
1289        BUG_ON(--cl->refcnt == 0);
1290        /*
1291         * This shouldn't happen: we "hold" one cops->get() when called
1292         * from tc_ctl_tclass; the destroy method is done from cops->put().
1293         */
1294
1295        sch_tree_unlock(sch);
1296        return 0;
1297}
1298
1299static void htb_put(struct Qdisc *sch, unsigned long arg)
1300{
1301        struct htb_class *cl = (struct htb_class *)arg;
1302
1303        if (--cl->refcnt == 0)
1304                htb_destroy_class(sch, cl);
1305}
1306
1307static int htb_change_class(struct Qdisc *sch, u32 classid,
1308                            u32 parentid, struct nlattr **tca,
1309                            unsigned long *arg)
1310{
1311        int err = -EINVAL;
1312        struct htb_sched *q = qdisc_priv(sch);
1313        struct htb_class *cl = (struct htb_class *)*arg, *parent;
1314        struct nlattr *opt = tca[TCA_OPTIONS];
1315        struct nlattr *tb[TCA_HTB_MAX + 1];
1316        struct tc_htb_opt *hopt;
1317
1318        /* extract all subattrs from opt attr */
1319        if (!opt)
1320                goto failure;
1321
1322        err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
1323        if (err < 0)
1324                goto failure;
1325
1326        err = -EINVAL;
1327        if (tb[TCA_HTB_PARMS] == NULL)
1328                goto failure;
1329
1330        parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1331
1332        hopt = nla_data(tb[TCA_HTB_PARMS]);
1333        if (!hopt->rate.rate || !hopt->ceil.rate)
1334                goto failure;
1335
1336        if (!cl) {              /* new class */
1337                struct Qdisc *new_q;
1338                int prio;
1339                struct {
1340                        struct nlattr           nla;
1341                        struct gnet_estimator   opt;
1342                } est = {
1343                        .nla = {
1344                                .nla_len        = nla_attr_size(sizeof(est.opt)),
1345                                .nla_type       = TCA_RATE,
1346                        },
1347                        .opt = {
1348                                /* 4s interval, 16s averaging constant */
1349                                .interval       = 2,
1350                                .ewma_log       = 2,
1351                        },
1352                };
1353
1354                /* check for valid classid */
1355                if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1356                    htb_find(classid, sch))
1357                        goto failure;
1358
1359                /* check maximal depth */
1360                if (parent && parent->parent && parent->parent->level < 2) {
1361                        pr_err("htb: tree is too deep\n");
1362                        goto failure;
1363                }
1364                err = -ENOBUFS;
1365                cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1366                if (!cl)
1367                        goto failure;
1368
1369                err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1370                                        qdisc_root_sleeping_lock(sch),
1371                                        tca[TCA_RATE] ? : &est.nla);
1372                if (err) {
1373                        kfree(cl);
1374                        goto failure;
1375                }
1376
1377                cl->refcnt = 1;
1378                cl->children = 0;
1379                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1380                RB_CLEAR_NODE(&cl->pq_node);
1381
1382                for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1383                        RB_CLEAR_NODE(&cl->node[prio]);
1384
1385                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1386                 * so that can't be used inside of sch_tree_lock
1387                 * -- thanks to Karlis Peisenieks
1388                 */
1389                new_q = qdisc_create_dflt(sch->dev_queue,
1390                                          &pfifo_qdisc_ops, classid);
1391                sch_tree_lock(sch);
1392                if (parent && !parent->level) {
1393                        unsigned int qlen = parent->un.leaf.q->q.qlen;
1394
1395                        /* turn parent into inner node */
1396                        qdisc_reset(parent->un.leaf.q);
1397                        qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
1398                        qdisc_destroy(parent->un.leaf.q);
1399                        if (parent->prio_activity)
1400                                htb_deactivate(q, parent);
1401
1402                        /* remove from evt list because of level change */
1403                        if (parent->cmode != HTB_CAN_SEND) {
1404                                htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1405                                parent->cmode = HTB_CAN_SEND;
1406                        }
1407                        parent->level = (parent->parent ? parent->parent->level
1408                                         : TC_HTB_MAXDEPTH) - 1;
1409                        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1410                }
1411                /* leaf (we) needs elementary qdisc */
1412                cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1413
1414                cl->common.classid = classid;
1415                cl->parent = parent;
1416
1417                /* set class to be in HTB_CAN_SEND state */
1418                cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1419                cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1420                cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1421                cl->t_c = ktime_to_ns(ktime_get());
1422                cl->cmode = HTB_CAN_SEND;
1423
1424                /* attach to the hash list and parent's family */
1425                qdisc_class_hash_insert(&q->clhash, &cl->common);
1426                if (parent)
1427                        parent->children++;
1428        } else {
1429                if (tca[TCA_RATE]) {
1430                        err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1431                                                    qdisc_root_sleeping_lock(sch),
1432                                                    tca[TCA_RATE]);
1433                        if (err)
1434                                return err;
1435                }
1436                sch_tree_lock(sch);
1437        }
1438
1439        /* it used to be a nasty bug here, we have to check that node
1440         * is really leaf before changing cl->un.leaf !
1441         */
1442        if (!cl->level) {
1443                cl->quantum = hopt->rate.rate / q->rate2quantum;
1444                if (!hopt->quantum && cl->quantum < 1000) {
1445                        pr_warning(
1446                               "HTB: quantum of class %X is small. Consider r2q change.\n",
1447                               cl->common.classid);
1448                        cl->quantum = 1000;
1449                }
1450                if (!hopt->quantum && cl->quantum > 200000) {
1451                        pr_warning(
1452                               "HTB: quantum of class %X is big. Consider r2q change.\n",
1453                               cl->common.classid);
1454                        cl->quantum = 200000;
1455                }
1456                if (hopt->quantum)
1457                        cl->quantum = hopt->quantum;
1458                if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1459                        cl->prio = TC_HTB_NUMPRIO - 1;
1460        }
1461
1462        psched_ratecfg_precompute(&cl->rate, &hopt->rate);
1463        psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
1464
1465        cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
1466        cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer);
1467
1468        sch_tree_unlock(sch);
1469
1470        qdisc_class_hash_grow(sch, &q->clhash);
1471
1472        *arg = (unsigned long)cl;
1473        return 0;
1474
1475failure:
1476        return err;
1477}
1478
1479static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1480{
1481        struct htb_sched *q = qdisc_priv(sch);
1482        struct htb_class *cl = (struct htb_class *)arg;
1483        struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1484
1485        return fl;
1486}
1487
1488static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1489                                     u32 classid)
1490{
1491        struct htb_class *cl = htb_find(classid, sch);
1492
1493        /*if (cl && !cl->level) return 0;
1494         * The line above used to be there to prevent attaching filters to
1495         * leaves. But at least tc_index filter uses this just to get class
1496         * for other reasons so that we have to allow for it.
1497         * ----
1498         * 19.6.2002 As Werner explained it is ok - bind filter is just
1499         * another way to "lock" the class - unlike "get" this lock can
1500         * be broken by class during destroy IIUC.
1501         */
1502        if (cl)
1503                cl->filter_cnt++;
1504        return (unsigned long)cl;
1505}
1506
1507static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1508{
1509        struct htb_class *cl = (struct htb_class *)arg;
1510
1511        if (cl)
1512                cl->filter_cnt--;
1513}
1514
1515static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1516{
1517        struct htb_sched *q = qdisc_priv(sch);
1518        struct htb_class *cl;
1519        unsigned int i;
1520
1521        if (arg->stop)
1522                return;
1523
1524        for (i = 0; i < q->clhash.hashsize; i++) {
1525                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1526                        if (arg->count < arg->skip) {
1527                                arg->count++;
1528                                continue;
1529                        }
1530                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1531                                arg->stop = 1;
1532                                return;
1533                        }
1534                        arg->count++;
1535                }
1536        }
1537}
1538
1539static const struct Qdisc_class_ops htb_class_ops = {
1540        .graft          =       htb_graft,
1541        .leaf           =       htb_leaf,
1542        .qlen_notify    =       htb_qlen_notify,
1543        .get            =       htb_get,
1544        .put            =       htb_put,
1545        .change         =       htb_change_class,
1546        .delete         =       htb_delete,
1547        .walk           =       htb_walk,
1548        .tcf_chain      =       htb_find_tcf,
1549        .bind_tcf       =       htb_bind_filter,
1550        .unbind_tcf     =       htb_unbind_filter,
1551        .dump           =       htb_dump_class,
1552        .dump_stats     =       htb_dump_class_stats,
1553};
1554
1555static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1556        .cl_ops         =       &htb_class_ops,
1557        .id             =       "htb",
1558        .priv_size      =       sizeof(struct htb_sched),
1559        .enqueue        =       htb_enqueue,
1560        .dequeue        =       htb_dequeue,
1561        .peek           =       qdisc_peek_dequeued,
1562        .drop           =       htb_drop,
1563        .init           =       htb_init,
1564        .reset          =       htb_reset,
1565        .destroy        =       htb_destroy,
1566        .dump           =       htb_dump,
1567        .owner          =       THIS_MODULE,
1568};
1569
1570static int __init htb_module_init(void)
1571{
1572        return register_qdisc(&htb_qdisc_ops);
1573}
1574static void __exit htb_module_exit(void)
1575{
1576        unregister_qdisc(&htb_qdisc_ops);
1577}
1578
1579module_init(htb_module_init)
1580module_exit(htb_module_exit)
1581MODULE_LICENSE("GPL");
1582