linux/net/sched/sch_htb.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Martin Devera, <devik@cdi.cz>
  10 *
  11 * Credits (in time order) for older HTB versions:
  12 *              Stef Coene <stef.coene@docum.org>
  13 *                      HTB support at LARTC mailing list
  14 *              Ondrej Kraus, <krauso@barr.cz>
  15 *                      found missing INIT_QDISC(htb)
  16 *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
  17 *                      helped a lot to locate nasty class stall bug
  18 *              Andi Kleen, Jamal Hadi, Bert Hubert
  19 *                      code review and helpful comments on shaping
  20 *              Tomasz Wrona, <tw@eter.tym.pl>
  21 *                      created test case so that I was able to fix nasty bug
  22 *              Wilfried Weissmann
  23 *                      spotted bug in dequeue code and helped with fix
  24 *              Jiri Fojtasek
  25 *                      fixed requeue routine
  26 *              and many others. thanks.
  27 */
  28#include <linux/module.h>
  29#include <linux/moduleparam.h>
  30#include <linux/types.h>
  31#include <linux/kernel.h>
  32#include <linux/string.h>
  33#include <linux/errno.h>
  34#include <linux/skbuff.h>
  35#include <linux/list.h>
  36#include <linux/compiler.h>
  37#include <linux/rbtree.h>
  38#include <linux/workqueue.h>
  39#include <linux/slab.h>
  40#include <net/netlink.h>
  41#include <net/sch_generic.h>
  42#include <net/pkt_sched.h>
  43
  44/* HTB algorithm.
  45    Author: devik@cdi.cz
  46    ========================================================================
  47    HTB is like TBF with multiple classes. It is also similar to CBQ because
  48    it allows to assign priority to each class in hierarchy.
  49    In fact it is another implementation of Floyd's formal sharing.
  50
  51    Levels:
  52    Each class is assigned level. Leaf has ALWAYS level 0 and root
  53    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
  54    one less than their parent.
  55*/
  56
  57static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
  58#define HTB_VER 0x30011         /* major must be matched with number suplied by TC as version */
  59
  60#if HTB_VER >> 16 != TC_HTB_PROTOVER
  61#error "Mismatched sch_htb.c and pkt_sch.h"
  62#endif
  63
  64/* Module parameter and sysfs export */
  65module_param    (htb_hysteresis, int, 0640);
  66MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
  67
  68static int htb_rate_est = 0; /* htb classes have a default rate estimator */
  69module_param(htb_rate_est, int, 0640);
  70MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
  71
  72/* used internaly to keep status of single class */
  73enum htb_cmode {
  74        HTB_CANT_SEND,          /* class can't send and can't borrow */
  75        HTB_MAY_BORROW,         /* class can't send but may borrow */
  76        HTB_CAN_SEND            /* class can send */
  77};
  78
  79struct htb_prio {
  80        union {
  81                struct rb_root  row;
  82                struct rb_root  feed;
  83        };
  84        struct rb_node  *ptr;
  85        /* When class changes from state 1->2 and disconnects from
  86         * parent's feed then we lost ptr value and start from the
  87         * first child again. Here we store classid of the
  88         * last valid ptr (used when ptr is NULL).
  89         */
  90        u32             last_ptr_id;
  91};
  92
  93/* interior & leaf nodes; props specific to leaves are marked L:
  94 * To reduce false sharing, place mostly read fields at beginning,
  95 * and mostly written ones at the end.
  96 */
  97struct htb_class {
  98        struct Qdisc_class_common common;
  99        struct psched_ratecfg   rate;
 100        struct psched_ratecfg   ceil;
 101        s64                     buffer, cbuffer;/* token bucket depth/rate */
 102        s64                     mbuffer;        /* max wait time */
 103        u32                     prio;           /* these two are used only by leaves... */
 104        int                     quantum;        /* but stored for parent-to-leaf return */
 105
 106        struct tcf_proto        *filter_list;   /* class attached filters */
 107        int                     filter_cnt;
 108        int                     refcnt;         /* usage count of this class */
 109
 110        int                     level;          /* our level (see above) */
 111        unsigned int            children;
 112        struct htb_class        *parent;        /* parent class */
 113
 114        struct gnet_stats_rate_est64 rate_est;
 115
 116        /*
 117         * Written often fields
 118         */
 119        struct gnet_stats_basic_packed bstats;
 120        struct gnet_stats_queue qstats;
 121        struct tc_htb_xstats    xstats; /* our special stats */
 122
 123        /* token bucket parameters */
 124        s64                     tokens, ctokens;/* current number of tokens */
 125        s64                     t_c;            /* checkpoint time */
 126
 127        union {
 128                struct htb_class_leaf {
 129                        struct list_head drop_list;
 130                        int             deficit[TC_HTB_MAXDEPTH];
 131                        struct Qdisc    *q;
 132                } leaf;
 133                struct htb_class_inner {
 134                        struct htb_prio clprio[TC_HTB_NUMPRIO];
 135                } inner;
 136        } un;
 137        s64                     pq_key;
 138
 139        int                     prio_activity;  /* for which prios are we active */
 140        enum htb_cmode          cmode;          /* current mode of the class */
 141        struct rb_node          pq_node;        /* node for event queue */
 142        struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
 143};
 144
 145struct htb_level {
 146        struct rb_root  wait_pq;
 147        struct htb_prio hprio[TC_HTB_NUMPRIO];
 148};
 149
 150struct htb_sched {
 151        struct Qdisc_class_hash clhash;
 152        int                     defcls;         /* class where unclassified flows go to */
 153        int                     rate2quantum;   /* quant = rate / rate2quantum */
 154
 155        /* filters for qdisc itself */
 156        struct tcf_proto        *filter_list;
 157
 158#define HTB_WARN_TOOMANYEVENTS  0x1
 159        unsigned int            warned; /* only one warning */
 160        int                     direct_qlen;
 161        struct work_struct      work;
 162
 163        /* non shaped skbs; let them go directly thru */
 164        struct sk_buff_head     direct_queue;
 165        long                    direct_pkts;
 166
 167        struct qdisc_watchdog   watchdog;
 168
 169        s64                     now;    /* cached dequeue time */
 170        struct list_head        drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 171
 172        /* time of nearest event per level (row) */
 173        s64                     near_ev_cache[TC_HTB_MAXDEPTH];
 174
 175        int                     row_mask[TC_HTB_MAXDEPTH];
 176
 177        struct htb_level        hlevel[TC_HTB_MAXDEPTH];
 178};
 179
 180/* find class in global hash table using given handle */
 181static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 182{
 183        struct htb_sched *q = qdisc_priv(sch);
 184        struct Qdisc_class_common *clc;
 185
 186        clc = qdisc_class_find(&q->clhash, handle);
 187        if (clc == NULL)
 188                return NULL;
 189        return container_of(clc, struct htb_class, common);
 190}
 191
 192/**
 193 * htb_classify - classify a packet into class
 194 *
 195 * It returns NULL if the packet should be dropped or -1 if the packet
 196 * should be passed directly thru. In all other cases leaf class is returned.
 197 * We allow direct class selection by classid in priority. The we examine
 198 * filters in qdisc and in inner nodes (if higher filter points to the inner
 199 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
 200 * internal fifo (direct). These packets then go directly thru. If we still
 201 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
 202 * then finish and return direct queue.
 203 */
 204#define HTB_DIRECT ((struct htb_class *)-1L)
 205
 206static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 207                                      int *qerr)
 208{
 209        struct htb_sched *q = qdisc_priv(sch);
 210        struct htb_class *cl;
 211        struct tcf_result res;
 212        struct tcf_proto *tcf;
 213        int result;
 214
 215        /* allow to select class by setting skb->priority to valid classid;
 216         * note that nfmark can be used too by attaching filter fw with no
 217         * rules in it
 218         */
 219        if (skb->priority == sch->handle)
 220                return HTB_DIRECT;      /* X:0 (direct flow) selected */
 221        cl = htb_find(skb->priority, sch);
 222        if (cl && cl->level == 0)
 223                return cl;
 224
 225        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 226        tcf = q->filter_list;
 227        while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 228#ifdef CONFIG_NET_CLS_ACT
 229                switch (result) {
 230                case TC_ACT_QUEUED:
 231                case TC_ACT_STOLEN:
 232                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 233                case TC_ACT_SHOT:
 234                        return NULL;
 235                }
 236#endif
 237                cl = (void *)res.class;
 238                if (!cl) {
 239                        if (res.classid == sch->handle)
 240                                return HTB_DIRECT;      /* X:0 (direct flow) */
 241                        cl = htb_find(res.classid, sch);
 242                        if (!cl)
 243                                break;  /* filter selected invalid classid */
 244                }
 245                if (!cl->level)
 246                        return cl;      /* we hit leaf; return it */
 247
 248                /* we have got inner class; apply inner filter chain */
 249                tcf = cl->filter_list;
 250        }
 251        /* classification failed; try to use default class */
 252        cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 253        if (!cl || cl->level)
 254                return HTB_DIRECT;      /* bad default .. this is safe bet */
 255        return cl;
 256}
 257
 258/**
 259 * htb_add_to_id_tree - adds class to the round robin list
 260 *
 261 * Routine adds class to the list (actually tree) sorted by classid.
 262 * Make sure that class is not already on such list for given prio.
 263 */
 264static void htb_add_to_id_tree(struct rb_root *root,
 265                               struct htb_class *cl, int prio)
 266{
 267        struct rb_node **p = &root->rb_node, *parent = NULL;
 268
 269        while (*p) {
 270                struct htb_class *c;
 271                parent = *p;
 272                c = rb_entry(parent, struct htb_class, node[prio]);
 273
 274                if (cl->common.classid > c->common.classid)
 275                        p = &parent->rb_right;
 276                else
 277                        p = &parent->rb_left;
 278        }
 279        rb_link_node(&cl->node[prio], parent, p);
 280        rb_insert_color(&cl->node[prio], root);
 281}
 282
 283/**
 284 * htb_add_to_wait_tree - adds class to the event queue with delay
 285 *
 286 * The class is added to priority event queue to indicate that class will
 287 * change its mode in cl->pq_key microseconds. Make sure that class is not
 288 * already in the queue.
 289 */
 290static void htb_add_to_wait_tree(struct htb_sched *q,
 291                                 struct htb_class *cl, s64 delay)
 292{
 293        struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
 294
 295        cl->pq_key = q->now + delay;
 296        if (cl->pq_key == q->now)
 297                cl->pq_key++;
 298
 299        /* update the nearest event cache */
 300        if (q->near_ev_cache[cl->level] > cl->pq_key)
 301                q->near_ev_cache[cl->level] = cl->pq_key;
 302
 303        while (*p) {
 304                struct htb_class *c;
 305                parent = *p;
 306                c = rb_entry(parent, struct htb_class, pq_node);
 307                if (cl->pq_key >= c->pq_key)
 308                        p = &parent->rb_right;
 309                else
 310                        p = &parent->rb_left;
 311        }
 312        rb_link_node(&cl->pq_node, parent, p);
 313        rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
 314}
 315
 316/**
 317 * htb_next_rb_node - finds next node in binary tree
 318 *
 319 * When we are past last key we return NULL.
 320 * Average complexity is 2 steps per call.
 321 */
 322static inline void htb_next_rb_node(struct rb_node **n)
 323{
 324        *n = rb_next(*n);
 325}
 326
 327/**
 328 * htb_add_class_to_row - add class to its row
 329 *
 330 * The class is added to row at priorities marked in mask.
 331 * It does nothing if mask == 0.
 332 */
 333static inline void htb_add_class_to_row(struct htb_sched *q,
 334                                        struct htb_class *cl, int mask)
 335{
 336        q->row_mask[cl->level] |= mask;
 337        while (mask) {
 338                int prio = ffz(~mask);
 339                mask &= ~(1 << prio);
 340                htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
 341        }
 342}
 343
 344/* If this triggers, it is a bug in this code, but it need not be fatal */
 345static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 346{
 347        if (RB_EMPTY_NODE(rb)) {
 348                WARN_ON(1);
 349        } else {
 350                rb_erase(rb, root);
 351                RB_CLEAR_NODE(rb);
 352        }
 353}
 354
 355
 356/**
 357 * htb_remove_class_from_row - removes class from its row
 358 *
 359 * The class is removed from row at priorities marked in mask.
 360 * It does nothing if mask == 0.
 361 */
 362static inline void htb_remove_class_from_row(struct htb_sched *q,
 363                                                 struct htb_class *cl, int mask)
 364{
 365        int m = 0;
 366        struct htb_level *hlevel = &q->hlevel[cl->level];
 367
 368        while (mask) {
 369                int prio = ffz(~mask);
 370                struct htb_prio *hprio = &hlevel->hprio[prio];
 371
 372                mask &= ~(1 << prio);
 373                if (hprio->ptr == cl->node + prio)
 374                        htb_next_rb_node(&hprio->ptr);
 375
 376                htb_safe_rb_erase(cl->node + prio, &hprio->row);
 377                if (!hprio->row.rb_node)
 378                        m |= 1 << prio;
 379        }
 380        q->row_mask[cl->level] &= ~m;
 381}
 382
 383/**
 384 * htb_activate_prios - creates active classe's feed chain
 385 *
 386 * The class is connected to ancestors and/or appropriate rows
 387 * for priorities it is participating on. cl->cmode must be new
 388 * (activated) mode. It does nothing if cl->prio_activity == 0.
 389 */
 390static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 391{
 392        struct htb_class *p = cl->parent;
 393        long m, mask = cl->prio_activity;
 394
 395        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 396                m = mask;
 397                while (m) {
 398                        int prio = ffz(~m);
 399                        m &= ~(1 << prio);
 400
 401                        if (p->un.inner.clprio[prio].feed.rb_node)
 402                                /* parent already has its feed in use so that
 403                                 * reset bit in mask as parent is already ok
 404                                 */
 405                                mask &= ~(1 << prio);
 406
 407                        htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio);
 408                }
 409                p->prio_activity |= mask;
 410                cl = p;
 411                p = cl->parent;
 412
 413        }
 414        if (cl->cmode == HTB_CAN_SEND && mask)
 415                htb_add_class_to_row(q, cl, mask);
 416}
 417
 418/**
 419 * htb_deactivate_prios - remove class from feed chain
 420 *
 421 * cl->cmode must represent old mode (before deactivation). It does
 422 * nothing if cl->prio_activity == 0. Class is removed from all feed
 423 * chains and rows.
 424 */
 425static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 426{
 427        struct htb_class *p = cl->parent;
 428        long m, mask = cl->prio_activity;
 429
 430        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 431                m = mask;
 432                mask = 0;
 433                while (m) {
 434                        int prio = ffz(~m);
 435                        m &= ~(1 << prio);
 436
 437                        if (p->un.inner.clprio[prio].ptr == cl->node + prio) {
 438                                /* we are removing child which is pointed to from
 439                                 * parent feed - forget the pointer but remember
 440                                 * classid
 441                                 */
 442                                p->un.inner.clprio[prio].last_ptr_id = cl->common.classid;
 443                                p->un.inner.clprio[prio].ptr = NULL;
 444                        }
 445
 446                        htb_safe_rb_erase(cl->node + prio,
 447                                          &p->un.inner.clprio[prio].feed);
 448
 449                        if (!p->un.inner.clprio[prio].feed.rb_node)
 450                                mask |= 1 << prio;
 451                }
 452
 453                p->prio_activity &= ~mask;
 454                cl = p;
 455                p = cl->parent;
 456
 457        }
 458        if (cl->cmode == HTB_CAN_SEND && mask)
 459                htb_remove_class_from_row(q, cl, mask);
 460}
 461
 462static inline s64 htb_lowater(const struct htb_class *cl)
 463{
 464        if (htb_hysteresis)
 465                return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
 466        else
 467                return 0;
 468}
 469static inline s64 htb_hiwater(const struct htb_class *cl)
 470{
 471        if (htb_hysteresis)
 472                return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
 473        else
 474                return 0;
 475}
 476
 477
 478/**
 479 * htb_class_mode - computes and returns current class mode
 480 *
 481 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
 482 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
 483 * from now to time when cl will change its state.
 484 * Also it is worth to note that class mode doesn't change simply
 485 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
 486 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
 487 * mode transitions per time unit. The speed gain is about 1/6.
 488 */
 489static inline enum htb_cmode
 490htb_class_mode(struct htb_class *cl, s64 *diff)
 491{
 492        s64 toks;
 493
 494        if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
 495                *diff = -toks;
 496                return HTB_CANT_SEND;
 497        }
 498
 499        if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
 500                return HTB_CAN_SEND;
 501
 502        *diff = -toks;
 503        return HTB_MAY_BORROW;
 504}
 505
 506/**
 507 * htb_change_class_mode - changes classe's mode
 508 *
 509 * This should be the only way how to change classe's mode under normal
 510 * cirsumstances. Routine will update feed lists linkage, change mode
 511 * and add class to the wait event queue if appropriate. New mode should
 512 * be different from old one and cl->pq_key has to be valid if changing
 513 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
 514 */
 515static void
 516htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
 517{
 518        enum htb_cmode new_mode = htb_class_mode(cl, diff);
 519
 520        if (new_mode == cl->cmode)
 521                return;
 522
 523        if (cl->prio_activity) {        /* not necessary: speed optimization */
 524                if (cl->cmode != HTB_CANT_SEND)
 525                        htb_deactivate_prios(q, cl);
 526                cl->cmode = new_mode;
 527                if (new_mode != HTB_CANT_SEND)
 528                        htb_activate_prios(q, cl);
 529        } else
 530                cl->cmode = new_mode;
 531}
 532
 533/**
 534 * htb_activate - inserts leaf cl into appropriate active feeds
 535 *
 536 * Routine learns (new) priority of leaf and activates feed chain
 537 * for the prio. It can be called on already active leaf safely.
 538 * It also adds leaf into droplist.
 539 */
 540static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 541{
 542        WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
 543
 544        if (!cl->prio_activity) {
 545                cl->prio_activity = 1 << cl->prio;
 546                htb_activate_prios(q, cl);
 547                list_add_tail(&cl->un.leaf.drop_list,
 548                              q->drops + cl->prio);
 549        }
 550}
 551
 552/**
 553 * htb_deactivate - remove leaf cl from active feeds
 554 *
 555 * Make sure that leaf is active. In the other words it can't be called
 556 * with non-active leaf. It also removes class from the drop list.
 557 */
 558static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 559{
 560        WARN_ON(!cl->prio_activity);
 561
 562        htb_deactivate_prios(q, cl);
 563        cl->prio_activity = 0;
 564        list_del_init(&cl->un.leaf.drop_list);
 565}
 566
 567static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 568{
 569        int uninitialized_var(ret);
 570        struct htb_sched *q = qdisc_priv(sch);
 571        struct htb_class *cl = htb_classify(skb, sch, &ret);
 572
 573        if (cl == HTB_DIRECT) {
 574                /* enqueue to helper queue */
 575                if (q->direct_queue.qlen < q->direct_qlen) {
 576                        __skb_queue_tail(&q->direct_queue, skb);
 577                        q->direct_pkts++;
 578                } else {
 579                        return qdisc_drop(skb, sch);
 580                }
 581#ifdef CONFIG_NET_CLS_ACT
 582        } else if (!cl) {
 583                if (ret & __NET_XMIT_BYPASS)
 584                        sch->qstats.drops++;
 585                kfree_skb(skb);
 586                return ret;
 587#endif
 588        } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
 589                if (net_xmit_drop_count(ret)) {
 590                        sch->qstats.drops++;
 591                        cl->qstats.drops++;
 592                }
 593                return ret;
 594        } else {
 595                htb_activate(q, cl);
 596        }
 597
 598        sch->q.qlen++;
 599        return NET_XMIT_SUCCESS;
 600}
 601
 602static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
 603{
 604        s64 toks = diff + cl->tokens;
 605
 606        if (toks > cl->buffer)
 607                toks = cl->buffer;
 608        toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
 609        if (toks <= -cl->mbuffer)
 610                toks = 1 - cl->mbuffer;
 611
 612        cl->tokens = toks;
 613}
 614
 615static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
 616{
 617        s64 toks = diff + cl->ctokens;
 618
 619        if (toks > cl->cbuffer)
 620                toks = cl->cbuffer;
 621        toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
 622        if (toks <= -cl->mbuffer)
 623                toks = 1 - cl->mbuffer;
 624
 625        cl->ctokens = toks;
 626}
 627
 628/**
 629 * htb_charge_class - charges amount "bytes" to leaf and ancestors
 630 *
 631 * Routine assumes that packet "bytes" long was dequeued from leaf cl
 632 * borrowing from "level". It accounts bytes to ceil leaky bucket for
 633 * leaf and all ancestors and to rate bucket for ancestors at levels
 634 * "level" and higher. It also handles possible change of mode resulting
 635 * from the update. Note that mode can also increase here (MAY_BORROW to
 636 * CAN_SEND) because we can use more precise clock that event queue here.
 637 * In such case we remove class from event queue first.
 638 */
 639static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 640                             int level, struct sk_buff *skb)
 641{
 642        int bytes = qdisc_pkt_len(skb);
 643        enum htb_cmode old_mode;
 644        s64 diff;
 645
 646        while (cl) {
 647                diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 648                if (cl->level >= level) {
 649                        if (cl->level == level)
 650                                cl->xstats.lends++;
 651                        htb_accnt_tokens(cl, bytes, diff);
 652                } else {
 653                        cl->xstats.borrows++;
 654                        cl->tokens += diff;     /* we moved t_c; update tokens */
 655                }
 656                htb_accnt_ctokens(cl, bytes, diff);
 657                cl->t_c = q->now;
 658
 659                old_mode = cl->cmode;
 660                diff = 0;
 661                htb_change_class_mode(q, cl, &diff);
 662                if (old_mode != cl->cmode) {
 663                        if (old_mode != HTB_CAN_SEND)
 664                                htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
 665                        if (cl->cmode != HTB_CAN_SEND)
 666                                htb_add_to_wait_tree(q, cl, diff);
 667                }
 668
 669                /* update basic stats except for leaves which are already updated */
 670                if (cl->level)
 671                        bstats_update(&cl->bstats, skb);
 672
 673                cl = cl->parent;
 674        }
 675}
 676
 677/**
 678 * htb_do_events - make mode changes to classes at the level
 679 *
 680 * Scans event queue for pending events and applies them. Returns time of
 681 * next pending event (0 for no event in pq, q->now for too many events).
 682 * Note: Applied are events whose have cl->pq_key <= q->now.
 683 */
 684static s64 htb_do_events(struct htb_sched *q, const int level,
 685                         unsigned long start)
 686{
 687        /* don't run for longer than 2 jiffies; 2 is used instead of
 688         * 1 to simplify things when jiffy is going to be incremented
 689         * too soon
 690         */
 691        unsigned long stop_at = start + 2;
 692        struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
 693
 694        while (time_before(jiffies, stop_at)) {
 695                struct htb_class *cl;
 696                s64 diff;
 697                struct rb_node *p = rb_first(wait_pq);
 698
 699                if (!p)
 700                        return 0;
 701
 702                cl = rb_entry(p, struct htb_class, pq_node);
 703                if (cl->pq_key > q->now)
 704                        return cl->pq_key;
 705
 706                htb_safe_rb_erase(p, wait_pq);
 707                diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 708                htb_change_class_mode(q, cl, &diff);
 709                if (cl->cmode != HTB_CAN_SEND)
 710                        htb_add_to_wait_tree(q, cl, diff);
 711        }
 712
 713        /* too much load - let's continue after a break for scheduling */
 714        if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
 715                pr_warning("htb: too many events!\n");
 716                q->warned |= HTB_WARN_TOOMANYEVENTS;
 717        }
 718
 719        return q->now;
 720}
 721
 722/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
 723 * is no such one exists.
 724 */
 725static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 726                                              u32 id)
 727{
 728        struct rb_node *r = NULL;
 729        while (n) {
 730                struct htb_class *cl =
 731                    rb_entry(n, struct htb_class, node[prio]);
 732
 733                if (id > cl->common.classid) {
 734                        n = n->rb_right;
 735                } else if (id < cl->common.classid) {
 736                        r = n;
 737                        n = n->rb_left;
 738                } else {
 739                        return n;
 740                }
 741        }
 742        return r;
 743}
 744
 745/**
 746 * htb_lookup_leaf - returns next leaf class in DRR order
 747 *
 748 * Find leaf where current feed pointers points to.
 749 */
 750static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
 751{
 752        int i;
 753        struct {
 754                struct rb_node *root;
 755                struct rb_node **pptr;
 756                u32 *pid;
 757        } stk[TC_HTB_MAXDEPTH], *sp = stk;
 758
 759        BUG_ON(!hprio->row.rb_node);
 760        sp->root = hprio->row.rb_node;
 761        sp->pptr = &hprio->ptr;
 762        sp->pid = &hprio->last_ptr_id;
 763
 764        for (i = 0; i < 65535; i++) {
 765                if (!*sp->pptr && *sp->pid) {
 766                        /* ptr was invalidated but id is valid - try to recover
 767                         * the original or next ptr
 768                         */
 769                        *sp->pptr =
 770                            htb_id_find_next_upper(prio, sp->root, *sp->pid);
 771                }
 772                *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
 773                                 * can become out of date quickly
 774                                 */
 775                if (!*sp->pptr) {       /* we are at right end; rewind & go up */
 776                        *sp->pptr = sp->root;
 777                        while ((*sp->pptr)->rb_left)
 778                                *sp->pptr = (*sp->pptr)->rb_left;
 779                        if (sp > stk) {
 780                                sp--;
 781                                if (!*sp->pptr) {
 782                                        WARN_ON(1);
 783                                        return NULL;
 784                                }
 785                                htb_next_rb_node(sp->pptr);
 786                        }
 787                } else {
 788                        struct htb_class *cl;
 789                        struct htb_prio *clp;
 790
 791                        cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
 792                        if (!cl->level)
 793                                return cl;
 794                        clp = &cl->un.inner.clprio[prio];
 795                        (++sp)->root = clp->feed.rb_node;
 796                        sp->pptr = &clp->ptr;
 797                        sp->pid = &clp->last_ptr_id;
 798                }
 799        }
 800        WARN_ON(1);
 801        return NULL;
 802}
 803
 804/* dequeues packet at given priority and level; call only if
 805 * you are sure that there is active class at prio/level
 806 */
 807static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
 808                                        const int level)
 809{
 810        struct sk_buff *skb = NULL;
 811        struct htb_class *cl, *start;
 812        struct htb_level *hlevel = &q->hlevel[level];
 813        struct htb_prio *hprio = &hlevel->hprio[prio];
 814
 815        /* look initial class up in the row */
 816        start = cl = htb_lookup_leaf(hprio, prio);
 817
 818        do {
 819next:
 820                if (unlikely(!cl))
 821                        return NULL;
 822
 823                /* class can be empty - it is unlikely but can be true if leaf
 824                 * qdisc drops packets in enqueue routine or if someone used
 825                 * graft operation on the leaf since last dequeue;
 826                 * simply deactivate and skip such class
 827                 */
 828                if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
 829                        struct htb_class *next;
 830                        htb_deactivate(q, cl);
 831
 832                        /* row/level might become empty */
 833                        if ((q->row_mask[level] & (1 << prio)) == 0)
 834                                return NULL;
 835
 836                        next = htb_lookup_leaf(hprio, prio);
 837
 838                        if (cl == start)        /* fix start if we just deleted it */
 839                                start = next;
 840                        cl = next;
 841                        goto next;
 842                }
 843
 844                skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
 845                if (likely(skb != NULL))
 846                        break;
 847
 848                qdisc_warn_nonwc("htb", cl->un.leaf.q);
 849                htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr:
 850                                         &q->hlevel[0].hprio[prio].ptr);
 851                cl = htb_lookup_leaf(hprio, prio);
 852
 853        } while (cl != start);
 854
 855        if (likely(skb != NULL)) {
 856                bstats_update(&cl->bstats, skb);
 857                cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
 858                if (cl->un.leaf.deficit[level] < 0) {
 859                        cl->un.leaf.deficit[level] += cl->quantum;
 860                        htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr :
 861                                                 &q->hlevel[0].hprio[prio].ptr);
 862                }
 863                /* this used to be after charge_class but this constelation
 864                 * gives us slightly better performance
 865                 */
 866                if (!cl->un.leaf.q->q.qlen)
 867                        htb_deactivate(q, cl);
 868                htb_charge_class(q, cl, level, skb);
 869        }
 870        return skb;
 871}
 872
 873static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 874{
 875        struct sk_buff *skb;
 876        struct htb_sched *q = qdisc_priv(sch);
 877        int level;
 878        s64 next_event;
 879        unsigned long start_at;
 880
 881        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
 882        skb = __skb_dequeue(&q->direct_queue);
 883        if (skb != NULL) {
 884ok:
 885                qdisc_bstats_update(sch, skb);
 886                qdisc_unthrottled(sch);
 887                sch->q.qlen--;
 888                return skb;
 889        }
 890
 891        if (!sch->q.qlen)
 892                goto fin;
 893        q->now = ktime_to_ns(ktime_get());
 894        start_at = jiffies;
 895
 896        next_event = q->now + 5LLU * NSEC_PER_SEC;
 897
 898        for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 899                /* common case optimization - skip event handler quickly */
 900                int m;
 901                s64 event = q->near_ev_cache[level];
 902
 903                if (q->now >= event) {
 904                        event = htb_do_events(q, level, start_at);
 905                        if (!event)
 906                                event = q->now + NSEC_PER_SEC;
 907                        q->near_ev_cache[level] = event;
 908                }
 909
 910                if (next_event > event)
 911                        next_event = event;
 912
 913                m = ~q->row_mask[level];
 914                while (m != (int)(-1)) {
 915                        int prio = ffz(m);
 916
 917                        m |= 1 << prio;
 918                        skb = htb_dequeue_tree(q, prio, level);
 919                        if (likely(skb != NULL))
 920                                goto ok;
 921                }
 922        }
 923        sch->qstats.overlimits++;
 924        if (likely(next_event > q->now)) {
 925                if (!test_bit(__QDISC_STATE_DEACTIVATED,
 926                              &qdisc_root_sleeping(q->watchdog.qdisc)->state)) {
 927                        ktime_t time = ns_to_ktime(next_event);
 928                        qdisc_throttled(q->watchdog.qdisc);
 929                        hrtimer_start(&q->watchdog.timer, time,
 930                                      HRTIMER_MODE_ABS);
 931                }
 932        } else {
 933                schedule_work(&q->work);
 934        }
 935fin:
 936        return skb;
 937}
 938
 939/* try to drop from each class (by prio) until one succeed */
 940static unsigned int htb_drop(struct Qdisc *sch)
 941{
 942        struct htb_sched *q = qdisc_priv(sch);
 943        int prio;
 944
 945        for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
 946                struct list_head *p;
 947                list_for_each(p, q->drops + prio) {
 948                        struct htb_class *cl = list_entry(p, struct htb_class,
 949                                                          un.leaf.drop_list);
 950                        unsigned int len;
 951                        if (cl->un.leaf.q->ops->drop &&
 952                            (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
 953                                sch->q.qlen--;
 954                                if (!cl->un.leaf.q->q.qlen)
 955                                        htb_deactivate(q, cl);
 956                                return len;
 957                        }
 958                }
 959        }
 960        return 0;
 961}
 962
 963/* reset all classes */
 964/* always caled under BH & queue lock */
 965static void htb_reset(struct Qdisc *sch)
 966{
 967        struct htb_sched *q = qdisc_priv(sch);
 968        struct htb_class *cl;
 969        unsigned int i;
 970
 971        for (i = 0; i < q->clhash.hashsize; i++) {
 972                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 973                        if (cl->level)
 974                                memset(&cl->un.inner, 0, sizeof(cl->un.inner));
 975                        else {
 976                                if (cl->un.leaf.q)
 977                                        qdisc_reset(cl->un.leaf.q);
 978                                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 979                        }
 980                        cl->prio_activity = 0;
 981                        cl->cmode = HTB_CAN_SEND;
 982
 983                }
 984        }
 985        qdisc_watchdog_cancel(&q->watchdog);
 986        __skb_queue_purge(&q->direct_queue);
 987        sch->q.qlen = 0;
 988        memset(q->hlevel, 0, sizeof(q->hlevel));
 989        memset(q->row_mask, 0, sizeof(q->row_mask));
 990        for (i = 0; i < TC_HTB_NUMPRIO; i++)
 991                INIT_LIST_HEAD(q->drops + i);
 992}
 993
 994static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
 995        [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
 996        [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
 997        [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 998        [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 999        [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1000};
1001
1002static void htb_work_func(struct work_struct *work)
1003{
1004        struct htb_sched *q = container_of(work, struct htb_sched, work);
1005        struct Qdisc *sch = q->watchdog.qdisc;
1006
1007        __netif_schedule(qdisc_root(sch));
1008}
1009
1010static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1011{
1012        struct htb_sched *q = qdisc_priv(sch);
1013        struct nlattr *tb[TCA_HTB_MAX + 1];
1014        struct tc_htb_glob *gopt;
1015        int err;
1016        int i;
1017
1018        if (!opt)
1019                return -EINVAL;
1020
1021        err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
1022        if (err < 0)
1023                return err;
1024
1025        if (!tb[TCA_HTB_INIT])
1026                return -EINVAL;
1027
1028        gopt = nla_data(tb[TCA_HTB_INIT]);
1029        if (gopt->version != HTB_VER >> 16)
1030                return -EINVAL;
1031
1032        err = qdisc_class_hash_init(&q->clhash);
1033        if (err < 0)
1034                return err;
1035        for (i = 0; i < TC_HTB_NUMPRIO; i++)
1036                INIT_LIST_HEAD(q->drops + i);
1037
1038        qdisc_watchdog_init(&q->watchdog, sch);
1039        INIT_WORK(&q->work, htb_work_func);
1040        skb_queue_head_init(&q->direct_queue);
1041
1042        if (tb[TCA_HTB_DIRECT_QLEN])
1043                q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1044        else {
1045                q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1046                if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1047                        q->direct_qlen = 2;
1048        }
1049        if ((q->rate2quantum = gopt->rate2quantum) < 1)
1050                q->rate2quantum = 1;
1051        q->defcls = gopt->defcls;
1052
1053        return 0;
1054}
1055
1056static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1057{
1058        spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1059        struct htb_sched *q = qdisc_priv(sch);
1060        struct nlattr *nest;
1061        struct tc_htb_glob gopt;
1062
1063        spin_lock_bh(root_lock);
1064
1065        gopt.direct_pkts = q->direct_pkts;
1066        gopt.version = HTB_VER;
1067        gopt.rate2quantum = q->rate2quantum;
1068        gopt.defcls = q->defcls;
1069        gopt.debug = 0;
1070
1071        nest = nla_nest_start(skb, TCA_OPTIONS);
1072        if (nest == NULL)
1073                goto nla_put_failure;
1074        if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1075            nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1076                goto nla_put_failure;
1077        nla_nest_end(skb, nest);
1078
1079        spin_unlock_bh(root_lock);
1080        return skb->len;
1081
1082nla_put_failure:
1083        spin_unlock_bh(root_lock);
1084        nla_nest_cancel(skb, nest);
1085        return -1;
1086}
1087
1088static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1089                          struct sk_buff *skb, struct tcmsg *tcm)
1090{
1091        struct htb_class *cl = (struct htb_class *)arg;
1092        spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1093        struct nlattr *nest;
1094        struct tc_htb_opt opt;
1095
1096        spin_lock_bh(root_lock);
1097        tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1098        tcm->tcm_handle = cl->common.classid;
1099        if (!cl->level && cl->un.leaf.q)
1100                tcm->tcm_info = cl->un.leaf.q->handle;
1101
1102        nest = nla_nest_start(skb, TCA_OPTIONS);
1103        if (nest == NULL)
1104                goto nla_put_failure;
1105
1106        memset(&opt, 0, sizeof(opt));
1107
1108        psched_ratecfg_getrate(&opt.rate, &cl->rate);
1109        opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1110        psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1111        opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1112        opt.quantum = cl->quantum;
1113        opt.prio = cl->prio;
1114        opt.level = cl->level;
1115        if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1116                goto nla_put_failure;
1117
1118        nla_nest_end(skb, nest);
1119        spin_unlock_bh(root_lock);
1120        return skb->len;
1121
1122nla_put_failure:
1123        spin_unlock_bh(root_lock);
1124        nla_nest_cancel(skb, nest);
1125        return -1;
1126}
1127
1128static int
1129htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1130{
1131        struct htb_class *cl = (struct htb_class *)arg;
1132
1133        if (!cl->level && cl->un.leaf.q)
1134                cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1135        cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
1136        cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
1137
1138        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1139            gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
1140            gnet_stats_copy_queue(d, &cl->qstats) < 0)
1141                return -1;
1142
1143        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1144}
1145
1146static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1147                     struct Qdisc **old)
1148{
1149        struct htb_class *cl = (struct htb_class *)arg;
1150
1151        if (cl->level)
1152                return -EINVAL;
1153        if (new == NULL &&
1154            (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1155                                     cl->common.classid)) == NULL)
1156                return -ENOBUFS;
1157
1158        sch_tree_lock(sch);
1159        *old = cl->un.leaf.q;
1160        cl->un.leaf.q = new;
1161        if (*old != NULL) {
1162                qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1163                qdisc_reset(*old);
1164        }
1165        sch_tree_unlock(sch);
1166        return 0;
1167}
1168
1169static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1170{
1171        struct htb_class *cl = (struct htb_class *)arg;
1172        return !cl->level ? cl->un.leaf.q : NULL;
1173}
1174
1175static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1176{
1177        struct htb_class *cl = (struct htb_class *)arg;
1178
1179        if (cl->un.leaf.q->q.qlen == 0)
1180                htb_deactivate(qdisc_priv(sch), cl);
1181}
1182
1183static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1184{
1185        struct htb_class *cl = htb_find(classid, sch);
1186        if (cl)
1187                cl->refcnt++;
1188        return (unsigned long)cl;
1189}
1190
1191static inline int htb_parent_last_child(struct htb_class *cl)
1192{
1193        if (!cl->parent)
1194                /* the root class */
1195                return 0;
1196        if (cl->parent->children > 1)
1197                /* not the last child */
1198                return 0;
1199        return 1;
1200}
1201
1202static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1203                               struct Qdisc *new_q)
1204{
1205        struct htb_class *parent = cl->parent;
1206
1207        WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
1208
1209        if (parent->cmode != HTB_CAN_SEND)
1210                htb_safe_rb_erase(&parent->pq_node,
1211                                  &q->hlevel[parent->level].wait_pq);
1212
1213        parent->level = 0;
1214        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1215        INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1216        parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1217        parent->tokens = parent->buffer;
1218        parent->ctokens = parent->cbuffer;
1219        parent->t_c = ktime_to_ns(ktime_get());
1220        parent->cmode = HTB_CAN_SEND;
1221}
1222
1223static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1224{
1225        if (!cl->level) {
1226                WARN_ON(!cl->un.leaf.q);
1227                qdisc_destroy(cl->un.leaf.q);
1228        }
1229        gen_kill_estimator(&cl->bstats, &cl->rate_est);
1230        tcf_destroy_chain(&cl->filter_list);
1231        kfree(cl);
1232}
1233
1234static void htb_destroy(struct Qdisc *sch)
1235{
1236        struct htb_sched *q = qdisc_priv(sch);
1237        struct hlist_node *next;
1238        struct htb_class *cl;
1239        unsigned int i;
1240
1241        cancel_work_sync(&q->work);
1242        qdisc_watchdog_cancel(&q->watchdog);
1243        /* This line used to be after htb_destroy_class call below
1244         * and surprisingly it worked in 2.4. But it must precede it
1245         * because filter need its target class alive to be able to call
1246         * unbind_filter on it (without Oops).
1247         */
1248        tcf_destroy_chain(&q->filter_list);
1249
1250        for (i = 0; i < q->clhash.hashsize; i++) {
1251                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
1252                        tcf_destroy_chain(&cl->filter_list);
1253        }
1254        for (i = 0; i < q->clhash.hashsize; i++) {
1255                hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1256                                          common.hnode)
1257                        htb_destroy_class(sch, cl);
1258        }
1259        qdisc_class_hash_destroy(&q->clhash);
1260        __skb_queue_purge(&q->direct_queue);
1261}
1262
1263static int htb_delete(struct Qdisc *sch, unsigned long arg)
1264{
1265        struct htb_sched *q = qdisc_priv(sch);
1266        struct htb_class *cl = (struct htb_class *)arg;
1267        unsigned int qlen;
1268        struct Qdisc *new_q = NULL;
1269        int last_child = 0;
1270
1271        // TODO: why don't allow to delete subtree ? references ? does
1272        // tc subsys quarantee us that in htb_destroy it holds no class
1273        // refs so that we can remove children safely there ?
1274        if (cl->children || cl->filter_cnt)
1275                return -EBUSY;
1276
1277        if (!cl->level && htb_parent_last_child(cl)) {
1278                new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1279                                          cl->parent->common.classid);
1280                last_child = 1;
1281        }
1282
1283        sch_tree_lock(sch);
1284
1285        if (!cl->level) {
1286                qlen = cl->un.leaf.q->q.qlen;
1287                qdisc_reset(cl->un.leaf.q);
1288                qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
1289        }
1290
1291        /* delete from hash and active; remainder in destroy_class */
1292        qdisc_class_hash_remove(&q->clhash, &cl->common);
1293        if (cl->parent)
1294                cl->parent->children--;
1295
1296        if (cl->prio_activity)
1297                htb_deactivate(q, cl);
1298
1299        if (cl->cmode != HTB_CAN_SEND)
1300                htb_safe_rb_erase(&cl->pq_node,
1301                                  &q->hlevel[cl->level].wait_pq);
1302
1303        if (last_child)
1304                htb_parent_to_leaf(q, cl, new_q);
1305
1306        BUG_ON(--cl->refcnt == 0);
1307        /*
1308         * This shouldn't happen: we "hold" one cops->get() when called
1309         * from tc_ctl_tclass; the destroy method is done from cops->put().
1310         */
1311
1312        sch_tree_unlock(sch);
1313        return 0;
1314}
1315
1316static void htb_put(struct Qdisc *sch, unsigned long arg)
1317{
1318        struct htb_class *cl = (struct htb_class *)arg;
1319
1320        if (--cl->refcnt == 0)
1321                htb_destroy_class(sch, cl);
1322}
1323
1324static int htb_change_class(struct Qdisc *sch, u32 classid,
1325                            u32 parentid, struct nlattr **tca,
1326                            unsigned long *arg)
1327{
1328        int err = -EINVAL;
1329        struct htb_sched *q = qdisc_priv(sch);
1330        struct htb_class *cl = (struct htb_class *)*arg, *parent;
1331        struct nlattr *opt = tca[TCA_OPTIONS];
1332        struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1333        struct nlattr *tb[TCA_HTB_MAX + 1];
1334        struct tc_htb_opt *hopt;
1335
1336        /* extract all subattrs from opt attr */
1337        if (!opt)
1338                goto failure;
1339
1340        err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
1341        if (err < 0)
1342                goto failure;
1343
1344        err = -EINVAL;
1345        if (tb[TCA_HTB_PARMS] == NULL)
1346                goto failure;
1347
1348        parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1349
1350        hopt = nla_data(tb[TCA_HTB_PARMS]);
1351        if (!hopt->rate.rate || !hopt->ceil.rate)
1352                goto failure;
1353
1354        /* Keeping backward compatible with rate_table based iproute2 tc */
1355        if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) {
1356                rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1357                if (rtab)
1358                        qdisc_put_rtab(rtab);
1359        }
1360        if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) {
1361                ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1362                if (ctab)
1363                        qdisc_put_rtab(ctab);
1364        }
1365
1366        if (!cl) {              /* new class */
1367                struct Qdisc *new_q;
1368                int prio;
1369                struct {
1370                        struct nlattr           nla;
1371                        struct gnet_estimator   opt;
1372                } est = {
1373                        .nla = {
1374                                .nla_len        = nla_attr_size(sizeof(est.opt)),
1375                                .nla_type       = TCA_RATE,
1376                        },
1377                        .opt = {
1378                                /* 4s interval, 16s averaging constant */
1379                                .interval       = 2,
1380                                .ewma_log       = 2,
1381                        },
1382                };
1383
1384                /* check for valid classid */
1385                if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1386                    htb_find(classid, sch))
1387                        goto failure;
1388
1389                /* check maximal depth */
1390                if (parent && parent->parent && parent->parent->level < 2) {
1391                        pr_err("htb: tree is too deep\n");
1392                        goto failure;
1393                }
1394                err = -ENOBUFS;
1395                cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1396                if (!cl)
1397                        goto failure;
1398
1399                if (htb_rate_est || tca[TCA_RATE]) {
1400                        err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1401                                                qdisc_root_sleeping_lock(sch),
1402                                                tca[TCA_RATE] ? : &est.nla);
1403                        if (err) {
1404                                kfree(cl);
1405                                goto failure;
1406                        }
1407                }
1408
1409                cl->refcnt = 1;
1410                cl->children = 0;
1411                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1412                RB_CLEAR_NODE(&cl->pq_node);
1413
1414                for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1415                        RB_CLEAR_NODE(&cl->node[prio]);
1416
1417                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1418                 * so that can't be used inside of sch_tree_lock
1419                 * -- thanks to Karlis Peisenieks
1420                 */
1421                new_q = qdisc_create_dflt(sch->dev_queue,
1422                                          &pfifo_qdisc_ops, classid);
1423                sch_tree_lock(sch);
1424                if (parent && !parent->level) {
1425                        unsigned int qlen = parent->un.leaf.q->q.qlen;
1426
1427                        /* turn parent into inner node */
1428                        qdisc_reset(parent->un.leaf.q);
1429                        qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
1430                        qdisc_destroy(parent->un.leaf.q);
1431                        if (parent->prio_activity)
1432                                htb_deactivate(q, parent);
1433
1434                        /* remove from evt list because of level change */
1435                        if (parent->cmode != HTB_CAN_SEND) {
1436                                htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1437                                parent->cmode = HTB_CAN_SEND;
1438                        }
1439                        parent->level = (parent->parent ? parent->parent->level
1440                                         : TC_HTB_MAXDEPTH) - 1;
1441                        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1442                }
1443                /* leaf (we) needs elementary qdisc */
1444                cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1445
1446                cl->common.classid = classid;
1447                cl->parent = parent;
1448
1449                /* set class to be in HTB_CAN_SEND state */
1450                cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1451                cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1452                cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1453                cl->t_c = ktime_to_ns(ktime_get());
1454                cl->cmode = HTB_CAN_SEND;
1455
1456                /* attach to the hash list and parent's family */
1457                qdisc_class_hash_insert(&q->clhash, &cl->common);
1458                if (parent)
1459                        parent->children++;
1460        } else {
1461                if (tca[TCA_RATE]) {
1462                        err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1463                                                    qdisc_root_sleeping_lock(sch),
1464                                                    tca[TCA_RATE]);
1465                        if (err)
1466                                return err;
1467                }
1468                sch_tree_lock(sch);
1469        }
1470
1471        /* it used to be a nasty bug here, we have to check that node
1472         * is really leaf before changing cl->un.leaf !
1473         */
1474        if (!cl->level) {
1475                cl->quantum = hopt->rate.rate / q->rate2quantum;
1476                if (!hopt->quantum && cl->quantum < 1000) {
1477                        pr_warning(
1478                               "HTB: quantum of class %X is small. Consider r2q change.\n",
1479                               cl->common.classid);
1480                        cl->quantum = 1000;
1481                }
1482                if (!hopt->quantum && cl->quantum > 200000) {
1483                        pr_warning(
1484                               "HTB: quantum of class %X is big. Consider r2q change.\n",
1485                               cl->common.classid);
1486                        cl->quantum = 200000;
1487                }
1488                if (hopt->quantum)
1489                        cl->quantum = hopt->quantum;
1490                if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1491                        cl->prio = TC_HTB_NUMPRIO - 1;
1492        }
1493
1494        psched_ratecfg_precompute(&cl->rate, &hopt->rate);
1495        psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
1496
1497        cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
1498        cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer);
1499
1500        sch_tree_unlock(sch);
1501
1502        qdisc_class_hash_grow(sch, &q->clhash);
1503
1504        *arg = (unsigned long)cl;
1505        return 0;
1506
1507failure:
1508        return err;
1509}
1510
1511static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1512{
1513        struct htb_sched *q = qdisc_priv(sch);
1514        struct htb_class *cl = (struct htb_class *)arg;
1515        struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1516
1517        return fl;
1518}
1519
1520static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1521                                     u32 classid)
1522{
1523        struct htb_class *cl = htb_find(classid, sch);
1524
1525        /*if (cl && !cl->level) return 0;
1526         * The line above used to be there to prevent attaching filters to
1527         * leaves. But at least tc_index filter uses this just to get class
1528         * for other reasons so that we have to allow for it.
1529         * ----
1530         * 19.6.2002 As Werner explained it is ok - bind filter is just
1531         * another way to "lock" the class - unlike "get" this lock can
1532         * be broken by class during destroy IIUC.
1533         */
1534        if (cl)
1535                cl->filter_cnt++;
1536        return (unsigned long)cl;
1537}
1538
1539static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1540{
1541        struct htb_class *cl = (struct htb_class *)arg;
1542
1543        if (cl)
1544                cl->filter_cnt--;
1545}
1546
1547static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1548{
1549        struct htb_sched *q = qdisc_priv(sch);
1550        struct htb_class *cl;
1551        unsigned int i;
1552
1553        if (arg->stop)
1554                return;
1555
1556        for (i = 0; i < q->clhash.hashsize; i++) {
1557                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1558                        if (arg->count < arg->skip) {
1559                                arg->count++;
1560                                continue;
1561                        }
1562                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1563                                arg->stop = 1;
1564                                return;
1565                        }
1566                        arg->count++;
1567                }
1568        }
1569}
1570
1571static const struct Qdisc_class_ops htb_class_ops = {
1572        .graft          =       htb_graft,
1573        .leaf           =       htb_leaf,
1574        .qlen_notify    =       htb_qlen_notify,
1575        .get            =       htb_get,
1576        .put            =       htb_put,
1577        .change         =       htb_change_class,
1578        .delete         =       htb_delete,
1579        .walk           =       htb_walk,
1580        .tcf_chain      =       htb_find_tcf,
1581        .bind_tcf       =       htb_bind_filter,
1582        .unbind_tcf     =       htb_unbind_filter,
1583        .dump           =       htb_dump_class,
1584        .dump_stats     =       htb_dump_class_stats,
1585};
1586
1587static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1588        .cl_ops         =       &htb_class_ops,
1589        .id             =       "htb",
1590        .priv_size      =       sizeof(struct htb_sched),
1591        .enqueue        =       htb_enqueue,
1592        .dequeue        =       htb_dequeue,
1593        .peek           =       qdisc_peek_dequeued,
1594        .drop           =       htb_drop,
1595        .init           =       htb_init,
1596        .reset          =       htb_reset,
1597        .destroy        =       htb_destroy,
1598        .dump           =       htb_dump,
1599        .owner          =       THIS_MODULE,
1600};
1601
1602static int __init htb_module_init(void)
1603{
1604        return register_qdisc(&htb_qdisc_ops);
1605}
1606static void __exit htb_module_exit(void)
1607{
1608        unregister_qdisc(&htb_qdisc_ops);
1609}
1610
1611module_init(htb_module_init)
1612module_exit(htb_module_exit)
1613MODULE_LICENSE("GPL");
1614