linux/net/sched/sch_htb.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
   4 *
   5 * Authors:     Martin Devera, <devik@cdi.cz>
   6 *
   7 * Credits (in time order) for older HTB versions:
   8 *              Stef Coene <stef.coene@docum.org>
   9 *                      HTB support at LARTC mailing list
  10 *              Ondrej Kraus, <krauso@barr.cz>
  11 *                      found missing INIT_QDISC(htb)
  12 *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
  13 *                      helped a lot to locate nasty class stall bug
  14 *              Andi Kleen, Jamal Hadi, Bert Hubert
  15 *                      code review and helpful comments on shaping
  16 *              Tomasz Wrona, <tw@eter.tym.pl>
  17 *                      created test case so that I was able to fix nasty bug
  18 *              Wilfried Weissmann
  19 *                      spotted bug in dequeue code and helped with fix
  20 *              Jiri Fojtasek
  21 *                      fixed requeue routine
  22 *              and many others. thanks.
  23 */
  24#include <linux/module.h>
  25#include <linux/moduleparam.h>
  26#include <linux/types.h>
  27#include <linux/kernel.h>
  28#include <linux/string.h>
  29#include <linux/errno.h>
  30#include <linux/skbuff.h>
  31#include <linux/list.h>
  32#include <linux/compiler.h>
  33#include <linux/rbtree.h>
  34#include <linux/workqueue.h>
  35#include <linux/slab.h>
  36#include <net/netlink.h>
  37#include <net/sch_generic.h>
  38#include <net/pkt_sched.h>
  39#include <net/pkt_cls.h>
  40
  41/* HTB algorithm.
  42    Author: devik@cdi.cz
  43    ========================================================================
  44    HTB is like TBF with multiple classes. It is also similar to CBQ because
  45    it allows to assign priority to each class in hierarchy.
  46    In fact it is another implementation of Floyd's formal sharing.
  47
  48    Levels:
  49    Each class is assigned level. Leaf has ALWAYS level 0 and root
  50    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
  51    one less than their parent.
  52*/
  53
  54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
  55#define HTB_VER 0x30011         /* major must be matched with number suplied by TC as version */
  56
  57#if HTB_VER >> 16 != TC_HTB_PROTOVER
  58#error "Mismatched sch_htb.c and pkt_sch.h"
  59#endif
  60
  61/* Module parameter and sysfs export */
  62module_param    (htb_hysteresis, int, 0640);
  63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
  64
  65static int htb_rate_est = 0; /* htb classes have a default rate estimator */
  66module_param(htb_rate_est, int, 0640);
  67MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
  68
  69/* used internaly to keep status of single class */
  70enum htb_cmode {
  71        HTB_CANT_SEND,          /* class can't send and can't borrow */
  72        HTB_MAY_BORROW,         /* class can't send but may borrow */
  73        HTB_CAN_SEND            /* class can send */
  74};
  75
  76struct htb_prio {
  77        union {
  78                struct rb_root  row;
  79                struct rb_root  feed;
  80        };
  81        struct rb_node  *ptr;
  82        /* When class changes from state 1->2 and disconnects from
  83         * parent's feed then we lost ptr value and start from the
  84         * first child again. Here we store classid of the
  85         * last valid ptr (used when ptr is NULL).
  86         */
  87        u32             last_ptr_id;
  88};
  89
  90/* interior & leaf nodes; props specific to leaves are marked L:
  91 * To reduce false sharing, place mostly read fields at beginning,
  92 * and mostly written ones at the end.
  93 */
  94struct htb_class {
  95        struct Qdisc_class_common common;
  96        struct psched_ratecfg   rate;
  97        struct psched_ratecfg   ceil;
  98        s64                     buffer, cbuffer;/* token bucket depth/rate */
  99        s64                     mbuffer;        /* max wait time */
 100        u32                     prio;           /* these two are used only by leaves... */
 101        int                     quantum;        /* but stored for parent-to-leaf return */
 102
 103        struct tcf_proto __rcu  *filter_list;   /* class attached filters */
 104        struct tcf_block        *block;
 105        int                     filter_cnt;
 106
 107        int                     level;          /* our level (see above) */
 108        unsigned int            children;
 109        struct htb_class        *parent;        /* parent class */
 110
 111        struct net_rate_estimator __rcu *rate_est;
 112
 113        /*
 114         * Written often fields
 115         */
 116        struct gnet_stats_basic_packed bstats;
 117        struct tc_htb_xstats    xstats; /* our special stats */
 118
 119        /* token bucket parameters */
 120        s64                     tokens, ctokens;/* current number of tokens */
 121        s64                     t_c;            /* checkpoint time */
 122
 123        union {
 124                struct htb_class_leaf {
 125                        int             deficit[TC_HTB_MAXDEPTH];
 126                        struct Qdisc    *q;
 127                } leaf;
 128                struct htb_class_inner {
 129                        struct htb_prio clprio[TC_HTB_NUMPRIO];
 130                } inner;
 131        };
 132        s64                     pq_key;
 133
 134        int                     prio_activity;  /* for which prios are we active */
 135        enum htb_cmode          cmode;          /* current mode of the class */
 136        struct rb_node          pq_node;        /* node for event queue */
 137        struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
 138
 139        unsigned int drops ____cacheline_aligned_in_smp;
 140        unsigned int            overlimits;
 141};
 142
 143struct htb_level {
 144        struct rb_root  wait_pq;
 145        struct htb_prio hprio[TC_HTB_NUMPRIO];
 146};
 147
 148struct htb_sched {
 149        struct Qdisc_class_hash clhash;
 150        int                     defcls;         /* class where unclassified flows go to */
 151        int                     rate2quantum;   /* quant = rate / rate2quantum */
 152
 153        /* filters for qdisc itself */
 154        struct tcf_proto __rcu  *filter_list;
 155        struct tcf_block        *block;
 156
 157#define HTB_WARN_TOOMANYEVENTS  0x1
 158        unsigned int            warned; /* only one warning */
 159        int                     direct_qlen;
 160        struct work_struct      work;
 161
 162        /* non shaped skbs; let them go directly thru */
 163        struct qdisc_skb_head   direct_queue;
 164        u32                     direct_pkts;
 165        u32                     overlimits;
 166
 167        struct qdisc_watchdog   watchdog;
 168
 169        s64                     now;    /* cached dequeue time */
 170
 171        /* time of nearest event per level (row) */
 172        s64                     near_ev_cache[TC_HTB_MAXDEPTH];
 173
 174        int                     row_mask[TC_HTB_MAXDEPTH];
 175
 176        struct htb_level        hlevel[TC_HTB_MAXDEPTH];
 177};
 178
 179/* find class in global hash table using given handle */
 180static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 181{
 182        struct htb_sched *q = qdisc_priv(sch);
 183        struct Qdisc_class_common *clc;
 184
 185        clc = qdisc_class_find(&q->clhash, handle);
 186        if (clc == NULL)
 187                return NULL;
 188        return container_of(clc, struct htb_class, common);
 189}
 190
 191static unsigned long htb_search(struct Qdisc *sch, u32 handle)
 192{
 193        return (unsigned long)htb_find(handle, sch);
 194}
 195/**
 196 * htb_classify - classify a packet into class
 197 *
 198 * It returns NULL if the packet should be dropped or -1 if the packet
 199 * should be passed directly thru. In all other cases leaf class is returned.
 200 * We allow direct class selection by classid in priority. The we examine
 201 * filters in qdisc and in inner nodes (if higher filter points to the inner
 202 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
 203 * internal fifo (direct). These packets then go directly thru. If we still
 204 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
 205 * then finish and return direct queue.
 206 */
 207#define HTB_DIRECT ((struct htb_class *)-1L)
 208
 209static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 210                                      int *qerr)
 211{
 212        struct htb_sched *q = qdisc_priv(sch);
 213        struct htb_class *cl;
 214        struct tcf_result res;
 215        struct tcf_proto *tcf;
 216        int result;
 217
 218        /* allow to select class by setting skb->priority to valid classid;
 219         * note that nfmark can be used too by attaching filter fw with no
 220         * rules in it
 221         */
 222        if (skb->priority == sch->handle)
 223                return HTB_DIRECT;      /* X:0 (direct flow) selected */
 224        cl = htb_find(skb->priority, sch);
 225        if (cl) {
 226                if (cl->level == 0)
 227                        return cl;
 228                /* Start with inner filter chain if a non-leaf class is selected */
 229                tcf = rcu_dereference_bh(cl->filter_list);
 230        } else {
 231                tcf = rcu_dereference_bh(q->filter_list);
 232        }
 233
 234        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235        while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
 236#ifdef CONFIG_NET_CLS_ACT
 237                switch (result) {
 238                case TC_ACT_QUEUED:
 239                case TC_ACT_STOLEN:
 240                case TC_ACT_TRAP:
 241                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 242                        fallthrough;
 243                case TC_ACT_SHOT:
 244                        return NULL;
 245                }
 246#endif
 247                cl = (void *)res.class;
 248                if (!cl) {
 249                        if (res.classid == sch->handle)
 250                                return HTB_DIRECT;      /* X:0 (direct flow) */
 251                        cl = htb_find(res.classid, sch);
 252                        if (!cl)
 253                                break;  /* filter selected invalid classid */
 254                }
 255                if (!cl->level)
 256                        return cl;      /* we hit leaf; return it */
 257
 258                /* we have got inner class; apply inner filter chain */
 259                tcf = rcu_dereference_bh(cl->filter_list);
 260        }
 261        /* classification failed; try to use default class */
 262        cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 263        if (!cl || cl->level)
 264                return HTB_DIRECT;      /* bad default .. this is safe bet */
 265        return cl;
 266}
 267
 268/**
 269 * htb_add_to_id_tree - adds class to the round robin list
 270 *
 271 * Routine adds class to the list (actually tree) sorted by classid.
 272 * Make sure that class is not already on such list for given prio.
 273 */
 274static void htb_add_to_id_tree(struct rb_root *root,
 275                               struct htb_class *cl, int prio)
 276{
 277        struct rb_node **p = &root->rb_node, *parent = NULL;
 278
 279        while (*p) {
 280                struct htb_class *c;
 281                parent = *p;
 282                c = rb_entry(parent, struct htb_class, node[prio]);
 283
 284                if (cl->common.classid > c->common.classid)
 285                        p = &parent->rb_right;
 286                else
 287                        p = &parent->rb_left;
 288        }
 289        rb_link_node(&cl->node[prio], parent, p);
 290        rb_insert_color(&cl->node[prio], root);
 291}
 292
 293/**
 294 * htb_add_to_wait_tree - adds class to the event queue with delay
 295 *
 296 * The class is added to priority event queue to indicate that class will
 297 * change its mode in cl->pq_key microseconds. Make sure that class is not
 298 * already in the queue.
 299 */
 300static void htb_add_to_wait_tree(struct htb_sched *q,
 301                                 struct htb_class *cl, s64 delay)
 302{
 303        struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
 304
 305        cl->pq_key = q->now + delay;
 306        if (cl->pq_key == q->now)
 307                cl->pq_key++;
 308
 309        /* update the nearest event cache */
 310        if (q->near_ev_cache[cl->level] > cl->pq_key)
 311                q->near_ev_cache[cl->level] = cl->pq_key;
 312
 313        while (*p) {
 314                struct htb_class *c;
 315                parent = *p;
 316                c = rb_entry(parent, struct htb_class, pq_node);
 317                if (cl->pq_key >= c->pq_key)
 318                        p = &parent->rb_right;
 319                else
 320                        p = &parent->rb_left;
 321        }
 322        rb_link_node(&cl->pq_node, parent, p);
 323        rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
 324}
 325
 326/**
 327 * htb_next_rb_node - finds next node in binary tree
 328 *
 329 * When we are past last key we return NULL.
 330 * Average complexity is 2 steps per call.
 331 */
 332static inline void htb_next_rb_node(struct rb_node **n)
 333{
 334        *n = rb_next(*n);
 335}
 336
 337/**
 338 * htb_add_class_to_row - add class to its row
 339 *
 340 * The class is added to row at priorities marked in mask.
 341 * It does nothing if mask == 0.
 342 */
 343static inline void htb_add_class_to_row(struct htb_sched *q,
 344                                        struct htb_class *cl, int mask)
 345{
 346        q->row_mask[cl->level] |= mask;
 347        while (mask) {
 348                int prio = ffz(~mask);
 349                mask &= ~(1 << prio);
 350                htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
 351        }
 352}
 353
 354/* If this triggers, it is a bug in this code, but it need not be fatal */
 355static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 356{
 357        if (RB_EMPTY_NODE(rb)) {
 358                WARN_ON(1);
 359        } else {
 360                rb_erase(rb, root);
 361                RB_CLEAR_NODE(rb);
 362        }
 363}
 364
 365
 366/**
 367 * htb_remove_class_from_row - removes class from its row
 368 *
 369 * The class is removed from row at priorities marked in mask.
 370 * It does nothing if mask == 0.
 371 */
 372static inline void htb_remove_class_from_row(struct htb_sched *q,
 373                                                 struct htb_class *cl, int mask)
 374{
 375        int m = 0;
 376        struct htb_level *hlevel = &q->hlevel[cl->level];
 377
 378        while (mask) {
 379                int prio = ffz(~mask);
 380                struct htb_prio *hprio = &hlevel->hprio[prio];
 381
 382                mask &= ~(1 << prio);
 383                if (hprio->ptr == cl->node + prio)
 384                        htb_next_rb_node(&hprio->ptr);
 385
 386                htb_safe_rb_erase(cl->node + prio, &hprio->row);
 387                if (!hprio->row.rb_node)
 388                        m |= 1 << prio;
 389        }
 390        q->row_mask[cl->level] &= ~m;
 391}
 392
 393/**
 394 * htb_activate_prios - creates active classe's feed chain
 395 *
 396 * The class is connected to ancestors and/or appropriate rows
 397 * for priorities it is participating on. cl->cmode must be new
 398 * (activated) mode. It does nothing if cl->prio_activity == 0.
 399 */
 400static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 401{
 402        struct htb_class *p = cl->parent;
 403        long m, mask = cl->prio_activity;
 404
 405        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 406                m = mask;
 407                while (m) {
 408                        int prio = ffz(~m);
 409                        m &= ~(1 << prio);
 410
 411                        if (p->inner.clprio[prio].feed.rb_node)
 412                                /* parent already has its feed in use so that
 413                                 * reset bit in mask as parent is already ok
 414                                 */
 415                                mask &= ~(1 << prio);
 416
 417                        htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
 418                }
 419                p->prio_activity |= mask;
 420                cl = p;
 421                p = cl->parent;
 422
 423        }
 424        if (cl->cmode == HTB_CAN_SEND && mask)
 425                htb_add_class_to_row(q, cl, mask);
 426}
 427
 428/**
 429 * htb_deactivate_prios - remove class from feed chain
 430 *
 431 * cl->cmode must represent old mode (before deactivation). It does
 432 * nothing if cl->prio_activity == 0. Class is removed from all feed
 433 * chains and rows.
 434 */
 435static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 436{
 437        struct htb_class *p = cl->parent;
 438        long m, mask = cl->prio_activity;
 439
 440        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 441                m = mask;
 442                mask = 0;
 443                while (m) {
 444                        int prio = ffz(~m);
 445                        m &= ~(1 << prio);
 446
 447                        if (p->inner.clprio[prio].ptr == cl->node + prio) {
 448                                /* we are removing child which is pointed to from
 449                                 * parent feed - forget the pointer but remember
 450                                 * classid
 451                                 */
 452                                p->inner.clprio[prio].last_ptr_id = cl->common.classid;
 453                                p->inner.clprio[prio].ptr = NULL;
 454                        }
 455
 456                        htb_safe_rb_erase(cl->node + prio,
 457                                          &p->inner.clprio[prio].feed);
 458
 459                        if (!p->inner.clprio[prio].feed.rb_node)
 460                                mask |= 1 << prio;
 461                }
 462
 463                p->prio_activity &= ~mask;
 464                cl = p;
 465                p = cl->parent;
 466
 467        }
 468        if (cl->cmode == HTB_CAN_SEND && mask)
 469                htb_remove_class_from_row(q, cl, mask);
 470}
 471
 472static inline s64 htb_lowater(const struct htb_class *cl)
 473{
 474        if (htb_hysteresis)
 475                return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
 476        else
 477                return 0;
 478}
 479static inline s64 htb_hiwater(const struct htb_class *cl)
 480{
 481        if (htb_hysteresis)
 482                return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
 483        else
 484                return 0;
 485}
 486
 487
 488/**
 489 * htb_class_mode - computes and returns current class mode
 490 *
 491 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
 492 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
 493 * from now to time when cl will change its state.
 494 * Also it is worth to note that class mode doesn't change simply
 495 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
 496 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
 497 * mode transitions per time unit. The speed gain is about 1/6.
 498 */
 499static inline enum htb_cmode
 500htb_class_mode(struct htb_class *cl, s64 *diff)
 501{
 502        s64 toks;
 503
 504        if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
 505                *diff = -toks;
 506                return HTB_CANT_SEND;
 507        }
 508
 509        if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
 510                return HTB_CAN_SEND;
 511
 512        *diff = -toks;
 513        return HTB_MAY_BORROW;
 514}
 515
 516/**
 517 * htb_change_class_mode - changes classe's mode
 518 *
 519 * This should be the only way how to change classe's mode under normal
 520 * cirsumstances. Routine will update feed lists linkage, change mode
 521 * and add class to the wait event queue if appropriate. New mode should
 522 * be different from old one and cl->pq_key has to be valid if changing
 523 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
 524 */
 525static void
 526htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
 527{
 528        enum htb_cmode new_mode = htb_class_mode(cl, diff);
 529
 530        if (new_mode == cl->cmode)
 531                return;
 532
 533        if (new_mode == HTB_CANT_SEND) {
 534                cl->overlimits++;
 535                q->overlimits++;
 536        }
 537
 538        if (cl->prio_activity) {        /* not necessary: speed optimization */
 539                if (cl->cmode != HTB_CANT_SEND)
 540                        htb_deactivate_prios(q, cl);
 541                cl->cmode = new_mode;
 542                if (new_mode != HTB_CANT_SEND)
 543                        htb_activate_prios(q, cl);
 544        } else
 545                cl->cmode = new_mode;
 546}
 547
 548/**
 549 * htb_activate - inserts leaf cl into appropriate active feeds
 550 *
 551 * Routine learns (new) priority of leaf and activates feed chain
 552 * for the prio. It can be called on already active leaf safely.
 553 * It also adds leaf into droplist.
 554 */
 555static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 556{
 557        WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
 558
 559        if (!cl->prio_activity) {
 560                cl->prio_activity = 1 << cl->prio;
 561                htb_activate_prios(q, cl);
 562        }
 563}
 564
 565/**
 566 * htb_deactivate - remove leaf cl from active feeds
 567 *
 568 * Make sure that leaf is active. In the other words it can't be called
 569 * with non-active leaf. It also removes class from the drop list.
 570 */
 571static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 572{
 573        WARN_ON(!cl->prio_activity);
 574
 575        htb_deactivate_prios(q, cl);
 576        cl->prio_activity = 0;
 577}
 578
 579static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 580                       struct sk_buff **to_free)
 581{
 582        int ret;
 583        unsigned int len = qdisc_pkt_len(skb);
 584        struct htb_sched *q = qdisc_priv(sch);
 585        struct htb_class *cl = htb_classify(skb, sch, &ret);
 586
 587        if (cl == HTB_DIRECT) {
 588                /* enqueue to helper queue */
 589                if (q->direct_queue.qlen < q->direct_qlen) {
 590                        __qdisc_enqueue_tail(skb, &q->direct_queue);
 591                        q->direct_pkts++;
 592                } else {
 593                        return qdisc_drop(skb, sch, to_free);
 594                }
 595#ifdef CONFIG_NET_CLS_ACT
 596        } else if (!cl) {
 597                if (ret & __NET_XMIT_BYPASS)
 598                        qdisc_qstats_drop(sch);
 599                __qdisc_drop(skb, to_free);
 600                return ret;
 601#endif
 602        } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
 603                                        to_free)) != NET_XMIT_SUCCESS) {
 604                if (net_xmit_drop_count(ret)) {
 605                        qdisc_qstats_drop(sch);
 606                        cl->drops++;
 607                }
 608                return ret;
 609        } else {
 610                htb_activate(q, cl);
 611        }
 612
 613        sch->qstats.backlog += len;
 614        sch->q.qlen++;
 615        return NET_XMIT_SUCCESS;
 616}
 617
 618static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
 619{
 620        s64 toks = diff + cl->tokens;
 621
 622        if (toks > cl->buffer)
 623                toks = cl->buffer;
 624        toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
 625        if (toks <= -cl->mbuffer)
 626                toks = 1 - cl->mbuffer;
 627
 628        cl->tokens = toks;
 629}
 630
 631static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
 632{
 633        s64 toks = diff + cl->ctokens;
 634
 635        if (toks > cl->cbuffer)
 636                toks = cl->cbuffer;
 637        toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
 638        if (toks <= -cl->mbuffer)
 639                toks = 1 - cl->mbuffer;
 640
 641        cl->ctokens = toks;
 642}
 643
 644/**
 645 * htb_charge_class - charges amount "bytes" to leaf and ancestors
 646 *
 647 * Routine assumes that packet "bytes" long was dequeued from leaf cl
 648 * borrowing from "level". It accounts bytes to ceil leaky bucket for
 649 * leaf and all ancestors and to rate bucket for ancestors at levels
 650 * "level" and higher. It also handles possible change of mode resulting
 651 * from the update. Note that mode can also increase here (MAY_BORROW to
 652 * CAN_SEND) because we can use more precise clock that event queue here.
 653 * In such case we remove class from event queue first.
 654 */
 655static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 656                             int level, struct sk_buff *skb)
 657{
 658        int bytes = qdisc_pkt_len(skb);
 659        enum htb_cmode old_mode;
 660        s64 diff;
 661
 662        while (cl) {
 663                diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 664                if (cl->level >= level) {
 665                        if (cl->level == level)
 666                                cl->xstats.lends++;
 667                        htb_accnt_tokens(cl, bytes, diff);
 668                } else {
 669                        cl->xstats.borrows++;
 670                        cl->tokens += diff;     /* we moved t_c; update tokens */
 671                }
 672                htb_accnt_ctokens(cl, bytes, diff);
 673                cl->t_c = q->now;
 674
 675                old_mode = cl->cmode;
 676                diff = 0;
 677                htb_change_class_mode(q, cl, &diff);
 678                if (old_mode != cl->cmode) {
 679                        if (old_mode != HTB_CAN_SEND)
 680                                htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
 681                        if (cl->cmode != HTB_CAN_SEND)
 682                                htb_add_to_wait_tree(q, cl, diff);
 683                }
 684
 685                /* update basic stats except for leaves which are already updated */
 686                if (cl->level)
 687                        bstats_update(&cl->bstats, skb);
 688
 689                cl = cl->parent;
 690        }
 691}
 692
 693/**
 694 * htb_do_events - make mode changes to classes at the level
 695 *
 696 * Scans event queue for pending events and applies them. Returns time of
 697 * next pending event (0 for no event in pq, q->now for too many events).
 698 * Note: Applied are events whose have cl->pq_key <= q->now.
 699 */
 700static s64 htb_do_events(struct htb_sched *q, const int level,
 701                         unsigned long start)
 702{
 703        /* don't run for longer than 2 jiffies; 2 is used instead of
 704         * 1 to simplify things when jiffy is going to be incremented
 705         * too soon
 706         */
 707        unsigned long stop_at = start + 2;
 708        struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
 709
 710        while (time_before(jiffies, stop_at)) {
 711                struct htb_class *cl;
 712                s64 diff;
 713                struct rb_node *p = rb_first(wait_pq);
 714
 715                if (!p)
 716                        return 0;
 717
 718                cl = rb_entry(p, struct htb_class, pq_node);
 719                if (cl->pq_key > q->now)
 720                        return cl->pq_key;
 721
 722                htb_safe_rb_erase(p, wait_pq);
 723                diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 724                htb_change_class_mode(q, cl, &diff);
 725                if (cl->cmode != HTB_CAN_SEND)
 726                        htb_add_to_wait_tree(q, cl, diff);
 727        }
 728
 729        /* too much load - let's continue after a break for scheduling */
 730        if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
 731                pr_warn("htb: too many events!\n");
 732                q->warned |= HTB_WARN_TOOMANYEVENTS;
 733        }
 734
 735        return q->now;
 736}
 737
 738/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
 739 * is no such one exists.
 740 */
 741static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 742                                              u32 id)
 743{
 744        struct rb_node *r = NULL;
 745        while (n) {
 746                struct htb_class *cl =
 747                    rb_entry(n, struct htb_class, node[prio]);
 748
 749                if (id > cl->common.classid) {
 750                        n = n->rb_right;
 751                } else if (id < cl->common.classid) {
 752                        r = n;
 753                        n = n->rb_left;
 754                } else {
 755                        return n;
 756                }
 757        }
 758        return r;
 759}
 760
 761/**
 762 * htb_lookup_leaf - returns next leaf class in DRR order
 763 *
 764 * Find leaf where current feed pointers points to.
 765 */
 766static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
 767{
 768        int i;
 769        struct {
 770                struct rb_node *root;
 771                struct rb_node **pptr;
 772                u32 *pid;
 773        } stk[TC_HTB_MAXDEPTH], *sp = stk;
 774
 775        BUG_ON(!hprio->row.rb_node);
 776        sp->root = hprio->row.rb_node;
 777        sp->pptr = &hprio->ptr;
 778        sp->pid = &hprio->last_ptr_id;
 779
 780        for (i = 0; i < 65535; i++) {
 781                if (!*sp->pptr && *sp->pid) {
 782                        /* ptr was invalidated but id is valid - try to recover
 783                         * the original or next ptr
 784                         */
 785                        *sp->pptr =
 786                            htb_id_find_next_upper(prio, sp->root, *sp->pid);
 787                }
 788                *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
 789                                 * can become out of date quickly
 790                                 */
 791                if (!*sp->pptr) {       /* we are at right end; rewind & go up */
 792                        *sp->pptr = sp->root;
 793                        while ((*sp->pptr)->rb_left)
 794                                *sp->pptr = (*sp->pptr)->rb_left;
 795                        if (sp > stk) {
 796                                sp--;
 797                                if (!*sp->pptr) {
 798                                        WARN_ON(1);
 799                                        return NULL;
 800                                }
 801                                htb_next_rb_node(sp->pptr);
 802                        }
 803                } else {
 804                        struct htb_class *cl;
 805                        struct htb_prio *clp;
 806
 807                        cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
 808                        if (!cl->level)
 809                                return cl;
 810                        clp = &cl->inner.clprio[prio];
 811                        (++sp)->root = clp->feed.rb_node;
 812                        sp->pptr = &clp->ptr;
 813                        sp->pid = &clp->last_ptr_id;
 814                }
 815        }
 816        WARN_ON(1);
 817        return NULL;
 818}
 819
 820/* dequeues packet at given priority and level; call only if
 821 * you are sure that there is active class at prio/level
 822 */
 823static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
 824                                        const int level)
 825{
 826        struct sk_buff *skb = NULL;
 827        struct htb_class *cl, *start;
 828        struct htb_level *hlevel = &q->hlevel[level];
 829        struct htb_prio *hprio = &hlevel->hprio[prio];
 830
 831        /* look initial class up in the row */
 832        start = cl = htb_lookup_leaf(hprio, prio);
 833
 834        do {
 835next:
 836                if (unlikely(!cl))
 837                        return NULL;
 838
 839                /* class can be empty - it is unlikely but can be true if leaf
 840                 * qdisc drops packets in enqueue routine or if someone used
 841                 * graft operation on the leaf since last dequeue;
 842                 * simply deactivate and skip such class
 843                 */
 844                if (unlikely(cl->leaf.q->q.qlen == 0)) {
 845                        struct htb_class *next;
 846                        htb_deactivate(q, cl);
 847
 848                        /* row/level might become empty */
 849                        if ((q->row_mask[level] & (1 << prio)) == 0)
 850                                return NULL;
 851
 852                        next = htb_lookup_leaf(hprio, prio);
 853
 854                        if (cl == start)        /* fix start if we just deleted it */
 855                                start = next;
 856                        cl = next;
 857                        goto next;
 858                }
 859
 860                skb = cl->leaf.q->dequeue(cl->leaf.q);
 861                if (likely(skb != NULL))
 862                        break;
 863
 864                qdisc_warn_nonwc("htb", cl->leaf.q);
 865                htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
 866                                         &q->hlevel[0].hprio[prio].ptr);
 867                cl = htb_lookup_leaf(hprio, prio);
 868
 869        } while (cl != start);
 870
 871        if (likely(skb != NULL)) {
 872                bstats_update(&cl->bstats, skb);
 873                cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
 874                if (cl->leaf.deficit[level] < 0) {
 875                        cl->leaf.deficit[level] += cl->quantum;
 876                        htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
 877                                                 &q->hlevel[0].hprio[prio].ptr);
 878                }
 879                /* this used to be after charge_class but this constelation
 880                 * gives us slightly better performance
 881                 */
 882                if (!cl->leaf.q->q.qlen)
 883                        htb_deactivate(q, cl);
 884                htb_charge_class(q, cl, level, skb);
 885        }
 886        return skb;
 887}
 888
 889static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 890{
 891        struct sk_buff *skb;
 892        struct htb_sched *q = qdisc_priv(sch);
 893        int level;
 894        s64 next_event;
 895        unsigned long start_at;
 896
 897        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
 898        skb = __qdisc_dequeue_head(&q->direct_queue);
 899        if (skb != NULL) {
 900ok:
 901                qdisc_bstats_update(sch, skb);
 902                qdisc_qstats_backlog_dec(sch, skb);
 903                sch->q.qlen--;
 904                return skb;
 905        }
 906
 907        if (!sch->q.qlen)
 908                goto fin;
 909        q->now = ktime_get_ns();
 910        start_at = jiffies;
 911
 912        next_event = q->now + 5LLU * NSEC_PER_SEC;
 913
 914        for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 915                /* common case optimization - skip event handler quickly */
 916                int m;
 917                s64 event = q->near_ev_cache[level];
 918
 919                if (q->now >= event) {
 920                        event = htb_do_events(q, level, start_at);
 921                        if (!event)
 922                                event = q->now + NSEC_PER_SEC;
 923                        q->near_ev_cache[level] = event;
 924                }
 925
 926                if (next_event > event)
 927                        next_event = event;
 928
 929                m = ~q->row_mask[level];
 930                while (m != (int)(-1)) {
 931                        int prio = ffz(m);
 932
 933                        m |= 1 << prio;
 934                        skb = htb_dequeue_tree(q, prio, level);
 935                        if (likely(skb != NULL))
 936                                goto ok;
 937                }
 938        }
 939        if (likely(next_event > q->now))
 940                qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
 941        else
 942                schedule_work(&q->work);
 943fin:
 944        return skb;
 945}
 946
 947/* reset all classes */
 948/* always caled under BH & queue lock */
 949static void htb_reset(struct Qdisc *sch)
 950{
 951        struct htb_sched *q = qdisc_priv(sch);
 952        struct htb_class *cl;
 953        unsigned int i;
 954
 955        for (i = 0; i < q->clhash.hashsize; i++) {
 956                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 957                        if (cl->level)
 958                                memset(&cl->inner, 0, sizeof(cl->inner));
 959                        else {
 960                                if (cl->leaf.q)
 961                                        qdisc_reset(cl->leaf.q);
 962                        }
 963                        cl->prio_activity = 0;
 964                        cl->cmode = HTB_CAN_SEND;
 965                }
 966        }
 967        qdisc_watchdog_cancel(&q->watchdog);
 968        __qdisc_reset_queue(&q->direct_queue);
 969        sch->q.qlen = 0;
 970        sch->qstats.backlog = 0;
 971        memset(q->hlevel, 0, sizeof(q->hlevel));
 972        memset(q->row_mask, 0, sizeof(q->row_mask));
 973}
 974
 975static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
 976        [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
 977        [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
 978        [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 979        [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 980        [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
 981        [TCA_HTB_RATE64] = { .type = NLA_U64 },
 982        [TCA_HTB_CEIL64] = { .type = NLA_U64 },
 983};
 984
 985static void htb_work_func(struct work_struct *work)
 986{
 987        struct htb_sched *q = container_of(work, struct htb_sched, work);
 988        struct Qdisc *sch = q->watchdog.qdisc;
 989
 990        rcu_read_lock();
 991        __netif_schedule(qdisc_root(sch));
 992        rcu_read_unlock();
 993}
 994
 995static int htb_init(struct Qdisc *sch, struct nlattr *opt,
 996                    struct netlink_ext_ack *extack)
 997{
 998        struct htb_sched *q = qdisc_priv(sch);
 999        struct nlattr *tb[TCA_HTB_MAX + 1];
1000        struct tc_htb_glob *gopt;
1001        int err;
1002
1003        qdisc_watchdog_init(&q->watchdog, sch);
1004        INIT_WORK(&q->work, htb_work_func);
1005
1006        if (!opt)
1007                return -EINVAL;
1008
1009        err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1010        if (err)
1011                return err;
1012
1013        err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1014                                          NULL);
1015        if (err < 0)
1016                return err;
1017
1018        if (!tb[TCA_HTB_INIT])
1019                return -EINVAL;
1020
1021        gopt = nla_data(tb[TCA_HTB_INIT]);
1022        if (gopt->version != HTB_VER >> 16)
1023                return -EINVAL;
1024
1025        err = qdisc_class_hash_init(&q->clhash);
1026        if (err < 0)
1027                return err;
1028
1029        qdisc_skb_head_init(&q->direct_queue);
1030
1031        if (tb[TCA_HTB_DIRECT_QLEN])
1032                q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1033        else
1034                q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1035
1036        if ((q->rate2quantum = gopt->rate2quantum) < 1)
1037                q->rate2quantum = 1;
1038        q->defcls = gopt->defcls;
1039
1040        return 0;
1041}
1042
1043static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1044{
1045        struct htb_sched *q = qdisc_priv(sch);
1046        struct nlattr *nest;
1047        struct tc_htb_glob gopt;
1048
1049        sch->qstats.overlimits = q->overlimits;
1050        /* Its safe to not acquire qdisc lock. As we hold RTNL,
1051         * no change can happen on the qdisc parameters.
1052         */
1053
1054        gopt.direct_pkts = q->direct_pkts;
1055        gopt.version = HTB_VER;
1056        gopt.rate2quantum = q->rate2quantum;
1057        gopt.defcls = q->defcls;
1058        gopt.debug = 0;
1059
1060        nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1061        if (nest == NULL)
1062                goto nla_put_failure;
1063        if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1064            nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1065                goto nla_put_failure;
1066
1067        return nla_nest_end(skb, nest);
1068
1069nla_put_failure:
1070        nla_nest_cancel(skb, nest);
1071        return -1;
1072}
1073
1074static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1075                          struct sk_buff *skb, struct tcmsg *tcm)
1076{
1077        struct htb_class *cl = (struct htb_class *)arg;
1078        struct nlattr *nest;
1079        struct tc_htb_opt opt;
1080
1081        /* Its safe to not acquire qdisc lock. As we hold RTNL,
1082         * no change can happen on the class parameters.
1083         */
1084        tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1085        tcm->tcm_handle = cl->common.classid;
1086        if (!cl->level && cl->leaf.q)
1087                tcm->tcm_info = cl->leaf.q->handle;
1088
1089        nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1090        if (nest == NULL)
1091                goto nla_put_failure;
1092
1093        memset(&opt, 0, sizeof(opt));
1094
1095        psched_ratecfg_getrate(&opt.rate, &cl->rate);
1096        opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1097        psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1098        opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1099        opt.quantum = cl->quantum;
1100        opt.prio = cl->prio;
1101        opt.level = cl->level;
1102        if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1103                goto nla_put_failure;
1104        if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1105            nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1106                              TCA_HTB_PAD))
1107                goto nla_put_failure;
1108        if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1109            nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1110                              TCA_HTB_PAD))
1111                goto nla_put_failure;
1112
1113        return nla_nest_end(skb, nest);
1114
1115nla_put_failure:
1116        nla_nest_cancel(skb, nest);
1117        return -1;
1118}
1119
1120static int
1121htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1122{
1123        struct htb_class *cl = (struct htb_class *)arg;
1124        struct gnet_stats_queue qs = {
1125                .drops = cl->drops,
1126                .overlimits = cl->overlimits,
1127        };
1128        __u32 qlen = 0;
1129
1130        if (!cl->level && cl->leaf.q)
1131                qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1132
1133        cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1134                                    INT_MIN, INT_MAX);
1135        cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1136                                     INT_MIN, INT_MAX);
1137
1138        if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1139                                  d, NULL, &cl->bstats) < 0 ||
1140            gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1141            gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1142                return -1;
1143
1144        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1145}
1146
1147static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1148                     struct Qdisc **old, struct netlink_ext_ack *extack)
1149{
1150        struct htb_class *cl = (struct htb_class *)arg;
1151
1152        if (cl->level)
1153                return -EINVAL;
1154        if (new == NULL &&
1155            (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1156                                     cl->common.classid, extack)) == NULL)
1157                return -ENOBUFS;
1158
1159        *old = qdisc_replace(sch, new, &cl->leaf.q);
1160        return 0;
1161}
1162
1163static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1164{
1165        struct htb_class *cl = (struct htb_class *)arg;
1166        return !cl->level ? cl->leaf.q : NULL;
1167}
1168
1169static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1170{
1171        struct htb_class *cl = (struct htb_class *)arg;
1172
1173        htb_deactivate(qdisc_priv(sch), cl);
1174}
1175
1176static inline int htb_parent_last_child(struct htb_class *cl)
1177{
1178        if (!cl->parent)
1179                /* the root class */
1180                return 0;
1181        if (cl->parent->children > 1)
1182                /* not the last child */
1183                return 0;
1184        return 1;
1185}
1186
1187static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1188                               struct Qdisc *new_q)
1189{
1190        struct htb_class *parent = cl->parent;
1191
1192        WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1193
1194        if (parent->cmode != HTB_CAN_SEND)
1195                htb_safe_rb_erase(&parent->pq_node,
1196                                  &q->hlevel[parent->level].wait_pq);
1197
1198        parent->level = 0;
1199        memset(&parent->inner, 0, sizeof(parent->inner));
1200        parent->leaf.q = new_q ? new_q : &noop_qdisc;
1201        parent->tokens = parent->buffer;
1202        parent->ctokens = parent->cbuffer;
1203        parent->t_c = ktime_get_ns();
1204        parent->cmode = HTB_CAN_SEND;
1205}
1206
1207static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1208{
1209        if (!cl->level) {
1210                WARN_ON(!cl->leaf.q);
1211                qdisc_put(cl->leaf.q);
1212        }
1213        gen_kill_estimator(&cl->rate_est);
1214        tcf_block_put(cl->block);
1215        kfree(cl);
1216}
1217
1218static void htb_destroy(struct Qdisc *sch)
1219{
1220        struct htb_sched *q = qdisc_priv(sch);
1221        struct hlist_node *next;
1222        struct htb_class *cl;
1223        unsigned int i;
1224
1225        cancel_work_sync(&q->work);
1226        qdisc_watchdog_cancel(&q->watchdog);
1227        /* This line used to be after htb_destroy_class call below
1228         * and surprisingly it worked in 2.4. But it must precede it
1229         * because filter need its target class alive to be able to call
1230         * unbind_filter on it (without Oops).
1231         */
1232        tcf_block_put(q->block);
1233
1234        for (i = 0; i < q->clhash.hashsize; i++) {
1235                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1236                        tcf_block_put(cl->block);
1237                        cl->block = NULL;
1238                }
1239        }
1240        for (i = 0; i < q->clhash.hashsize; i++) {
1241                hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1242                                          common.hnode)
1243                        htb_destroy_class(sch, cl);
1244        }
1245        qdisc_class_hash_destroy(&q->clhash);
1246        __qdisc_reset_queue(&q->direct_queue);
1247}
1248
1249static int htb_delete(struct Qdisc *sch, unsigned long arg)
1250{
1251        struct htb_sched *q = qdisc_priv(sch);
1252        struct htb_class *cl = (struct htb_class *)arg;
1253        struct Qdisc *new_q = NULL;
1254        int last_child = 0;
1255
1256        /* TODO: why don't allow to delete subtree ? references ? does
1257         * tc subsys guarantee us that in htb_destroy it holds no class
1258         * refs so that we can remove children safely there ?
1259         */
1260        if (cl->children || cl->filter_cnt)
1261                return -EBUSY;
1262
1263        if (!cl->level && htb_parent_last_child(cl)) {
1264                new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1265                                          cl->parent->common.classid,
1266                                          NULL);
1267                last_child = 1;
1268        }
1269
1270        sch_tree_lock(sch);
1271
1272        if (!cl->level)
1273                qdisc_purge_queue(cl->leaf.q);
1274
1275        /* delete from hash and active; remainder in destroy_class */
1276        qdisc_class_hash_remove(&q->clhash, &cl->common);
1277        if (cl->parent)
1278                cl->parent->children--;
1279
1280        if (cl->prio_activity)
1281                htb_deactivate(q, cl);
1282
1283        if (cl->cmode != HTB_CAN_SEND)
1284                htb_safe_rb_erase(&cl->pq_node,
1285                                  &q->hlevel[cl->level].wait_pq);
1286
1287        if (last_child)
1288                htb_parent_to_leaf(q, cl, new_q);
1289
1290        sch_tree_unlock(sch);
1291
1292        htb_destroy_class(sch, cl);
1293        return 0;
1294}
1295
1296static int htb_change_class(struct Qdisc *sch, u32 classid,
1297                            u32 parentid, struct nlattr **tca,
1298                            unsigned long *arg, struct netlink_ext_ack *extack)
1299{
1300        int err = -EINVAL;
1301        struct htb_sched *q = qdisc_priv(sch);
1302        struct htb_class *cl = (struct htb_class *)*arg, *parent;
1303        struct nlattr *opt = tca[TCA_OPTIONS];
1304        struct nlattr *tb[TCA_HTB_MAX + 1];
1305        struct Qdisc *parent_qdisc = NULL;
1306        struct tc_htb_opt *hopt;
1307        u64 rate64, ceil64;
1308        int warn = 0;
1309
1310        /* extract all subattrs from opt attr */
1311        if (!opt)
1312                goto failure;
1313
1314        err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1315                                          NULL);
1316        if (err < 0)
1317                goto failure;
1318
1319        err = -EINVAL;
1320        if (tb[TCA_HTB_PARMS] == NULL)
1321                goto failure;
1322
1323        parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1324
1325        hopt = nla_data(tb[TCA_HTB_PARMS]);
1326        if (!hopt->rate.rate || !hopt->ceil.rate)
1327                goto failure;
1328
1329        /* Keeping backward compatible with rate_table based iproute2 tc */
1330        if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1331                qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1332                                              NULL));
1333
1334        if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1335                qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1336                                              NULL));
1337
1338        if (!cl) {              /* new class */
1339                struct Qdisc *new_q;
1340                int prio;
1341                struct {
1342                        struct nlattr           nla;
1343                        struct gnet_estimator   opt;
1344                } est = {
1345                        .nla = {
1346                                .nla_len        = nla_attr_size(sizeof(est.opt)),
1347                                .nla_type       = TCA_RATE,
1348                        },
1349                        .opt = {
1350                                /* 4s interval, 16s averaging constant */
1351                                .interval       = 2,
1352                                .ewma_log       = 2,
1353                        },
1354                };
1355
1356                /* check for valid classid */
1357                if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1358                    htb_find(classid, sch))
1359                        goto failure;
1360
1361                /* check maximal depth */
1362                if (parent && parent->parent && parent->parent->level < 2) {
1363                        pr_err("htb: tree is too deep\n");
1364                        goto failure;
1365                }
1366                err = -ENOBUFS;
1367                cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1368                if (!cl)
1369                        goto failure;
1370
1371                err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1372                if (err) {
1373                        kfree(cl);
1374                        goto failure;
1375                }
1376                if (htb_rate_est || tca[TCA_RATE]) {
1377                        err = gen_new_estimator(&cl->bstats, NULL,
1378                                                &cl->rate_est,
1379                                                NULL,
1380                                                qdisc_root_sleeping_running(sch),
1381                                                tca[TCA_RATE] ? : &est.nla);
1382                        if (err) {
1383                                tcf_block_put(cl->block);
1384                                kfree(cl);
1385                                goto failure;
1386                        }
1387                }
1388
1389                cl->children = 0;
1390                RB_CLEAR_NODE(&cl->pq_node);
1391
1392                for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1393                        RB_CLEAR_NODE(&cl->node[prio]);
1394
1395                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1396                 * so that can't be used inside of sch_tree_lock
1397                 * -- thanks to Karlis Peisenieks
1398                 */
1399                new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1400                                          classid, NULL);
1401                sch_tree_lock(sch);
1402                if (parent && !parent->level) {
1403                        /* turn parent into inner node */
1404                        qdisc_purge_queue(parent->leaf.q);
1405                        parent_qdisc = parent->leaf.q;
1406                        if (parent->prio_activity)
1407                                htb_deactivate(q, parent);
1408
1409                        /* remove from evt list because of level change */
1410                        if (parent->cmode != HTB_CAN_SEND) {
1411                                htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1412                                parent->cmode = HTB_CAN_SEND;
1413                        }
1414                        parent->level = (parent->parent ? parent->parent->level
1415                                         : TC_HTB_MAXDEPTH) - 1;
1416                        memset(&parent->inner, 0, sizeof(parent->inner));
1417                }
1418                /* leaf (we) needs elementary qdisc */
1419                cl->leaf.q = new_q ? new_q : &noop_qdisc;
1420
1421                cl->common.classid = classid;
1422                cl->parent = parent;
1423
1424                /* set class to be in HTB_CAN_SEND state */
1425                cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1426                cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1427                cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1428                cl->t_c = ktime_get_ns();
1429                cl->cmode = HTB_CAN_SEND;
1430
1431                /* attach to the hash list and parent's family */
1432                qdisc_class_hash_insert(&q->clhash, &cl->common);
1433                if (parent)
1434                        parent->children++;
1435                if (cl->leaf.q != &noop_qdisc)
1436                        qdisc_hash_add(cl->leaf.q, true);
1437        } else {
1438                if (tca[TCA_RATE]) {
1439                        err = gen_replace_estimator(&cl->bstats, NULL,
1440                                                    &cl->rate_est,
1441                                                    NULL,
1442                                                    qdisc_root_sleeping_running(sch),
1443                                                    tca[TCA_RATE]);
1444                        if (err)
1445                                return err;
1446                }
1447                sch_tree_lock(sch);
1448        }
1449
1450        rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1451
1452        ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1453
1454        psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1455        psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1456
1457        /* it used to be a nasty bug here, we have to check that node
1458         * is really leaf before changing cl->leaf !
1459         */
1460        if (!cl->level) {
1461                u64 quantum = cl->rate.rate_bytes_ps;
1462
1463                do_div(quantum, q->rate2quantum);
1464                cl->quantum = min_t(u64, quantum, INT_MAX);
1465
1466                if (!hopt->quantum && cl->quantum < 1000) {
1467                        warn = -1;
1468                        cl->quantum = 1000;
1469                }
1470                if (!hopt->quantum && cl->quantum > 200000) {
1471                        warn = 1;
1472                        cl->quantum = 200000;
1473                }
1474                if (hopt->quantum)
1475                        cl->quantum = hopt->quantum;
1476                if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1477                        cl->prio = TC_HTB_NUMPRIO - 1;
1478        }
1479
1480        cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
1481        cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
1482
1483        sch_tree_unlock(sch);
1484        qdisc_put(parent_qdisc);
1485
1486        if (warn)
1487                pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
1488                            cl->common.classid, (warn == -1 ? "small" : "big"));
1489
1490        qdisc_class_hash_grow(sch, &q->clhash);
1491
1492        *arg = (unsigned long)cl;
1493        return 0;
1494
1495failure:
1496        return err;
1497}
1498
1499static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
1500                                       struct netlink_ext_ack *extack)
1501{
1502        struct htb_sched *q = qdisc_priv(sch);
1503        struct htb_class *cl = (struct htb_class *)arg;
1504
1505        return cl ? cl->block : q->block;
1506}
1507
1508static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1509                                     u32 classid)
1510{
1511        struct htb_class *cl = htb_find(classid, sch);
1512
1513        /*if (cl && !cl->level) return 0;
1514         * The line above used to be there to prevent attaching filters to
1515         * leaves. But at least tc_index filter uses this just to get class
1516         * for other reasons so that we have to allow for it.
1517         * ----
1518         * 19.6.2002 As Werner explained it is ok - bind filter is just
1519         * another way to "lock" the class - unlike "get" this lock can
1520         * be broken by class during destroy IIUC.
1521         */
1522        if (cl)
1523                cl->filter_cnt++;
1524        return (unsigned long)cl;
1525}
1526
1527static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1528{
1529        struct htb_class *cl = (struct htb_class *)arg;
1530
1531        if (cl)
1532                cl->filter_cnt--;
1533}
1534
1535static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1536{
1537        struct htb_sched *q = qdisc_priv(sch);
1538        struct htb_class *cl;
1539        unsigned int i;
1540
1541        if (arg->stop)
1542                return;
1543
1544        for (i = 0; i < q->clhash.hashsize; i++) {
1545                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1546                        if (arg->count < arg->skip) {
1547                                arg->count++;
1548                                continue;
1549                        }
1550                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1551                                arg->stop = 1;
1552                                return;
1553                        }
1554                        arg->count++;
1555                }
1556        }
1557}
1558
1559static const struct Qdisc_class_ops htb_class_ops = {
1560        .graft          =       htb_graft,
1561        .leaf           =       htb_leaf,
1562        .qlen_notify    =       htb_qlen_notify,
1563        .find           =       htb_search,
1564        .change         =       htb_change_class,
1565        .delete         =       htb_delete,
1566        .walk           =       htb_walk,
1567        .tcf_block      =       htb_tcf_block,
1568        .bind_tcf       =       htb_bind_filter,
1569        .unbind_tcf     =       htb_unbind_filter,
1570        .dump           =       htb_dump_class,
1571        .dump_stats     =       htb_dump_class_stats,
1572};
1573
1574static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1575        .cl_ops         =       &htb_class_ops,
1576        .id             =       "htb",
1577        .priv_size      =       sizeof(struct htb_sched),
1578        .enqueue        =       htb_enqueue,
1579        .dequeue        =       htb_dequeue,
1580        .peek           =       qdisc_peek_dequeued,
1581        .init           =       htb_init,
1582        .reset          =       htb_reset,
1583        .destroy        =       htb_destroy,
1584        .dump           =       htb_dump,
1585        .owner          =       THIS_MODULE,
1586};
1587
1588static int __init htb_module_init(void)
1589{
1590        return register_qdisc(&htb_qdisc_ops);
1591}
1592static void __exit htb_module_exit(void)
1593{
1594        unregister_qdisc(&htb_qdisc_ops);
1595}
1596
1597module_init(htb_module_init)
1598module_exit(htb_module_exit)
1599MODULE_LICENSE("GPL");
1600