linux/net/sched/sch_htb.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
   4 *
   5 * Authors:     Martin Devera, <devik@cdi.cz>
   6 *
   7 * Credits (in time order) for older HTB versions:
   8 *              Stef Coene <stef.coene@docum.org>
   9 *                      HTB support at LARTC mailing list
  10 *              Ondrej Kraus, <krauso@barr.cz>
  11 *                      found missing INIT_QDISC(htb)
  12 *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
  13 *                      helped a lot to locate nasty class stall bug
  14 *              Andi Kleen, Jamal Hadi, Bert Hubert
  15 *                      code review and helpful comments on shaping
  16 *              Tomasz Wrona, <tw@eter.tym.pl>
  17 *                      created test case so that I was able to fix nasty bug
  18 *              Wilfried Weissmann
  19 *                      spotted bug in dequeue code and helped with fix
  20 *              Jiri Fojtasek
  21 *                      fixed requeue routine
  22 *              and many others. thanks.
  23 */
  24#include <linux/module.h>
  25#include <linux/moduleparam.h>
  26#include <linux/types.h>
  27#include <linux/kernel.h>
  28#include <linux/string.h>
  29#include <linux/errno.h>
  30#include <linux/skbuff.h>
  31#include <linux/list.h>
  32#include <linux/compiler.h>
  33#include <linux/rbtree.h>
  34#include <linux/workqueue.h>
  35#include <linux/slab.h>
  36#include <net/netlink.h>
  37#include <net/sch_generic.h>
  38#include <net/pkt_sched.h>
  39#include <net/pkt_cls.h>
  40
  41/* HTB algorithm.
  42    Author: devik@cdi.cz
  43    ========================================================================
  44    HTB is like TBF with multiple classes. It is also similar to CBQ because
  45    it allows to assign priority to each class in hierarchy.
  46    In fact it is another implementation of Floyd's formal sharing.
  47
  48    Levels:
  49    Each class is assigned level. Leaf has ALWAYS level 0 and root
  50    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
  51    one less than their parent.
  52*/
  53
  54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
  55#define HTB_VER 0x30011         /* major must be matched with number supplied by TC as version */
  56
  57#if HTB_VER >> 16 != TC_HTB_PROTOVER
  58#error "Mismatched sch_htb.c and pkt_sch.h"
  59#endif
  60
  61/* Module parameter and sysfs export */
  62module_param    (htb_hysteresis, int, 0640);
  63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
  64
  65static int htb_rate_est = 0; /* htb classes have a default rate estimator */
  66module_param(htb_rate_est, int, 0640);
  67MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
  68
  69/* used internaly to keep status of single class */
  70enum htb_cmode {
  71        HTB_CANT_SEND,          /* class can't send and can't borrow */
  72        HTB_MAY_BORROW,         /* class can't send but may borrow */
  73        HTB_CAN_SEND            /* class can send */
  74};
  75
  76struct htb_prio {
  77        union {
  78                struct rb_root  row;
  79                struct rb_root  feed;
  80        };
  81        struct rb_node  *ptr;
  82        /* When class changes from state 1->2 and disconnects from
  83         * parent's feed then we lost ptr value and start from the
  84         * first child again. Here we store classid of the
  85         * last valid ptr (used when ptr is NULL).
  86         */
  87        u32             last_ptr_id;
  88};
  89
  90/* interior & leaf nodes; props specific to leaves are marked L:
  91 * To reduce false sharing, place mostly read fields at beginning,
  92 * and mostly written ones at the end.
  93 */
  94struct htb_class {
  95        struct Qdisc_class_common common;
  96        struct psched_ratecfg   rate;
  97        struct psched_ratecfg   ceil;
  98        s64                     buffer, cbuffer;/* token bucket depth/rate */
  99        s64                     mbuffer;        /* max wait time */
 100        u32                     prio;           /* these two are used only by leaves... */
 101        int                     quantum;        /* but stored for parent-to-leaf return */
 102
 103        struct tcf_proto __rcu  *filter_list;   /* class attached filters */
 104        struct tcf_block        *block;
 105        int                     filter_cnt;
 106
 107        int                     level;          /* our level (see above) */
 108        unsigned int            children;
 109        struct htb_class        *parent;        /* parent class */
 110
 111        struct net_rate_estimator __rcu *rate_est;
 112
 113        /*
 114         * Written often fields
 115         */
 116        struct gnet_stats_basic_packed bstats;
 117        struct gnet_stats_basic_packed bstats_bias;
 118        struct tc_htb_xstats    xstats; /* our special stats */
 119
 120        /* token bucket parameters */
 121        s64                     tokens, ctokens;/* current number of tokens */
 122        s64                     t_c;            /* checkpoint time */
 123
 124        union {
 125                struct htb_class_leaf {
 126                        int             deficit[TC_HTB_MAXDEPTH];
 127                        struct Qdisc    *q;
 128                        struct netdev_queue *offload_queue;
 129                } leaf;
 130                struct htb_class_inner {
 131                        struct htb_prio clprio[TC_HTB_NUMPRIO];
 132                } inner;
 133        };
 134        s64                     pq_key;
 135
 136        int                     prio_activity;  /* for which prios are we active */
 137        enum htb_cmode          cmode;          /* current mode of the class */
 138        struct rb_node          pq_node;        /* node for event queue */
 139        struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
 140
 141        unsigned int drops ____cacheline_aligned_in_smp;
 142        unsigned int            overlimits;
 143};
 144
 145struct htb_level {
 146        struct rb_root  wait_pq;
 147        struct htb_prio hprio[TC_HTB_NUMPRIO];
 148};
 149
 150struct htb_sched {
 151        struct Qdisc_class_hash clhash;
 152        int                     defcls;         /* class where unclassified flows go to */
 153        int                     rate2quantum;   /* quant = rate / rate2quantum */
 154
 155        /* filters for qdisc itself */
 156        struct tcf_proto __rcu  *filter_list;
 157        struct tcf_block        *block;
 158
 159#define HTB_WARN_TOOMANYEVENTS  0x1
 160        unsigned int            warned; /* only one warning */
 161        int                     direct_qlen;
 162        struct work_struct      work;
 163
 164        /* non shaped skbs; let them go directly thru */
 165        struct qdisc_skb_head   direct_queue;
 166        u32                     direct_pkts;
 167        u32                     overlimits;
 168
 169        struct qdisc_watchdog   watchdog;
 170
 171        s64                     now;    /* cached dequeue time */
 172
 173        /* time of nearest event per level (row) */
 174        s64                     near_ev_cache[TC_HTB_MAXDEPTH];
 175
 176        int                     row_mask[TC_HTB_MAXDEPTH];
 177
 178        struct htb_level        hlevel[TC_HTB_MAXDEPTH];
 179
 180        struct Qdisc            **direct_qdiscs;
 181        unsigned int            num_direct_qdiscs;
 182
 183        bool                    offload;
 184};
 185
 186/* find class in global hash table using given handle */
 187static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 188{
 189        struct htb_sched *q = qdisc_priv(sch);
 190        struct Qdisc_class_common *clc;
 191
 192        clc = qdisc_class_find(&q->clhash, handle);
 193        if (clc == NULL)
 194                return NULL;
 195        return container_of(clc, struct htb_class, common);
 196}
 197
 198static unsigned long htb_search(struct Qdisc *sch, u32 handle)
 199{
 200        return (unsigned long)htb_find(handle, sch);
 201}
 202/**
 203 * htb_classify - classify a packet into class
 204 *
 205 * It returns NULL if the packet should be dropped or -1 if the packet
 206 * should be passed directly thru. In all other cases leaf class is returned.
 207 * We allow direct class selection by classid in priority. The we examine
 208 * filters in qdisc and in inner nodes (if higher filter points to the inner
 209 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
 210 * internal fifo (direct). These packets then go directly thru. If we still
 211 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
 212 * then finish and return direct queue.
 213 */
 214#define HTB_DIRECT ((struct htb_class *)-1L)
 215
 216static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 217                                      int *qerr)
 218{
 219        struct htb_sched *q = qdisc_priv(sch);
 220        struct htb_class *cl;
 221        struct tcf_result res;
 222        struct tcf_proto *tcf;
 223        int result;
 224
 225        /* allow to select class by setting skb->priority to valid classid;
 226         * note that nfmark can be used too by attaching filter fw with no
 227         * rules in it
 228         */
 229        if (skb->priority == sch->handle)
 230                return HTB_DIRECT;      /* X:0 (direct flow) selected */
 231        cl = htb_find(skb->priority, sch);
 232        if (cl) {
 233                if (cl->level == 0)
 234                        return cl;
 235                /* Start with inner filter chain if a non-leaf class is selected */
 236                tcf = rcu_dereference_bh(cl->filter_list);
 237        } else {
 238                tcf = rcu_dereference_bh(q->filter_list);
 239        }
 240
 241        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 242        while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
 243#ifdef CONFIG_NET_CLS_ACT
 244                switch (result) {
 245                case TC_ACT_QUEUED:
 246                case TC_ACT_STOLEN:
 247                case TC_ACT_TRAP:
 248                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 249                        fallthrough;
 250                case TC_ACT_SHOT:
 251                        return NULL;
 252                }
 253#endif
 254                cl = (void *)res.class;
 255                if (!cl) {
 256                        if (res.classid == sch->handle)
 257                                return HTB_DIRECT;      /* X:0 (direct flow) */
 258                        cl = htb_find(res.classid, sch);
 259                        if (!cl)
 260                                break;  /* filter selected invalid classid */
 261                }
 262                if (!cl->level)
 263                        return cl;      /* we hit leaf; return it */
 264
 265                /* we have got inner class; apply inner filter chain */
 266                tcf = rcu_dereference_bh(cl->filter_list);
 267        }
 268        /* classification failed; try to use default class */
 269        cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 270        if (!cl || cl->level)
 271                return HTB_DIRECT;      /* bad default .. this is safe bet */
 272        return cl;
 273}
 274
 275/**
 276 * htb_add_to_id_tree - adds class to the round robin list
 277 * @root: the root of the tree
 278 * @cl: the class to add
 279 * @prio: the give prio in class
 280 *
 281 * Routine adds class to the list (actually tree) sorted by classid.
 282 * Make sure that class is not already on such list for given prio.
 283 */
 284static void htb_add_to_id_tree(struct rb_root *root,
 285                               struct htb_class *cl, int prio)
 286{
 287        struct rb_node **p = &root->rb_node, *parent = NULL;
 288
 289        while (*p) {
 290                struct htb_class *c;
 291                parent = *p;
 292                c = rb_entry(parent, struct htb_class, node[prio]);
 293
 294                if (cl->common.classid > c->common.classid)
 295                        p = &parent->rb_right;
 296                else
 297                        p = &parent->rb_left;
 298        }
 299        rb_link_node(&cl->node[prio], parent, p);
 300        rb_insert_color(&cl->node[prio], root);
 301}
 302
 303/**
 304 * htb_add_to_wait_tree - adds class to the event queue with delay
 305 * @q: the priority event queue
 306 * @cl: the class to add
 307 * @delay: delay in microseconds
 308 *
 309 * The class is added to priority event queue to indicate that class will
 310 * change its mode in cl->pq_key microseconds. Make sure that class is not
 311 * already in the queue.
 312 */
 313static void htb_add_to_wait_tree(struct htb_sched *q,
 314                                 struct htb_class *cl, s64 delay)
 315{
 316        struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
 317
 318        cl->pq_key = q->now + delay;
 319        if (cl->pq_key == q->now)
 320                cl->pq_key++;
 321
 322        /* update the nearest event cache */
 323        if (q->near_ev_cache[cl->level] > cl->pq_key)
 324                q->near_ev_cache[cl->level] = cl->pq_key;
 325
 326        while (*p) {
 327                struct htb_class *c;
 328                parent = *p;
 329                c = rb_entry(parent, struct htb_class, pq_node);
 330                if (cl->pq_key >= c->pq_key)
 331                        p = &parent->rb_right;
 332                else
 333                        p = &parent->rb_left;
 334        }
 335        rb_link_node(&cl->pq_node, parent, p);
 336        rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
 337}
 338
 339/**
 340 * htb_next_rb_node - finds next node in binary tree
 341 * @n: the current node in binary tree
 342 *
 343 * When we are past last key we return NULL.
 344 * Average complexity is 2 steps per call.
 345 */
 346static inline void htb_next_rb_node(struct rb_node **n)
 347{
 348        *n = rb_next(*n);
 349}
 350
 351/**
 352 * htb_add_class_to_row - add class to its row
 353 * @q: the priority event queue
 354 * @cl: the class to add
 355 * @mask: the given priorities in class in bitmap
 356 *
 357 * The class is added to row at priorities marked in mask.
 358 * It does nothing if mask == 0.
 359 */
 360static inline void htb_add_class_to_row(struct htb_sched *q,
 361                                        struct htb_class *cl, int mask)
 362{
 363        q->row_mask[cl->level] |= mask;
 364        while (mask) {
 365                int prio = ffz(~mask);
 366                mask &= ~(1 << prio);
 367                htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
 368        }
 369}
 370
 371/* If this triggers, it is a bug in this code, but it need not be fatal */
 372static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 373{
 374        if (RB_EMPTY_NODE(rb)) {
 375                WARN_ON(1);
 376        } else {
 377                rb_erase(rb, root);
 378                RB_CLEAR_NODE(rb);
 379        }
 380}
 381
 382
 383/**
 384 * htb_remove_class_from_row - removes class from its row
 385 * @q: the priority event queue
 386 * @cl: the class to add
 387 * @mask: the given priorities in class in bitmap
 388 *
 389 * The class is removed from row at priorities marked in mask.
 390 * It does nothing if mask == 0.
 391 */
 392static inline void htb_remove_class_from_row(struct htb_sched *q,
 393                                                 struct htb_class *cl, int mask)
 394{
 395        int m = 0;
 396        struct htb_level *hlevel = &q->hlevel[cl->level];
 397
 398        while (mask) {
 399                int prio = ffz(~mask);
 400                struct htb_prio *hprio = &hlevel->hprio[prio];
 401
 402                mask &= ~(1 << prio);
 403                if (hprio->ptr == cl->node + prio)
 404                        htb_next_rb_node(&hprio->ptr);
 405
 406                htb_safe_rb_erase(cl->node + prio, &hprio->row);
 407                if (!hprio->row.rb_node)
 408                        m |= 1 << prio;
 409        }
 410        q->row_mask[cl->level] &= ~m;
 411}
 412
 413/**
 414 * htb_activate_prios - creates active classe's feed chain
 415 * @q: the priority event queue
 416 * @cl: the class to activate
 417 *
 418 * The class is connected to ancestors and/or appropriate rows
 419 * for priorities it is participating on. cl->cmode must be new
 420 * (activated) mode. It does nothing if cl->prio_activity == 0.
 421 */
 422static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 423{
 424        struct htb_class *p = cl->parent;
 425        long m, mask = cl->prio_activity;
 426
 427        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 428                m = mask;
 429                while (m) {
 430                        int prio = ffz(~m);
 431                        m &= ~(1 << prio);
 432
 433                        if (p->inner.clprio[prio].feed.rb_node)
 434                                /* parent already has its feed in use so that
 435                                 * reset bit in mask as parent is already ok
 436                                 */
 437                                mask &= ~(1 << prio);
 438
 439                        htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
 440                }
 441                p->prio_activity |= mask;
 442                cl = p;
 443                p = cl->parent;
 444
 445        }
 446        if (cl->cmode == HTB_CAN_SEND && mask)
 447                htb_add_class_to_row(q, cl, mask);
 448}
 449
 450/**
 451 * htb_deactivate_prios - remove class from feed chain
 452 * @q: the priority event queue
 453 * @cl: the class to deactivate
 454 *
 455 * cl->cmode must represent old mode (before deactivation). It does
 456 * nothing if cl->prio_activity == 0. Class is removed from all feed
 457 * chains and rows.
 458 */
 459static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 460{
 461        struct htb_class *p = cl->parent;
 462        long m, mask = cl->prio_activity;
 463
 464        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 465                m = mask;
 466                mask = 0;
 467                while (m) {
 468                        int prio = ffz(~m);
 469                        m &= ~(1 << prio);
 470
 471                        if (p->inner.clprio[prio].ptr == cl->node + prio) {
 472                                /* we are removing child which is pointed to from
 473                                 * parent feed - forget the pointer but remember
 474                                 * classid
 475                                 */
 476                                p->inner.clprio[prio].last_ptr_id = cl->common.classid;
 477                                p->inner.clprio[prio].ptr = NULL;
 478                        }
 479
 480                        htb_safe_rb_erase(cl->node + prio,
 481                                          &p->inner.clprio[prio].feed);
 482
 483                        if (!p->inner.clprio[prio].feed.rb_node)
 484                                mask |= 1 << prio;
 485                }
 486
 487                p->prio_activity &= ~mask;
 488                cl = p;
 489                p = cl->parent;
 490
 491        }
 492        if (cl->cmode == HTB_CAN_SEND && mask)
 493                htb_remove_class_from_row(q, cl, mask);
 494}
 495
 496static inline s64 htb_lowater(const struct htb_class *cl)
 497{
 498        if (htb_hysteresis)
 499                return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
 500        else
 501                return 0;
 502}
 503static inline s64 htb_hiwater(const struct htb_class *cl)
 504{
 505        if (htb_hysteresis)
 506                return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
 507        else
 508                return 0;
 509}
 510
 511
 512/**
 513 * htb_class_mode - computes and returns current class mode
 514 * @cl: the target class
 515 * @diff: diff time in microseconds
 516 *
 517 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
 518 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
 519 * from now to time when cl will change its state.
 520 * Also it is worth to note that class mode doesn't change simply
 521 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
 522 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
 523 * mode transitions per time unit. The speed gain is about 1/6.
 524 */
 525static inline enum htb_cmode
 526htb_class_mode(struct htb_class *cl, s64 *diff)
 527{
 528        s64 toks;
 529
 530        if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
 531                *diff = -toks;
 532                return HTB_CANT_SEND;
 533        }
 534
 535        if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
 536                return HTB_CAN_SEND;
 537
 538        *diff = -toks;
 539        return HTB_MAY_BORROW;
 540}
 541
 542/**
 543 * htb_change_class_mode - changes classe's mode
 544 * @q: the priority event queue
 545 * @cl: the target class
 546 * @diff: diff time in microseconds
 547 *
 548 * This should be the only way how to change classe's mode under normal
 549 * circumstances. Routine will update feed lists linkage, change mode
 550 * and add class to the wait event queue if appropriate. New mode should
 551 * be different from old one and cl->pq_key has to be valid if changing
 552 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
 553 */
 554static void
 555htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
 556{
 557        enum htb_cmode new_mode = htb_class_mode(cl, diff);
 558
 559        if (new_mode == cl->cmode)
 560                return;
 561
 562        if (new_mode == HTB_CANT_SEND) {
 563                cl->overlimits++;
 564                q->overlimits++;
 565        }
 566
 567        if (cl->prio_activity) {        /* not necessary: speed optimization */
 568                if (cl->cmode != HTB_CANT_SEND)
 569                        htb_deactivate_prios(q, cl);
 570                cl->cmode = new_mode;
 571                if (new_mode != HTB_CANT_SEND)
 572                        htb_activate_prios(q, cl);
 573        } else
 574                cl->cmode = new_mode;
 575}
 576
 577/**
 578 * htb_activate - inserts leaf cl into appropriate active feeds
 579 * @q: the priority event queue
 580 * @cl: the target class
 581 *
 582 * Routine learns (new) priority of leaf and activates feed chain
 583 * for the prio. It can be called on already active leaf safely.
 584 * It also adds leaf into droplist.
 585 */
 586static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 587{
 588        WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
 589
 590        if (!cl->prio_activity) {
 591                cl->prio_activity = 1 << cl->prio;
 592                htb_activate_prios(q, cl);
 593        }
 594}
 595
 596/**
 597 * htb_deactivate - remove leaf cl from active feeds
 598 * @q: the priority event queue
 599 * @cl: the target class
 600 *
 601 * Make sure that leaf is active. In the other words it can't be called
 602 * with non-active leaf. It also removes class from the drop list.
 603 */
 604static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 605{
 606        WARN_ON(!cl->prio_activity);
 607
 608        htb_deactivate_prios(q, cl);
 609        cl->prio_activity = 0;
 610}
 611
 612static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 613                       struct sk_buff **to_free)
 614{
 615        int ret;
 616        unsigned int len = qdisc_pkt_len(skb);
 617        struct htb_sched *q = qdisc_priv(sch);
 618        struct htb_class *cl = htb_classify(skb, sch, &ret);
 619
 620        if (cl == HTB_DIRECT) {
 621                /* enqueue to helper queue */
 622                if (q->direct_queue.qlen < q->direct_qlen) {
 623                        __qdisc_enqueue_tail(skb, &q->direct_queue);
 624                        q->direct_pkts++;
 625                } else {
 626                        return qdisc_drop(skb, sch, to_free);
 627                }
 628#ifdef CONFIG_NET_CLS_ACT
 629        } else if (!cl) {
 630                if (ret & __NET_XMIT_BYPASS)
 631                        qdisc_qstats_drop(sch);
 632                __qdisc_drop(skb, to_free);
 633                return ret;
 634#endif
 635        } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
 636                                        to_free)) != NET_XMIT_SUCCESS) {
 637                if (net_xmit_drop_count(ret)) {
 638                        qdisc_qstats_drop(sch);
 639                        cl->drops++;
 640                }
 641                return ret;
 642        } else {
 643                htb_activate(q, cl);
 644        }
 645
 646        sch->qstats.backlog += len;
 647        sch->q.qlen++;
 648        return NET_XMIT_SUCCESS;
 649}
 650
 651static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
 652{
 653        s64 toks = diff + cl->tokens;
 654
 655        if (toks > cl->buffer)
 656                toks = cl->buffer;
 657        toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
 658        if (toks <= -cl->mbuffer)
 659                toks = 1 - cl->mbuffer;
 660
 661        cl->tokens = toks;
 662}
 663
 664static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
 665{
 666        s64 toks = diff + cl->ctokens;
 667
 668        if (toks > cl->cbuffer)
 669                toks = cl->cbuffer;
 670        toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
 671        if (toks <= -cl->mbuffer)
 672                toks = 1 - cl->mbuffer;
 673
 674        cl->ctokens = toks;
 675}
 676
 677/**
 678 * htb_charge_class - charges amount "bytes" to leaf and ancestors
 679 * @q: the priority event queue
 680 * @cl: the class to start iterate
 681 * @level: the minimum level to account
 682 * @skb: the socket buffer
 683 *
 684 * Routine assumes that packet "bytes" long was dequeued from leaf cl
 685 * borrowing from "level". It accounts bytes to ceil leaky bucket for
 686 * leaf and all ancestors and to rate bucket for ancestors at levels
 687 * "level" and higher. It also handles possible change of mode resulting
 688 * from the update. Note that mode can also increase here (MAY_BORROW to
 689 * CAN_SEND) because we can use more precise clock that event queue here.
 690 * In such case we remove class from event queue first.
 691 */
 692static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 693                             int level, struct sk_buff *skb)
 694{
 695        int bytes = qdisc_pkt_len(skb);
 696        enum htb_cmode old_mode;
 697        s64 diff;
 698
 699        while (cl) {
 700                diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 701                if (cl->level >= level) {
 702                        if (cl->level == level)
 703                                cl->xstats.lends++;
 704                        htb_accnt_tokens(cl, bytes, diff);
 705                } else {
 706                        cl->xstats.borrows++;
 707                        cl->tokens += diff;     /* we moved t_c; update tokens */
 708                }
 709                htb_accnt_ctokens(cl, bytes, diff);
 710                cl->t_c = q->now;
 711
 712                old_mode = cl->cmode;
 713                diff = 0;
 714                htb_change_class_mode(q, cl, &diff);
 715                if (old_mode != cl->cmode) {
 716                        if (old_mode != HTB_CAN_SEND)
 717                                htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
 718                        if (cl->cmode != HTB_CAN_SEND)
 719                                htb_add_to_wait_tree(q, cl, diff);
 720                }
 721
 722                /* update basic stats except for leaves which are already updated */
 723                if (cl->level)
 724                        bstats_update(&cl->bstats, skb);
 725
 726                cl = cl->parent;
 727        }
 728}
 729
 730/**
 731 * htb_do_events - make mode changes to classes at the level
 732 * @q: the priority event queue
 733 * @level: which wait_pq in 'q->hlevel'
 734 * @start: start jiffies
 735 *
 736 * Scans event queue for pending events and applies them. Returns time of
 737 * next pending event (0 for no event in pq, q->now for too many events).
 738 * Note: Applied are events whose have cl->pq_key <= q->now.
 739 */
 740static s64 htb_do_events(struct htb_sched *q, const int level,
 741                         unsigned long start)
 742{
 743        /* don't run for longer than 2 jiffies; 2 is used instead of
 744         * 1 to simplify things when jiffy is going to be incremented
 745         * too soon
 746         */
 747        unsigned long stop_at = start + 2;
 748        struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
 749
 750        while (time_before(jiffies, stop_at)) {
 751                struct htb_class *cl;
 752                s64 diff;
 753                struct rb_node *p = rb_first(wait_pq);
 754
 755                if (!p)
 756                        return 0;
 757
 758                cl = rb_entry(p, struct htb_class, pq_node);
 759                if (cl->pq_key > q->now)
 760                        return cl->pq_key;
 761
 762                htb_safe_rb_erase(p, wait_pq);
 763                diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 764                htb_change_class_mode(q, cl, &diff);
 765                if (cl->cmode != HTB_CAN_SEND)
 766                        htb_add_to_wait_tree(q, cl, diff);
 767        }
 768
 769        /* too much load - let's continue after a break for scheduling */
 770        if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
 771                pr_warn("htb: too many events!\n");
 772                q->warned |= HTB_WARN_TOOMANYEVENTS;
 773        }
 774
 775        return q->now;
 776}
 777
 778/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
 779 * is no such one exists.
 780 */
 781static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 782                                              u32 id)
 783{
 784        struct rb_node *r = NULL;
 785        while (n) {
 786                struct htb_class *cl =
 787                    rb_entry(n, struct htb_class, node[prio]);
 788
 789                if (id > cl->common.classid) {
 790                        n = n->rb_right;
 791                } else if (id < cl->common.classid) {
 792                        r = n;
 793                        n = n->rb_left;
 794                } else {
 795                        return n;
 796                }
 797        }
 798        return r;
 799}
 800
 801/**
 802 * htb_lookup_leaf - returns next leaf class in DRR order
 803 * @hprio: the current one
 804 * @prio: which prio in class
 805 *
 806 * Find leaf where current feed pointers points to.
 807 */
 808static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
 809{
 810        int i;
 811        struct {
 812                struct rb_node *root;
 813                struct rb_node **pptr;
 814                u32 *pid;
 815        } stk[TC_HTB_MAXDEPTH], *sp = stk;
 816
 817        BUG_ON(!hprio->row.rb_node);
 818        sp->root = hprio->row.rb_node;
 819        sp->pptr = &hprio->ptr;
 820        sp->pid = &hprio->last_ptr_id;
 821
 822        for (i = 0; i < 65535; i++) {
 823                if (!*sp->pptr && *sp->pid) {
 824                        /* ptr was invalidated but id is valid - try to recover
 825                         * the original or next ptr
 826                         */
 827                        *sp->pptr =
 828                            htb_id_find_next_upper(prio, sp->root, *sp->pid);
 829                }
 830                *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
 831                                 * can become out of date quickly
 832                                 */
 833                if (!*sp->pptr) {       /* we are at right end; rewind & go up */
 834                        *sp->pptr = sp->root;
 835                        while ((*sp->pptr)->rb_left)
 836                                *sp->pptr = (*sp->pptr)->rb_left;
 837                        if (sp > stk) {
 838                                sp--;
 839                                if (!*sp->pptr) {
 840                                        WARN_ON(1);
 841                                        return NULL;
 842                                }
 843                                htb_next_rb_node(sp->pptr);
 844                        }
 845                } else {
 846                        struct htb_class *cl;
 847                        struct htb_prio *clp;
 848
 849                        cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
 850                        if (!cl->level)
 851                                return cl;
 852                        clp = &cl->inner.clprio[prio];
 853                        (++sp)->root = clp->feed.rb_node;
 854                        sp->pptr = &clp->ptr;
 855                        sp->pid = &clp->last_ptr_id;
 856                }
 857        }
 858        WARN_ON(1);
 859        return NULL;
 860}
 861
 862/* dequeues packet at given priority and level; call only if
 863 * you are sure that there is active class at prio/level
 864 */
 865static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
 866                                        const int level)
 867{
 868        struct sk_buff *skb = NULL;
 869        struct htb_class *cl, *start;
 870        struct htb_level *hlevel = &q->hlevel[level];
 871        struct htb_prio *hprio = &hlevel->hprio[prio];
 872
 873        /* look initial class up in the row */
 874        start = cl = htb_lookup_leaf(hprio, prio);
 875
 876        do {
 877next:
 878                if (unlikely(!cl))
 879                        return NULL;
 880
 881                /* class can be empty - it is unlikely but can be true if leaf
 882                 * qdisc drops packets in enqueue routine or if someone used
 883                 * graft operation on the leaf since last dequeue;
 884                 * simply deactivate and skip such class
 885                 */
 886                if (unlikely(cl->leaf.q->q.qlen == 0)) {
 887                        struct htb_class *next;
 888                        htb_deactivate(q, cl);
 889
 890                        /* row/level might become empty */
 891                        if ((q->row_mask[level] & (1 << prio)) == 0)
 892                                return NULL;
 893
 894                        next = htb_lookup_leaf(hprio, prio);
 895
 896                        if (cl == start)        /* fix start if we just deleted it */
 897                                start = next;
 898                        cl = next;
 899                        goto next;
 900                }
 901
 902                skb = cl->leaf.q->dequeue(cl->leaf.q);
 903                if (likely(skb != NULL))
 904                        break;
 905
 906                qdisc_warn_nonwc("htb", cl->leaf.q);
 907                htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
 908                                         &q->hlevel[0].hprio[prio].ptr);
 909                cl = htb_lookup_leaf(hprio, prio);
 910
 911        } while (cl != start);
 912
 913        if (likely(skb != NULL)) {
 914                bstats_update(&cl->bstats, skb);
 915                cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
 916                if (cl->leaf.deficit[level] < 0) {
 917                        cl->leaf.deficit[level] += cl->quantum;
 918                        htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
 919                                                 &q->hlevel[0].hprio[prio].ptr);
 920                }
 921                /* this used to be after charge_class but this constelation
 922                 * gives us slightly better performance
 923                 */
 924                if (!cl->leaf.q->q.qlen)
 925                        htb_deactivate(q, cl);
 926                htb_charge_class(q, cl, level, skb);
 927        }
 928        return skb;
 929}
 930
 931static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 932{
 933        struct sk_buff *skb;
 934        struct htb_sched *q = qdisc_priv(sch);
 935        int level;
 936        s64 next_event;
 937        unsigned long start_at;
 938
 939        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
 940        skb = __qdisc_dequeue_head(&q->direct_queue);
 941        if (skb != NULL) {
 942ok:
 943                qdisc_bstats_update(sch, skb);
 944                qdisc_qstats_backlog_dec(sch, skb);
 945                sch->q.qlen--;
 946                return skb;
 947        }
 948
 949        if (!sch->q.qlen)
 950                goto fin;
 951        q->now = ktime_get_ns();
 952        start_at = jiffies;
 953
 954        next_event = q->now + 5LLU * NSEC_PER_SEC;
 955
 956        for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 957                /* common case optimization - skip event handler quickly */
 958                int m;
 959                s64 event = q->near_ev_cache[level];
 960
 961                if (q->now >= event) {
 962                        event = htb_do_events(q, level, start_at);
 963                        if (!event)
 964                                event = q->now + NSEC_PER_SEC;
 965                        q->near_ev_cache[level] = event;
 966                }
 967
 968                if (next_event > event)
 969                        next_event = event;
 970
 971                m = ~q->row_mask[level];
 972                while (m != (int)(-1)) {
 973                        int prio = ffz(m);
 974
 975                        m |= 1 << prio;
 976                        skb = htb_dequeue_tree(q, prio, level);
 977                        if (likely(skb != NULL))
 978                                goto ok;
 979                }
 980        }
 981        if (likely(next_event > q->now))
 982                qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
 983        else
 984                schedule_work(&q->work);
 985fin:
 986        return skb;
 987}
 988
 989/* reset all classes */
 990/* always caled under BH & queue lock */
 991static void htb_reset(struct Qdisc *sch)
 992{
 993        struct htb_sched *q = qdisc_priv(sch);
 994        struct htb_class *cl;
 995        unsigned int i;
 996
 997        for (i = 0; i < q->clhash.hashsize; i++) {
 998                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 999                        if (cl->level)
1000                                memset(&cl->inner, 0, sizeof(cl->inner));
1001                        else {
1002                                if (cl->leaf.q && !q->offload)
1003                                        qdisc_reset(cl->leaf.q);
1004                        }
1005                        cl->prio_activity = 0;
1006                        cl->cmode = HTB_CAN_SEND;
1007                }
1008        }
1009        qdisc_watchdog_cancel(&q->watchdog);
1010        __qdisc_reset_queue(&q->direct_queue);
1011        sch->q.qlen = 0;
1012        sch->qstats.backlog = 0;
1013        memset(q->hlevel, 0, sizeof(q->hlevel));
1014        memset(q->row_mask, 0, sizeof(q->row_mask));
1015}
1016
1017static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1018        [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1019        [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
1020        [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1021        [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1022        [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1023        [TCA_HTB_RATE64] = { .type = NLA_U64 },
1024        [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1025        [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1026};
1027
1028static void htb_work_func(struct work_struct *work)
1029{
1030        struct htb_sched *q = container_of(work, struct htb_sched, work);
1031        struct Qdisc *sch = q->watchdog.qdisc;
1032
1033        rcu_read_lock();
1034        __netif_schedule(qdisc_root(sch));
1035        rcu_read_unlock();
1036}
1037
1038static void htb_set_lockdep_class_child(struct Qdisc *q)
1039{
1040        static struct lock_class_key child_key;
1041
1042        lockdep_set_class(qdisc_lock(q), &child_key);
1043}
1044
1045static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1046{
1047        return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1048}
1049
1050static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1051                    struct netlink_ext_ack *extack)
1052{
1053        struct net_device *dev = qdisc_dev(sch);
1054        struct tc_htb_qopt_offload offload_opt;
1055        struct htb_sched *q = qdisc_priv(sch);
1056        struct nlattr *tb[TCA_HTB_MAX + 1];
1057        struct tc_htb_glob *gopt;
1058        unsigned int ntx;
1059        bool offload;
1060        int err;
1061
1062        qdisc_watchdog_init(&q->watchdog, sch);
1063        INIT_WORK(&q->work, htb_work_func);
1064
1065        if (!opt)
1066                return -EINVAL;
1067
1068        err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1069        if (err)
1070                return err;
1071
1072        err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1073                                          NULL);
1074        if (err < 0)
1075                return err;
1076
1077        if (!tb[TCA_HTB_INIT])
1078                return -EINVAL;
1079
1080        gopt = nla_data(tb[TCA_HTB_INIT]);
1081        if (gopt->version != HTB_VER >> 16)
1082                return -EINVAL;
1083
1084        offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1085
1086        if (offload) {
1087                if (sch->parent != TC_H_ROOT)
1088                        return -EOPNOTSUPP;
1089
1090                if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1091                        return -EOPNOTSUPP;
1092
1093                q->num_direct_qdiscs = dev->real_num_tx_queues;
1094                q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1095                                           sizeof(*q->direct_qdiscs),
1096                                           GFP_KERNEL);
1097                if (!q->direct_qdiscs)
1098                        return -ENOMEM;
1099        }
1100
1101        err = qdisc_class_hash_init(&q->clhash);
1102        if (err < 0)
1103                goto err_free_direct_qdiscs;
1104
1105        qdisc_skb_head_init(&q->direct_queue);
1106
1107        if (tb[TCA_HTB_DIRECT_QLEN])
1108                q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1109        else
1110                q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1111
1112        if ((q->rate2quantum = gopt->rate2quantum) < 1)
1113                q->rate2quantum = 1;
1114        q->defcls = gopt->defcls;
1115
1116        if (!offload)
1117                return 0;
1118
1119        for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1120                struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1121                struct Qdisc *qdisc;
1122
1123                qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1124                                          TC_H_MAKE(sch->handle, 0), extack);
1125                if (!qdisc) {
1126                        err = -ENOMEM;
1127                        goto err_free_qdiscs;
1128                }
1129
1130                htb_set_lockdep_class_child(qdisc);
1131                q->direct_qdiscs[ntx] = qdisc;
1132                qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1133        }
1134
1135        sch->flags |= TCQ_F_MQROOT;
1136
1137        offload_opt = (struct tc_htb_qopt_offload) {
1138                .command = TC_HTB_CREATE,
1139                .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1140                .classid = TC_H_MIN(q->defcls),
1141                .extack = extack,
1142        };
1143        err = htb_offload(dev, &offload_opt);
1144        if (err)
1145                goto err_free_qdiscs;
1146
1147        /* Defer this assignment, so that htb_destroy skips offload-related
1148         * parts (especially calling ndo_setup_tc) on errors.
1149         */
1150        q->offload = true;
1151
1152        return 0;
1153
1154err_free_qdiscs:
1155        for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1156             ntx++)
1157                qdisc_put(q->direct_qdiscs[ntx]);
1158
1159        qdisc_class_hash_destroy(&q->clhash);
1160        /* Prevent use-after-free and double-free when htb_destroy gets called.
1161         */
1162        q->clhash.hash = NULL;
1163        q->clhash.hashsize = 0;
1164
1165err_free_direct_qdiscs:
1166        kfree(q->direct_qdiscs);
1167        q->direct_qdiscs = NULL;
1168        return err;
1169}
1170
1171static void htb_attach_offload(struct Qdisc *sch)
1172{
1173        struct net_device *dev = qdisc_dev(sch);
1174        struct htb_sched *q = qdisc_priv(sch);
1175        unsigned int ntx;
1176
1177        for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1178                struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1179
1180                old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1181                qdisc_put(old);
1182                qdisc_hash_add(qdisc, false);
1183        }
1184        for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1185                struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1186                struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1187
1188                qdisc_put(old);
1189        }
1190
1191        kfree(q->direct_qdiscs);
1192        q->direct_qdiscs = NULL;
1193}
1194
1195static void htb_attach_software(struct Qdisc *sch)
1196{
1197        struct net_device *dev = qdisc_dev(sch);
1198        unsigned int ntx;
1199
1200        /* Resemble qdisc_graft behavior. */
1201        for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1202                struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1203                struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1204
1205                qdisc_refcount_inc(sch);
1206
1207                qdisc_put(old);
1208        }
1209}
1210
1211static void htb_attach(struct Qdisc *sch)
1212{
1213        struct htb_sched *q = qdisc_priv(sch);
1214
1215        if (q->offload)
1216                htb_attach_offload(sch);
1217        else
1218                htb_attach_software(sch);
1219}
1220
1221static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1222{
1223        struct htb_sched *q = qdisc_priv(sch);
1224        struct nlattr *nest;
1225        struct tc_htb_glob gopt;
1226
1227        if (q->offload)
1228                sch->flags |= TCQ_F_OFFLOADED;
1229        else
1230                sch->flags &= ~TCQ_F_OFFLOADED;
1231
1232        sch->qstats.overlimits = q->overlimits;
1233        /* Its safe to not acquire qdisc lock. As we hold RTNL,
1234         * no change can happen on the qdisc parameters.
1235         */
1236
1237        gopt.direct_pkts = q->direct_pkts;
1238        gopt.version = HTB_VER;
1239        gopt.rate2quantum = q->rate2quantum;
1240        gopt.defcls = q->defcls;
1241        gopt.debug = 0;
1242
1243        nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1244        if (nest == NULL)
1245                goto nla_put_failure;
1246        if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1247            nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1248                goto nla_put_failure;
1249        if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1250                goto nla_put_failure;
1251
1252        return nla_nest_end(skb, nest);
1253
1254nla_put_failure:
1255        nla_nest_cancel(skb, nest);
1256        return -1;
1257}
1258
1259static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1260                          struct sk_buff *skb, struct tcmsg *tcm)
1261{
1262        struct htb_class *cl = (struct htb_class *)arg;
1263        struct htb_sched *q = qdisc_priv(sch);
1264        struct nlattr *nest;
1265        struct tc_htb_opt opt;
1266
1267        /* Its safe to not acquire qdisc lock. As we hold RTNL,
1268         * no change can happen on the class parameters.
1269         */
1270        tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1271        tcm->tcm_handle = cl->common.classid;
1272        if (!cl->level && cl->leaf.q)
1273                tcm->tcm_info = cl->leaf.q->handle;
1274
1275        nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1276        if (nest == NULL)
1277                goto nla_put_failure;
1278
1279        memset(&opt, 0, sizeof(opt));
1280
1281        psched_ratecfg_getrate(&opt.rate, &cl->rate);
1282        opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1283        psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1284        opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1285        opt.quantum = cl->quantum;
1286        opt.prio = cl->prio;
1287        opt.level = cl->level;
1288        if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1289                goto nla_put_failure;
1290        if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1291                goto nla_put_failure;
1292        if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1293            nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1294                              TCA_HTB_PAD))
1295                goto nla_put_failure;
1296        if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1297            nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1298                              TCA_HTB_PAD))
1299                goto nla_put_failure;
1300
1301        return nla_nest_end(skb, nest);
1302
1303nla_put_failure:
1304        nla_nest_cancel(skb, nest);
1305        return -1;
1306}
1307
1308static void htb_offload_aggregate_stats(struct htb_sched *q,
1309                                        struct htb_class *cl)
1310{
1311        struct htb_class *c;
1312        unsigned int i;
1313
1314        memset(&cl->bstats, 0, sizeof(cl->bstats));
1315
1316        for (i = 0; i < q->clhash.hashsize; i++) {
1317                hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1318                        struct htb_class *p = c;
1319
1320                        while (p && p->level < cl->level)
1321                                p = p->parent;
1322
1323                        if (p != cl)
1324                                continue;
1325
1326                        cl->bstats.bytes += c->bstats_bias.bytes;
1327                        cl->bstats.packets += c->bstats_bias.packets;
1328                        if (c->level == 0) {
1329                                cl->bstats.bytes += c->leaf.q->bstats.bytes;
1330                                cl->bstats.packets += c->leaf.q->bstats.packets;
1331                        }
1332                }
1333        }
1334}
1335
1336static int
1337htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1338{
1339        struct htb_class *cl = (struct htb_class *)arg;
1340        struct htb_sched *q = qdisc_priv(sch);
1341        struct gnet_stats_queue qs = {
1342                .drops = cl->drops,
1343                .overlimits = cl->overlimits,
1344        };
1345        __u32 qlen = 0;
1346
1347        if (!cl->level && cl->leaf.q)
1348                qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1349
1350        cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1351                                    INT_MIN, INT_MAX);
1352        cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1353                                     INT_MIN, INT_MAX);
1354
1355        if (q->offload) {
1356                if (!cl->level) {
1357                        if (cl->leaf.q)
1358                                cl->bstats = cl->leaf.q->bstats;
1359                        else
1360                                memset(&cl->bstats, 0, sizeof(cl->bstats));
1361                        cl->bstats.bytes += cl->bstats_bias.bytes;
1362                        cl->bstats.packets += cl->bstats_bias.packets;
1363                } else {
1364                        htb_offload_aggregate_stats(q, cl);
1365                }
1366        }
1367
1368        if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1369                                  d, NULL, &cl->bstats) < 0 ||
1370            gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1371            gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1372                return -1;
1373
1374        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1375}
1376
1377static struct netdev_queue *
1378htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1379{
1380        struct net_device *dev = qdisc_dev(sch);
1381        struct tc_htb_qopt_offload offload_opt;
1382        struct htb_sched *q = qdisc_priv(sch);
1383        int err;
1384
1385        if (!q->offload)
1386                return sch->dev_queue;
1387
1388        offload_opt = (struct tc_htb_qopt_offload) {
1389                .command = TC_HTB_LEAF_QUERY_QUEUE,
1390                .classid = TC_H_MIN(tcm->tcm_parent),
1391        };
1392        err = htb_offload(dev, &offload_opt);
1393        if (err || offload_opt.qid >= dev->num_tx_queues)
1394                return NULL;
1395        return netdev_get_tx_queue(dev, offload_opt.qid);
1396}
1397
1398static struct Qdisc *
1399htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1400{
1401        struct net_device *dev = dev_queue->dev;
1402        struct Qdisc *old_q;
1403
1404        if (dev->flags & IFF_UP)
1405                dev_deactivate(dev);
1406        old_q = dev_graft_qdisc(dev_queue, new_q);
1407        if (new_q)
1408                new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1409        if (dev->flags & IFF_UP)
1410                dev_activate(dev);
1411
1412        return old_q;
1413}
1414
1415static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1416{
1417        struct netdev_queue *queue;
1418
1419        queue = cl->leaf.offload_queue;
1420        if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1421                WARN_ON(cl->leaf.q->dev_queue != queue);
1422
1423        return queue;
1424}
1425
1426static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1427                                   struct htb_class *cl_new, bool destroying)
1428{
1429        struct netdev_queue *queue_old, *queue_new;
1430        struct net_device *dev = qdisc_dev(sch);
1431
1432        queue_old = htb_offload_get_queue(cl_old);
1433        queue_new = htb_offload_get_queue(cl_new);
1434
1435        if (!destroying) {
1436                struct Qdisc *qdisc;
1437
1438                if (dev->flags & IFF_UP)
1439                        dev_deactivate(dev);
1440                qdisc = dev_graft_qdisc(queue_old, NULL);
1441                WARN_ON(qdisc != cl_old->leaf.q);
1442        }
1443
1444        if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1445                cl_old->leaf.q->dev_queue = queue_new;
1446        cl_old->leaf.offload_queue = queue_new;
1447
1448        if (!destroying) {
1449                struct Qdisc *qdisc;
1450
1451                qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1452                if (dev->flags & IFF_UP)
1453                        dev_activate(dev);
1454                WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1455        }
1456}
1457
1458static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1459                     struct Qdisc **old, struct netlink_ext_ack *extack)
1460{
1461        struct netdev_queue *dev_queue = sch->dev_queue;
1462        struct htb_class *cl = (struct htb_class *)arg;
1463        struct htb_sched *q = qdisc_priv(sch);
1464        struct Qdisc *old_q;
1465
1466        if (cl->level)
1467                return -EINVAL;
1468
1469        if (q->offload)
1470                dev_queue = htb_offload_get_queue(cl);
1471
1472        if (!new) {
1473                new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1474                                        cl->common.classid, extack);
1475                if (!new)
1476                        return -ENOBUFS;
1477        }
1478
1479        if (q->offload) {
1480                htb_set_lockdep_class_child(new);
1481                /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1482                qdisc_refcount_inc(new);
1483                old_q = htb_graft_helper(dev_queue, new);
1484        }
1485
1486        *old = qdisc_replace(sch, new, &cl->leaf.q);
1487
1488        if (q->offload) {
1489                WARN_ON(old_q != *old);
1490                qdisc_put(old_q);
1491        }
1492
1493        return 0;
1494}
1495
1496static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1497{
1498        struct htb_class *cl = (struct htb_class *)arg;
1499        return !cl->level ? cl->leaf.q : NULL;
1500}
1501
1502static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1503{
1504        struct htb_class *cl = (struct htb_class *)arg;
1505
1506        htb_deactivate(qdisc_priv(sch), cl);
1507}
1508
1509static inline int htb_parent_last_child(struct htb_class *cl)
1510{
1511        if (!cl->parent)
1512                /* the root class */
1513                return 0;
1514        if (cl->parent->children > 1)
1515                /* not the last child */
1516                return 0;
1517        return 1;
1518}
1519
1520static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1521                               struct Qdisc *new_q)
1522{
1523        struct htb_sched *q = qdisc_priv(sch);
1524        struct htb_class *parent = cl->parent;
1525
1526        WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1527
1528        if (parent->cmode != HTB_CAN_SEND)
1529                htb_safe_rb_erase(&parent->pq_node,
1530                                  &q->hlevel[parent->level].wait_pq);
1531
1532        parent->level = 0;
1533        memset(&parent->inner, 0, sizeof(parent->inner));
1534        parent->leaf.q = new_q ? new_q : &noop_qdisc;
1535        parent->tokens = parent->buffer;
1536        parent->ctokens = parent->cbuffer;
1537        parent->t_c = ktime_get_ns();
1538        parent->cmode = HTB_CAN_SEND;
1539        if (q->offload)
1540                parent->leaf.offload_queue = cl->leaf.offload_queue;
1541}
1542
1543static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1544                                       struct netdev_queue *dev_queue,
1545                                       struct Qdisc *new_q)
1546{
1547        struct Qdisc *old_q;
1548
1549        /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1550        if (new_q)
1551                qdisc_refcount_inc(new_q);
1552        old_q = htb_graft_helper(dev_queue, new_q);
1553        WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1554}
1555
1556static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1557                                     bool last_child, bool destroying,
1558                                     struct netlink_ext_ack *extack)
1559{
1560        struct tc_htb_qopt_offload offload_opt;
1561        struct netdev_queue *dev_queue;
1562        struct Qdisc *q = cl->leaf.q;
1563        struct Qdisc *old = NULL;
1564        int err;
1565
1566        if (cl->level)
1567                return -EINVAL;
1568
1569        WARN_ON(!q);
1570        dev_queue = htb_offload_get_queue(cl);
1571        old = htb_graft_helper(dev_queue, NULL);
1572        if (destroying)
1573                /* Before HTB is destroyed, the kernel grafts noop_qdisc to
1574                 * all queues.
1575                 */
1576                WARN_ON(!(old->flags & TCQ_F_BUILTIN));
1577        else
1578                WARN_ON(old != q);
1579
1580        if (cl->parent) {
1581                cl->parent->bstats_bias.bytes += q->bstats.bytes;
1582                cl->parent->bstats_bias.packets += q->bstats.packets;
1583        }
1584
1585        offload_opt = (struct tc_htb_qopt_offload) {
1586                .command = !last_child ? TC_HTB_LEAF_DEL :
1587                           destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1588                           TC_HTB_LEAF_DEL_LAST,
1589                .classid = cl->common.classid,
1590                .extack = extack,
1591        };
1592        err = htb_offload(qdisc_dev(sch), &offload_opt);
1593
1594        if (!err || destroying)
1595                qdisc_put(old);
1596        else
1597                htb_graft_helper(dev_queue, old);
1598
1599        if (last_child)
1600                return err;
1601
1602        if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1603                u32 classid = TC_H_MAJ(sch->handle) |
1604                              TC_H_MIN(offload_opt.classid);
1605                struct htb_class *moved_cl = htb_find(classid, sch);
1606
1607                htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1608        }
1609
1610        return err;
1611}
1612
1613static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1614{
1615        if (!cl->level) {
1616                WARN_ON(!cl->leaf.q);
1617                qdisc_put(cl->leaf.q);
1618        }
1619        gen_kill_estimator(&cl->rate_est);
1620        tcf_block_put(cl->block);
1621        kfree(cl);
1622}
1623
1624static void htb_destroy(struct Qdisc *sch)
1625{
1626        struct net_device *dev = qdisc_dev(sch);
1627        struct tc_htb_qopt_offload offload_opt;
1628        struct htb_sched *q = qdisc_priv(sch);
1629        struct hlist_node *next;
1630        bool nonempty, changed;
1631        struct htb_class *cl;
1632        unsigned int i;
1633
1634        cancel_work_sync(&q->work);
1635        qdisc_watchdog_cancel(&q->watchdog);
1636        /* This line used to be after htb_destroy_class call below
1637         * and surprisingly it worked in 2.4. But it must precede it
1638         * because filter need its target class alive to be able to call
1639         * unbind_filter on it (without Oops).
1640         */
1641        tcf_block_put(q->block);
1642
1643        for (i = 0; i < q->clhash.hashsize; i++) {
1644                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1645                        tcf_block_put(cl->block);
1646                        cl->block = NULL;
1647                }
1648        }
1649
1650        do {
1651                nonempty = false;
1652                changed = false;
1653                for (i = 0; i < q->clhash.hashsize; i++) {
1654                        hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1655                                                  common.hnode) {
1656                                bool last_child;
1657
1658                                if (!q->offload) {
1659                                        htb_destroy_class(sch, cl);
1660                                        continue;
1661                                }
1662
1663                                nonempty = true;
1664
1665                                if (cl->level)
1666                                        continue;
1667
1668                                changed = true;
1669
1670                                last_child = htb_parent_last_child(cl);
1671                                htb_destroy_class_offload(sch, cl, last_child,
1672                                                          true, NULL);
1673                                qdisc_class_hash_remove(&q->clhash,
1674                                                        &cl->common);
1675                                if (cl->parent)
1676                                        cl->parent->children--;
1677                                if (last_child)
1678                                        htb_parent_to_leaf(sch, cl, NULL);
1679                                htb_destroy_class(sch, cl);
1680                        }
1681                }
1682        } while (changed);
1683        WARN_ON(nonempty);
1684
1685        qdisc_class_hash_destroy(&q->clhash);
1686        __qdisc_reset_queue(&q->direct_queue);
1687
1688        if (!q->offload)
1689                return;
1690
1691        offload_opt = (struct tc_htb_qopt_offload) {
1692                .command = TC_HTB_DESTROY,
1693        };
1694        htb_offload(dev, &offload_opt);
1695
1696        if (!q->direct_qdiscs)
1697                return;
1698        for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1699                qdisc_put(q->direct_qdiscs[i]);
1700        kfree(q->direct_qdiscs);
1701}
1702
1703static int htb_delete(struct Qdisc *sch, unsigned long arg,
1704                      struct netlink_ext_ack *extack)
1705{
1706        struct htb_sched *q = qdisc_priv(sch);
1707        struct htb_class *cl = (struct htb_class *)arg;
1708        struct Qdisc *new_q = NULL;
1709        int last_child = 0;
1710        int err;
1711
1712        /* TODO: why don't allow to delete subtree ? references ? does
1713         * tc subsys guarantee us that in htb_destroy it holds no class
1714         * refs so that we can remove children safely there ?
1715         */
1716        if (cl->children || cl->filter_cnt)
1717                return -EBUSY;
1718
1719        if (!cl->level && htb_parent_last_child(cl))
1720                last_child = 1;
1721
1722        if (q->offload) {
1723                err = htb_destroy_class_offload(sch, cl, last_child, false,
1724                                                extack);
1725                if (err)
1726                        return err;
1727        }
1728
1729        if (last_child) {
1730                struct netdev_queue *dev_queue = sch->dev_queue;
1731
1732                if (q->offload)
1733                        dev_queue = htb_offload_get_queue(cl);
1734
1735                new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1736                                          cl->parent->common.classid,
1737                                          NULL);
1738                if (q->offload) {
1739                        if (new_q)
1740                                htb_set_lockdep_class_child(new_q);
1741                        htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1742                }
1743        }
1744
1745        sch_tree_lock(sch);
1746
1747        if (!cl->level)
1748                qdisc_purge_queue(cl->leaf.q);
1749
1750        /* delete from hash and active; remainder in destroy_class */
1751        qdisc_class_hash_remove(&q->clhash, &cl->common);
1752        if (cl->parent)
1753                cl->parent->children--;
1754
1755        if (cl->prio_activity)
1756                htb_deactivate(q, cl);
1757
1758        if (cl->cmode != HTB_CAN_SEND)
1759                htb_safe_rb_erase(&cl->pq_node,
1760                                  &q->hlevel[cl->level].wait_pq);
1761
1762        if (last_child)
1763                htb_parent_to_leaf(sch, cl, new_q);
1764
1765        sch_tree_unlock(sch);
1766
1767        htb_destroy_class(sch, cl);
1768        return 0;
1769}
1770
1771static int htb_change_class(struct Qdisc *sch, u32 classid,
1772                            u32 parentid, struct nlattr **tca,
1773                            unsigned long *arg, struct netlink_ext_ack *extack)
1774{
1775        int err = -EINVAL;
1776        struct htb_sched *q = qdisc_priv(sch);
1777        struct htb_class *cl = (struct htb_class *)*arg, *parent;
1778        struct tc_htb_qopt_offload offload_opt;
1779        struct nlattr *opt = tca[TCA_OPTIONS];
1780        struct nlattr *tb[TCA_HTB_MAX + 1];
1781        struct Qdisc *parent_qdisc = NULL;
1782        struct netdev_queue *dev_queue;
1783        struct tc_htb_opt *hopt;
1784        u64 rate64, ceil64;
1785        int warn = 0;
1786
1787        /* extract all subattrs from opt attr */
1788        if (!opt)
1789                goto failure;
1790
1791        err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1792                                          NULL);
1793        if (err < 0)
1794                goto failure;
1795
1796        err = -EINVAL;
1797        if (tb[TCA_HTB_PARMS] == NULL)
1798                goto failure;
1799
1800        parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1801
1802        hopt = nla_data(tb[TCA_HTB_PARMS]);
1803        if (!hopt->rate.rate || !hopt->ceil.rate)
1804                goto failure;
1805
1806        /* Keeping backward compatible with rate_table based iproute2 tc */
1807        if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1808                qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1809                                              NULL));
1810
1811        if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1812                qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1813                                              NULL));
1814
1815        rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1816        ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1817
1818        if (!cl) {              /* new class */
1819                struct net_device *dev = qdisc_dev(sch);
1820                struct Qdisc *new_q, *old_q;
1821                int prio;
1822                struct {
1823                        struct nlattr           nla;
1824                        struct gnet_estimator   opt;
1825                } est = {
1826                        .nla = {
1827                                .nla_len        = nla_attr_size(sizeof(est.opt)),
1828                                .nla_type       = TCA_RATE,
1829                        },
1830                        .opt = {
1831                                /* 4s interval, 16s averaging constant */
1832                                .interval       = 2,
1833                                .ewma_log       = 2,
1834                        },
1835                };
1836
1837                /* check for valid classid */
1838                if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1839                    htb_find(classid, sch))
1840                        goto failure;
1841
1842                /* check maximal depth */
1843                if (parent && parent->parent && parent->parent->level < 2) {
1844                        pr_err("htb: tree is too deep\n");
1845                        goto failure;
1846                }
1847                err = -ENOBUFS;
1848                cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1849                if (!cl)
1850                        goto failure;
1851
1852                err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1853                if (err) {
1854                        kfree(cl);
1855                        goto failure;
1856                }
1857                if (htb_rate_est || tca[TCA_RATE]) {
1858                        err = gen_new_estimator(&cl->bstats, NULL,
1859                                                &cl->rate_est,
1860                                                NULL,
1861                                                qdisc_root_sleeping_running(sch),
1862                                                tca[TCA_RATE] ? : &est.nla);
1863                        if (err)
1864                                goto err_block_put;
1865                }
1866
1867                cl->children = 0;
1868                RB_CLEAR_NODE(&cl->pq_node);
1869
1870                for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1871                        RB_CLEAR_NODE(&cl->node[prio]);
1872
1873                cl->common.classid = classid;
1874
1875                /* Make sure nothing interrupts us in between of two
1876                 * ndo_setup_tc calls.
1877                 */
1878                ASSERT_RTNL();
1879
1880                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1881                 * so that can't be used inside of sch_tree_lock
1882                 * -- thanks to Karlis Peisenieks
1883                 */
1884                if (!q->offload) {
1885                        dev_queue = sch->dev_queue;
1886                } else if (!(parent && !parent->level)) {
1887                        /* Assign a dev_queue to this classid. */
1888                        offload_opt = (struct tc_htb_qopt_offload) {
1889                                .command = TC_HTB_LEAF_ALLOC_QUEUE,
1890                                .classid = cl->common.classid,
1891                                .parent_classid = parent ?
1892                                        TC_H_MIN(parent->common.classid) :
1893                                        TC_HTB_CLASSID_ROOT,
1894                                .rate = max_t(u64, hopt->rate.rate, rate64),
1895                                .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1896                                .extack = extack,
1897                        };
1898                        err = htb_offload(dev, &offload_opt);
1899                        if (err) {
1900                                pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1901                                       err);
1902                                goto err_kill_estimator;
1903                        }
1904                        dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1905                } else { /* First child. */
1906                        dev_queue = htb_offload_get_queue(parent);
1907                        old_q = htb_graft_helper(dev_queue, NULL);
1908                        WARN_ON(old_q != parent->leaf.q);
1909                        offload_opt = (struct tc_htb_qopt_offload) {
1910                                .command = TC_HTB_LEAF_TO_INNER,
1911                                .classid = cl->common.classid,
1912                                .parent_classid =
1913                                        TC_H_MIN(parent->common.classid),
1914                                .rate = max_t(u64, hopt->rate.rate, rate64),
1915                                .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1916                                .extack = extack,
1917                        };
1918                        err = htb_offload(dev, &offload_opt);
1919                        if (err) {
1920                                pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1921                                       err);
1922                                htb_graft_helper(dev_queue, old_q);
1923                                goto err_kill_estimator;
1924                        }
1925                        parent->bstats_bias.bytes += old_q->bstats.bytes;
1926                        parent->bstats_bias.packets += old_q->bstats.packets;
1927                        qdisc_put(old_q);
1928                }
1929                new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1930                                          classid, NULL);
1931                if (q->offload) {
1932                        if (new_q) {
1933                                htb_set_lockdep_class_child(new_q);
1934                                /* One ref for cl->leaf.q, the other for
1935                                 * dev_queue->qdisc.
1936                                 */
1937                                qdisc_refcount_inc(new_q);
1938                        }
1939                        old_q = htb_graft_helper(dev_queue, new_q);
1940                        /* No qdisc_put needed. */
1941                        WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1942                }
1943                sch_tree_lock(sch);
1944                if (parent && !parent->level) {
1945                        /* turn parent into inner node */
1946                        qdisc_purge_queue(parent->leaf.q);
1947                        parent_qdisc = parent->leaf.q;
1948                        if (parent->prio_activity)
1949                                htb_deactivate(q, parent);
1950
1951                        /* remove from evt list because of level change */
1952                        if (parent->cmode != HTB_CAN_SEND) {
1953                                htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1954                                parent->cmode = HTB_CAN_SEND;
1955                        }
1956                        parent->level = (parent->parent ? parent->parent->level
1957                                         : TC_HTB_MAXDEPTH) - 1;
1958                        memset(&parent->inner, 0, sizeof(parent->inner));
1959                }
1960
1961                /* leaf (we) needs elementary qdisc */
1962                cl->leaf.q = new_q ? new_q : &noop_qdisc;
1963                if (q->offload)
1964                        cl->leaf.offload_queue = dev_queue;
1965
1966                cl->parent = parent;
1967
1968                /* set class to be in HTB_CAN_SEND state */
1969                cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1970                cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1971                cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1972                cl->t_c = ktime_get_ns();
1973                cl->cmode = HTB_CAN_SEND;
1974
1975                /* attach to the hash list and parent's family */
1976                qdisc_class_hash_insert(&q->clhash, &cl->common);
1977                if (parent)
1978                        parent->children++;
1979                if (cl->leaf.q != &noop_qdisc)
1980                        qdisc_hash_add(cl->leaf.q, true);
1981        } else {
1982                if (tca[TCA_RATE]) {
1983                        err = gen_replace_estimator(&cl->bstats, NULL,
1984                                                    &cl->rate_est,
1985                                                    NULL,
1986                                                    qdisc_root_sleeping_running(sch),
1987                                                    tca[TCA_RATE]);
1988                        if (err)
1989                                return err;
1990                }
1991
1992                if (q->offload) {
1993                        struct net_device *dev = qdisc_dev(sch);
1994
1995                        offload_opt = (struct tc_htb_qopt_offload) {
1996                                .command = TC_HTB_NODE_MODIFY,
1997                                .classid = cl->common.classid,
1998                                .rate = max_t(u64, hopt->rate.rate, rate64),
1999                                .ceil = max_t(u64, hopt->ceil.rate, ceil64),
2000                                .extack = extack,
2001                        };
2002                        err = htb_offload(dev, &offload_opt);
2003                        if (err)
2004                                /* Estimator was replaced, and rollback may fail
2005                                 * as well, so we don't try to recover it, and
2006                                 * the estimator won't work property with the
2007                                 * offload anyway, because bstats are updated
2008                                 * only when the stats are queried.
2009                                 */
2010                                return err;
2011                }
2012
2013                sch_tree_lock(sch);
2014        }
2015
2016        psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2017        psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2018
2019        /* it used to be a nasty bug here, we have to check that node
2020         * is really leaf before changing cl->leaf !
2021         */
2022        if (!cl->level) {
2023                u64 quantum = cl->rate.rate_bytes_ps;
2024
2025                do_div(quantum, q->rate2quantum);
2026                cl->quantum = min_t(u64, quantum, INT_MAX);
2027
2028                if (!hopt->quantum && cl->quantum < 1000) {
2029                        warn = -1;
2030                        cl->quantum = 1000;
2031                }
2032                if (!hopt->quantum && cl->quantum > 200000) {
2033                        warn = 1;
2034                        cl->quantum = 200000;
2035                }
2036                if (hopt->quantum)
2037                        cl->quantum = hopt->quantum;
2038                if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2039                        cl->prio = TC_HTB_NUMPRIO - 1;
2040        }
2041
2042        cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2043        cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2044
2045        sch_tree_unlock(sch);
2046        qdisc_put(parent_qdisc);
2047
2048        if (warn)
2049                pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2050                            cl->common.classid, (warn == -1 ? "small" : "big"));
2051
2052        qdisc_class_hash_grow(sch, &q->clhash);
2053
2054        *arg = (unsigned long)cl;
2055        return 0;
2056
2057err_kill_estimator:
2058        gen_kill_estimator(&cl->rate_est);
2059err_block_put:
2060        tcf_block_put(cl->block);
2061        kfree(cl);
2062failure:
2063        return err;
2064}
2065
2066static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2067                                       struct netlink_ext_ack *extack)
2068{
2069        struct htb_sched *q = qdisc_priv(sch);
2070        struct htb_class *cl = (struct htb_class *)arg;
2071
2072        return cl ? cl->block : q->block;
2073}
2074
2075static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2076                                     u32 classid)
2077{
2078        struct htb_class *cl = htb_find(classid, sch);
2079
2080        /*if (cl && !cl->level) return 0;
2081         * The line above used to be there to prevent attaching filters to
2082         * leaves. But at least tc_index filter uses this just to get class
2083         * for other reasons so that we have to allow for it.
2084         * ----
2085         * 19.6.2002 As Werner explained it is ok - bind filter is just
2086         * another way to "lock" the class - unlike "get" this lock can
2087         * be broken by class during destroy IIUC.
2088         */
2089        if (cl)
2090                cl->filter_cnt++;
2091        return (unsigned long)cl;
2092}
2093
2094static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2095{
2096        struct htb_class *cl = (struct htb_class *)arg;
2097
2098        if (cl)
2099                cl->filter_cnt--;
2100}
2101
2102static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2103{
2104        struct htb_sched *q = qdisc_priv(sch);
2105        struct htb_class *cl;
2106        unsigned int i;
2107
2108        if (arg->stop)
2109                return;
2110
2111        for (i = 0; i < q->clhash.hashsize; i++) {
2112                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2113                        if (arg->count < arg->skip) {
2114                                arg->count++;
2115                                continue;
2116                        }
2117                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2118                                arg->stop = 1;
2119                                return;
2120                        }
2121                        arg->count++;
2122                }
2123        }
2124}
2125
2126static const struct Qdisc_class_ops htb_class_ops = {
2127        .select_queue   =       htb_select_queue,
2128        .graft          =       htb_graft,
2129        .leaf           =       htb_leaf,
2130        .qlen_notify    =       htb_qlen_notify,
2131        .find           =       htb_search,
2132        .change         =       htb_change_class,
2133        .delete         =       htb_delete,
2134        .walk           =       htb_walk,
2135        .tcf_block      =       htb_tcf_block,
2136        .bind_tcf       =       htb_bind_filter,
2137        .unbind_tcf     =       htb_unbind_filter,
2138        .dump           =       htb_dump_class,
2139        .dump_stats     =       htb_dump_class_stats,
2140};
2141
2142static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2143        .cl_ops         =       &htb_class_ops,
2144        .id             =       "htb",
2145        .priv_size      =       sizeof(struct htb_sched),
2146        .enqueue        =       htb_enqueue,
2147        .dequeue        =       htb_dequeue,
2148        .peek           =       qdisc_peek_dequeued,
2149        .init           =       htb_init,
2150        .attach         =       htb_attach,
2151        .reset          =       htb_reset,
2152        .destroy        =       htb_destroy,
2153        .dump           =       htb_dump,
2154        .owner          =       THIS_MODULE,
2155};
2156
2157static int __init htb_module_init(void)
2158{
2159        return register_qdisc(&htb_qdisc_ops);
2160}
2161static void __exit htb_module_exit(void)
2162{
2163        unregister_qdisc(&htb_qdisc_ops);
2164}
2165
2166module_init(htb_module_init)
2167module_exit(htb_module_exit)
2168MODULE_LICENSE("GPL");
2169