linux/net/sched/sch_api.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * net/sched/sch_api.c  Packet scheduler API.
   4 *
   5 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   6 *
   7 * Fixes:
   8 *
   9 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
  10 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
  11 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
  12 */
  13
  14#include <linux/module.h>
  15#include <linux/types.h>
  16#include <linux/kernel.h>
  17#include <linux/string.h>
  18#include <linux/errno.h>
  19#include <linux/skbuff.h>
  20#include <linux/init.h>
  21#include <linux/proc_fs.h>
  22#include <linux/seq_file.h>
  23#include <linux/kmod.h>
  24#include <linux/list.h>
  25#include <linux/hrtimer.h>
  26#include <linux/slab.h>
  27#include <linux/hashtable.h>
  28
  29#include <net/net_namespace.h>
  30#include <net/sock.h>
  31#include <net/netlink.h>
  32#include <net/pkt_sched.h>
  33#include <net/pkt_cls.h>
  34
  35/*
  36
  37   Short review.
  38   -------------
  39
  40   This file consists of two interrelated parts:
  41
  42   1. queueing disciplines manager frontend.
  43   2. traffic classes manager frontend.
  44
  45   Generally, queueing discipline ("qdisc") is a black box,
  46   which is able to enqueue packets and to dequeue them (when
  47   device is ready to send something) in order and at times
  48   determined by algorithm hidden in it.
  49
  50   qdisc's are divided to two categories:
  51   - "queues", which have no internal structure visible from outside.
  52   - "schedulers", which split all the packets to "traffic classes",
  53     using "packet classifiers" (look at cls_api.c)
  54
  55   In turn, classes may have child qdiscs (as rule, queues)
  56   attached to them etc. etc. etc.
  57
  58   The goal of the routines in this file is to translate
  59   information supplied by user in the form of handles
  60   to more intelligible for kernel form, to make some sanity
  61   checks and part of work, which is common to all qdiscs
  62   and to provide rtnetlink notifications.
  63
  64   All real intelligent work is done inside qdisc modules.
  65
  66
  67
  68   Every discipline has two major routines: enqueue and dequeue.
  69
  70   ---dequeue
  71
  72   dequeue usually returns a skb to send. It is allowed to return NULL,
  73   but it does not mean that queue is empty, it just means that
  74   discipline does not want to send anything this time.
  75   Queue is really empty if q->q.qlen == 0.
  76   For complicated disciplines with multiple queues q->q is not
  77   real packet queue, but however q->q.qlen must be valid.
  78
  79   ---enqueue
  80
  81   enqueue returns 0, if packet was enqueued successfully.
  82   If packet (this one or another one) was dropped, it returns
  83   not zero error code.
  84   NET_XMIT_DROP        - this packet dropped
  85     Expected action: do not backoff, but wait until queue will clear.
  86   NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
  87     Expected action: backoff or ignore
  88
  89   Auxiliary routines:
  90
  91   ---peek
  92
  93   like dequeue but without removing a packet from the queue
  94
  95   ---reset
  96
  97   returns qdisc to initial state: purge all buffers, clear all
  98   timers, counters (except for statistics) etc.
  99
 100   ---init
 101
 102   initializes newly created qdisc.
 103
 104   ---destroy
 105
 106   destroys resources allocated by init and during lifetime of qdisc.
 107
 108   ---change
 109
 110   changes qdisc parameters.
 111 */
 112
 113/* Protects list of registered TC modules. It is pure SMP lock. */
 114static DEFINE_RWLOCK(qdisc_mod_lock);
 115
 116
 117/************************************************
 118 *      Queueing disciplines manipulation.      *
 119 ************************************************/
 120
 121
 122/* The list of all installed queueing disciplines. */
 123
 124static struct Qdisc_ops *qdisc_base;
 125
 126/* Register/unregister queueing discipline */
 127
 128int register_qdisc(struct Qdisc_ops *qops)
 129{
 130        struct Qdisc_ops *q, **qp;
 131        int rc = -EEXIST;
 132
 133        write_lock(&qdisc_mod_lock);
 134        for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
 135                if (!strcmp(qops->id, q->id))
 136                        goto out;
 137
 138        if (qops->enqueue == NULL)
 139                qops->enqueue = noop_qdisc_ops.enqueue;
 140        if (qops->peek == NULL) {
 141                if (qops->dequeue == NULL)
 142                        qops->peek = noop_qdisc_ops.peek;
 143                else
 144                        goto out_einval;
 145        }
 146        if (qops->dequeue == NULL)
 147                qops->dequeue = noop_qdisc_ops.dequeue;
 148
 149        if (qops->cl_ops) {
 150                const struct Qdisc_class_ops *cops = qops->cl_ops;
 151
 152                if (!(cops->find && cops->walk && cops->leaf))
 153                        goto out_einval;
 154
 155                if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
 156                        goto out_einval;
 157        }
 158
 159        qops->next = NULL;
 160        *qp = qops;
 161        rc = 0;
 162out:
 163        write_unlock(&qdisc_mod_lock);
 164        return rc;
 165
 166out_einval:
 167        rc = -EINVAL;
 168        goto out;
 169}
 170EXPORT_SYMBOL(register_qdisc);
 171
 172int unregister_qdisc(struct Qdisc_ops *qops)
 173{
 174        struct Qdisc_ops *q, **qp;
 175        int err = -ENOENT;
 176
 177        write_lock(&qdisc_mod_lock);
 178        for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
 179                if (q == qops)
 180                        break;
 181        if (q) {
 182                *qp = q->next;
 183                q->next = NULL;
 184                err = 0;
 185        }
 186        write_unlock(&qdisc_mod_lock);
 187        return err;
 188}
 189EXPORT_SYMBOL(unregister_qdisc);
 190
 191/* Get default qdisc if not otherwise specified */
 192void qdisc_get_default(char *name, size_t len)
 193{
 194        read_lock(&qdisc_mod_lock);
 195        strlcpy(name, default_qdisc_ops->id, len);
 196        read_unlock(&qdisc_mod_lock);
 197}
 198
 199static struct Qdisc_ops *qdisc_lookup_default(const char *name)
 200{
 201        struct Qdisc_ops *q = NULL;
 202
 203        for (q = qdisc_base; q; q = q->next) {
 204                if (!strcmp(name, q->id)) {
 205                        if (!try_module_get(q->owner))
 206                                q = NULL;
 207                        break;
 208                }
 209        }
 210
 211        return q;
 212}
 213
 214/* Set new default qdisc to use */
 215int qdisc_set_default(const char *name)
 216{
 217        const struct Qdisc_ops *ops;
 218
 219        if (!capable(CAP_NET_ADMIN))
 220                return -EPERM;
 221
 222        write_lock(&qdisc_mod_lock);
 223        ops = qdisc_lookup_default(name);
 224        if (!ops) {
 225                /* Not found, drop lock and try to load module */
 226                write_unlock(&qdisc_mod_lock);
 227                request_module("sch_%s", name);
 228                write_lock(&qdisc_mod_lock);
 229
 230                ops = qdisc_lookup_default(name);
 231        }
 232
 233        if (ops) {
 234                /* Set new default */
 235                module_put(default_qdisc_ops->owner);
 236                default_qdisc_ops = ops;
 237        }
 238        write_unlock(&qdisc_mod_lock);
 239
 240        return ops ? 0 : -ENOENT;
 241}
 242
 243#ifdef CONFIG_NET_SCH_DEFAULT
 244/* Set default value from kernel config */
 245static int __init sch_default_qdisc(void)
 246{
 247        return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
 248}
 249late_initcall(sch_default_qdisc);
 250#endif
 251
 252/* We know handle. Find qdisc among all qdisc's attached to device
 253 * (root qdisc, all its children, children of children etc.)
 254 * Note: caller either uses rtnl or rcu_read_lock()
 255 */
 256
 257static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
 258{
 259        struct Qdisc *q;
 260
 261        if (!qdisc_dev(root))
 262                return (root->handle == handle ? root : NULL);
 263
 264        if (!(root->flags & TCQ_F_BUILTIN) &&
 265            root->handle == handle)
 266                return root;
 267
 268        hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
 269                if (q->handle == handle)
 270                        return q;
 271        }
 272        return NULL;
 273}
 274
 275void qdisc_hash_add(struct Qdisc *q, bool invisible)
 276{
 277        if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
 278                ASSERT_RTNL();
 279                hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
 280                if (invisible)
 281                        q->flags |= TCQ_F_INVISIBLE;
 282        }
 283}
 284EXPORT_SYMBOL(qdisc_hash_add);
 285
 286void qdisc_hash_del(struct Qdisc *q)
 287{
 288        if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
 289                ASSERT_RTNL();
 290                hash_del_rcu(&q->hash);
 291        }
 292}
 293EXPORT_SYMBOL(qdisc_hash_del);
 294
 295struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 296{
 297        struct Qdisc *q;
 298
 299        if (!handle)
 300                return NULL;
 301        q = qdisc_match_from_root(dev->qdisc, handle);
 302        if (q)
 303                goto out;
 304
 305        if (dev_ingress_queue(dev))
 306                q = qdisc_match_from_root(
 307                        dev_ingress_queue(dev)->qdisc_sleeping,
 308                        handle);
 309out:
 310        return q;
 311}
 312
 313struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
 314{
 315        struct netdev_queue *nq;
 316        struct Qdisc *q;
 317
 318        if (!handle)
 319                return NULL;
 320        q = qdisc_match_from_root(dev->qdisc, handle);
 321        if (q)
 322                goto out;
 323
 324        nq = dev_ingress_queue_rcu(dev);
 325        if (nq)
 326                q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
 327out:
 328        return q;
 329}
 330
 331static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 332{
 333        unsigned long cl;
 334        const struct Qdisc_class_ops *cops = p->ops->cl_ops;
 335
 336        if (cops == NULL)
 337                return NULL;
 338        cl = cops->find(p, classid);
 339
 340        if (cl == 0)
 341                return NULL;
 342        return cops->leaf(p, cl);
 343}
 344
 345/* Find queueing discipline by name */
 346
 347static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
 348{
 349        struct Qdisc_ops *q = NULL;
 350
 351        if (kind) {
 352                read_lock(&qdisc_mod_lock);
 353                for (q = qdisc_base; q; q = q->next) {
 354                        if (nla_strcmp(kind, q->id) == 0) {
 355                                if (!try_module_get(q->owner))
 356                                        q = NULL;
 357                                break;
 358                        }
 359                }
 360                read_unlock(&qdisc_mod_lock);
 361        }
 362        return q;
 363}
 364
 365/* The linklayer setting were not transferred from iproute2, in older
 366 * versions, and the rate tables lookup systems have been dropped in
 367 * the kernel. To keep backward compatible with older iproute2 tc
 368 * utils, we detect the linklayer setting by detecting if the rate
 369 * table were modified.
 370 *
 371 * For linklayer ATM table entries, the rate table will be aligned to
 372 * 48 bytes, thus some table entries will contain the same value.  The
 373 * mpu (min packet unit) is also encoded into the old rate table, thus
 374 * starting from the mpu, we find low and high table entries for
 375 * mapping this cell.  If these entries contain the same value, when
 376 * the rate tables have been modified for linklayer ATM.
 377 *
 378 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
 379 * and then roundup to the next cell, calc the table entry one below,
 380 * and compare.
 381 */
 382static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
 383{
 384        int low       = roundup(r->mpu, 48);
 385        int high      = roundup(low+1, 48);
 386        int cell_low  = low >> r->cell_log;
 387        int cell_high = (high >> r->cell_log) - 1;
 388
 389        /* rtab is too inaccurate at rates > 100Mbit/s */
 390        if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
 391                pr_debug("TC linklayer: Giving up ATM detection\n");
 392                return TC_LINKLAYER_ETHERNET;
 393        }
 394
 395        if ((cell_high > cell_low) && (cell_high < 256)
 396            && (rtab[cell_low] == rtab[cell_high])) {
 397                pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
 398                         cell_low, cell_high, rtab[cell_high]);
 399                return TC_LINKLAYER_ATM;
 400        }
 401        return TC_LINKLAYER_ETHERNET;
 402}
 403
 404static struct qdisc_rate_table *qdisc_rtab_list;
 405
 406struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
 407                                        struct nlattr *tab,
 408                                        struct netlink_ext_ack *extack)
 409{
 410        struct qdisc_rate_table *rtab;
 411
 412        if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
 413            nla_len(tab) != TC_RTAB_SIZE) {
 414                NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
 415                return NULL;
 416        }
 417
 418        for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
 419                if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
 420                    !memcmp(&rtab->data, nla_data(tab), 1024)) {
 421                        rtab->refcnt++;
 422                        return rtab;
 423                }
 424        }
 425
 426        rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
 427        if (rtab) {
 428                rtab->rate = *r;
 429                rtab->refcnt = 1;
 430                memcpy(rtab->data, nla_data(tab), 1024);
 431                if (r->linklayer == TC_LINKLAYER_UNAWARE)
 432                        r->linklayer = __detect_linklayer(r, rtab->data);
 433                rtab->next = qdisc_rtab_list;
 434                qdisc_rtab_list = rtab;
 435        } else {
 436                NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
 437        }
 438        return rtab;
 439}
 440EXPORT_SYMBOL(qdisc_get_rtab);
 441
 442void qdisc_put_rtab(struct qdisc_rate_table *tab)
 443{
 444        struct qdisc_rate_table *rtab, **rtabp;
 445
 446        if (!tab || --tab->refcnt)
 447                return;
 448
 449        for (rtabp = &qdisc_rtab_list;
 450             (rtab = *rtabp) != NULL;
 451             rtabp = &rtab->next) {
 452                if (rtab == tab) {
 453                        *rtabp = rtab->next;
 454                        kfree(rtab);
 455                        return;
 456                }
 457        }
 458}
 459EXPORT_SYMBOL(qdisc_put_rtab);
 460
 461static LIST_HEAD(qdisc_stab_list);
 462
 463static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
 464        [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
 465        [TCA_STAB_DATA] = { .type = NLA_BINARY },
 466};
 467
 468static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
 469                                               struct netlink_ext_ack *extack)
 470{
 471        struct nlattr *tb[TCA_STAB_MAX + 1];
 472        struct qdisc_size_table *stab;
 473        struct tc_sizespec *s;
 474        unsigned int tsize = 0;
 475        u16 *tab = NULL;
 476        int err;
 477
 478        err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
 479                                          extack);
 480        if (err < 0)
 481                return ERR_PTR(err);
 482        if (!tb[TCA_STAB_BASE]) {
 483                NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
 484                return ERR_PTR(-EINVAL);
 485        }
 486
 487        s = nla_data(tb[TCA_STAB_BASE]);
 488
 489        if (s->tsize > 0) {
 490                if (!tb[TCA_STAB_DATA]) {
 491                        NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
 492                        return ERR_PTR(-EINVAL);
 493                }
 494                tab = nla_data(tb[TCA_STAB_DATA]);
 495                tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
 496        }
 497
 498        if (tsize != s->tsize || (!tab && tsize > 0)) {
 499                NL_SET_ERR_MSG(extack, "Invalid size of size table");
 500                return ERR_PTR(-EINVAL);
 501        }
 502
 503        list_for_each_entry(stab, &qdisc_stab_list, list) {
 504                if (memcmp(&stab->szopts, s, sizeof(*s)))
 505                        continue;
 506                if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
 507                        continue;
 508                stab->refcnt++;
 509                return stab;
 510        }
 511
 512        stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
 513        if (!stab)
 514                return ERR_PTR(-ENOMEM);
 515
 516        stab->refcnt = 1;
 517        stab->szopts = *s;
 518        if (tsize > 0)
 519                memcpy(stab->data, tab, tsize * sizeof(u16));
 520
 521        list_add_tail(&stab->list, &qdisc_stab_list);
 522
 523        return stab;
 524}
 525
 526void qdisc_put_stab(struct qdisc_size_table *tab)
 527{
 528        if (!tab)
 529                return;
 530
 531        if (--tab->refcnt == 0) {
 532                list_del(&tab->list);
 533                kfree_rcu(tab, rcu);
 534        }
 535}
 536EXPORT_SYMBOL(qdisc_put_stab);
 537
 538static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
 539{
 540        struct nlattr *nest;
 541
 542        nest = nla_nest_start_noflag(skb, TCA_STAB);
 543        if (nest == NULL)
 544                goto nla_put_failure;
 545        if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
 546                goto nla_put_failure;
 547        nla_nest_end(skb, nest);
 548
 549        return skb->len;
 550
 551nla_put_failure:
 552        return -1;
 553}
 554
 555void __qdisc_calculate_pkt_len(struct sk_buff *skb,
 556                               const struct qdisc_size_table *stab)
 557{
 558        int pkt_len, slot;
 559
 560        pkt_len = skb->len + stab->szopts.overhead;
 561        if (unlikely(!stab->szopts.tsize))
 562                goto out;
 563
 564        slot = pkt_len + stab->szopts.cell_align;
 565        if (unlikely(slot < 0))
 566                slot = 0;
 567
 568        slot >>= stab->szopts.cell_log;
 569        if (likely(slot < stab->szopts.tsize))
 570                pkt_len = stab->data[slot];
 571        else
 572                pkt_len = stab->data[stab->szopts.tsize - 1] *
 573                                (slot / stab->szopts.tsize) +
 574                                stab->data[slot % stab->szopts.tsize];
 575
 576        pkt_len <<= stab->szopts.size_log;
 577out:
 578        if (unlikely(pkt_len < 1))
 579                pkt_len = 1;
 580        qdisc_skb_cb(skb)->pkt_len = pkt_len;
 581}
 582EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
 583
 584void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
 585{
 586        if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
 587                pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
 588                        txt, qdisc->ops->id, qdisc->handle >> 16);
 589                qdisc->flags |= TCQ_F_WARN_NONWC;
 590        }
 591}
 592EXPORT_SYMBOL(qdisc_warn_nonwc);
 593
 594static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 595{
 596        struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
 597                                                 timer);
 598
 599        rcu_read_lock();
 600        __netif_schedule(qdisc_root(wd->qdisc));
 601        rcu_read_unlock();
 602
 603        return HRTIMER_NORESTART;
 604}
 605
 606void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
 607                                 clockid_t clockid)
 608{
 609        hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
 610        wd->timer.function = qdisc_watchdog;
 611        wd->qdisc = qdisc;
 612}
 613EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
 614
 615void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
 616{
 617        qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
 618}
 619EXPORT_SYMBOL(qdisc_watchdog_init);
 620
 621void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
 622{
 623        if (test_bit(__QDISC_STATE_DEACTIVATED,
 624                     &qdisc_root_sleeping(wd->qdisc)->state))
 625                return;
 626
 627        if (wd->last_expires == expires)
 628                return;
 629
 630        wd->last_expires = expires;
 631        hrtimer_start(&wd->timer,
 632                      ns_to_ktime(expires),
 633                      HRTIMER_MODE_ABS_PINNED);
 634}
 635EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
 636
 637void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 638{
 639        hrtimer_cancel(&wd->timer);
 640}
 641EXPORT_SYMBOL(qdisc_watchdog_cancel);
 642
 643static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
 644{
 645        struct hlist_head *h;
 646        unsigned int i;
 647
 648        h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
 649
 650        if (h != NULL) {
 651                for (i = 0; i < n; i++)
 652                        INIT_HLIST_HEAD(&h[i]);
 653        }
 654        return h;
 655}
 656
 657void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
 658{
 659        struct Qdisc_class_common *cl;
 660        struct hlist_node *next;
 661        struct hlist_head *nhash, *ohash;
 662        unsigned int nsize, nmask, osize;
 663        unsigned int i, h;
 664
 665        /* Rehash when load factor exceeds 0.75 */
 666        if (clhash->hashelems * 4 <= clhash->hashsize * 3)
 667                return;
 668        nsize = clhash->hashsize * 2;
 669        nmask = nsize - 1;
 670        nhash = qdisc_class_hash_alloc(nsize);
 671        if (nhash == NULL)
 672                return;
 673
 674        ohash = clhash->hash;
 675        osize = clhash->hashsize;
 676
 677        sch_tree_lock(sch);
 678        for (i = 0; i < osize; i++) {
 679                hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
 680                        h = qdisc_class_hash(cl->classid, nmask);
 681                        hlist_add_head(&cl->hnode, &nhash[h]);
 682                }
 683        }
 684        clhash->hash     = nhash;
 685        clhash->hashsize = nsize;
 686        clhash->hashmask = nmask;
 687        sch_tree_unlock(sch);
 688
 689        kvfree(ohash);
 690}
 691EXPORT_SYMBOL(qdisc_class_hash_grow);
 692
 693int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
 694{
 695        unsigned int size = 4;
 696
 697        clhash->hash = qdisc_class_hash_alloc(size);
 698        if (!clhash->hash)
 699                return -ENOMEM;
 700        clhash->hashsize  = size;
 701        clhash->hashmask  = size - 1;
 702        clhash->hashelems = 0;
 703        return 0;
 704}
 705EXPORT_SYMBOL(qdisc_class_hash_init);
 706
 707void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
 708{
 709        kvfree(clhash->hash);
 710}
 711EXPORT_SYMBOL(qdisc_class_hash_destroy);
 712
 713void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
 714                             struct Qdisc_class_common *cl)
 715{
 716        unsigned int h;
 717
 718        INIT_HLIST_NODE(&cl->hnode);
 719        h = qdisc_class_hash(cl->classid, clhash->hashmask);
 720        hlist_add_head(&cl->hnode, &clhash->hash[h]);
 721        clhash->hashelems++;
 722}
 723EXPORT_SYMBOL(qdisc_class_hash_insert);
 724
 725void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
 726                             struct Qdisc_class_common *cl)
 727{
 728        hlist_del(&cl->hnode);
 729        clhash->hashelems--;
 730}
 731EXPORT_SYMBOL(qdisc_class_hash_remove);
 732
 733/* Allocate an unique handle from space managed by kernel
 734 * Possible range is [8000-FFFF]:0000 (0x8000 values)
 735 */
 736static u32 qdisc_alloc_handle(struct net_device *dev)
 737{
 738        int i = 0x8000;
 739        static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
 740
 741        do {
 742                autohandle += TC_H_MAKE(0x10000U, 0);
 743                if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
 744                        autohandle = TC_H_MAKE(0x80000000U, 0);
 745                if (!qdisc_lookup(dev, autohandle))
 746                        return autohandle;
 747                cond_resched();
 748        } while (--i > 0);
 749
 750        return 0;
 751}
 752
 753void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
 754{
 755        bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
 756        const struct Qdisc_class_ops *cops;
 757        unsigned long cl;
 758        u32 parentid;
 759        bool notify;
 760        int drops;
 761
 762        if (n == 0 && len == 0)
 763                return;
 764        drops = max_t(int, n, 0);
 765        rcu_read_lock();
 766        while ((parentid = sch->parent)) {
 767                if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
 768                        break;
 769
 770                if (sch->flags & TCQ_F_NOPARENT)
 771                        break;
 772                /* Notify parent qdisc only if child qdisc becomes empty.
 773                 *
 774                 * If child was empty even before update then backlog
 775                 * counter is screwed and we skip notification because
 776                 * parent class is already passive.
 777                 *
 778                 * If the original child was offloaded then it is allowed
 779                 * to be seem as empty, so the parent is notified anyway.
 780                 */
 781                notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
 782                                                       !qdisc_is_offloaded);
 783                /* TODO: perform the search on a per txq basis */
 784                sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
 785                if (sch == NULL) {
 786                        WARN_ON_ONCE(parentid != TC_H_ROOT);
 787                        break;
 788                }
 789                cops = sch->ops->cl_ops;
 790                if (notify && cops->qlen_notify) {
 791                        cl = cops->find(sch, parentid);
 792                        cops->qlen_notify(sch, cl);
 793                }
 794                sch->q.qlen -= n;
 795                sch->qstats.backlog -= len;
 796                __qdisc_qstats_drop(sch, drops);
 797        }
 798        rcu_read_unlock();
 799}
 800EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
 801
 802int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
 803                              void *type_data)
 804{
 805        struct net_device *dev = qdisc_dev(sch);
 806        int err;
 807
 808        sch->flags &= ~TCQ_F_OFFLOADED;
 809        if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 810                return 0;
 811
 812        err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
 813        if (err == -EOPNOTSUPP)
 814                return 0;
 815
 816        if (!err)
 817                sch->flags |= TCQ_F_OFFLOADED;
 818
 819        return err;
 820}
 821EXPORT_SYMBOL(qdisc_offload_dump_helper);
 822
 823void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
 824                                struct Qdisc *new, struct Qdisc *old,
 825                                enum tc_setup_type type, void *type_data,
 826                                struct netlink_ext_ack *extack)
 827{
 828        bool any_qdisc_is_offloaded;
 829        int err;
 830
 831        if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 832                return;
 833
 834        err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
 835
 836        /* Don't report error if the graft is part of destroy operation. */
 837        if (!err || !new || new == &noop_qdisc)
 838                return;
 839
 840        /* Don't report error if the parent, the old child and the new
 841         * one are not offloaded.
 842         */
 843        any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
 844        any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
 845        any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
 846
 847        if (any_qdisc_is_offloaded)
 848                NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
 849}
 850EXPORT_SYMBOL(qdisc_offload_graft_helper);
 851
 852static void qdisc_offload_graft_root(struct net_device *dev,
 853                                     struct Qdisc *new, struct Qdisc *old,
 854                                     struct netlink_ext_ack *extack)
 855{
 856        struct tc_root_qopt_offload graft_offload = {
 857                .command        = TC_ROOT_GRAFT,
 858                .handle         = new ? new->handle : 0,
 859                .ingress        = (new && new->flags & TCQ_F_INGRESS) ||
 860                                  (old && old->flags & TCQ_F_INGRESS),
 861        };
 862
 863        qdisc_offload_graft_helper(dev, NULL, new, old,
 864                                   TC_SETUP_ROOT_QDISC, &graft_offload, extack);
 865}
 866
 867static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 868                         u32 portid, u32 seq, u16 flags, int event)
 869{
 870        struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
 871        struct gnet_stats_queue __percpu *cpu_qstats = NULL;
 872        struct tcmsg *tcm;
 873        struct nlmsghdr  *nlh;
 874        unsigned char *b = skb_tail_pointer(skb);
 875        struct gnet_dump d;
 876        struct qdisc_size_table *stab;
 877        u32 block_index;
 878        __u32 qlen;
 879
 880        cond_resched();
 881        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
 882        if (!nlh)
 883                goto out_nlmsg_trim;
 884        tcm = nlmsg_data(nlh);
 885        tcm->tcm_family = AF_UNSPEC;
 886        tcm->tcm__pad1 = 0;
 887        tcm->tcm__pad2 = 0;
 888        tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
 889        tcm->tcm_parent = clid;
 890        tcm->tcm_handle = q->handle;
 891        tcm->tcm_info = refcount_read(&q->refcnt);
 892        if (nla_put_string(skb, TCA_KIND, q->ops->id))
 893                goto nla_put_failure;
 894        if (q->ops->ingress_block_get) {
 895                block_index = q->ops->ingress_block_get(q);
 896                if (block_index &&
 897                    nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
 898                        goto nla_put_failure;
 899        }
 900        if (q->ops->egress_block_get) {
 901                block_index = q->ops->egress_block_get(q);
 902                if (block_index &&
 903                    nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
 904                        goto nla_put_failure;
 905        }
 906        if (q->ops->dump && q->ops->dump(q, skb) < 0)
 907                goto nla_put_failure;
 908        if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
 909                goto nla_put_failure;
 910        qlen = qdisc_qlen_sum(q);
 911
 912        stab = rtnl_dereference(q->stab);
 913        if (stab && qdisc_dump_stab(skb, stab) < 0)
 914                goto nla_put_failure;
 915
 916        if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
 917                                         NULL, &d, TCA_PAD) < 0)
 918                goto nla_put_failure;
 919
 920        if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
 921                goto nla_put_failure;
 922
 923        if (qdisc_is_percpu_stats(q)) {
 924                cpu_bstats = q->cpu_bstats;
 925                cpu_qstats = q->cpu_qstats;
 926        }
 927
 928        if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
 929                                  &d, cpu_bstats, &q->bstats) < 0 ||
 930            gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
 931            gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
 932                goto nla_put_failure;
 933
 934        if (gnet_stats_finish_copy(&d) < 0)
 935                goto nla_put_failure;
 936
 937        nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 938        return skb->len;
 939
 940out_nlmsg_trim:
 941nla_put_failure:
 942        nlmsg_trim(skb, b);
 943        return -1;
 944}
 945
 946static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
 947{
 948        if (q->flags & TCQ_F_BUILTIN)
 949                return true;
 950        if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
 951                return true;
 952
 953        return false;
 954}
 955
 956static int qdisc_notify(struct net *net, struct sk_buff *oskb,
 957                        struct nlmsghdr *n, u32 clid,
 958                        struct Qdisc *old, struct Qdisc *new)
 959{
 960        struct sk_buff *skb;
 961        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
 962
 963        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 964        if (!skb)
 965                return -ENOBUFS;
 966
 967        if (old && !tc_qdisc_dump_ignore(old, false)) {
 968                if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
 969                                  0, RTM_DELQDISC) < 0)
 970                        goto err_out;
 971        }
 972        if (new && !tc_qdisc_dump_ignore(new, false)) {
 973                if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
 974                                  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
 975                        goto err_out;
 976        }
 977
 978        if (skb->len)
 979                return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
 980                                      n->nlmsg_flags & NLM_F_ECHO);
 981
 982err_out:
 983        kfree_skb(skb);
 984        return -EINVAL;
 985}
 986
 987static void notify_and_destroy(struct net *net, struct sk_buff *skb,
 988                               struct nlmsghdr *n, u32 clid,
 989                               struct Qdisc *old, struct Qdisc *new)
 990{
 991        if (new || old)
 992                qdisc_notify(net, skb, n, clid, old, new);
 993
 994        if (old)
 995                qdisc_put(old);
 996}
 997
 998static void qdisc_clear_nolock(struct Qdisc *sch)
 999{
1000        sch->flags &= ~TCQ_F_NOLOCK;
1001        if (!(sch->flags & TCQ_F_CPUSTATS))
1002                return;
1003
1004        free_percpu(sch->cpu_bstats);
1005        free_percpu(sch->cpu_qstats);
1006        sch->cpu_bstats = NULL;
1007        sch->cpu_qstats = NULL;
1008        sch->flags &= ~TCQ_F_CPUSTATS;
1009}
1010
1011/* Graft qdisc "new" to class "classid" of qdisc "parent" or
1012 * to device "dev".
1013 *
1014 * When appropriate send a netlink notification using 'skb'
1015 * and "n".
1016 *
1017 * On success, destroy old qdisc.
1018 */
1019
1020static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1021                       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1022                       struct Qdisc *new, struct Qdisc *old,
1023                       struct netlink_ext_ack *extack)
1024{
1025        struct Qdisc *q = old;
1026        struct net *net = dev_net(dev);
1027
1028        if (parent == NULL) {
1029                unsigned int i, num_q, ingress;
1030
1031                ingress = 0;
1032                num_q = dev->num_tx_queues;
1033                if ((q && q->flags & TCQ_F_INGRESS) ||
1034                    (new && new->flags & TCQ_F_INGRESS)) {
1035                        num_q = 1;
1036                        ingress = 1;
1037                        if (!dev_ingress_queue(dev)) {
1038                                NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1039                                return -ENOENT;
1040                        }
1041                }
1042
1043                if (dev->flags & IFF_UP)
1044                        dev_deactivate(dev);
1045
1046                qdisc_offload_graft_root(dev, new, old, extack);
1047
1048                if (new && new->ops->attach)
1049                        goto skip;
1050
1051                for (i = 0; i < num_q; i++) {
1052                        struct netdev_queue *dev_queue = dev_ingress_queue(dev);
1053
1054                        if (!ingress)
1055                                dev_queue = netdev_get_tx_queue(dev, i);
1056
1057                        old = dev_graft_qdisc(dev_queue, new);
1058                        if (new && i > 0)
1059                                qdisc_refcount_inc(new);
1060
1061                        if (!ingress)
1062                                qdisc_put(old);
1063                }
1064
1065skip:
1066                if (!ingress) {
1067                        notify_and_destroy(net, skb, n, classid,
1068                                           dev->qdisc, new);
1069                        if (new && !new->ops->attach)
1070                                qdisc_refcount_inc(new);
1071                        dev->qdisc = new ? : &noop_qdisc;
1072
1073                        if (new && new->ops->attach)
1074                                new->ops->attach(new);
1075                } else {
1076                        notify_and_destroy(net, skb, n, classid, old, new);
1077                }
1078
1079                if (dev->flags & IFF_UP)
1080                        dev_activate(dev);
1081        } else {
1082                const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1083                unsigned long cl;
1084                int err;
1085
1086                /* Only support running class lockless if parent is lockless */
1087                if (new && (new->flags & TCQ_F_NOLOCK) &&
1088                    parent && !(parent->flags & TCQ_F_NOLOCK))
1089                        qdisc_clear_nolock(new);
1090
1091                if (!cops || !cops->graft)
1092                        return -EOPNOTSUPP;
1093
1094                cl = cops->find(parent, classid);
1095                if (!cl) {
1096                        NL_SET_ERR_MSG(extack, "Specified class not found");
1097                        return -ENOENT;
1098                }
1099
1100                err = cops->graft(parent, cl, new, &old, extack);
1101                if (err)
1102                        return err;
1103                notify_and_destroy(net, skb, n, classid, old, new);
1104        }
1105        return 0;
1106}
1107
1108static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1109                                   struct netlink_ext_ack *extack)
1110{
1111        u32 block_index;
1112
1113        if (tca[TCA_INGRESS_BLOCK]) {
1114                block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1115
1116                if (!block_index) {
1117                        NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1118                        return -EINVAL;
1119                }
1120                if (!sch->ops->ingress_block_set) {
1121                        NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1122                        return -EOPNOTSUPP;
1123                }
1124                sch->ops->ingress_block_set(sch, block_index);
1125        }
1126        if (tca[TCA_EGRESS_BLOCK]) {
1127                block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1128
1129                if (!block_index) {
1130                        NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1131                        return -EINVAL;
1132                }
1133                if (!sch->ops->egress_block_set) {
1134                        NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1135                        return -EOPNOTSUPP;
1136                }
1137                sch->ops->egress_block_set(sch, block_index);
1138        }
1139        return 0;
1140}
1141
1142/*
1143   Allocate and initialize new qdisc.
1144
1145   Parameters are passed via opt.
1146 */
1147
1148static struct Qdisc *qdisc_create(struct net_device *dev,
1149                                  struct netdev_queue *dev_queue,
1150                                  struct Qdisc *p, u32 parent, u32 handle,
1151                                  struct nlattr **tca, int *errp,
1152                                  struct netlink_ext_ack *extack)
1153{
1154        int err;
1155        struct nlattr *kind = tca[TCA_KIND];
1156        struct Qdisc *sch;
1157        struct Qdisc_ops *ops;
1158        struct qdisc_size_table *stab;
1159
1160        ops = qdisc_lookup_ops(kind);
1161#ifdef CONFIG_MODULES
1162        if (ops == NULL && kind != NULL) {
1163                char name[IFNAMSIZ];
1164                if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1165                        /* We dropped the RTNL semaphore in order to
1166                         * perform the module load.  So, even if we
1167                         * succeeded in loading the module we have to
1168                         * tell the caller to replay the request.  We
1169                         * indicate this using -EAGAIN.
1170                         * We replay the request because the device may
1171                         * go away in the mean time.
1172                         */
1173                        rtnl_unlock();
1174                        request_module("sch_%s", name);
1175                        rtnl_lock();
1176                        ops = qdisc_lookup_ops(kind);
1177                        if (ops != NULL) {
1178                                /* We will try again qdisc_lookup_ops,
1179                                 * so don't keep a reference.
1180                                 */
1181                                module_put(ops->owner);
1182                                err = -EAGAIN;
1183                                goto err_out;
1184                        }
1185                }
1186        }
1187#endif
1188
1189        err = -ENOENT;
1190        if (!ops) {
1191                NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1192                goto err_out;
1193        }
1194
1195        sch = qdisc_alloc(dev_queue, ops, extack);
1196        if (IS_ERR(sch)) {
1197                err = PTR_ERR(sch);
1198                goto err_out2;
1199        }
1200
1201        sch->parent = parent;
1202
1203        if (handle == TC_H_INGRESS) {
1204                sch->flags |= TCQ_F_INGRESS;
1205                handle = TC_H_MAKE(TC_H_INGRESS, 0);
1206        } else {
1207                if (handle == 0) {
1208                        handle = qdisc_alloc_handle(dev);
1209                        if (handle == 0) {
1210                                NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1211                                err = -ENOSPC;
1212                                goto err_out3;
1213                        }
1214                }
1215                if (!netif_is_multiqueue(dev))
1216                        sch->flags |= TCQ_F_ONETXQUEUE;
1217        }
1218
1219        sch->handle = handle;
1220
1221        /* This exist to keep backward compatible with a userspace
1222         * loophole, what allowed userspace to get IFF_NO_QUEUE
1223         * facility on older kernels by setting tx_queue_len=0 (prior
1224         * to qdisc init), and then forgot to reinit tx_queue_len
1225         * before again attaching a qdisc.
1226         */
1227        if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1228                dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1229                netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1230        }
1231
1232        err = qdisc_block_indexes_set(sch, tca, extack);
1233        if (err)
1234                goto err_out3;
1235
1236        if (ops->init) {
1237                err = ops->init(sch, tca[TCA_OPTIONS], extack);
1238                if (err != 0)
1239                        goto err_out5;
1240        }
1241
1242        if (tca[TCA_STAB]) {
1243                stab = qdisc_get_stab(tca[TCA_STAB], extack);
1244                if (IS_ERR(stab)) {
1245                        err = PTR_ERR(stab);
1246                        goto err_out4;
1247                }
1248                rcu_assign_pointer(sch->stab, stab);
1249        }
1250        if (tca[TCA_RATE]) {
1251                seqcount_t *running;
1252
1253                err = -EOPNOTSUPP;
1254                if (sch->flags & TCQ_F_MQROOT) {
1255                        NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1256                        goto err_out4;
1257                }
1258
1259                if (sch->parent != TC_H_ROOT &&
1260                    !(sch->flags & TCQ_F_INGRESS) &&
1261                    (!p || !(p->flags & TCQ_F_MQROOT)))
1262                        running = qdisc_root_sleeping_running(sch);
1263                else
1264                        running = &sch->running;
1265
1266                err = gen_new_estimator(&sch->bstats,
1267                                        sch->cpu_bstats,
1268                                        &sch->rate_est,
1269                                        NULL,
1270                                        running,
1271                                        tca[TCA_RATE]);
1272                if (err) {
1273                        NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1274                        goto err_out4;
1275                }
1276        }
1277
1278        qdisc_hash_add(sch, false);
1279
1280        return sch;
1281
1282err_out5:
1283        /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1284        if (ops->destroy)
1285                ops->destroy(sch);
1286err_out3:
1287        dev_put(dev);
1288        qdisc_free(sch);
1289err_out2:
1290        module_put(ops->owner);
1291err_out:
1292        *errp = err;
1293        return NULL;
1294
1295err_out4:
1296        /*
1297         * Any broken qdiscs that would require a ops->reset() here?
1298         * The qdisc was never in action so it shouldn't be necessary.
1299         */
1300        qdisc_put_stab(rtnl_dereference(sch->stab));
1301        if (ops->destroy)
1302                ops->destroy(sch);
1303        goto err_out3;
1304}
1305
1306static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1307                        struct netlink_ext_ack *extack)
1308{
1309        struct qdisc_size_table *ostab, *stab = NULL;
1310        int err = 0;
1311
1312        if (tca[TCA_OPTIONS]) {
1313                if (!sch->ops->change) {
1314                        NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1315                        return -EINVAL;
1316                }
1317                if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1318                        NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1319                        return -EOPNOTSUPP;
1320                }
1321                err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1322                if (err)
1323                        return err;
1324        }
1325
1326        if (tca[TCA_STAB]) {
1327                stab = qdisc_get_stab(tca[TCA_STAB], extack);
1328                if (IS_ERR(stab))
1329                        return PTR_ERR(stab);
1330        }
1331
1332        ostab = rtnl_dereference(sch->stab);
1333        rcu_assign_pointer(sch->stab, stab);
1334        qdisc_put_stab(ostab);
1335
1336        if (tca[TCA_RATE]) {
1337                /* NB: ignores errors from replace_estimator
1338                   because change can't be undone. */
1339                if (sch->flags & TCQ_F_MQROOT)
1340                        goto out;
1341                gen_replace_estimator(&sch->bstats,
1342                                      sch->cpu_bstats,
1343                                      &sch->rate_est,
1344                                      NULL,
1345                                      qdisc_root_sleeping_running(sch),
1346                                      tca[TCA_RATE]);
1347        }
1348out:
1349        return 0;
1350}
1351
1352struct check_loop_arg {
1353        struct qdisc_walker     w;
1354        struct Qdisc            *p;
1355        int                     depth;
1356};
1357
1358static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1359                         struct qdisc_walker *w);
1360
1361static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1362{
1363        struct check_loop_arg   arg;
1364
1365        if (q->ops->cl_ops == NULL)
1366                return 0;
1367
1368        arg.w.stop = arg.w.skip = arg.w.count = 0;
1369        arg.w.fn = check_loop_fn;
1370        arg.depth = depth;
1371        arg.p = p;
1372        q->ops->cl_ops->walk(q, &arg.w);
1373        return arg.w.stop ? -ELOOP : 0;
1374}
1375
1376static int
1377check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1378{
1379        struct Qdisc *leaf;
1380        const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1381        struct check_loop_arg *arg = (struct check_loop_arg *)w;
1382
1383        leaf = cops->leaf(q, cl);
1384        if (leaf) {
1385                if (leaf == arg->p || arg->depth > 7)
1386                        return -ELOOP;
1387                return check_loop(leaf, arg->p, arg->depth + 1);
1388        }
1389        return 0;
1390}
1391
1392const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1393        [TCA_KIND]              = { .type = NLA_STRING },
1394        [TCA_RATE]              = { .type = NLA_BINARY,
1395                                    .len = sizeof(struct tc_estimator) },
1396        [TCA_STAB]              = { .type = NLA_NESTED },
1397        [TCA_DUMP_INVISIBLE]    = { .type = NLA_FLAG },
1398        [TCA_CHAIN]             = { .type = NLA_U32 },
1399        [TCA_INGRESS_BLOCK]     = { .type = NLA_U32 },
1400        [TCA_EGRESS_BLOCK]      = { .type = NLA_U32 },
1401};
1402
1403/*
1404 * Delete/get qdisc.
1405 */
1406
1407static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1408                        struct netlink_ext_ack *extack)
1409{
1410        struct net *net = sock_net(skb->sk);
1411        struct tcmsg *tcm = nlmsg_data(n);
1412        struct nlattr *tca[TCA_MAX + 1];
1413        struct net_device *dev;
1414        u32 clid;
1415        struct Qdisc *q = NULL;
1416        struct Qdisc *p = NULL;
1417        int err;
1418
1419        if ((n->nlmsg_type != RTM_GETQDISC) &&
1420            !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1421                return -EPERM;
1422
1423        err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1424                                     rtm_tca_policy, extack);
1425        if (err < 0)
1426                return err;
1427
1428        dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1429        if (!dev)
1430                return -ENODEV;
1431
1432        clid = tcm->tcm_parent;
1433        if (clid) {
1434                if (clid != TC_H_ROOT) {
1435                        if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1436                                p = qdisc_lookup(dev, TC_H_MAJ(clid));
1437                                if (!p) {
1438                                        NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1439                                        return -ENOENT;
1440                                }
1441                                q = qdisc_leaf(p, clid);
1442                        } else if (dev_ingress_queue(dev)) {
1443                                q = dev_ingress_queue(dev)->qdisc_sleeping;
1444                        }
1445                } else {
1446                        q = dev->qdisc;
1447                }
1448                if (!q) {
1449                        NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1450                        return -ENOENT;
1451                }
1452
1453                if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1454                        NL_SET_ERR_MSG(extack, "Invalid handle");
1455                        return -EINVAL;
1456                }
1457        } else {
1458                q = qdisc_lookup(dev, tcm->tcm_handle);
1459                if (!q) {
1460                        NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1461                        return -ENOENT;
1462                }
1463        }
1464
1465        if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1466                NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1467                return -EINVAL;
1468        }
1469
1470        if (n->nlmsg_type == RTM_DELQDISC) {
1471                if (!clid) {
1472                        NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1473                        return -EINVAL;
1474                }
1475                if (q->handle == 0) {
1476                        NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1477                        return -ENOENT;
1478                }
1479                err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1480                if (err != 0)
1481                        return err;
1482        } else {
1483                qdisc_notify(net, skb, n, clid, NULL, q);
1484        }
1485        return 0;
1486}
1487
1488/*
1489 * Create/change qdisc.
1490 */
1491
1492static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1493                           struct netlink_ext_ack *extack)
1494{
1495        struct net *net = sock_net(skb->sk);
1496        struct tcmsg *tcm;
1497        struct nlattr *tca[TCA_MAX + 1];
1498        struct net_device *dev;
1499        u32 clid;
1500        struct Qdisc *q, *p;
1501        int err;
1502
1503        if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1504                return -EPERM;
1505
1506replay:
1507        /* Reinit, just in case something touches this. */
1508        err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1509                                     rtm_tca_policy, extack);
1510        if (err < 0)
1511                return err;
1512
1513        tcm = nlmsg_data(n);
1514        clid = tcm->tcm_parent;
1515        q = p = NULL;
1516
1517        dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1518        if (!dev)
1519                return -ENODEV;
1520
1521
1522        if (clid) {
1523                if (clid != TC_H_ROOT) {
1524                        if (clid != TC_H_INGRESS) {
1525                                p = qdisc_lookup(dev, TC_H_MAJ(clid));
1526                                if (!p) {
1527                                        NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1528                                        return -ENOENT;
1529                                }
1530                                q = qdisc_leaf(p, clid);
1531                        } else if (dev_ingress_queue_create(dev)) {
1532                                q = dev_ingress_queue(dev)->qdisc_sleeping;
1533                        }
1534                } else {
1535                        q = dev->qdisc;
1536                }
1537
1538                /* It may be default qdisc, ignore it */
1539                if (q && q->handle == 0)
1540                        q = NULL;
1541
1542                if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1543                        if (tcm->tcm_handle) {
1544                                if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1545                                        NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1546                                        return -EEXIST;
1547                                }
1548                                if (TC_H_MIN(tcm->tcm_handle)) {
1549                                        NL_SET_ERR_MSG(extack, "Invalid minor handle");
1550                                        return -EINVAL;
1551                                }
1552                                q = qdisc_lookup(dev, tcm->tcm_handle);
1553                                if (!q)
1554                                        goto create_n_graft;
1555                                if (n->nlmsg_flags & NLM_F_EXCL) {
1556                                        NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1557                                        return -EEXIST;
1558                                }
1559                                if (tca[TCA_KIND] &&
1560                                    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1561                                        NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1562                                        return -EINVAL;
1563                                }
1564                                if (q == p ||
1565                                    (p && check_loop(q, p, 0))) {
1566                                        NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1567                                        return -ELOOP;
1568                                }
1569                                qdisc_refcount_inc(q);
1570                                goto graft;
1571                        } else {
1572                                if (!q)
1573                                        goto create_n_graft;
1574
1575                                /* This magic test requires explanation.
1576                                 *
1577                                 *   We know, that some child q is already
1578                                 *   attached to this parent and have choice:
1579                                 *   either to change it or to create/graft new one.
1580                                 *
1581                                 *   1. We are allowed to create/graft only
1582                                 *   if CREATE and REPLACE flags are set.
1583                                 *
1584                                 *   2. If EXCL is set, requestor wanted to say,
1585                                 *   that qdisc tcm_handle is not expected
1586                                 *   to exist, so that we choose create/graft too.
1587                                 *
1588                                 *   3. The last case is when no flags are set.
1589                                 *   Alas, it is sort of hole in API, we
1590                                 *   cannot decide what to do unambiguously.
1591                                 *   For now we select create/graft, if
1592                                 *   user gave KIND, which does not match existing.
1593                                 */
1594                                if ((n->nlmsg_flags & NLM_F_CREATE) &&
1595                                    (n->nlmsg_flags & NLM_F_REPLACE) &&
1596                                    ((n->nlmsg_flags & NLM_F_EXCL) ||
1597                                     (tca[TCA_KIND] &&
1598                                      nla_strcmp(tca[TCA_KIND], q->ops->id))))
1599                                        goto create_n_graft;
1600                        }
1601                }
1602        } else {
1603                if (!tcm->tcm_handle) {
1604                        NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1605                        return -EINVAL;
1606                }
1607                q = qdisc_lookup(dev, tcm->tcm_handle);
1608        }
1609
1610        /* Change qdisc parameters */
1611        if (!q) {
1612                NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1613                return -ENOENT;
1614        }
1615        if (n->nlmsg_flags & NLM_F_EXCL) {
1616                NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1617                return -EEXIST;
1618        }
1619        if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1620                NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1621                return -EINVAL;
1622        }
1623        err = qdisc_change(q, tca, extack);
1624        if (err == 0)
1625                qdisc_notify(net, skb, n, clid, NULL, q);
1626        return err;
1627
1628create_n_graft:
1629        if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1630                NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1631                return -ENOENT;
1632        }
1633        if (clid == TC_H_INGRESS) {
1634                if (dev_ingress_queue(dev)) {
1635                        q = qdisc_create(dev, dev_ingress_queue(dev), p,
1636                                         tcm->tcm_parent, tcm->tcm_parent,
1637                                         tca, &err, extack);
1638                } else {
1639                        NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1640                        err = -ENOENT;
1641                }
1642        } else {
1643                struct netdev_queue *dev_queue;
1644
1645                if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1646                        dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1647                else if (p)
1648                        dev_queue = p->dev_queue;
1649                else
1650                        dev_queue = netdev_get_tx_queue(dev, 0);
1651
1652                q = qdisc_create(dev, dev_queue, p,
1653                                 tcm->tcm_parent, tcm->tcm_handle,
1654                                 tca, &err, extack);
1655        }
1656        if (q == NULL) {
1657                if (err == -EAGAIN)
1658                        goto replay;
1659                return err;
1660        }
1661
1662graft:
1663        err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1664        if (err) {
1665                if (q)
1666                        qdisc_put(q);
1667                return err;
1668        }
1669
1670        return 0;
1671}
1672
1673static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1674                              struct netlink_callback *cb,
1675                              int *q_idx_p, int s_q_idx, bool recur,
1676                              bool dump_invisible)
1677{
1678        int ret = 0, q_idx = *q_idx_p;
1679        struct Qdisc *q;
1680        int b;
1681
1682        if (!root)
1683                return 0;
1684
1685        q = root;
1686        if (q_idx < s_q_idx) {
1687                q_idx++;
1688        } else {
1689                if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1690                    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1691                                  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1692                                  RTM_NEWQDISC) <= 0)
1693                        goto done;
1694                q_idx++;
1695        }
1696
1697        /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1698         * itself has already been dumped.
1699         *
1700         * If we've already dumped the top-level (ingress) qdisc above and the global
1701         * qdisc hashtable, we don't want to hit it again
1702         */
1703        if (!qdisc_dev(root) || !recur)
1704                goto out;
1705
1706        hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1707                if (q_idx < s_q_idx) {
1708                        q_idx++;
1709                        continue;
1710                }
1711                if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1712                    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1713                                  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1714                                  RTM_NEWQDISC) <= 0)
1715                        goto done;
1716                q_idx++;
1717        }
1718
1719out:
1720        *q_idx_p = q_idx;
1721        return ret;
1722done:
1723        ret = -1;
1724        goto out;
1725}
1726
1727static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1728{
1729        struct net *net = sock_net(skb->sk);
1730        int idx, q_idx;
1731        int s_idx, s_q_idx;
1732        struct net_device *dev;
1733        const struct nlmsghdr *nlh = cb->nlh;
1734        struct nlattr *tca[TCA_MAX + 1];
1735        int err;
1736
1737        s_idx = cb->args[0];
1738        s_q_idx = q_idx = cb->args[1];
1739
1740        idx = 0;
1741        ASSERT_RTNL();
1742
1743        err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1744                                     rtm_tca_policy, cb->extack);
1745        if (err < 0)
1746                return err;
1747
1748        for_each_netdev(net, dev) {
1749                struct netdev_queue *dev_queue;
1750
1751                if (idx < s_idx)
1752                        goto cont;
1753                if (idx > s_idx)
1754                        s_q_idx = 0;
1755                q_idx = 0;
1756
1757                if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1758                                       true, tca[TCA_DUMP_INVISIBLE]) < 0)
1759                        goto done;
1760
1761                dev_queue = dev_ingress_queue(dev);
1762                if (dev_queue &&
1763                    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1764                                       &q_idx, s_q_idx, false,
1765                                       tca[TCA_DUMP_INVISIBLE]) < 0)
1766                        goto done;
1767
1768cont:
1769                idx++;
1770        }
1771
1772done:
1773        cb->args[0] = idx;
1774        cb->args[1] = q_idx;
1775
1776        return skb->len;
1777}
1778
1779
1780
1781/************************************************
1782 *      Traffic classes manipulation.           *
1783 ************************************************/
1784
1785static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1786                          unsigned long cl,
1787                          u32 portid, u32 seq, u16 flags, int event)
1788{
1789        struct tcmsg *tcm;
1790        struct nlmsghdr  *nlh;
1791        unsigned char *b = skb_tail_pointer(skb);
1792        struct gnet_dump d;
1793        const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1794
1795        cond_resched();
1796        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1797        if (!nlh)
1798                goto out_nlmsg_trim;
1799        tcm = nlmsg_data(nlh);
1800        tcm->tcm_family = AF_UNSPEC;
1801        tcm->tcm__pad1 = 0;
1802        tcm->tcm__pad2 = 0;
1803        tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1804        tcm->tcm_parent = q->handle;
1805        tcm->tcm_handle = q->handle;
1806        tcm->tcm_info = 0;
1807        if (nla_put_string(skb, TCA_KIND, q->ops->id))
1808                goto nla_put_failure;
1809        if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1810                goto nla_put_failure;
1811
1812        if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1813                                         NULL, &d, TCA_PAD) < 0)
1814                goto nla_put_failure;
1815
1816        if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1817                goto nla_put_failure;
1818
1819        if (gnet_stats_finish_copy(&d) < 0)
1820                goto nla_put_failure;
1821
1822        nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1823        return skb->len;
1824
1825out_nlmsg_trim:
1826nla_put_failure:
1827        nlmsg_trim(skb, b);
1828        return -1;
1829}
1830
1831static int tclass_notify(struct net *net, struct sk_buff *oskb,
1832                         struct nlmsghdr *n, struct Qdisc *q,
1833                         unsigned long cl, int event)
1834{
1835        struct sk_buff *skb;
1836        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1837        int err = 0;
1838
1839        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1840        if (!skb)
1841                return -ENOBUFS;
1842
1843        if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1844                kfree_skb(skb);
1845                return -EINVAL;
1846        }
1847
1848        err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1849                             n->nlmsg_flags & NLM_F_ECHO);
1850        if (err > 0)
1851                err = 0;
1852        return err;
1853}
1854
1855static int tclass_del_notify(struct net *net,
1856                             const struct Qdisc_class_ops *cops,
1857                             struct sk_buff *oskb, struct nlmsghdr *n,
1858                             struct Qdisc *q, unsigned long cl)
1859{
1860        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1861        struct sk_buff *skb;
1862        int err = 0;
1863
1864        if (!cops->delete)
1865                return -EOPNOTSUPP;
1866
1867        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1868        if (!skb)
1869                return -ENOBUFS;
1870
1871        if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1872                           RTM_DELTCLASS) < 0) {
1873                kfree_skb(skb);
1874                return -EINVAL;
1875        }
1876
1877        err = cops->delete(q, cl);
1878        if (err) {
1879                kfree_skb(skb);
1880                return err;
1881        }
1882
1883        err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1884                             n->nlmsg_flags & NLM_F_ECHO);
1885        if (err > 0)
1886                err = 0;
1887        return err;
1888}
1889
1890#ifdef CONFIG_NET_CLS
1891
1892struct tcf_bind_args {
1893        struct tcf_walker w;
1894        u32 classid;
1895        unsigned long cl;
1896};
1897
1898static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1899{
1900        struct tcf_bind_args *a = (void *)arg;
1901
1902        if (tp->ops->bind_class) {
1903                struct Qdisc *q = tcf_block_q(tp->chain->block);
1904
1905                sch_tree_lock(q);
1906                tp->ops->bind_class(n, a->classid, a->cl);
1907                sch_tree_unlock(q);
1908        }
1909        return 0;
1910}
1911
1912static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1913                           unsigned long new_cl)
1914{
1915        const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1916        struct tcf_block *block;
1917        struct tcf_chain *chain;
1918        unsigned long cl;
1919
1920        cl = cops->find(q, portid);
1921        if (!cl)
1922                return;
1923        if (!cops->tcf_block)
1924                return;
1925        block = cops->tcf_block(q, cl, NULL);
1926        if (!block)
1927                return;
1928        for (chain = tcf_get_next_chain(block, NULL);
1929             chain;
1930             chain = tcf_get_next_chain(block, chain)) {
1931                struct tcf_proto *tp;
1932
1933                for (tp = tcf_get_next_proto(chain, NULL, true);
1934                     tp; tp = tcf_get_next_proto(chain, tp, true)) {
1935                        struct tcf_bind_args arg = {};
1936
1937                        arg.w.fn = tcf_node_bind;
1938                        arg.classid = clid;
1939                        arg.cl = new_cl;
1940                        tp->ops->walk(tp, &arg.w, true);
1941                }
1942        }
1943}
1944
1945#else
1946
1947static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1948                           unsigned long new_cl)
1949{
1950}
1951
1952#endif
1953
1954static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1955                         struct netlink_ext_ack *extack)
1956{
1957        struct net *net = sock_net(skb->sk);
1958        struct tcmsg *tcm = nlmsg_data(n);
1959        struct nlattr *tca[TCA_MAX + 1];
1960        struct net_device *dev;
1961        struct Qdisc *q = NULL;
1962        const struct Qdisc_class_ops *cops;
1963        unsigned long cl = 0;
1964        unsigned long new_cl;
1965        u32 portid;
1966        u32 clid;
1967        u32 qid;
1968        int err;
1969
1970        if ((n->nlmsg_type != RTM_GETTCLASS) &&
1971            !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1972                return -EPERM;
1973
1974        err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1975                                     rtm_tca_policy, extack);
1976        if (err < 0)
1977                return err;
1978
1979        dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1980        if (!dev)
1981                return -ENODEV;
1982
1983        /*
1984           parent == TC_H_UNSPEC - unspecified parent.
1985           parent == TC_H_ROOT   - class is root, which has no parent.
1986           parent == X:0         - parent is root class.
1987           parent == X:Y         - parent is a node in hierarchy.
1988           parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
1989
1990           handle == 0:0         - generate handle from kernel pool.
1991           handle == 0:Y         - class is X:Y, where X:0 is qdisc.
1992           handle == X:Y         - clear.
1993           handle == X:0         - root class.
1994         */
1995
1996        /* Step 1. Determine qdisc handle X:0 */
1997
1998        portid = tcm->tcm_parent;
1999        clid = tcm->tcm_handle;
2000        qid = TC_H_MAJ(clid);
2001
2002        if (portid != TC_H_ROOT) {
2003                u32 qid1 = TC_H_MAJ(portid);
2004
2005                if (qid && qid1) {
2006                        /* If both majors are known, they must be identical. */
2007                        if (qid != qid1)
2008                                return -EINVAL;
2009                } else if (qid1) {
2010                        qid = qid1;
2011                } else if (qid == 0)
2012                        qid = dev->qdisc->handle;
2013
2014                /* Now qid is genuine qdisc handle consistent
2015                 * both with parent and child.
2016                 *
2017                 * TC_H_MAJ(portid) still may be unspecified, complete it now.
2018                 */
2019                if (portid)
2020                        portid = TC_H_MAKE(qid, portid);
2021        } else {
2022                if (qid == 0)
2023                        qid = dev->qdisc->handle;
2024        }
2025
2026        /* OK. Locate qdisc */
2027        q = qdisc_lookup(dev, qid);
2028        if (!q)
2029                return -ENOENT;
2030
2031        /* An check that it supports classes */
2032        cops = q->ops->cl_ops;
2033        if (cops == NULL)
2034                return -EINVAL;
2035
2036        /* Now try to get class */
2037        if (clid == 0) {
2038                if (portid == TC_H_ROOT)
2039                        clid = qid;
2040        } else
2041                clid = TC_H_MAKE(qid, clid);
2042
2043        if (clid)
2044                cl = cops->find(q, clid);
2045
2046        if (cl == 0) {
2047                err = -ENOENT;
2048                if (n->nlmsg_type != RTM_NEWTCLASS ||
2049                    !(n->nlmsg_flags & NLM_F_CREATE))
2050                        goto out;
2051        } else {
2052                switch (n->nlmsg_type) {
2053                case RTM_NEWTCLASS:
2054                        err = -EEXIST;
2055                        if (n->nlmsg_flags & NLM_F_EXCL)
2056                                goto out;
2057                        break;
2058                case RTM_DELTCLASS:
2059                        err = tclass_del_notify(net, cops, skb, n, q, cl);
2060                        /* Unbind the class with flilters with 0 */
2061                        tc_bind_tclass(q, portid, clid, 0);
2062                        goto out;
2063                case RTM_GETTCLASS:
2064                        err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
2065                        goto out;
2066                default:
2067                        err = -EINVAL;
2068                        goto out;
2069                }
2070        }
2071
2072        if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2073                NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2074                return -EOPNOTSUPP;
2075        }
2076
2077        new_cl = cl;
2078        err = -EOPNOTSUPP;
2079        if (cops->change)
2080                err = cops->change(q, clid, portid, tca, &new_cl, extack);
2081        if (err == 0) {
2082                tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
2083                /* We just create a new class, need to do reverse binding. */
2084                if (cl != new_cl)
2085                        tc_bind_tclass(q, portid, clid, new_cl);
2086        }
2087out:
2088        return err;
2089}
2090
2091struct qdisc_dump_args {
2092        struct qdisc_walker     w;
2093        struct sk_buff          *skb;
2094        struct netlink_callback *cb;
2095};
2096
2097static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2098                            struct qdisc_walker *arg)
2099{
2100        struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2101
2102        return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2103                              a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2104                              RTM_NEWTCLASS);
2105}
2106
2107static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2108                                struct tcmsg *tcm, struct netlink_callback *cb,
2109                                int *t_p, int s_t)
2110{
2111        struct qdisc_dump_args arg;
2112
2113        if (tc_qdisc_dump_ignore(q, false) ||
2114            *t_p < s_t || !q->ops->cl_ops ||
2115            (tcm->tcm_parent &&
2116             TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2117                (*t_p)++;
2118                return 0;
2119        }
2120        if (*t_p > s_t)
2121                memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2122        arg.w.fn = qdisc_class_dump;
2123        arg.skb = skb;
2124        arg.cb = cb;
2125        arg.w.stop  = 0;
2126        arg.w.skip = cb->args[1];
2127        arg.w.count = 0;
2128        q->ops->cl_ops->walk(q, &arg.w);
2129        cb->args[1] = arg.w.count;
2130        if (arg.w.stop)
2131                return -1;
2132        (*t_p)++;
2133        return 0;
2134}
2135
2136static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2137                               struct tcmsg *tcm, struct netlink_callback *cb,
2138                               int *t_p, int s_t)
2139{
2140        struct Qdisc *q;
2141        int b;
2142
2143        if (!root)
2144                return 0;
2145
2146        if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2147                return -1;
2148
2149        if (!qdisc_dev(root))
2150                return 0;
2151
2152        if (tcm->tcm_parent) {
2153                q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2154                if (q && q != root &&
2155                    tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2156                        return -1;
2157                return 0;
2158        }
2159        hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2160                if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2161                        return -1;
2162        }
2163
2164        return 0;
2165}
2166
2167static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2168{
2169        struct tcmsg *tcm = nlmsg_data(cb->nlh);
2170        struct net *net = sock_net(skb->sk);
2171        struct netdev_queue *dev_queue;
2172        struct net_device *dev;
2173        int t, s_t;
2174
2175        if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2176                return 0;
2177        dev = dev_get_by_index(net, tcm->tcm_ifindex);
2178        if (!dev)
2179                return 0;
2180
2181        s_t = cb->args[0];
2182        t = 0;
2183
2184        if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
2185                goto done;
2186
2187        dev_queue = dev_ingress_queue(dev);
2188        if (dev_queue &&
2189            tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2190                                &t, s_t) < 0)
2191                goto done;
2192
2193done:
2194        cb->args[0] = t;
2195
2196        dev_put(dev);
2197        return skb->len;
2198}
2199
2200#ifdef CONFIG_PROC_FS
2201static int psched_show(struct seq_file *seq, void *v)
2202{
2203        seq_printf(seq, "%08x %08x %08x %08x\n",
2204                   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2205                   1000000,
2206                   (u32)NSEC_PER_SEC / hrtimer_resolution);
2207
2208        return 0;
2209}
2210
2211static int __net_init psched_net_init(struct net *net)
2212{
2213        struct proc_dir_entry *e;
2214
2215        e = proc_create_single("psched", 0, net->proc_net, psched_show);
2216        if (e == NULL)
2217                return -ENOMEM;
2218
2219        return 0;
2220}
2221
2222static void __net_exit psched_net_exit(struct net *net)
2223{
2224        remove_proc_entry("psched", net->proc_net);
2225}
2226#else
2227static int __net_init psched_net_init(struct net *net)
2228{
2229        return 0;
2230}
2231
2232static void __net_exit psched_net_exit(struct net *net)
2233{
2234}
2235#endif
2236
2237static struct pernet_operations psched_net_ops = {
2238        .init = psched_net_init,
2239        .exit = psched_net_exit,
2240};
2241
2242static int __init pktsched_init(void)
2243{
2244        int err;
2245
2246        err = register_pernet_subsys(&psched_net_ops);
2247        if (err) {
2248                pr_err("pktsched_init: "
2249                       "cannot initialize per netns operations\n");
2250                return err;
2251        }
2252
2253        register_qdisc(&pfifo_fast_ops);
2254        register_qdisc(&pfifo_qdisc_ops);
2255        register_qdisc(&bfifo_qdisc_ops);
2256        register_qdisc(&pfifo_head_drop_qdisc_ops);
2257        register_qdisc(&mq_qdisc_ops);
2258        register_qdisc(&noqueue_qdisc_ops);
2259
2260        rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2261        rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2262        rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2263                      0);
2264        rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2265        rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2266        rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2267                      0);
2268
2269        return 0;
2270}
2271
2272subsys_initcall(pktsched_init);
2273