linux/net/sched/sch_generic.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_generic.c      Generic packet scheduler routines.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
  11 *              - Ingress support
  12 */
  13
  14#include <linux/bitops.h>
  15#include <linux/module.h>
  16#include <linux/types.h>
  17#include <linux/kernel.h>
  18#include <linux/sched.h>
  19#include <linux/string.h>
  20#include <linux/errno.h>
  21#include <linux/netdevice.h>
  22#include <linux/skbuff.h>
  23#include <linux/rtnetlink.h>
  24#include <linux/init.h>
  25#include <linux/rcupdate.h>
  26#include <linux/list.h>
  27#include <linux/slab.h>
  28#include <linux/if_vlan.h>
  29#include <linux/skb_array.h>
  30#include <linux/if_macvlan.h>
  31#include <net/sch_generic.h>
  32#include <net/pkt_sched.h>
  33#include <net/dst.h>
  34#include <trace/events/qdisc.h>
  35#include <trace/events/net.h>
  36#include <net/xfrm.h>
  37
  38/* Qdisc to use by default */
  39const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
  40EXPORT_SYMBOL(default_qdisc_ops);
  41
  42/* Main transmission queue. */
  43
  44/* Modifications to data participating in scheduling must be protected with
  45 * qdisc_lock(qdisc) spinlock.
  46 *
  47 * The idea is the following:
  48 * - enqueue, dequeue are serialized via qdisc root lock
  49 * - ingress filtering is also serialized via qdisc root lock
  50 * - updates to tree and tree walking are only done under the rtnl mutex.
  51 */
  52
  53static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
  54{
  55        const struct netdev_queue *txq = q->dev_queue;
  56        spinlock_t *lock = NULL;
  57        struct sk_buff *skb;
  58
  59        if (q->flags & TCQ_F_NOLOCK) {
  60                lock = qdisc_lock(q);
  61                spin_lock(lock);
  62        }
  63
  64        skb = skb_peek(&q->skb_bad_txq);
  65        if (skb) {
  66                /* check the reason of requeuing without tx lock first */
  67                txq = skb_get_tx_queue(txq->dev, skb);
  68                if (!netif_xmit_frozen_or_stopped(txq)) {
  69                        skb = __skb_dequeue(&q->skb_bad_txq);
  70                        if (qdisc_is_percpu_stats(q)) {
  71                                qdisc_qstats_cpu_backlog_dec(q, skb);
  72                                qdisc_qstats_cpu_qlen_dec(q);
  73                        } else {
  74                                qdisc_qstats_backlog_dec(q, skb);
  75                                q->q.qlen--;
  76                        }
  77                } else {
  78                        skb = NULL;
  79                }
  80        }
  81
  82        if (lock)
  83                spin_unlock(lock);
  84
  85        return skb;
  86}
  87
  88static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
  89{
  90        struct sk_buff *skb = skb_peek(&q->skb_bad_txq);
  91
  92        if (unlikely(skb))
  93                skb = __skb_dequeue_bad_txq(q);
  94
  95        return skb;
  96}
  97
  98static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
  99                                             struct sk_buff *skb)
 100{
 101        spinlock_t *lock = NULL;
 102
 103        if (q->flags & TCQ_F_NOLOCK) {
 104                lock = qdisc_lock(q);
 105                spin_lock(lock);
 106        }
 107
 108        __skb_queue_tail(&q->skb_bad_txq, skb);
 109
 110        if (qdisc_is_percpu_stats(q)) {
 111                qdisc_qstats_cpu_backlog_inc(q, skb);
 112                qdisc_qstats_cpu_qlen_inc(q);
 113        } else {
 114                qdisc_qstats_backlog_inc(q, skb);
 115                q->q.qlen++;
 116        }
 117
 118        if (lock)
 119                spin_unlock(lock);
 120}
 121
 122static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 123{
 124        spinlock_t *lock = NULL;
 125
 126        if (q->flags & TCQ_F_NOLOCK) {
 127                lock = qdisc_lock(q);
 128                spin_lock(lock);
 129        }
 130
 131        while (skb) {
 132                struct sk_buff *next = skb->next;
 133
 134                __skb_queue_tail(&q->gso_skb, skb);
 135
 136                /* it's still part of the queue */
 137                if (qdisc_is_percpu_stats(q)) {
 138                        qdisc_qstats_cpu_requeues_inc(q);
 139                        qdisc_qstats_cpu_backlog_inc(q, skb);
 140                        qdisc_qstats_cpu_qlen_inc(q);
 141                } else {
 142                        q->qstats.requeues++;
 143                        qdisc_qstats_backlog_inc(q, skb);
 144                        q->q.qlen++;
 145                }
 146
 147                skb = next;
 148        }
 149        if (lock)
 150                spin_unlock(lock);
 151        __netif_schedule(q);
 152}
 153
 154static void try_bulk_dequeue_skb(struct Qdisc *q,
 155                                 struct sk_buff *skb,
 156                                 const struct netdev_queue *txq,
 157                                 int *packets)
 158{
 159        int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
 160
 161        while (bytelimit > 0) {
 162                struct sk_buff *nskb = q->dequeue(q);
 163
 164                if (!nskb)
 165                        break;
 166
 167                bytelimit -= nskb->len; /* covers GSO len */
 168                skb->next = nskb;
 169                skb = nskb;
 170                (*packets)++; /* GSO counts as one pkt */
 171        }
 172        skb_mark_not_on_list(skb);
 173}
 174
 175/* This variant of try_bulk_dequeue_skb() makes sure
 176 * all skbs in the chain are for the same txq
 177 */
 178static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
 179                                      struct sk_buff *skb,
 180                                      int *packets)
 181{
 182        int mapping = skb_get_queue_mapping(skb);
 183        struct sk_buff *nskb;
 184        int cnt = 0;
 185
 186        do {
 187                nskb = q->dequeue(q);
 188                if (!nskb)
 189                        break;
 190                if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
 191                        qdisc_enqueue_skb_bad_txq(q, nskb);
 192                        break;
 193                }
 194                skb->next = nskb;
 195                skb = nskb;
 196        } while (++cnt < 8);
 197        (*packets) += cnt;
 198        skb_mark_not_on_list(skb);
 199}
 200
 201/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
 202 * A requeued skb (via q->gso_skb) can also be a SKB list.
 203 */
 204static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 205                                   int *packets)
 206{
 207        const struct netdev_queue *txq = q->dev_queue;
 208        struct sk_buff *skb = NULL;
 209
 210        *packets = 1;
 211        if (unlikely(!skb_queue_empty(&q->gso_skb))) {
 212                spinlock_t *lock = NULL;
 213
 214                if (q->flags & TCQ_F_NOLOCK) {
 215                        lock = qdisc_lock(q);
 216                        spin_lock(lock);
 217                }
 218
 219                skb = skb_peek(&q->gso_skb);
 220
 221                /* skb may be null if another cpu pulls gso_skb off in between
 222                 * empty check and lock.
 223                 */
 224                if (!skb) {
 225                        if (lock)
 226                                spin_unlock(lock);
 227                        goto validate;
 228                }
 229
 230                /* skb in gso_skb were already validated */
 231                *validate = false;
 232                if (xfrm_offload(skb))
 233                        *validate = true;
 234                /* check the reason of requeuing without tx lock first */
 235                txq = skb_get_tx_queue(txq->dev, skb);
 236                if (!netif_xmit_frozen_or_stopped(txq)) {
 237                        skb = __skb_dequeue(&q->gso_skb);
 238                        if (qdisc_is_percpu_stats(q)) {
 239                                qdisc_qstats_cpu_backlog_dec(q, skb);
 240                                qdisc_qstats_cpu_qlen_dec(q);
 241                        } else {
 242                                qdisc_qstats_backlog_dec(q, skb);
 243                                q->q.qlen--;
 244                        }
 245                } else {
 246                        skb = NULL;
 247                }
 248                if (lock)
 249                        spin_unlock(lock);
 250                goto trace;
 251        }
 252validate:
 253        *validate = true;
 254
 255        if ((q->flags & TCQ_F_ONETXQUEUE) &&
 256            netif_xmit_frozen_or_stopped(txq))
 257                return skb;
 258
 259        skb = qdisc_dequeue_skb_bad_txq(q);
 260        if (unlikely(skb))
 261                goto bulk;
 262        skb = q->dequeue(q);
 263        if (skb) {
 264bulk:
 265                if (qdisc_may_bulk(q))
 266                        try_bulk_dequeue_skb(q, skb, txq, packets);
 267                else
 268                        try_bulk_dequeue_skb_slow(q, skb, packets);
 269        }
 270trace:
 271        trace_qdisc_dequeue(q, txq, *packets, skb);
 272        return skb;
 273}
 274
 275/*
 276 * Transmit possibly several skbs, and handle the return status as
 277 * required. Owning running seqcount bit guarantees that
 278 * only one CPU can execute this function.
 279 *
 280 * Returns to the caller:
 281 *                              false  - hardware queue frozen backoff
 282 *                              true   - feel free to send more pkts
 283 */
 284bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 285                     struct net_device *dev, struct netdev_queue *txq,
 286                     spinlock_t *root_lock, bool validate)
 287{
 288        int ret = NETDEV_TX_BUSY;
 289        bool again = false;
 290
 291        /* And release qdisc */
 292        if (root_lock)
 293                spin_unlock(root_lock);
 294
 295        /* Note that we validate skb (GSO, checksum, ...) outside of locks */
 296        if (validate)
 297                skb = validate_xmit_skb_list(skb, dev, &again);
 298
 299#ifdef CONFIG_XFRM_OFFLOAD
 300        if (unlikely(again)) {
 301                if (root_lock)
 302                        spin_lock(root_lock);
 303
 304                dev_requeue_skb(skb, q);
 305                return false;
 306        }
 307#endif
 308
 309        if (likely(skb)) {
 310                HARD_TX_LOCK(dev, txq, smp_processor_id());
 311                if (!netif_xmit_frozen_or_stopped(txq))
 312                        skb = dev_hard_start_xmit(skb, dev, txq, &ret);
 313
 314                HARD_TX_UNLOCK(dev, txq);
 315        } else {
 316                if (root_lock)
 317                        spin_lock(root_lock);
 318                return true;
 319        }
 320
 321        if (root_lock)
 322                spin_lock(root_lock);
 323
 324        if (!dev_xmit_complete(ret)) {
 325                /* Driver returned NETDEV_TX_BUSY - requeue skb */
 326                if (unlikely(ret != NETDEV_TX_BUSY))
 327                        net_warn_ratelimited("BUG %s code %d qlen %d\n",
 328                                             dev->name, ret, q->q.qlen);
 329
 330                dev_requeue_skb(skb, q);
 331                return false;
 332        }
 333
 334        return true;
 335}
 336
 337/*
 338 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
 339 *
 340 * running seqcount guarantees only one CPU can process
 341 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 342 * this queue.
 343 *
 344 *  netif_tx_lock serializes accesses to device driver.
 345 *
 346 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
 347 *  if one is grabbed, another must be free.
 348 *
 349 * Note, that this procedure can be called by a watchdog timer
 350 *
 351 * Returns to the caller:
 352 *                              0  - queue is empty or throttled.
 353 *                              >0 - queue is not empty.
 354 *
 355 */
 356static inline bool qdisc_restart(struct Qdisc *q, int *packets)
 357{
 358        spinlock_t *root_lock = NULL;
 359        struct netdev_queue *txq;
 360        struct net_device *dev;
 361        struct sk_buff *skb;
 362        bool validate;
 363
 364        /* Dequeue packet */
 365        skb = dequeue_skb(q, &validate, packets);
 366        if (unlikely(!skb))
 367                return false;
 368
 369        if (!(q->flags & TCQ_F_NOLOCK))
 370                root_lock = qdisc_lock(q);
 371
 372        dev = qdisc_dev(q);
 373        txq = skb_get_tx_queue(dev, skb);
 374
 375        return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
 376}
 377
 378void __qdisc_run(struct Qdisc *q)
 379{
 380        int quota = dev_tx_weight;
 381        int packets;
 382
 383        while (qdisc_restart(q, &packets)) {
 384                /*
 385                 * Ordered by possible occurrence: Postpone processing if
 386                 * 1. we've exceeded packet quota
 387                 * 2. another process needs the CPU;
 388                 */
 389                quota -= packets;
 390                if (quota <= 0 || need_resched()) {
 391                        __netif_schedule(q);
 392                        break;
 393                }
 394        }
 395}
 396
 397unsigned long dev_trans_start(struct net_device *dev)
 398{
 399        unsigned long val, res;
 400        unsigned int i;
 401
 402        if (is_vlan_dev(dev))
 403                dev = vlan_dev_real_dev(dev);
 404        else if (netif_is_macvlan(dev))
 405                dev = macvlan_dev_real_dev(dev);
 406        res = netdev_get_tx_queue(dev, 0)->trans_start;
 407        for (i = 1; i < dev->num_tx_queues; i++) {
 408                val = netdev_get_tx_queue(dev, i)->trans_start;
 409                if (val && time_after(val, res))
 410                        res = val;
 411        }
 412
 413        return res;
 414}
 415EXPORT_SYMBOL(dev_trans_start);
 416
 417static void dev_watchdog(struct timer_list *t)
 418{
 419        struct net_device *dev = from_timer(dev, t, watchdog_timer);
 420
 421        netif_tx_lock(dev);
 422        if (!qdisc_tx_is_noop(dev)) {
 423                if (netif_device_present(dev) &&
 424                    netif_running(dev) &&
 425                    netif_carrier_ok(dev)) {
 426                        int some_queue_timedout = 0;
 427                        unsigned int i;
 428                        unsigned long trans_start;
 429
 430                        for (i = 0; i < dev->num_tx_queues; i++) {
 431                                struct netdev_queue *txq;
 432
 433                                txq = netdev_get_tx_queue(dev, i);
 434                                trans_start = txq->trans_start;
 435                                if (netif_xmit_stopped(txq) &&
 436                                    time_after(jiffies, (trans_start +
 437                                                         dev->watchdog_timeo))) {
 438                                        some_queue_timedout = 1;
 439                                        txq->trans_timeout++;
 440                                        break;
 441                                }
 442                        }
 443
 444                        if (some_queue_timedout) {
 445                                trace_net_dev_xmit_timeout(dev, i);
 446                                WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
 447                                       dev->name, netdev_drivername(dev), i);
 448                                dev->netdev_ops->ndo_tx_timeout(dev);
 449                        }
 450                        if (!mod_timer(&dev->watchdog_timer,
 451                                       round_jiffies(jiffies +
 452                                                     dev->watchdog_timeo)))
 453                                dev_hold(dev);
 454                }
 455        }
 456        netif_tx_unlock(dev);
 457
 458        dev_put(dev);
 459}
 460
 461void __netdev_watchdog_up(struct net_device *dev)
 462{
 463        if (dev->netdev_ops->ndo_tx_timeout) {
 464                if (dev->watchdog_timeo <= 0)
 465                        dev->watchdog_timeo = 5*HZ;
 466                if (!mod_timer(&dev->watchdog_timer,
 467                               round_jiffies(jiffies + dev->watchdog_timeo)))
 468                        dev_hold(dev);
 469        }
 470}
 471
 472static void dev_watchdog_up(struct net_device *dev)
 473{
 474        __netdev_watchdog_up(dev);
 475}
 476
 477static void dev_watchdog_down(struct net_device *dev)
 478{
 479        netif_tx_lock_bh(dev);
 480        if (del_timer(&dev->watchdog_timer))
 481                dev_put(dev);
 482        netif_tx_unlock_bh(dev);
 483}
 484
 485/**
 486 *      netif_carrier_on - set carrier
 487 *      @dev: network device
 488 *
 489 * Device has detected acquisition of carrier.
 490 */
 491void netif_carrier_on(struct net_device *dev)
 492{
 493        if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
 494                if (dev->reg_state == NETREG_UNINITIALIZED)
 495                        return;
 496                atomic_inc(&dev->carrier_up_count);
 497                linkwatch_fire_event(dev);
 498                if (netif_running(dev))
 499                        __netdev_watchdog_up(dev);
 500        }
 501}
 502EXPORT_SYMBOL(netif_carrier_on);
 503
 504/**
 505 *      netif_carrier_off - clear carrier
 506 *      @dev: network device
 507 *
 508 * Device has detected loss of carrier.
 509 */
 510void netif_carrier_off(struct net_device *dev)
 511{
 512        if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
 513                if (dev->reg_state == NETREG_UNINITIALIZED)
 514                        return;
 515                atomic_inc(&dev->carrier_down_count);
 516                linkwatch_fire_event(dev);
 517        }
 518}
 519EXPORT_SYMBOL(netif_carrier_off);
 520
 521/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
 522   under all circumstances. It is difficult to invent anything faster or
 523   cheaper.
 524 */
 525
 526static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
 527                        struct sk_buff **to_free)
 528{
 529        __qdisc_drop(skb, to_free);
 530        return NET_XMIT_CN;
 531}
 532
 533static struct sk_buff *noop_dequeue(struct Qdisc *qdisc)
 534{
 535        return NULL;
 536}
 537
 538struct Qdisc_ops noop_qdisc_ops __read_mostly = {
 539        .id             =       "noop",
 540        .priv_size      =       0,
 541        .enqueue        =       noop_enqueue,
 542        .dequeue        =       noop_dequeue,
 543        .peek           =       noop_dequeue,
 544        .owner          =       THIS_MODULE,
 545};
 546
 547static struct netdev_queue noop_netdev_queue = {
 548        RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc),
 549        .qdisc_sleeping =       &noop_qdisc,
 550};
 551
 552struct Qdisc noop_qdisc = {
 553        .enqueue        =       noop_enqueue,
 554        .dequeue        =       noop_dequeue,
 555        .flags          =       TCQ_F_BUILTIN,
 556        .ops            =       &noop_qdisc_ops,
 557        .q.lock         =       __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 558        .dev_queue      =       &noop_netdev_queue,
 559        .running        =       SEQCNT_ZERO(noop_qdisc.running),
 560        .busylock       =       __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
 561        .gso_skb = {
 562                .next = (struct sk_buff *)&noop_qdisc.gso_skb,
 563                .prev = (struct sk_buff *)&noop_qdisc.gso_skb,
 564                .qlen = 0,
 565                .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.gso_skb.lock),
 566        },
 567        .skb_bad_txq = {
 568                .next = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
 569                .prev = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
 570                .qlen = 0,
 571                .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock),
 572        },
 573};
 574EXPORT_SYMBOL(noop_qdisc);
 575
 576static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt,
 577                        struct netlink_ext_ack *extack)
 578{
 579        /* register_qdisc() assigns a default of noop_enqueue if unset,
 580         * but __dev_queue_xmit() treats noqueue only as such
 581         * if this is NULL - so clear it here. */
 582        qdisc->enqueue = NULL;
 583        return 0;
 584}
 585
 586struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
 587        .id             =       "noqueue",
 588        .priv_size      =       0,
 589        .init           =       noqueue_init,
 590        .enqueue        =       noop_enqueue,
 591        .dequeue        =       noop_dequeue,
 592        .peek           =       noop_dequeue,
 593        .owner          =       THIS_MODULE,
 594};
 595
 596static const u8 prio2band[TC_PRIO_MAX + 1] = {
 597        1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
 598};
 599
 600/* 3-band FIFO queue: old style, but should be a bit faster than
 601   generic prio+fifo combination.
 602 */
 603
 604#define PFIFO_FAST_BANDS 3
 605
 606/*
 607 * Private data for a pfifo_fast scheduler containing:
 608 *      - rings for priority bands
 609 */
 610struct pfifo_fast_priv {
 611        struct skb_array q[PFIFO_FAST_BANDS];
 612};
 613
 614static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
 615                                          int band)
 616{
 617        return &priv->q[band];
 618}
 619
 620static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
 621                              struct sk_buff **to_free)
 622{
 623        int band = prio2band[skb->priority & TC_PRIO_MAX];
 624        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 625        struct skb_array *q = band2list(priv, band);
 626        unsigned int pkt_len = qdisc_pkt_len(skb);
 627        int err;
 628
 629        err = skb_array_produce(q, skb);
 630
 631        if (unlikely(err)) {
 632                if (qdisc_is_percpu_stats(qdisc))
 633                        return qdisc_drop_cpu(skb, qdisc, to_free);
 634                else
 635                        return qdisc_drop(skb, qdisc, to_free);
 636        }
 637
 638        qdisc_update_stats_at_enqueue(qdisc, pkt_len);
 639        return NET_XMIT_SUCCESS;
 640}
 641
 642static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
 643{
 644        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 645        struct sk_buff *skb = NULL;
 646        int band;
 647
 648        for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
 649                struct skb_array *q = band2list(priv, band);
 650
 651                if (__skb_array_empty(q))
 652                        continue;
 653
 654                skb = __skb_array_consume(q);
 655        }
 656        if (likely(skb)) {
 657                qdisc_update_stats_at_dequeue(qdisc, skb);
 658        } else {
 659                qdisc->empty = true;
 660        }
 661
 662        return skb;
 663}
 664
 665static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
 666{
 667        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 668        struct sk_buff *skb = NULL;
 669        int band;
 670
 671        for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
 672                struct skb_array *q = band2list(priv, band);
 673
 674                skb = __skb_array_peek(q);
 675        }
 676
 677        return skb;
 678}
 679
 680static void pfifo_fast_reset(struct Qdisc *qdisc)
 681{
 682        int i, band;
 683        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 684
 685        for (band = 0; band < PFIFO_FAST_BANDS; band++) {
 686                struct skb_array *q = band2list(priv, band);
 687                struct sk_buff *skb;
 688
 689                /* NULL ring is possible if destroy path is due to a failed
 690                 * skb_array_init() in pfifo_fast_init() case.
 691                 */
 692                if (!q->ring.queue)
 693                        continue;
 694
 695                while ((skb = __skb_array_consume(q)) != NULL)
 696                        kfree_skb(skb);
 697        }
 698
 699        if (qdisc_is_percpu_stats(qdisc)) {
 700                for_each_possible_cpu(i) {
 701                        struct gnet_stats_queue *q;
 702
 703                        q = per_cpu_ptr(qdisc->cpu_qstats, i);
 704                        q->backlog = 0;
 705                        q->qlen = 0;
 706                }
 707        }
 708}
 709
 710static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
 711{
 712        struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
 713
 714        memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
 715        if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
 716                goto nla_put_failure;
 717        return skb->len;
 718
 719nla_put_failure:
 720        return -1;
 721}
 722
 723static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt,
 724                           struct netlink_ext_ack *extack)
 725{
 726        unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len;
 727        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 728        int prio;
 729
 730        /* guard against zero length rings */
 731        if (!qlen)
 732                return -EINVAL;
 733
 734        for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
 735                struct skb_array *q = band2list(priv, prio);
 736                int err;
 737
 738                err = skb_array_init(q, qlen, GFP_KERNEL);
 739                if (err)
 740                        return -ENOMEM;
 741        }
 742
 743        /* Can by-pass the queue discipline */
 744        qdisc->flags |= TCQ_F_CAN_BYPASS;
 745        return 0;
 746}
 747
 748static void pfifo_fast_destroy(struct Qdisc *sch)
 749{
 750        struct pfifo_fast_priv *priv = qdisc_priv(sch);
 751        int prio;
 752
 753        for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
 754                struct skb_array *q = band2list(priv, prio);
 755
 756                /* NULL ring is possible if destroy path is due to a failed
 757                 * skb_array_init() in pfifo_fast_init() case.
 758                 */
 759                if (!q->ring.queue)
 760                        continue;
 761                /* Destroy ring but no need to kfree_skb because a call to
 762                 * pfifo_fast_reset() has already done that work.
 763                 */
 764                ptr_ring_cleanup(&q->ring, NULL);
 765        }
 766}
 767
 768static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch,
 769                                          unsigned int new_len)
 770{
 771        struct pfifo_fast_priv *priv = qdisc_priv(sch);
 772        struct skb_array *bands[PFIFO_FAST_BANDS];
 773        int prio;
 774
 775        for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
 776                struct skb_array *q = band2list(priv, prio);
 777
 778                bands[prio] = q;
 779        }
 780
 781        return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len,
 782                                         GFP_KERNEL);
 783}
 784
 785struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 786        .id             =       "pfifo_fast",
 787        .priv_size      =       sizeof(struct pfifo_fast_priv),
 788        .enqueue        =       pfifo_fast_enqueue,
 789        .dequeue        =       pfifo_fast_dequeue,
 790        .peek           =       pfifo_fast_peek,
 791        .init           =       pfifo_fast_init,
 792        .destroy        =       pfifo_fast_destroy,
 793        .reset          =       pfifo_fast_reset,
 794        .dump           =       pfifo_fast_dump,
 795        .change_tx_queue_len =  pfifo_fast_change_tx_queue_len,
 796        .owner          =       THIS_MODULE,
 797        .static_flags   =       TCQ_F_NOLOCK | TCQ_F_CPUSTATS,
 798};
 799EXPORT_SYMBOL(pfifo_fast_ops);
 800
 801static struct lock_class_key qdisc_tx_busylock;
 802static struct lock_class_key qdisc_running_key;
 803
 804struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 805                          const struct Qdisc_ops *ops,
 806                          struct netlink_ext_ack *extack)
 807{
 808        void *p;
 809        struct Qdisc *sch;
 810        unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
 811        int err = -ENOBUFS;
 812        struct net_device *dev;
 813
 814        if (!dev_queue) {
 815                NL_SET_ERR_MSG(extack, "No device queue given");
 816                err = -EINVAL;
 817                goto errout;
 818        }
 819
 820        dev = dev_queue->dev;
 821        p = kzalloc_node(size, GFP_KERNEL,
 822                         netdev_queue_numa_node_read(dev_queue));
 823
 824        if (!p)
 825                goto errout;
 826        sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
 827        /* if we got non aligned memory, ask more and do alignment ourself */
 828        if (sch != p) {
 829                kfree(p);
 830                p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
 831                                 netdev_queue_numa_node_read(dev_queue));
 832                if (!p)
 833                        goto errout;
 834                sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
 835                sch->padded = (char *) sch - (char *) p;
 836        }
 837        __skb_queue_head_init(&sch->gso_skb);
 838        __skb_queue_head_init(&sch->skb_bad_txq);
 839        qdisc_skb_head_init(&sch->q);
 840        spin_lock_init(&sch->q.lock);
 841
 842        if (ops->static_flags & TCQ_F_CPUSTATS) {
 843                sch->cpu_bstats =
 844                        netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
 845                if (!sch->cpu_bstats)
 846                        goto errout1;
 847
 848                sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
 849                if (!sch->cpu_qstats) {
 850                        free_percpu(sch->cpu_bstats);
 851                        goto errout1;
 852                }
 853        }
 854
 855        spin_lock_init(&sch->busylock);
 856        lockdep_set_class(&sch->busylock,
 857                          dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
 858
 859        /* seqlock has the same scope of busylock, for NOLOCK qdisc */
 860        spin_lock_init(&sch->seqlock);
 861        lockdep_set_class(&sch->busylock,
 862                          dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
 863
 864        seqcount_init(&sch->running);
 865        lockdep_set_class(&sch->running,
 866                          dev->qdisc_running_key ?: &qdisc_running_key);
 867
 868        sch->ops = ops;
 869        sch->flags = ops->static_flags;
 870        sch->enqueue = ops->enqueue;
 871        sch->dequeue = ops->dequeue;
 872        sch->dev_queue = dev_queue;
 873        sch->empty = true;
 874        dev_hold(dev);
 875        refcount_set(&sch->refcnt, 1);
 876
 877        return sch;
 878errout1:
 879        kfree(p);
 880errout:
 881        return ERR_PTR(err);
 882}
 883
 884struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
 885                                const struct Qdisc_ops *ops,
 886                                unsigned int parentid,
 887                                struct netlink_ext_ack *extack)
 888{
 889        struct Qdisc *sch;
 890
 891        if (!try_module_get(ops->owner)) {
 892                NL_SET_ERR_MSG(extack, "Failed to increase module reference counter");
 893                return NULL;
 894        }
 895
 896        sch = qdisc_alloc(dev_queue, ops, extack);
 897        if (IS_ERR(sch)) {
 898                module_put(ops->owner);
 899                return NULL;
 900        }
 901        sch->parent = parentid;
 902
 903        if (!ops->init || ops->init(sch, NULL, extack) == 0)
 904                return sch;
 905
 906        qdisc_put(sch);
 907        return NULL;
 908}
 909EXPORT_SYMBOL(qdisc_create_dflt);
 910
 911/* Under qdisc_lock(qdisc) and BH! */
 912
 913void qdisc_reset(struct Qdisc *qdisc)
 914{
 915        const struct Qdisc_ops *ops = qdisc->ops;
 916        struct sk_buff *skb, *tmp;
 917
 918        if (ops->reset)
 919                ops->reset(qdisc);
 920
 921        skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
 922                __skb_unlink(skb, &qdisc->gso_skb);
 923                kfree_skb_list(skb);
 924        }
 925
 926        skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
 927                __skb_unlink(skb, &qdisc->skb_bad_txq);
 928                kfree_skb_list(skb);
 929        }
 930
 931        qdisc->q.qlen = 0;
 932        qdisc->qstats.backlog = 0;
 933}
 934EXPORT_SYMBOL(qdisc_reset);
 935
 936void qdisc_free(struct Qdisc *qdisc)
 937{
 938        if (qdisc_is_percpu_stats(qdisc)) {
 939                free_percpu(qdisc->cpu_bstats);
 940                free_percpu(qdisc->cpu_qstats);
 941        }
 942
 943        kfree((char *) qdisc - qdisc->padded);
 944}
 945
 946static void qdisc_free_cb(struct rcu_head *head)
 947{
 948        struct Qdisc *q = container_of(head, struct Qdisc, rcu);
 949
 950        qdisc_free(q);
 951}
 952
 953static void qdisc_destroy(struct Qdisc *qdisc)
 954{
 955        const struct Qdisc_ops  *ops = qdisc->ops;
 956        struct sk_buff *skb, *tmp;
 957
 958#ifdef CONFIG_NET_SCHED
 959        qdisc_hash_del(qdisc);
 960
 961        qdisc_put_stab(rtnl_dereference(qdisc->stab));
 962#endif
 963        gen_kill_estimator(&qdisc->rate_est);
 964        if (ops->reset)
 965                ops->reset(qdisc);
 966        if (ops->destroy)
 967                ops->destroy(qdisc);
 968
 969        module_put(ops->owner);
 970        dev_put(qdisc_dev(qdisc));
 971
 972        skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
 973                __skb_unlink(skb, &qdisc->gso_skb);
 974                kfree_skb_list(skb);
 975        }
 976
 977        skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
 978                __skb_unlink(skb, &qdisc->skb_bad_txq);
 979                kfree_skb_list(skb);
 980        }
 981
 982        call_rcu(&qdisc->rcu, qdisc_free_cb);
 983}
 984
 985void qdisc_put(struct Qdisc *qdisc)
 986{
 987        if (qdisc->flags & TCQ_F_BUILTIN ||
 988            !refcount_dec_and_test(&qdisc->refcnt))
 989                return;
 990
 991        qdisc_destroy(qdisc);
 992}
 993EXPORT_SYMBOL(qdisc_put);
 994
 995/* Version of qdisc_put() that is called with rtnl mutex unlocked.
 996 * Intended to be used as optimization, this function only takes rtnl lock if
 997 * qdisc reference counter reached zero.
 998 */
 999
1000void qdisc_put_unlocked(struct Qdisc *qdisc)
1001{
1002        if (qdisc->flags & TCQ_F_BUILTIN ||
1003            !refcount_dec_and_rtnl_lock(&qdisc->refcnt))
1004                return;
1005
1006        qdisc_destroy(qdisc);
1007        rtnl_unlock();
1008}
1009EXPORT_SYMBOL(qdisc_put_unlocked);
1010
1011/* Attach toplevel qdisc to device queue. */
1012struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
1013                              struct Qdisc *qdisc)
1014{
1015        struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
1016        spinlock_t *root_lock;
1017
1018        root_lock = qdisc_lock(oqdisc);
1019        spin_lock_bh(root_lock);
1020
1021        /* ... and graft new one */
1022        if (qdisc == NULL)
1023                qdisc = &noop_qdisc;
1024        dev_queue->qdisc_sleeping = qdisc;
1025        rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
1026
1027        spin_unlock_bh(root_lock);
1028
1029        return oqdisc;
1030}
1031EXPORT_SYMBOL(dev_graft_qdisc);
1032
1033static void attach_one_default_qdisc(struct net_device *dev,
1034                                     struct netdev_queue *dev_queue,
1035                                     void *_unused)
1036{
1037        struct Qdisc *qdisc;
1038        const struct Qdisc_ops *ops = default_qdisc_ops;
1039
1040        if (dev->priv_flags & IFF_NO_QUEUE)
1041                ops = &noqueue_qdisc_ops;
1042
1043        qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
1044        if (!qdisc) {
1045                netdev_info(dev, "activation failed\n");
1046                return;
1047        }
1048        if (!netif_is_multiqueue(dev))
1049                qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1050        dev_queue->qdisc_sleeping = qdisc;
1051}
1052
1053static void attach_default_qdiscs(struct net_device *dev)
1054{
1055        struct netdev_queue *txq;
1056        struct Qdisc *qdisc;
1057
1058        txq = netdev_get_tx_queue(dev, 0);
1059
1060        if (!netif_is_multiqueue(dev) ||
1061            dev->priv_flags & IFF_NO_QUEUE) {
1062                netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
1063                dev->qdisc = txq->qdisc_sleeping;
1064                qdisc_refcount_inc(dev->qdisc);
1065        } else {
1066                qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
1067                if (qdisc) {
1068                        dev->qdisc = qdisc;
1069                        qdisc->ops->attach(qdisc);
1070                }
1071        }
1072#ifdef CONFIG_NET_SCHED
1073        if (dev->qdisc != &noop_qdisc)
1074                qdisc_hash_add(dev->qdisc, false);
1075#endif
1076}
1077
1078static void transition_one_qdisc(struct net_device *dev,
1079                                 struct netdev_queue *dev_queue,
1080                                 void *_need_watchdog)
1081{
1082        struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
1083        int *need_watchdog_p = _need_watchdog;
1084
1085        if (!(new_qdisc->flags & TCQ_F_BUILTIN))
1086                clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
1087
1088        rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
1089        if (need_watchdog_p) {
1090                dev_queue->trans_start = 0;
1091                *need_watchdog_p = 1;
1092        }
1093}
1094
1095void dev_activate(struct net_device *dev)
1096{
1097        int need_watchdog;
1098
1099        /* No queueing discipline is attached to device;
1100         * create default one for devices, which need queueing
1101         * and noqueue_qdisc for virtual interfaces
1102         */
1103
1104        if (dev->qdisc == &noop_qdisc)
1105                attach_default_qdiscs(dev);
1106
1107        if (!netif_carrier_ok(dev))
1108                /* Delay activation until next carrier-on event */
1109                return;
1110
1111        need_watchdog = 0;
1112        netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
1113        if (dev_ingress_queue(dev))
1114                transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
1115
1116        if (need_watchdog) {
1117                netif_trans_update(dev);
1118                dev_watchdog_up(dev);
1119        }
1120}
1121EXPORT_SYMBOL(dev_activate);
1122
1123static void dev_deactivate_queue(struct net_device *dev,
1124                                 struct netdev_queue *dev_queue,
1125                                 void *_qdisc_default)
1126{
1127        struct Qdisc *qdisc_default = _qdisc_default;
1128        struct Qdisc *qdisc;
1129
1130        qdisc = rtnl_dereference(dev_queue->qdisc);
1131        if (qdisc) {
1132                bool nolock = qdisc->flags & TCQ_F_NOLOCK;
1133
1134                if (nolock)
1135                        spin_lock_bh(&qdisc->seqlock);
1136                spin_lock_bh(qdisc_lock(qdisc));
1137
1138                if (!(qdisc->flags & TCQ_F_BUILTIN))
1139                        set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
1140
1141                rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1142                qdisc_reset(qdisc);
1143
1144                spin_unlock_bh(qdisc_lock(qdisc));
1145                if (nolock)
1146                        spin_unlock_bh(&qdisc->seqlock);
1147        }
1148}
1149
1150static bool some_qdisc_is_busy(struct net_device *dev)
1151{
1152        unsigned int i;
1153
1154        for (i = 0; i < dev->num_tx_queues; i++) {
1155                struct netdev_queue *dev_queue;
1156                spinlock_t *root_lock;
1157                struct Qdisc *q;
1158                int val;
1159
1160                dev_queue = netdev_get_tx_queue(dev, i);
1161                q = dev_queue->qdisc_sleeping;
1162
1163                root_lock = qdisc_lock(q);
1164                spin_lock_bh(root_lock);
1165
1166                val = (qdisc_is_running(q) ||
1167                       test_bit(__QDISC_STATE_SCHED, &q->state));
1168
1169                spin_unlock_bh(root_lock);
1170
1171                if (val)
1172                        return true;
1173        }
1174        return false;
1175}
1176
1177static void dev_qdisc_reset(struct net_device *dev,
1178                            struct netdev_queue *dev_queue,
1179                            void *none)
1180{
1181        struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1182
1183        if (qdisc)
1184                qdisc_reset(qdisc);
1185}
1186
1187/**
1188 *      dev_deactivate_many - deactivate transmissions on several devices
1189 *      @head: list of devices to deactivate
1190 *
1191 *      This function returns only when all outstanding transmissions
1192 *      have completed, unless all devices are in dismantle phase.
1193 */
1194void dev_deactivate_many(struct list_head *head)
1195{
1196        struct net_device *dev;
1197
1198        list_for_each_entry(dev, head, close_list) {
1199                netdev_for_each_tx_queue(dev, dev_deactivate_queue,
1200                                         &noop_qdisc);
1201                if (dev_ingress_queue(dev))
1202                        dev_deactivate_queue(dev, dev_ingress_queue(dev),
1203                                             &noop_qdisc);
1204
1205                dev_watchdog_down(dev);
1206        }
1207
1208        /* Wait for outstanding qdisc-less dev_queue_xmit calls.
1209         * This is avoided if all devices are in dismantle phase :
1210         * Caller will call synchronize_net() for us
1211         */
1212        synchronize_net();
1213
1214        /* Wait for outstanding qdisc_run calls. */
1215        list_for_each_entry(dev, head, close_list) {
1216                while (some_qdisc_is_busy(dev))
1217                        yield();
1218                /* The new qdisc is assigned at this point so we can safely
1219                 * unwind stale skb lists and qdisc statistics
1220                 */
1221                netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
1222                if (dev_ingress_queue(dev))
1223                        dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
1224        }
1225}
1226
1227void dev_deactivate(struct net_device *dev)
1228{
1229        LIST_HEAD(single);
1230
1231        list_add(&dev->close_list, &single);
1232        dev_deactivate_many(&single);
1233        list_del(&single);
1234}
1235EXPORT_SYMBOL(dev_deactivate);
1236
1237static int qdisc_change_tx_queue_len(struct net_device *dev,
1238                                     struct netdev_queue *dev_queue)
1239{
1240        struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1241        const struct Qdisc_ops *ops = qdisc->ops;
1242
1243        if (ops->change_tx_queue_len)
1244                return ops->change_tx_queue_len(qdisc, dev->tx_queue_len);
1245        return 0;
1246}
1247
1248int dev_qdisc_change_tx_queue_len(struct net_device *dev)
1249{
1250        bool up = dev->flags & IFF_UP;
1251        unsigned int i;
1252        int ret = 0;
1253
1254        if (up)
1255                dev_deactivate(dev);
1256
1257        for (i = 0; i < dev->num_tx_queues; i++) {
1258                ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]);
1259
1260                /* TODO: revert changes on a partial failure */
1261                if (ret)
1262                        break;
1263        }
1264
1265        if (up)
1266                dev_activate(dev);
1267        return ret;
1268}
1269
1270static void dev_init_scheduler_queue(struct net_device *dev,
1271                                     struct netdev_queue *dev_queue,
1272                                     void *_qdisc)
1273{
1274        struct Qdisc *qdisc = _qdisc;
1275
1276        rcu_assign_pointer(dev_queue->qdisc, qdisc);
1277        dev_queue->qdisc_sleeping = qdisc;
1278}
1279
1280void dev_init_scheduler(struct net_device *dev)
1281{
1282        dev->qdisc = &noop_qdisc;
1283        netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
1284        if (dev_ingress_queue(dev))
1285                dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
1286
1287        timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
1288}
1289
1290static void shutdown_scheduler_queue(struct net_device *dev,
1291                                     struct netdev_queue *dev_queue,
1292                                     void *_qdisc_default)
1293{
1294        struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1295        struct Qdisc *qdisc_default = _qdisc_default;
1296
1297        if (qdisc) {
1298                rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1299                dev_queue->qdisc_sleeping = qdisc_default;
1300
1301                qdisc_put(qdisc);
1302        }
1303}
1304
1305void dev_shutdown(struct net_device *dev)
1306{
1307        netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
1308        if (dev_ingress_queue(dev))
1309                shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
1310        qdisc_put(dev->qdisc);
1311        dev->qdisc = &noop_qdisc;
1312
1313        WARN_ON(timer_pending(&dev->watchdog_timer));
1314}
1315
1316void psched_ratecfg_precompute(struct psched_ratecfg *r,
1317                               const struct tc_ratespec *conf,
1318                               u64 rate64)
1319{
1320        memset(r, 0, sizeof(*r));
1321        r->overhead = conf->overhead;
1322        r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
1323        r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
1324        r->mult = 1;
1325        /*
1326         * The deal here is to replace a divide by a reciprocal one
1327         * in fast path (a reciprocal divide is a multiply and a shift)
1328         *
1329         * Normal formula would be :
1330         *  time_in_ns = (NSEC_PER_SEC * len) / rate_bps
1331         *
1332         * We compute mult/shift to use instead :
1333         *  time_in_ns = (len * mult) >> shift;
1334         *
1335         * We try to get the highest possible mult value for accuracy,
1336         * but have to make sure no overflows will ever happen.
1337         */
1338        if (r->rate_bytes_ps > 0) {
1339                u64 factor = NSEC_PER_SEC;
1340
1341                for (;;) {
1342                        r->mult = div64_u64(factor, r->rate_bytes_ps);
1343                        if (r->mult & (1U << 31) || factor & (1ULL << 63))
1344                                break;
1345                        factor <<= 1;
1346                        r->shift++;
1347                }
1348        }
1349}
1350EXPORT_SYMBOL(psched_ratecfg_precompute);
1351
1352static void mini_qdisc_rcu_func(struct rcu_head *head)
1353{
1354}
1355
1356void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1357                          struct tcf_proto *tp_head)
1358{
1359        /* Protected with chain0->filter_chain_lock.
1360         * Can't access chain directly because tp_head can be NULL.
1361         */
1362        struct mini_Qdisc *miniq_old =
1363                rcu_dereference_protected(*miniqp->p_miniq, 1);
1364        struct mini_Qdisc *miniq;
1365
1366        if (!tp_head) {
1367                RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
1368                /* Wait for flying RCU callback before it is freed. */
1369                rcu_barrier();
1370                return;
1371        }
1372
1373        miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
1374                &miniqp->miniq1 : &miniqp->miniq2;
1375
1376        /* We need to make sure that readers won't see the miniq
1377         * we are about to modify. So wait until previous call_rcu callback
1378         * is done.
1379         */
1380        rcu_barrier();
1381        miniq->filter_list = tp_head;
1382        rcu_assign_pointer(*miniqp->p_miniq, miniq);
1383
1384        if (miniq_old)
1385                /* This is counterpart of the rcu barriers above. We need to
1386                 * block potential new user of miniq_old until all readers
1387                 * are not seeing it.
1388                 */
1389                call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func);
1390}
1391EXPORT_SYMBOL(mini_qdisc_pair_swap);
1392
1393void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
1394                          struct mini_Qdisc __rcu **p_miniq)
1395{
1396        miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
1397        miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
1398        miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
1399        miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
1400        miniqp->p_miniq = p_miniq;
1401}
1402EXPORT_SYMBOL(mini_qdisc_pair_init);
1403