linux/net/sched/sch_red.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_red.c  Random Early Detection queue.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *
  11 * Changes:
  12 * J Hadi Salim 980914: computation fixes
  13 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
  14 * J Hadi Salim 980816:  ECN support
  15 */
  16
  17#include <linux/module.h>
  18#include <linux/types.h>
  19#include <linux/kernel.h>
  20#include <linux/skbuff.h>
  21#include <net/pkt_sched.h>
  22#include <net/pkt_cls.h>
  23#include <net/inet_ecn.h>
  24#include <net/red.h>
  25
  26
  27/*      Parameters, settable by user:
  28        -----------------------------
  29
  30        limit           - bytes (must be > qth_max + burst)
  31
  32        Hard limit on queue length, should be chosen >qth_max
  33        to allow packet bursts. This parameter does not
  34        affect the algorithms behaviour and can be chosen
  35        arbitrarily high (well, less than ram size)
  36        Really, this limit will never be reached
  37        if RED works correctly.
  38 */
  39
  40struct red_sched_data {
  41        u32                     limit;          /* HARD maximal queue length */
  42        unsigned char           flags;
  43        struct timer_list       adapt_timer;
  44        struct Qdisc            *sch;
  45        struct red_parms        parms;
  46        struct red_vars         vars;
  47        struct red_stats        stats;
  48        struct Qdisc            *qdisc;
  49};
  50
  51static inline int red_use_ecn(struct red_sched_data *q)
  52{
  53        return q->flags & TC_RED_ECN;
  54}
  55
  56static inline int red_use_harddrop(struct red_sched_data *q)
  57{
  58        return q->flags & TC_RED_HARDDROP;
  59}
  60
  61static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  62                       struct sk_buff **to_free)
  63{
  64        struct red_sched_data *q = qdisc_priv(sch);
  65        struct Qdisc *child = q->qdisc;
  66        int ret;
  67
  68        q->vars.qavg = red_calc_qavg(&q->parms,
  69                                     &q->vars,
  70                                     child->qstats.backlog);
  71
  72        if (red_is_idling(&q->vars))
  73                red_end_of_idle_period(&q->vars);
  74
  75        switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
  76        case RED_DONT_MARK:
  77                break;
  78
  79        case RED_PROB_MARK:
  80                qdisc_qstats_overlimit(sch);
  81                if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
  82                        q->stats.prob_drop++;
  83                        goto congestion_drop;
  84                }
  85
  86                q->stats.prob_mark++;
  87                break;
  88
  89        case RED_HARD_MARK:
  90                qdisc_qstats_overlimit(sch);
  91                if (red_use_harddrop(q) || !red_use_ecn(q) ||
  92                    !INET_ECN_set_ce(skb)) {
  93                        q->stats.forced_drop++;
  94                        goto congestion_drop;
  95                }
  96
  97                q->stats.forced_mark++;
  98                break;
  99        }
 100
 101        ret = qdisc_enqueue(skb, child, to_free);
 102        if (likely(ret == NET_XMIT_SUCCESS)) {
 103                qdisc_qstats_backlog_inc(sch, skb);
 104                sch->q.qlen++;
 105        } else if (net_xmit_drop_count(ret)) {
 106                q->stats.pdrop++;
 107                qdisc_qstats_drop(sch);
 108        }
 109        return ret;
 110
 111congestion_drop:
 112        qdisc_drop(skb, sch, to_free);
 113        return NET_XMIT_CN;
 114}
 115
 116static struct sk_buff *red_dequeue(struct Qdisc *sch)
 117{
 118        struct sk_buff *skb;
 119        struct red_sched_data *q = qdisc_priv(sch);
 120        struct Qdisc *child = q->qdisc;
 121
 122        skb = child->dequeue(child);
 123        if (skb) {
 124                qdisc_bstats_update(sch, skb);
 125                qdisc_qstats_backlog_dec(sch, skb);
 126                sch->q.qlen--;
 127        } else {
 128                if (!red_is_idling(&q->vars))
 129                        red_start_of_idle_period(&q->vars);
 130        }
 131        return skb;
 132}
 133
 134static struct sk_buff *red_peek(struct Qdisc *sch)
 135{
 136        struct red_sched_data *q = qdisc_priv(sch);
 137        struct Qdisc *child = q->qdisc;
 138
 139        return child->ops->peek(child);
 140}
 141
 142static void red_reset(struct Qdisc *sch)
 143{
 144        struct red_sched_data *q = qdisc_priv(sch);
 145
 146        qdisc_reset(q->qdisc);
 147        sch->qstats.backlog = 0;
 148        sch->q.qlen = 0;
 149        red_restart(&q->vars);
 150}
 151
 152static int red_offload(struct Qdisc *sch, bool enable)
 153{
 154        struct red_sched_data *q = qdisc_priv(sch);
 155        struct net_device *dev = qdisc_dev(sch);
 156        struct tc_red_qopt_offload opt = {
 157                .handle = sch->handle,
 158                .parent = sch->parent,
 159        };
 160
 161        if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 162                return -EOPNOTSUPP;
 163
 164        if (enable) {
 165                opt.command = TC_RED_REPLACE;
 166                opt.set.min = q->parms.qth_min >> q->parms.Wlog;
 167                opt.set.max = q->parms.qth_max >> q->parms.Wlog;
 168                opt.set.probability = q->parms.max_P;
 169                opt.set.is_ecn = red_use_ecn(q);
 170                opt.set.qstats = &sch->qstats;
 171        } else {
 172                opt.command = TC_RED_DESTROY;
 173        }
 174
 175        return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
 176}
 177
 178static void red_destroy(struct Qdisc *sch)
 179{
 180        struct red_sched_data *q = qdisc_priv(sch);
 181
 182        del_timer_sync(&q->adapt_timer);
 183        red_offload(sch, false);
 184        qdisc_put(q->qdisc);
 185}
 186
 187static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
 188        [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
 189        [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
 190        [TCA_RED_MAX_P] = { .type = NLA_U32 },
 191};
 192
 193static int red_change(struct Qdisc *sch, struct nlattr *opt,
 194                      struct netlink_ext_ack *extack)
 195{
 196        struct red_sched_data *q = qdisc_priv(sch);
 197        struct nlattr *tb[TCA_RED_MAX + 1];
 198        struct tc_red_qopt *ctl;
 199        struct Qdisc *child = NULL;
 200        int err;
 201        u32 max_P;
 202
 203        if (opt == NULL)
 204                return -EINVAL;
 205
 206        err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
 207        if (err < 0)
 208                return err;
 209
 210        if (tb[TCA_RED_PARMS] == NULL ||
 211            tb[TCA_RED_STAB] == NULL)
 212                return -EINVAL;
 213
 214        max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
 215
 216        ctl = nla_data(tb[TCA_RED_PARMS]);
 217        if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
 218                return -EINVAL;
 219
 220        if (ctl->limit > 0) {
 221                child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
 222                                         extack);
 223                if (IS_ERR(child))
 224                        return PTR_ERR(child);
 225
 226                /* child is fifo, no need to check for noop_qdisc */
 227                qdisc_hash_add(child, true);
 228        }
 229
 230        sch_tree_lock(sch);
 231        q->flags = ctl->flags;
 232        q->limit = ctl->limit;
 233        if (child) {
 234                qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
 235                                          q->qdisc->qstats.backlog);
 236                qdisc_put(q->qdisc);
 237                q->qdisc = child;
 238        }
 239
 240        red_set_parms(&q->parms,
 241                      ctl->qth_min, ctl->qth_max, ctl->Wlog,
 242                      ctl->Plog, ctl->Scell_log,
 243                      nla_data(tb[TCA_RED_STAB]),
 244                      max_P);
 245        red_set_vars(&q->vars);
 246
 247        del_timer(&q->adapt_timer);
 248        if (ctl->flags & TC_RED_ADAPTATIVE)
 249                mod_timer(&q->adapt_timer, jiffies + HZ/2);
 250
 251        if (!q->qdisc->q.qlen)
 252                red_start_of_idle_period(&q->vars);
 253
 254        sch_tree_unlock(sch);
 255        red_offload(sch, true);
 256        return 0;
 257}
 258
 259static inline void red_adaptative_timer(struct timer_list *t)
 260{
 261        struct red_sched_data *q = from_timer(q, t, adapt_timer);
 262        struct Qdisc *sch = q->sch;
 263        spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 264
 265        spin_lock(root_lock);
 266        red_adaptative_algo(&q->parms, &q->vars);
 267        mod_timer(&q->adapt_timer, jiffies + HZ/2);
 268        spin_unlock(root_lock);
 269}
 270
 271static int red_init(struct Qdisc *sch, struct nlattr *opt,
 272                    struct netlink_ext_ack *extack)
 273{
 274        struct red_sched_data *q = qdisc_priv(sch);
 275
 276        q->qdisc = &noop_qdisc;
 277        q->sch = sch;
 278        timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
 279        return red_change(sch, opt, extack);
 280}
 281
 282static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
 283{
 284        struct net_device *dev = qdisc_dev(sch);
 285        struct tc_red_qopt_offload hw_stats = {
 286                .command = TC_RED_STATS,
 287                .handle = sch->handle,
 288                .parent = sch->parent,
 289                {
 290                        .stats.bstats = &sch->bstats,
 291                        .stats.qstats = &sch->qstats,
 292                },
 293        };
 294        int err;
 295
 296        sch->flags &= ~TCQ_F_OFFLOADED;
 297
 298        if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 299                return 0;
 300
 301        err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
 302                                            &hw_stats);
 303        if (err == -EOPNOTSUPP)
 304                return 0;
 305
 306        if (!err)
 307                sch->flags |= TCQ_F_OFFLOADED;
 308
 309        return err;
 310}
 311
 312static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 313{
 314        struct red_sched_data *q = qdisc_priv(sch);
 315        struct nlattr *opts = NULL;
 316        struct tc_red_qopt opt = {
 317                .limit          = q->limit,
 318                .flags          = q->flags,
 319                .qth_min        = q->parms.qth_min >> q->parms.Wlog,
 320                .qth_max        = q->parms.qth_max >> q->parms.Wlog,
 321                .Wlog           = q->parms.Wlog,
 322                .Plog           = q->parms.Plog,
 323                .Scell_log      = q->parms.Scell_log,
 324        };
 325        int err;
 326
 327        err = red_dump_offload_stats(sch, &opt);
 328        if (err)
 329                goto nla_put_failure;
 330
 331        opts = nla_nest_start(skb, TCA_OPTIONS);
 332        if (opts == NULL)
 333                goto nla_put_failure;
 334        if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
 335            nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
 336                goto nla_put_failure;
 337        return nla_nest_end(skb, opts);
 338
 339nla_put_failure:
 340        nla_nest_cancel(skb, opts);
 341        return -EMSGSIZE;
 342}
 343
 344static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 345{
 346        struct red_sched_data *q = qdisc_priv(sch);
 347        struct net_device *dev = qdisc_dev(sch);
 348        struct tc_red_xstats st = {0};
 349
 350        if (sch->flags & TCQ_F_OFFLOADED) {
 351                struct tc_red_qopt_offload hw_stats_request = {
 352                        .command = TC_RED_XSTATS,
 353                        .handle = sch->handle,
 354                        .parent = sch->parent,
 355                        {
 356                                .xstats = &q->stats,
 357                        },
 358                };
 359                dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
 360                                              &hw_stats_request);
 361        }
 362        st.early = q->stats.prob_drop + q->stats.forced_drop;
 363        st.pdrop = q->stats.pdrop;
 364        st.other = q->stats.other;
 365        st.marked = q->stats.prob_mark + q->stats.forced_mark;
 366
 367        return gnet_stats_copy_app(d, &st, sizeof(st));
 368}
 369
 370static int red_dump_class(struct Qdisc *sch, unsigned long cl,
 371                          struct sk_buff *skb, struct tcmsg *tcm)
 372{
 373        struct red_sched_data *q = qdisc_priv(sch);
 374
 375        tcm->tcm_handle |= TC_H_MIN(1);
 376        tcm->tcm_info = q->qdisc->handle;
 377        return 0;
 378}
 379
 380static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 381                     struct Qdisc **old, struct netlink_ext_ack *extack)
 382{
 383        struct red_sched_data *q = qdisc_priv(sch);
 384
 385        if (new == NULL)
 386                new = &noop_qdisc;
 387
 388        *old = qdisc_replace(sch, new, &q->qdisc);
 389        return 0;
 390}
 391
 392static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
 393{
 394        struct red_sched_data *q = qdisc_priv(sch);
 395        return q->qdisc;
 396}
 397
 398static unsigned long red_find(struct Qdisc *sch, u32 classid)
 399{
 400        return 1;
 401}
 402
 403static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 404{
 405        if (!walker->stop) {
 406                if (walker->count >= walker->skip)
 407                        if (walker->fn(sch, 1, walker) < 0) {
 408                                walker->stop = 1;
 409                                return;
 410                        }
 411                walker->count++;
 412        }
 413}
 414
 415static const struct Qdisc_class_ops red_class_ops = {
 416        .graft          =       red_graft,
 417        .leaf           =       red_leaf,
 418        .find           =       red_find,
 419        .walk           =       red_walk,
 420        .dump           =       red_dump_class,
 421};
 422
 423static struct Qdisc_ops red_qdisc_ops __read_mostly = {
 424        .id             =       "red",
 425        .priv_size      =       sizeof(struct red_sched_data),
 426        .cl_ops         =       &red_class_ops,
 427        .enqueue        =       red_enqueue,
 428        .dequeue        =       red_dequeue,
 429        .peek           =       red_peek,
 430        .init           =       red_init,
 431        .reset          =       red_reset,
 432        .destroy        =       red_destroy,
 433        .change         =       red_change,
 434        .dump           =       red_dump,
 435        .dump_stats     =       red_dump_stats,
 436        .owner          =       THIS_MODULE,
 437};
 438
 439static int __init red_module_init(void)
 440{
 441        return register_qdisc(&red_qdisc_ops);
 442}
 443
 444static void __exit red_module_exit(void)
 445{
 446        unregister_qdisc(&red_qdisc_ops);
 447}
 448
 449module_init(red_module_init)
 450module_exit(red_module_exit)
 451
 452MODULE_LICENSE("GPL");
 453