linux/net/sched/sch_red.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * net/sched/sch_red.c  Random Early Detection queue.
   4 *
   5 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   6 *
   7 * Changes:
   8 * J Hadi Salim 980914: computation fixes
   9 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
  10 * J Hadi Salim 980816:  ECN support
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/types.h>
  15#include <linux/kernel.h>
  16#include <linux/skbuff.h>
  17#include <net/pkt_sched.h>
  18#include <net/pkt_cls.h>
  19#include <net/inet_ecn.h>
  20#include <net/red.h>
  21
  22
  23/*      Parameters, settable by user:
  24        -----------------------------
  25
  26        limit           - bytes (must be > qth_max + burst)
  27
  28        Hard limit on queue length, should be chosen >qth_max
  29        to allow packet bursts. This parameter does not
  30        affect the algorithms behaviour and can be chosen
  31        arbitrarily high (well, less than ram size)
  32        Really, this limit will never be reached
  33        if RED works correctly.
  34 */
  35
  36struct red_sched_data {
  37        u32                     limit;          /* HARD maximal queue length */
  38
  39        unsigned char           flags;
  40        /* Non-flags in tc_red_qopt.flags. */
  41        unsigned char           userbits;
  42
  43        struct timer_list       adapt_timer;
  44        struct Qdisc            *sch;
  45        struct red_parms        parms;
  46        struct red_vars         vars;
  47        struct red_stats        stats;
  48        struct Qdisc            *qdisc;
  49        struct tcf_qevent       qe_early_drop;
  50        struct tcf_qevent       qe_mark;
  51};
  52
  53#define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
  54
  55static inline int red_use_ecn(struct red_sched_data *q)
  56{
  57        return q->flags & TC_RED_ECN;
  58}
  59
  60static inline int red_use_harddrop(struct red_sched_data *q)
  61{
  62        return q->flags & TC_RED_HARDDROP;
  63}
  64
  65static int red_use_nodrop(struct red_sched_data *q)
  66{
  67        return q->flags & TC_RED_NODROP;
  68}
  69
  70static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  71                       struct sk_buff **to_free)
  72{
  73        struct red_sched_data *q = qdisc_priv(sch);
  74        struct Qdisc *child = q->qdisc;
  75        int ret;
  76
  77        q->vars.qavg = red_calc_qavg(&q->parms,
  78                                     &q->vars,
  79                                     child->qstats.backlog);
  80
  81        if (red_is_idling(&q->vars))
  82                red_end_of_idle_period(&q->vars);
  83
  84        switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
  85        case RED_DONT_MARK:
  86                break;
  87
  88        case RED_PROB_MARK:
  89                qdisc_qstats_overlimit(sch);
  90                if (!red_use_ecn(q)) {
  91                        q->stats.prob_drop++;
  92                        goto congestion_drop;
  93                }
  94
  95                if (INET_ECN_set_ce(skb)) {
  96                        q->stats.prob_mark++;
  97                        skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
  98                        if (!skb)
  99                                return NET_XMIT_CN | ret;
 100                } else if (!red_use_nodrop(q)) {
 101                        q->stats.prob_drop++;
 102                        goto congestion_drop;
 103                }
 104
 105                /* Non-ECT packet in ECN nodrop mode: queue it. */
 106                break;
 107
 108        case RED_HARD_MARK:
 109                qdisc_qstats_overlimit(sch);
 110                if (red_use_harddrop(q) || !red_use_ecn(q)) {
 111                        q->stats.forced_drop++;
 112                        goto congestion_drop;
 113                }
 114
 115                if (INET_ECN_set_ce(skb)) {
 116                        q->stats.forced_mark++;
 117                        skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
 118                        if (!skb)
 119                                return NET_XMIT_CN | ret;
 120                } else if (!red_use_nodrop(q)) {
 121                        q->stats.forced_drop++;
 122                        goto congestion_drop;
 123                }
 124
 125                /* Non-ECT packet in ECN nodrop mode: queue it. */
 126                break;
 127        }
 128
 129        ret = qdisc_enqueue(skb, child, to_free);
 130        if (likely(ret == NET_XMIT_SUCCESS)) {
 131                qdisc_qstats_backlog_inc(sch, skb);
 132                sch->q.qlen++;
 133        } else if (net_xmit_drop_count(ret)) {
 134                q->stats.pdrop++;
 135                qdisc_qstats_drop(sch);
 136        }
 137        return ret;
 138
 139congestion_drop:
 140        skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
 141        if (!skb)
 142                return NET_XMIT_CN | ret;
 143
 144        qdisc_drop(skb, sch, to_free);
 145        return NET_XMIT_CN;
 146}
 147
 148static struct sk_buff *red_dequeue(struct Qdisc *sch)
 149{
 150        struct sk_buff *skb;
 151        struct red_sched_data *q = qdisc_priv(sch);
 152        struct Qdisc *child = q->qdisc;
 153
 154        skb = child->dequeue(child);
 155        if (skb) {
 156                qdisc_bstats_update(sch, skb);
 157                qdisc_qstats_backlog_dec(sch, skb);
 158                sch->q.qlen--;
 159        } else {
 160                if (!red_is_idling(&q->vars))
 161                        red_start_of_idle_period(&q->vars);
 162        }
 163        return skb;
 164}
 165
 166static struct sk_buff *red_peek(struct Qdisc *sch)
 167{
 168        struct red_sched_data *q = qdisc_priv(sch);
 169        struct Qdisc *child = q->qdisc;
 170
 171        return child->ops->peek(child);
 172}
 173
 174static void red_reset(struct Qdisc *sch)
 175{
 176        struct red_sched_data *q = qdisc_priv(sch);
 177
 178        qdisc_reset(q->qdisc);
 179        sch->qstats.backlog = 0;
 180        sch->q.qlen = 0;
 181        red_restart(&q->vars);
 182}
 183
 184static int red_offload(struct Qdisc *sch, bool enable)
 185{
 186        struct red_sched_data *q = qdisc_priv(sch);
 187        struct net_device *dev = qdisc_dev(sch);
 188        struct tc_red_qopt_offload opt = {
 189                .handle = sch->handle,
 190                .parent = sch->parent,
 191        };
 192
 193        if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 194                return -EOPNOTSUPP;
 195
 196        if (enable) {
 197                opt.command = TC_RED_REPLACE;
 198                opt.set.min = q->parms.qth_min >> q->parms.Wlog;
 199                opt.set.max = q->parms.qth_max >> q->parms.Wlog;
 200                opt.set.probability = q->parms.max_P;
 201                opt.set.limit = q->limit;
 202                opt.set.is_ecn = red_use_ecn(q);
 203                opt.set.is_harddrop = red_use_harddrop(q);
 204                opt.set.is_nodrop = red_use_nodrop(q);
 205                opt.set.qstats = &sch->qstats;
 206        } else {
 207                opt.command = TC_RED_DESTROY;
 208        }
 209
 210        return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
 211}
 212
 213static void red_destroy(struct Qdisc *sch)
 214{
 215        struct red_sched_data *q = qdisc_priv(sch);
 216
 217        tcf_qevent_destroy(&q->qe_mark, sch);
 218        tcf_qevent_destroy(&q->qe_early_drop, sch);
 219        del_timer_sync(&q->adapt_timer);
 220        red_offload(sch, false);
 221        qdisc_put(q->qdisc);
 222}
 223
 224static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
 225        [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
 226        [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
 227        [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
 228        [TCA_RED_MAX_P] = { .type = NLA_U32 },
 229        [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
 230        [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
 231        [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
 232};
 233
 234static int __red_change(struct Qdisc *sch, struct nlattr **tb,
 235                        struct netlink_ext_ack *extack)
 236{
 237        struct Qdisc *old_child = NULL, *child = NULL;
 238        struct red_sched_data *q = qdisc_priv(sch);
 239        struct nla_bitfield32 flags_bf;
 240        struct tc_red_qopt *ctl;
 241        unsigned char userbits;
 242        unsigned char flags;
 243        int err;
 244        u32 max_P;
 245
 246        if (tb[TCA_RED_PARMS] == NULL ||
 247            tb[TCA_RED_STAB] == NULL)
 248                return -EINVAL;
 249
 250        max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
 251
 252        ctl = nla_data(tb[TCA_RED_PARMS]);
 253        if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
 254                return -EINVAL;
 255
 256        err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
 257                            tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
 258                            &flags_bf, &userbits, extack);
 259        if (err)
 260                return err;
 261
 262        if (ctl->limit > 0) {
 263                child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
 264                                         extack);
 265                if (IS_ERR(child))
 266                        return PTR_ERR(child);
 267
 268                /* child is fifo, no need to check for noop_qdisc */
 269                qdisc_hash_add(child, true);
 270        }
 271
 272        sch_tree_lock(sch);
 273
 274        flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
 275        err = red_validate_flags(flags, extack);
 276        if (err)
 277                goto unlock_out;
 278
 279        q->flags = flags;
 280        q->userbits = userbits;
 281        q->limit = ctl->limit;
 282        if (child) {
 283                qdisc_tree_flush_backlog(q->qdisc);
 284                old_child = q->qdisc;
 285                q->qdisc = child;
 286        }
 287
 288        red_set_parms(&q->parms,
 289                      ctl->qth_min, ctl->qth_max, ctl->Wlog,
 290                      ctl->Plog, ctl->Scell_log,
 291                      nla_data(tb[TCA_RED_STAB]),
 292                      max_P);
 293        red_set_vars(&q->vars);
 294
 295        del_timer(&q->adapt_timer);
 296        if (ctl->flags & TC_RED_ADAPTATIVE)
 297                mod_timer(&q->adapt_timer, jiffies + HZ/2);
 298
 299        if (!q->qdisc->q.qlen)
 300                red_start_of_idle_period(&q->vars);
 301
 302        sch_tree_unlock(sch);
 303
 304        red_offload(sch, true);
 305
 306        if (old_child)
 307                qdisc_put(old_child);
 308        return 0;
 309
 310unlock_out:
 311        sch_tree_unlock(sch);
 312        if (child)
 313                qdisc_put(child);
 314        return err;
 315}
 316
 317static inline void red_adaptative_timer(struct timer_list *t)
 318{
 319        struct red_sched_data *q = from_timer(q, t, adapt_timer);
 320        struct Qdisc *sch = q->sch;
 321        spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 322
 323        spin_lock(root_lock);
 324        red_adaptative_algo(&q->parms, &q->vars);
 325        mod_timer(&q->adapt_timer, jiffies + HZ/2);
 326        spin_unlock(root_lock);
 327}
 328
 329static int red_init(struct Qdisc *sch, struct nlattr *opt,
 330                    struct netlink_ext_ack *extack)
 331{
 332        struct red_sched_data *q = qdisc_priv(sch);
 333        struct nlattr *tb[TCA_RED_MAX + 1];
 334        int err;
 335
 336        q->qdisc = &noop_qdisc;
 337        q->sch = sch;
 338        timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
 339
 340        if (!opt)
 341                return -EINVAL;
 342
 343        err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
 344                                          extack);
 345        if (err < 0)
 346                return err;
 347
 348        err = __red_change(sch, tb, extack);
 349        if (err)
 350                return err;
 351
 352        err = tcf_qevent_init(&q->qe_early_drop, sch,
 353                              FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
 354                              tb[TCA_RED_EARLY_DROP_BLOCK], extack);
 355        if (err)
 356                return err;
 357
 358        return tcf_qevent_init(&q->qe_mark, sch,
 359                               FLOW_BLOCK_BINDER_TYPE_RED_MARK,
 360                               tb[TCA_RED_MARK_BLOCK], extack);
 361}
 362
 363static int red_change(struct Qdisc *sch, struct nlattr *opt,
 364                      struct netlink_ext_ack *extack)
 365{
 366        struct red_sched_data *q = qdisc_priv(sch);
 367        struct nlattr *tb[TCA_RED_MAX + 1];
 368        int err;
 369
 370        if (!opt)
 371                return -EINVAL;
 372
 373        err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
 374                                          extack);
 375        if (err < 0)
 376                return err;
 377
 378        err = tcf_qevent_validate_change(&q->qe_early_drop,
 379                                         tb[TCA_RED_EARLY_DROP_BLOCK], extack);
 380        if (err)
 381                return err;
 382
 383        err = tcf_qevent_validate_change(&q->qe_mark,
 384                                         tb[TCA_RED_MARK_BLOCK], extack);
 385        if (err)
 386                return err;
 387
 388        return __red_change(sch, tb, extack);
 389}
 390
 391static int red_dump_offload_stats(struct Qdisc *sch)
 392{
 393        struct tc_red_qopt_offload hw_stats = {
 394                .command = TC_RED_STATS,
 395                .handle = sch->handle,
 396                .parent = sch->parent,
 397                {
 398                        .stats.bstats = &sch->bstats,
 399                        .stats.qstats = &sch->qstats,
 400                },
 401        };
 402
 403        return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
 404}
 405
 406static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 407{
 408        struct red_sched_data *q = qdisc_priv(sch);
 409        struct nlattr *opts = NULL;
 410        struct tc_red_qopt opt = {
 411                .limit          = q->limit,
 412                .flags          = (q->flags & TC_RED_HISTORIC_FLAGS) |
 413                                  q->userbits,
 414                .qth_min        = q->parms.qth_min >> q->parms.Wlog,
 415                .qth_max        = q->parms.qth_max >> q->parms.Wlog,
 416                .Wlog           = q->parms.Wlog,
 417                .Plog           = q->parms.Plog,
 418                .Scell_log      = q->parms.Scell_log,
 419        };
 420        int err;
 421
 422        err = red_dump_offload_stats(sch);
 423        if (err)
 424                goto nla_put_failure;
 425
 426        opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 427        if (opts == NULL)
 428                goto nla_put_failure;
 429        if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
 430            nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
 431            nla_put_bitfield32(skb, TCA_RED_FLAGS,
 432                               q->flags, TC_RED_SUPPORTED_FLAGS) ||
 433            tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
 434            tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
 435                goto nla_put_failure;
 436        return nla_nest_end(skb, opts);
 437
 438nla_put_failure:
 439        nla_nest_cancel(skb, opts);
 440        return -EMSGSIZE;
 441}
 442
 443static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 444{
 445        struct red_sched_data *q = qdisc_priv(sch);
 446        struct net_device *dev = qdisc_dev(sch);
 447        struct tc_red_xstats st = {0};
 448
 449        if (sch->flags & TCQ_F_OFFLOADED) {
 450                struct tc_red_qopt_offload hw_stats_request = {
 451                        .command = TC_RED_XSTATS,
 452                        .handle = sch->handle,
 453                        .parent = sch->parent,
 454                        {
 455                                .xstats = &q->stats,
 456                        },
 457                };
 458                dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
 459                                              &hw_stats_request);
 460        }
 461        st.early = q->stats.prob_drop + q->stats.forced_drop;
 462        st.pdrop = q->stats.pdrop;
 463        st.other = q->stats.other;
 464        st.marked = q->stats.prob_mark + q->stats.forced_mark;
 465
 466        return gnet_stats_copy_app(d, &st, sizeof(st));
 467}
 468
 469static int red_dump_class(struct Qdisc *sch, unsigned long cl,
 470                          struct sk_buff *skb, struct tcmsg *tcm)
 471{
 472        struct red_sched_data *q = qdisc_priv(sch);
 473
 474        tcm->tcm_handle |= TC_H_MIN(1);
 475        tcm->tcm_info = q->qdisc->handle;
 476        return 0;
 477}
 478
 479static void red_graft_offload(struct Qdisc *sch,
 480                              struct Qdisc *new, struct Qdisc *old,
 481                              struct netlink_ext_ack *extack)
 482{
 483        struct tc_red_qopt_offload graft_offload = {
 484                .handle         = sch->handle,
 485                .parent         = sch->parent,
 486                .child_handle   = new->handle,
 487                .command        = TC_RED_GRAFT,
 488        };
 489
 490        qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
 491                                   TC_SETUP_QDISC_RED, &graft_offload, extack);
 492}
 493
 494static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 495                     struct Qdisc **old, struct netlink_ext_ack *extack)
 496{
 497        struct red_sched_data *q = qdisc_priv(sch);
 498
 499        if (new == NULL)
 500                new = &noop_qdisc;
 501
 502        *old = qdisc_replace(sch, new, &q->qdisc);
 503
 504        red_graft_offload(sch, new, *old, extack);
 505        return 0;
 506}
 507
 508static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
 509{
 510        struct red_sched_data *q = qdisc_priv(sch);
 511        return q->qdisc;
 512}
 513
 514static unsigned long red_find(struct Qdisc *sch, u32 classid)
 515{
 516        return 1;
 517}
 518
 519static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 520{
 521        if (!walker->stop) {
 522                if (walker->count >= walker->skip)
 523                        if (walker->fn(sch, 1, walker) < 0) {
 524                                walker->stop = 1;
 525                                return;
 526                        }
 527                walker->count++;
 528        }
 529}
 530
 531static const struct Qdisc_class_ops red_class_ops = {
 532        .graft          =       red_graft,
 533        .leaf           =       red_leaf,
 534        .find           =       red_find,
 535        .walk           =       red_walk,
 536        .dump           =       red_dump_class,
 537};
 538
 539static struct Qdisc_ops red_qdisc_ops __read_mostly = {
 540        .id             =       "red",
 541        .priv_size      =       sizeof(struct red_sched_data),
 542        .cl_ops         =       &red_class_ops,
 543        .enqueue        =       red_enqueue,
 544        .dequeue        =       red_dequeue,
 545        .peek           =       red_peek,
 546        .init           =       red_init,
 547        .reset          =       red_reset,
 548        .destroy        =       red_destroy,
 549        .change         =       red_change,
 550        .dump           =       red_dump,
 551        .dump_stats     =       red_dump_stats,
 552        .owner          =       THIS_MODULE,
 553};
 554
 555static int __init red_module_init(void)
 556{
 557        return register_qdisc(&red_qdisc_ops);
 558}
 559
 560static void __exit red_module_exit(void)
 561{
 562        unregister_qdisc(&red_qdisc_ops);
 563}
 564
 565module_init(red_module_init)
 566module_exit(red_module_exit)
 567
 568MODULE_LICENSE("GPL");
 569