linux/net/sched/sch_red.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * net/sched/sch_red.c  Random Early Detection queue.
   4 *
   5 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   6 *
   7 * Changes:
   8 * J Hadi Salim 980914: computation fixes
   9 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
  10 * J Hadi Salim 980816:  ECN support
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/types.h>
  15#include <linux/kernel.h>
  16#include <linux/skbuff.h>
  17#include <net/pkt_sched.h>
  18#include <net/pkt_cls.h>
  19#include <net/inet_ecn.h>
  20#include <net/red.h>
  21
  22
  23/*      Parameters, settable by user:
  24        -----------------------------
  25
  26        limit           - bytes (must be > qth_max + burst)
  27
  28        Hard limit on queue length, should be chosen >qth_max
  29        to allow packet bursts. This parameter does not
  30        affect the algorithms behaviour and can be chosen
  31        arbitrarily high (well, less than ram size)
  32        Really, this limit will never be reached
  33        if RED works correctly.
  34 */
  35
  36struct red_sched_data {
  37        u32                     limit;          /* HARD maximal queue length */
  38
  39        unsigned char           flags;
  40        /* Non-flags in tc_red_qopt.flags. */
  41        unsigned char           userbits;
  42
  43        struct timer_list       adapt_timer;
  44        struct Qdisc            *sch;
  45        struct red_parms        parms;
  46        struct red_vars         vars;
  47        struct red_stats        stats;
  48        struct Qdisc            *qdisc;
  49        struct tcf_qevent       qe_early_drop;
  50        struct tcf_qevent       qe_mark;
  51};
  52
  53#define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
  54
  55static inline int red_use_ecn(struct red_sched_data *q)
  56{
  57        return q->flags & TC_RED_ECN;
  58}
  59
  60static inline int red_use_harddrop(struct red_sched_data *q)
  61{
  62        return q->flags & TC_RED_HARDDROP;
  63}
  64
  65static int red_use_nodrop(struct red_sched_data *q)
  66{
  67        return q->flags & TC_RED_NODROP;
  68}
  69
  70static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  71                       struct sk_buff **to_free)
  72{
  73        struct red_sched_data *q = qdisc_priv(sch);
  74        struct Qdisc *child = q->qdisc;
  75        int ret;
  76
  77        q->vars.qavg = red_calc_qavg(&q->parms,
  78                                     &q->vars,
  79                                     child->qstats.backlog);
  80
  81        if (red_is_idling(&q->vars))
  82                red_end_of_idle_period(&q->vars);
  83
  84        switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
  85        case RED_DONT_MARK:
  86                break;
  87
  88        case RED_PROB_MARK:
  89                qdisc_qstats_overlimit(sch);
  90                if (!red_use_ecn(q)) {
  91                        q->stats.prob_drop++;
  92                        goto congestion_drop;
  93                }
  94
  95                if (INET_ECN_set_ce(skb)) {
  96                        q->stats.prob_mark++;
  97                        skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
  98                        if (!skb)
  99                                return NET_XMIT_CN | ret;
 100                } else if (!red_use_nodrop(q)) {
 101                        q->stats.prob_drop++;
 102                        goto congestion_drop;
 103                }
 104
 105                /* Non-ECT packet in ECN nodrop mode: queue it. */
 106                break;
 107
 108        case RED_HARD_MARK:
 109                qdisc_qstats_overlimit(sch);
 110                if (red_use_harddrop(q) || !red_use_ecn(q)) {
 111                        q->stats.forced_drop++;
 112                        goto congestion_drop;
 113                }
 114
 115                if (INET_ECN_set_ce(skb)) {
 116                        q->stats.forced_mark++;
 117                        skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
 118                        if (!skb)
 119                                return NET_XMIT_CN | ret;
 120                } else if (!red_use_nodrop(q)) {
 121                        q->stats.forced_drop++;
 122                        goto congestion_drop;
 123                }
 124
 125                /* Non-ECT packet in ECN nodrop mode: queue it. */
 126                break;
 127        }
 128
 129        ret = qdisc_enqueue(skb, child, to_free);
 130        if (likely(ret == NET_XMIT_SUCCESS)) {
 131                qdisc_qstats_backlog_inc(sch, skb);
 132                sch->q.qlen++;
 133        } else if (net_xmit_drop_count(ret)) {
 134                q->stats.pdrop++;
 135                qdisc_qstats_drop(sch);
 136        }
 137        return ret;
 138
 139congestion_drop:
 140        skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
 141        if (!skb)
 142                return NET_XMIT_CN | ret;
 143
 144        qdisc_drop(skb, sch, to_free);
 145        return NET_XMIT_CN;
 146}
 147
 148static struct sk_buff *red_dequeue(struct Qdisc *sch)
 149{
 150        struct sk_buff *skb;
 151        struct red_sched_data *q = qdisc_priv(sch);
 152        struct Qdisc *child = q->qdisc;
 153
 154        skb = child->dequeue(child);
 155        if (skb) {
 156                qdisc_bstats_update(sch, skb);
 157                qdisc_qstats_backlog_dec(sch, skb);
 158                sch->q.qlen--;
 159        } else {
 160                if (!red_is_idling(&q->vars))
 161                        red_start_of_idle_period(&q->vars);
 162        }
 163        return skb;
 164}
 165
 166static struct sk_buff *red_peek(struct Qdisc *sch)
 167{
 168        struct red_sched_data *q = qdisc_priv(sch);
 169        struct Qdisc *child = q->qdisc;
 170
 171        return child->ops->peek(child);
 172}
 173
 174static void red_reset(struct Qdisc *sch)
 175{
 176        struct red_sched_data *q = qdisc_priv(sch);
 177
 178        qdisc_reset(q->qdisc);
 179        sch->qstats.backlog = 0;
 180        sch->q.qlen = 0;
 181        red_restart(&q->vars);
 182}
 183
 184static int red_offload(struct Qdisc *sch, bool enable)
 185{
 186        struct red_sched_data *q = qdisc_priv(sch);
 187        struct net_device *dev = qdisc_dev(sch);
 188        struct tc_red_qopt_offload opt = {
 189                .handle = sch->handle,
 190                .parent = sch->parent,
 191        };
 192
 193        if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 194                return -EOPNOTSUPP;
 195
 196        if (enable) {
 197                opt.command = TC_RED_REPLACE;
 198                opt.set.min = q->parms.qth_min >> q->parms.Wlog;
 199                opt.set.max = q->parms.qth_max >> q->parms.Wlog;
 200                opt.set.probability = q->parms.max_P;
 201                opt.set.limit = q->limit;
 202                opt.set.is_ecn = red_use_ecn(q);
 203                opt.set.is_harddrop = red_use_harddrop(q);
 204                opt.set.is_nodrop = red_use_nodrop(q);
 205                opt.set.qstats = &sch->qstats;
 206        } else {
 207                opt.command = TC_RED_DESTROY;
 208        }
 209
 210        return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
 211}
 212
 213static void red_destroy(struct Qdisc *sch)
 214{
 215        struct red_sched_data *q = qdisc_priv(sch);
 216
 217        tcf_qevent_destroy(&q->qe_mark, sch);
 218        tcf_qevent_destroy(&q->qe_early_drop, sch);
 219        del_timer_sync(&q->adapt_timer);
 220        red_offload(sch, false);
 221        qdisc_put(q->qdisc);
 222}
 223
 224static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
 225        [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
 226        [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
 227        [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
 228        [TCA_RED_MAX_P] = { .type = NLA_U32 },
 229        [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
 230        [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
 231        [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
 232};
 233
 234static int __red_change(struct Qdisc *sch, struct nlattr **tb,
 235                        struct netlink_ext_ack *extack)
 236{
 237        struct Qdisc *old_child = NULL, *child = NULL;
 238        struct red_sched_data *q = qdisc_priv(sch);
 239        struct nla_bitfield32 flags_bf;
 240        struct tc_red_qopt *ctl;
 241        unsigned char userbits;
 242        unsigned char flags;
 243        int err;
 244        u32 max_P;
 245        u8 *stab;
 246
 247        if (tb[TCA_RED_PARMS] == NULL ||
 248            tb[TCA_RED_STAB] == NULL)
 249                return -EINVAL;
 250
 251        max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
 252
 253        ctl = nla_data(tb[TCA_RED_PARMS]);
 254        stab = nla_data(tb[TCA_RED_STAB]);
 255        if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
 256                              ctl->Scell_log, stab))
 257                return -EINVAL;
 258
 259        err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
 260                            tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
 261                            &flags_bf, &userbits, extack);
 262        if (err)
 263                return err;
 264
 265        if (ctl->limit > 0) {
 266                child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
 267                                         extack);
 268                if (IS_ERR(child))
 269                        return PTR_ERR(child);
 270
 271                /* child is fifo, no need to check for noop_qdisc */
 272                qdisc_hash_add(child, true);
 273        }
 274
 275        sch_tree_lock(sch);
 276
 277        flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
 278        err = red_validate_flags(flags, extack);
 279        if (err)
 280                goto unlock_out;
 281
 282        q->flags = flags;
 283        q->userbits = userbits;
 284        q->limit = ctl->limit;
 285        if (child) {
 286                qdisc_tree_flush_backlog(q->qdisc);
 287                old_child = q->qdisc;
 288                q->qdisc = child;
 289        }
 290
 291        red_set_parms(&q->parms,
 292                      ctl->qth_min, ctl->qth_max, ctl->Wlog,
 293                      ctl->Plog, ctl->Scell_log,
 294                      stab,
 295                      max_P);
 296        red_set_vars(&q->vars);
 297
 298        del_timer(&q->adapt_timer);
 299        if (ctl->flags & TC_RED_ADAPTATIVE)
 300                mod_timer(&q->adapt_timer, jiffies + HZ/2);
 301
 302        if (!q->qdisc->q.qlen)
 303                red_start_of_idle_period(&q->vars);
 304
 305        sch_tree_unlock(sch);
 306
 307        red_offload(sch, true);
 308
 309        if (old_child)
 310                qdisc_put(old_child);
 311        return 0;
 312
 313unlock_out:
 314        sch_tree_unlock(sch);
 315        if (child)
 316                qdisc_put(child);
 317        return err;
 318}
 319
 320static inline void red_adaptative_timer(struct timer_list *t)
 321{
 322        struct red_sched_data *q = from_timer(q, t, adapt_timer);
 323        struct Qdisc *sch = q->sch;
 324        spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 325
 326        spin_lock(root_lock);
 327        red_adaptative_algo(&q->parms, &q->vars);
 328        mod_timer(&q->adapt_timer, jiffies + HZ/2);
 329        spin_unlock(root_lock);
 330}
 331
 332static int red_init(struct Qdisc *sch, struct nlattr *opt,
 333                    struct netlink_ext_ack *extack)
 334{
 335        struct red_sched_data *q = qdisc_priv(sch);
 336        struct nlattr *tb[TCA_RED_MAX + 1];
 337        int err;
 338
 339        q->qdisc = &noop_qdisc;
 340        q->sch = sch;
 341        timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
 342
 343        if (!opt)
 344                return -EINVAL;
 345
 346        err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
 347                                          extack);
 348        if (err < 0)
 349                return err;
 350
 351        err = __red_change(sch, tb, extack);
 352        if (err)
 353                return err;
 354
 355        err = tcf_qevent_init(&q->qe_early_drop, sch,
 356                              FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
 357                              tb[TCA_RED_EARLY_DROP_BLOCK], extack);
 358        if (err)
 359                return err;
 360
 361        return tcf_qevent_init(&q->qe_mark, sch,
 362                               FLOW_BLOCK_BINDER_TYPE_RED_MARK,
 363                               tb[TCA_RED_MARK_BLOCK], extack);
 364}
 365
 366static int red_change(struct Qdisc *sch, struct nlattr *opt,
 367                      struct netlink_ext_ack *extack)
 368{
 369        struct red_sched_data *q = qdisc_priv(sch);
 370        struct nlattr *tb[TCA_RED_MAX + 1];
 371        int err;
 372
 373        if (!opt)
 374                return -EINVAL;
 375
 376        err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
 377                                          extack);
 378        if (err < 0)
 379                return err;
 380
 381        err = tcf_qevent_validate_change(&q->qe_early_drop,
 382                                         tb[TCA_RED_EARLY_DROP_BLOCK], extack);
 383        if (err)
 384                return err;
 385
 386        err = tcf_qevent_validate_change(&q->qe_mark,
 387                                         tb[TCA_RED_MARK_BLOCK], extack);
 388        if (err)
 389                return err;
 390
 391        return __red_change(sch, tb, extack);
 392}
 393
 394static int red_dump_offload_stats(struct Qdisc *sch)
 395{
 396        struct tc_red_qopt_offload hw_stats = {
 397                .command = TC_RED_STATS,
 398                .handle = sch->handle,
 399                .parent = sch->parent,
 400                {
 401                        .stats.bstats = &sch->bstats,
 402                        .stats.qstats = &sch->qstats,
 403                },
 404        };
 405
 406        return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
 407}
 408
 409static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 410{
 411        struct red_sched_data *q = qdisc_priv(sch);
 412        struct nlattr *opts = NULL;
 413        struct tc_red_qopt opt = {
 414                .limit          = q->limit,
 415                .flags          = (q->flags & TC_RED_HISTORIC_FLAGS) |
 416                                  q->userbits,
 417                .qth_min        = q->parms.qth_min >> q->parms.Wlog,
 418                .qth_max        = q->parms.qth_max >> q->parms.Wlog,
 419                .Wlog           = q->parms.Wlog,
 420                .Plog           = q->parms.Plog,
 421                .Scell_log      = q->parms.Scell_log,
 422        };
 423        int err;
 424
 425        err = red_dump_offload_stats(sch);
 426        if (err)
 427                goto nla_put_failure;
 428
 429        opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 430        if (opts == NULL)
 431                goto nla_put_failure;
 432        if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
 433            nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
 434            nla_put_bitfield32(skb, TCA_RED_FLAGS,
 435                               q->flags, TC_RED_SUPPORTED_FLAGS) ||
 436            tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
 437            tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
 438                goto nla_put_failure;
 439        return nla_nest_end(skb, opts);
 440
 441nla_put_failure:
 442        nla_nest_cancel(skb, opts);
 443        return -EMSGSIZE;
 444}
 445
 446static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 447{
 448        struct red_sched_data *q = qdisc_priv(sch);
 449        struct net_device *dev = qdisc_dev(sch);
 450        struct tc_red_xstats st = {0};
 451
 452        if (sch->flags & TCQ_F_OFFLOADED) {
 453                struct tc_red_qopt_offload hw_stats_request = {
 454                        .command = TC_RED_XSTATS,
 455                        .handle = sch->handle,
 456                        .parent = sch->parent,
 457                        {
 458                                .xstats = &q->stats,
 459                        },
 460                };
 461                dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
 462                                              &hw_stats_request);
 463        }
 464        st.early = q->stats.prob_drop + q->stats.forced_drop;
 465        st.pdrop = q->stats.pdrop;
 466        st.other = q->stats.other;
 467        st.marked = q->stats.prob_mark + q->stats.forced_mark;
 468
 469        return gnet_stats_copy_app(d, &st, sizeof(st));
 470}
 471
 472static int red_dump_class(struct Qdisc *sch, unsigned long cl,
 473                          struct sk_buff *skb, struct tcmsg *tcm)
 474{
 475        struct red_sched_data *q = qdisc_priv(sch);
 476
 477        tcm->tcm_handle |= TC_H_MIN(1);
 478        tcm->tcm_info = q->qdisc->handle;
 479        return 0;
 480}
 481
 482static void red_graft_offload(struct Qdisc *sch,
 483                              struct Qdisc *new, struct Qdisc *old,
 484                              struct netlink_ext_ack *extack)
 485{
 486        struct tc_red_qopt_offload graft_offload = {
 487                .handle         = sch->handle,
 488                .parent         = sch->parent,
 489                .child_handle   = new->handle,
 490                .command        = TC_RED_GRAFT,
 491        };
 492
 493        qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
 494                                   TC_SETUP_QDISC_RED, &graft_offload, extack);
 495}
 496
 497static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 498                     struct Qdisc **old, struct netlink_ext_ack *extack)
 499{
 500        struct red_sched_data *q = qdisc_priv(sch);
 501
 502        if (new == NULL)
 503                new = &noop_qdisc;
 504
 505        *old = qdisc_replace(sch, new, &q->qdisc);
 506
 507        red_graft_offload(sch, new, *old, extack);
 508        return 0;
 509}
 510
 511static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
 512{
 513        struct red_sched_data *q = qdisc_priv(sch);
 514        return q->qdisc;
 515}
 516
 517static unsigned long red_find(struct Qdisc *sch, u32 classid)
 518{
 519        return 1;
 520}
 521
 522static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 523{
 524        if (!walker->stop) {
 525                if (walker->count >= walker->skip)
 526                        if (walker->fn(sch, 1, walker) < 0) {
 527                                walker->stop = 1;
 528                                return;
 529                        }
 530                walker->count++;
 531        }
 532}
 533
 534static const struct Qdisc_class_ops red_class_ops = {
 535        .graft          =       red_graft,
 536        .leaf           =       red_leaf,
 537        .find           =       red_find,
 538        .walk           =       red_walk,
 539        .dump           =       red_dump_class,
 540};
 541
 542static struct Qdisc_ops red_qdisc_ops __read_mostly = {
 543        .id             =       "red",
 544        .priv_size      =       sizeof(struct red_sched_data),
 545        .cl_ops         =       &red_class_ops,
 546        .enqueue        =       red_enqueue,
 547        .dequeue        =       red_dequeue,
 548        .peek           =       red_peek,
 549        .init           =       red_init,
 550        .reset          =       red_reset,
 551        .destroy        =       red_destroy,
 552        .change         =       red_change,
 553        .dump           =       red_dump,
 554        .dump_stats     =       red_dump_stats,
 555        .owner          =       THIS_MODULE,
 556};
 557
 558static int __init red_module_init(void)
 559{
 560        return register_qdisc(&red_qdisc_ops);
 561}
 562
 563static void __exit red_module_exit(void)
 564{
 565        unregister_qdisc(&red_qdisc_ops);
 566}
 567
 568module_init(red_module_init)
 569module_exit(red_module_exit)
 570
 571MODULE_LICENSE("GPL");
 572