linux/net/sched/sch_choke.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * net/sched/sch_choke.c        CHOKE scheduler
   4 *
   5 * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
   6 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
   7 */
   8
   9#include <linux/module.h>
  10#include <linux/types.h>
  11#include <linux/kernel.h>
  12#include <linux/skbuff.h>
  13#include <linux/vmalloc.h>
  14#include <net/pkt_sched.h>
  15#include <net/pkt_cls.h>
  16#include <net/inet_ecn.h>
  17#include <net/red.h>
  18#include <net/flow_dissector.h>
  19
  20/*
  21   CHOKe stateless AQM for fair bandwidth allocation
  22   =================================================
  23
  24   CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
  25   unresponsive flows) is a variant of RED that penalizes misbehaving flows but
  26   maintains no flow state. The difference from RED is an additional step
  27   during the enqueuing process. If average queue size is over the
  28   low threshold (qmin), a packet is chosen at random from the queue.
  29   If both the new and chosen packet are from the same flow, both
  30   are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
  31   needs to access packets in queue randomly. It has a minimal class
  32   interface to allow overriding the builtin flow classifier with
  33   filters.
  34
  35   Source:
  36   R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
  37   Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
  38   IEEE INFOCOM, 2000.
  39
  40   A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
  41   Characteristics", IEEE/ACM Transactions on Networking, 2004
  42
  43 */
  44
  45/* Upper bound on size of sk_buff table (packets) */
  46#define CHOKE_MAX_QUEUE (128*1024 - 1)
  47
  48struct choke_sched_data {
  49/* Parameters */
  50        u32              limit;
  51        unsigned char    flags;
  52
  53        struct red_parms parms;
  54
  55/* Variables */
  56        struct red_vars  vars;
  57        struct {
  58                u32     prob_drop;      /* Early probability drops */
  59                u32     prob_mark;      /* Early probability marks */
  60                u32     forced_drop;    /* Forced drops, qavg > max_thresh */
  61                u32     forced_mark;    /* Forced marks, qavg > max_thresh */
  62                u32     pdrop;          /* Drops due to queue limits */
  63                u32     other;          /* Drops due to drop() calls */
  64                u32     matched;        /* Drops to flow match */
  65        } stats;
  66
  67        unsigned int     head;
  68        unsigned int     tail;
  69
  70        unsigned int     tab_mask; /* size - 1 */
  71
  72        struct sk_buff **tab;
  73};
  74
  75/* number of elements in queue including holes */
  76static unsigned int choke_len(const struct choke_sched_data *q)
  77{
  78        return (q->tail - q->head) & q->tab_mask;
  79}
  80
  81/* Is ECN parameter configured */
  82static int use_ecn(const struct choke_sched_data *q)
  83{
  84        return q->flags & TC_RED_ECN;
  85}
  86
  87/* Should packets over max just be dropped (versus marked) */
  88static int use_harddrop(const struct choke_sched_data *q)
  89{
  90        return q->flags & TC_RED_HARDDROP;
  91}
  92
  93/* Move head pointer forward to skip over holes */
  94static void choke_zap_head_holes(struct choke_sched_data *q)
  95{
  96        do {
  97                q->head = (q->head + 1) & q->tab_mask;
  98                if (q->head == q->tail)
  99                        break;
 100        } while (q->tab[q->head] == NULL);
 101}
 102
 103/* Move tail pointer backwards to reuse holes */
 104static void choke_zap_tail_holes(struct choke_sched_data *q)
 105{
 106        do {
 107                q->tail = (q->tail - 1) & q->tab_mask;
 108                if (q->head == q->tail)
 109                        break;
 110        } while (q->tab[q->tail] == NULL);
 111}
 112
 113/* Drop packet from queue array by creating a "hole" */
 114static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
 115                              struct sk_buff **to_free)
 116{
 117        struct choke_sched_data *q = qdisc_priv(sch);
 118        struct sk_buff *skb = q->tab[idx];
 119
 120        q->tab[idx] = NULL;
 121
 122        if (idx == q->head)
 123                choke_zap_head_holes(q);
 124        if (idx == q->tail)
 125                choke_zap_tail_holes(q);
 126
 127        qdisc_qstats_backlog_dec(sch, skb);
 128        qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
 129        qdisc_drop(skb, sch, to_free);
 130        --sch->q.qlen;
 131}
 132
 133struct choke_skb_cb {
 134        u8                      keys_valid;
 135        struct                  flow_keys_digest keys;
 136};
 137
 138static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
 139{
 140        qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
 141        return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
 142}
 143
 144/*
 145 * Compare flow of two packets
 146 *  Returns true only if source and destination address and port match.
 147 *          false for special cases
 148 */
 149static bool choke_match_flow(struct sk_buff *skb1,
 150                             struct sk_buff *skb2)
 151{
 152        struct flow_keys temp;
 153
 154        if (skb1->protocol != skb2->protocol)
 155                return false;
 156
 157        if (!choke_skb_cb(skb1)->keys_valid) {
 158                choke_skb_cb(skb1)->keys_valid = 1;
 159                skb_flow_dissect_flow_keys(skb1, &temp, 0);
 160                make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
 161        }
 162
 163        if (!choke_skb_cb(skb2)->keys_valid) {
 164                choke_skb_cb(skb2)->keys_valid = 1;
 165                skb_flow_dissect_flow_keys(skb2, &temp, 0);
 166                make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
 167        }
 168
 169        return !memcmp(&choke_skb_cb(skb1)->keys,
 170                       &choke_skb_cb(skb2)->keys,
 171                       sizeof(choke_skb_cb(skb1)->keys));
 172}
 173
 174/*
 175 * Select a packet at random from queue
 176 * HACK: since queue can have holes from previous deletion; retry several
 177 *   times to find a random skb but then just give up and return the head
 178 * Will return NULL if queue is empty (q->head == q->tail)
 179 */
 180static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
 181                                         unsigned int *pidx)
 182{
 183        struct sk_buff *skb;
 184        int retrys = 3;
 185
 186        do {
 187                *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
 188                skb = q->tab[*pidx];
 189                if (skb)
 190                        return skb;
 191        } while (--retrys > 0);
 192
 193        return q->tab[*pidx = q->head];
 194}
 195
 196/*
 197 * Compare new packet with random packet in queue
 198 * returns true if matched and sets *pidx
 199 */
 200static bool choke_match_random(const struct choke_sched_data *q,
 201                               struct sk_buff *nskb,
 202                               unsigned int *pidx)
 203{
 204        struct sk_buff *oskb;
 205
 206        if (q->head == q->tail)
 207                return false;
 208
 209        oskb = choke_peek_random(q, pidx);
 210        return choke_match_flow(oskb, nskb);
 211}
 212
 213static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 214                         struct sk_buff **to_free)
 215{
 216        struct choke_sched_data *q = qdisc_priv(sch);
 217        const struct red_parms *p = &q->parms;
 218
 219        choke_skb_cb(skb)->keys_valid = 0;
 220        /* Compute average queue usage (see RED) */
 221        q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
 222        if (red_is_idling(&q->vars))
 223                red_end_of_idle_period(&q->vars);
 224
 225        /* Is queue small? */
 226        if (q->vars.qavg <= p->qth_min)
 227                q->vars.qcount = -1;
 228        else {
 229                unsigned int idx;
 230
 231                /* Draw a packet at random from queue and compare flow */
 232                if (choke_match_random(q, skb, &idx)) {
 233                        q->stats.matched++;
 234                        choke_drop_by_idx(sch, idx, to_free);
 235                        goto congestion_drop;
 236                }
 237
 238                /* Queue is large, always mark/drop */
 239                if (q->vars.qavg > p->qth_max) {
 240                        q->vars.qcount = -1;
 241
 242                        qdisc_qstats_overlimit(sch);
 243                        if (use_harddrop(q) || !use_ecn(q) ||
 244                            !INET_ECN_set_ce(skb)) {
 245                                q->stats.forced_drop++;
 246                                goto congestion_drop;
 247                        }
 248
 249                        q->stats.forced_mark++;
 250                } else if (++q->vars.qcount) {
 251                        if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
 252                                q->vars.qcount = 0;
 253                                q->vars.qR = red_random(p);
 254
 255                                qdisc_qstats_overlimit(sch);
 256                                if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
 257                                        q->stats.prob_drop++;
 258                                        goto congestion_drop;
 259                                }
 260
 261                                q->stats.prob_mark++;
 262                        }
 263                } else
 264                        q->vars.qR = red_random(p);
 265        }
 266
 267        /* Admit new packet */
 268        if (sch->q.qlen < q->limit) {
 269                q->tab[q->tail] = skb;
 270                q->tail = (q->tail + 1) & q->tab_mask;
 271                ++sch->q.qlen;
 272                qdisc_qstats_backlog_inc(sch, skb);
 273                return NET_XMIT_SUCCESS;
 274        }
 275
 276        q->stats.pdrop++;
 277        return qdisc_drop(skb, sch, to_free);
 278
 279congestion_drop:
 280        qdisc_drop(skb, sch, to_free);
 281        return NET_XMIT_CN;
 282}
 283
 284static struct sk_buff *choke_dequeue(struct Qdisc *sch)
 285{
 286        struct choke_sched_data *q = qdisc_priv(sch);
 287        struct sk_buff *skb;
 288
 289        if (q->head == q->tail) {
 290                if (!red_is_idling(&q->vars))
 291                        red_start_of_idle_period(&q->vars);
 292                return NULL;
 293        }
 294
 295        skb = q->tab[q->head];
 296        q->tab[q->head] = NULL;
 297        choke_zap_head_holes(q);
 298        --sch->q.qlen;
 299        qdisc_qstats_backlog_dec(sch, skb);
 300        qdisc_bstats_update(sch, skb);
 301
 302        return skb;
 303}
 304
 305static void choke_reset(struct Qdisc *sch)
 306{
 307        struct choke_sched_data *q = qdisc_priv(sch);
 308
 309        while (q->head != q->tail) {
 310                struct sk_buff *skb = q->tab[q->head];
 311
 312                q->head = (q->head + 1) & q->tab_mask;
 313                if (!skb)
 314                        continue;
 315                rtnl_qdisc_drop(skb, sch);
 316        }
 317
 318        sch->q.qlen = 0;
 319        sch->qstats.backlog = 0;
 320        if (q->tab)
 321                memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
 322        q->head = q->tail = 0;
 323        red_restart(&q->vars);
 324}
 325
 326static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
 327        [TCA_CHOKE_PARMS]       = { .len = sizeof(struct tc_red_qopt) },
 328        [TCA_CHOKE_STAB]        = { .len = RED_STAB_SIZE },
 329        [TCA_CHOKE_MAX_P]       = { .type = NLA_U32 },
 330};
 331
 332
 333static void choke_free(void *addr)
 334{
 335        kvfree(addr);
 336}
 337
 338static int choke_change(struct Qdisc *sch, struct nlattr *opt,
 339                        struct netlink_ext_ack *extack)
 340{
 341        struct choke_sched_data *q = qdisc_priv(sch);
 342        struct nlattr *tb[TCA_CHOKE_MAX + 1];
 343        const struct tc_red_qopt *ctl;
 344        int err;
 345        struct sk_buff **old = NULL;
 346        unsigned int mask;
 347        u32 max_P;
 348
 349        if (opt == NULL)
 350                return -EINVAL;
 351
 352        err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt,
 353                                          choke_policy, NULL);
 354        if (err < 0)
 355                return err;
 356
 357        if (tb[TCA_CHOKE_PARMS] == NULL ||
 358            tb[TCA_CHOKE_STAB] == NULL)
 359                return -EINVAL;
 360
 361        max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
 362
 363        ctl = nla_data(tb[TCA_CHOKE_PARMS]);
 364
 365        if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
 366                return -EINVAL;
 367
 368        if (ctl->limit > CHOKE_MAX_QUEUE)
 369                return -EINVAL;
 370
 371        mask = roundup_pow_of_two(ctl->limit + 1) - 1;
 372        if (mask != q->tab_mask) {
 373                struct sk_buff **ntab;
 374
 375                ntab = kvcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
 376                if (!ntab)
 377                        return -ENOMEM;
 378
 379                sch_tree_lock(sch);
 380                old = q->tab;
 381                if (old) {
 382                        unsigned int oqlen = sch->q.qlen, tail = 0;
 383                        unsigned dropped = 0;
 384
 385                        while (q->head != q->tail) {
 386                                struct sk_buff *skb = q->tab[q->head];
 387
 388                                q->head = (q->head + 1) & q->tab_mask;
 389                                if (!skb)
 390                                        continue;
 391                                if (tail < mask) {
 392                                        ntab[tail++] = skb;
 393                                        continue;
 394                                }
 395                                dropped += qdisc_pkt_len(skb);
 396                                qdisc_qstats_backlog_dec(sch, skb);
 397                                --sch->q.qlen;
 398                                rtnl_qdisc_drop(skb, sch);
 399                        }
 400                        qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
 401                        q->head = 0;
 402                        q->tail = tail;
 403                }
 404
 405                q->tab_mask = mask;
 406                q->tab = ntab;
 407        } else
 408                sch_tree_lock(sch);
 409
 410        q->flags = ctl->flags;
 411        q->limit = ctl->limit;
 412
 413        red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
 414                      ctl->Plog, ctl->Scell_log,
 415                      nla_data(tb[TCA_CHOKE_STAB]),
 416                      max_P);
 417        red_set_vars(&q->vars);
 418
 419        if (q->head == q->tail)
 420                red_end_of_idle_period(&q->vars);
 421
 422        sch_tree_unlock(sch);
 423        choke_free(old);
 424        return 0;
 425}
 426
 427static int choke_init(struct Qdisc *sch, struct nlattr *opt,
 428                      struct netlink_ext_ack *extack)
 429{
 430        return choke_change(sch, opt, extack);
 431}
 432
 433static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
 434{
 435        struct choke_sched_data *q = qdisc_priv(sch);
 436        struct nlattr *opts = NULL;
 437        struct tc_red_qopt opt = {
 438                .limit          = q->limit,
 439                .flags          = q->flags,
 440                .qth_min        = q->parms.qth_min >> q->parms.Wlog,
 441                .qth_max        = q->parms.qth_max >> q->parms.Wlog,
 442                .Wlog           = q->parms.Wlog,
 443                .Plog           = q->parms.Plog,
 444                .Scell_log      = q->parms.Scell_log,
 445        };
 446
 447        opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 448        if (opts == NULL)
 449                goto nla_put_failure;
 450
 451        if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
 452            nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
 453                goto nla_put_failure;
 454        return nla_nest_end(skb, opts);
 455
 456nla_put_failure:
 457        nla_nest_cancel(skb, opts);
 458        return -EMSGSIZE;
 459}
 460
 461static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 462{
 463        struct choke_sched_data *q = qdisc_priv(sch);
 464        struct tc_choke_xstats st = {
 465                .early  = q->stats.prob_drop + q->stats.forced_drop,
 466                .marked = q->stats.prob_mark + q->stats.forced_mark,
 467                .pdrop  = q->stats.pdrop,
 468                .other  = q->stats.other,
 469                .matched = q->stats.matched,
 470        };
 471
 472        return gnet_stats_copy_app(d, &st, sizeof(st));
 473}
 474
 475static void choke_destroy(struct Qdisc *sch)
 476{
 477        struct choke_sched_data *q = qdisc_priv(sch);
 478
 479        choke_free(q->tab);
 480}
 481
 482static struct sk_buff *choke_peek_head(struct Qdisc *sch)
 483{
 484        struct choke_sched_data *q = qdisc_priv(sch);
 485
 486        return (q->head != q->tail) ? q->tab[q->head] : NULL;
 487}
 488
 489static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
 490        .id             =       "choke",
 491        .priv_size      =       sizeof(struct choke_sched_data),
 492
 493        .enqueue        =       choke_enqueue,
 494        .dequeue        =       choke_dequeue,
 495        .peek           =       choke_peek_head,
 496        .init           =       choke_init,
 497        .destroy        =       choke_destroy,
 498        .reset          =       choke_reset,
 499        .change         =       choke_change,
 500        .dump           =       choke_dump,
 501        .dump_stats     =       choke_dump_stats,
 502        .owner          =       THIS_MODULE,
 503};
 504
 505static int __init choke_module_init(void)
 506{
 507        return register_qdisc(&choke_qdisc_ops);
 508}
 509
 510static void __exit choke_module_exit(void)
 511{
 512        unregister_qdisc(&choke_qdisc_ops);
 513}
 514
 515module_init(choke_module_init)
 516module_exit(choke_module_exit)
 517
 518MODULE_LICENSE("GPL");
 519