linux/net/sched/sch_choke.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * net/sched/sch_choke.c        CHOKE scheduler
   4 *
   5 * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
   6 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
   7 */
   8
   9#include <linux/module.h>
  10#include <linux/types.h>
  11#include <linux/kernel.h>
  12#include <linux/skbuff.h>
  13#include <linux/vmalloc.h>
  14#include <net/pkt_sched.h>
  15#include <net/pkt_cls.h>
  16#include <net/inet_ecn.h>
  17#include <net/red.h>
  18#include <net/flow_dissector.h>
  19
  20/*
  21   CHOKe stateless AQM for fair bandwidth allocation
  22   =================================================
  23
  24   CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
  25   unresponsive flows) is a variant of RED that penalizes misbehaving flows but
  26   maintains no flow state. The difference from RED is an additional step
  27   during the enqueuing process. If average queue size is over the
  28   low threshold (qmin), a packet is chosen at random from the queue.
  29   If both the new and chosen packet are from the same flow, both
  30   are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
  31   needs to access packets in queue randomly. It has a minimal class
  32   interface to allow overriding the builtin flow classifier with
  33   filters.
  34
  35   Source:
  36   R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
  37   Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
  38   IEEE INFOCOM, 2000.
  39
  40   A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
  41   Characteristics", IEEE/ACM Transactions on Networking, 2004
  42
  43 */
  44
  45/* Upper bound on size of sk_buff table (packets) */
  46#define CHOKE_MAX_QUEUE (128*1024 - 1)
  47
  48struct choke_sched_data {
  49/* Parameters */
  50        u32              limit;
  51        unsigned char    flags;
  52
  53        struct red_parms parms;
  54
  55/* Variables */
  56        struct red_vars  vars;
  57        struct {
  58                u32     prob_drop;      /* Early probability drops */
  59                u32     prob_mark;      /* Early probability marks */
  60                u32     forced_drop;    /* Forced drops, qavg > max_thresh */
  61                u32     forced_mark;    /* Forced marks, qavg > max_thresh */
  62                u32     pdrop;          /* Drops due to queue limits */
  63                u32     other;          /* Drops due to drop() calls */
  64                u32     matched;        /* Drops to flow match */
  65        } stats;
  66
  67        unsigned int     head;
  68        unsigned int     tail;
  69
  70        unsigned int     tab_mask; /* size - 1 */
  71
  72        struct sk_buff **tab;
  73};
  74
  75/* number of elements in queue including holes */
  76static unsigned int choke_len(const struct choke_sched_data *q)
  77{
  78        return (q->tail - q->head) & q->tab_mask;
  79}
  80
  81/* Is ECN parameter configured */
  82static int use_ecn(const struct choke_sched_data *q)
  83{
  84        return q->flags & TC_RED_ECN;
  85}
  86
  87/* Should packets over max just be dropped (versus marked) */
  88static int use_harddrop(const struct choke_sched_data *q)
  89{
  90        return q->flags & TC_RED_HARDDROP;
  91}
  92
  93/* Move head pointer forward to skip over holes */
  94static void choke_zap_head_holes(struct choke_sched_data *q)
  95{
  96        do {
  97                q->head = (q->head + 1) & q->tab_mask;
  98                if (q->head == q->tail)
  99                        break;
 100        } while (q->tab[q->head] == NULL);
 101}
 102
 103/* Move tail pointer backwards to reuse holes */
 104static void choke_zap_tail_holes(struct choke_sched_data *q)
 105{
 106        do {
 107                q->tail = (q->tail - 1) & q->tab_mask;
 108                if (q->head == q->tail)
 109                        break;
 110        } while (q->tab[q->tail] == NULL);
 111}
 112
 113/* Drop packet from queue array by creating a "hole" */
 114static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
 115                              struct sk_buff **to_free)
 116{
 117        struct choke_sched_data *q = qdisc_priv(sch);
 118        struct sk_buff *skb = q->tab[idx];
 119
 120        q->tab[idx] = NULL;
 121
 122        if (idx == q->head)
 123                choke_zap_head_holes(q);
 124        if (idx == q->tail)
 125                choke_zap_tail_holes(q);
 126
 127        qdisc_qstats_backlog_dec(sch, skb);
 128        qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
 129        qdisc_drop(skb, sch, to_free);
 130        --sch->q.qlen;
 131}
 132
 133struct choke_skb_cb {
 134        u8                      keys_valid;
 135        struct                  flow_keys_digest keys;
 136};
 137
 138static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
 139{
 140        qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
 141        return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
 142}
 143
 144/*
 145 * Compare flow of two packets
 146 *  Returns true only if source and destination address and port match.
 147 *          false for special cases
 148 */
 149static bool choke_match_flow(struct sk_buff *skb1,
 150                             struct sk_buff *skb2)
 151{
 152        struct flow_keys temp;
 153
 154        if (skb1->protocol != skb2->protocol)
 155                return false;
 156
 157        if (!choke_skb_cb(skb1)->keys_valid) {
 158                choke_skb_cb(skb1)->keys_valid = 1;
 159                skb_flow_dissect_flow_keys(skb1, &temp, 0);
 160                make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
 161        }
 162
 163        if (!choke_skb_cb(skb2)->keys_valid) {
 164                choke_skb_cb(skb2)->keys_valid = 1;
 165                skb_flow_dissect_flow_keys(skb2, &temp, 0);
 166                make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
 167        }
 168
 169        return !memcmp(&choke_skb_cb(skb1)->keys,
 170                       &choke_skb_cb(skb2)->keys,
 171                       sizeof(choke_skb_cb(skb1)->keys));
 172}
 173
 174/*
 175 * Select a packet at random from queue
 176 * HACK: since queue can have holes from previous deletion; retry several
 177 *   times to find a random skb but then just give up and return the head
 178 * Will return NULL if queue is empty (q->head == q->tail)
 179 */
 180static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
 181                                         unsigned int *pidx)
 182{
 183        struct sk_buff *skb;
 184        int retrys = 3;
 185
 186        do {
 187                *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
 188                skb = q->tab[*pidx];
 189                if (skb)
 190                        return skb;
 191        } while (--retrys > 0);
 192
 193        return q->tab[*pidx = q->head];
 194}
 195
 196/*
 197 * Compare new packet with random packet in queue
 198 * returns true if matched and sets *pidx
 199 */
 200static bool choke_match_random(const struct choke_sched_data *q,
 201                               struct sk_buff *nskb,
 202                               unsigned int *pidx)
 203{
 204        struct sk_buff *oskb;
 205
 206        if (q->head == q->tail)
 207                return false;
 208
 209        oskb = choke_peek_random(q, pidx);
 210        return choke_match_flow(oskb, nskb);
 211}
 212
 213static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 214                         struct sk_buff **to_free)
 215{
 216        struct choke_sched_data *q = qdisc_priv(sch);
 217        const struct red_parms *p = &q->parms;
 218
 219        choke_skb_cb(skb)->keys_valid = 0;
 220        /* Compute average queue usage (see RED) */
 221        q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
 222        if (red_is_idling(&q->vars))
 223                red_end_of_idle_period(&q->vars);
 224
 225        /* Is queue small? */
 226        if (q->vars.qavg <= p->qth_min)
 227                q->vars.qcount = -1;
 228        else {
 229                unsigned int idx;
 230
 231                /* Draw a packet at random from queue and compare flow */
 232                if (choke_match_random(q, skb, &idx)) {
 233                        q->stats.matched++;
 234                        choke_drop_by_idx(sch, idx, to_free);
 235                        goto congestion_drop;
 236                }
 237
 238                /* Queue is large, always mark/drop */
 239                if (q->vars.qavg > p->qth_max) {
 240                        q->vars.qcount = -1;
 241
 242                        qdisc_qstats_overlimit(sch);
 243                        if (use_harddrop(q) || !use_ecn(q) ||
 244                            !INET_ECN_set_ce(skb)) {
 245                                q->stats.forced_drop++;
 246                                goto congestion_drop;
 247                        }
 248
 249                        q->stats.forced_mark++;
 250                } else if (++q->vars.qcount) {
 251                        if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
 252                                q->vars.qcount = 0;
 253                                q->vars.qR = red_random(p);
 254
 255                                qdisc_qstats_overlimit(sch);
 256                                if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
 257                                        q->stats.prob_drop++;
 258                                        goto congestion_drop;
 259                                }
 260
 261                                q->stats.prob_mark++;
 262                        }
 263                } else
 264                        q->vars.qR = red_random(p);
 265        }
 266
 267        /* Admit new packet */
 268        if (sch->q.qlen < q->limit) {
 269                q->tab[q->tail] = skb;
 270                q->tail = (q->tail + 1) & q->tab_mask;
 271                ++sch->q.qlen;
 272                qdisc_qstats_backlog_inc(sch, skb);
 273                return NET_XMIT_SUCCESS;
 274        }
 275
 276        q->stats.pdrop++;
 277        return qdisc_drop(skb, sch, to_free);
 278
 279congestion_drop:
 280        qdisc_drop(skb, sch, to_free);
 281        return NET_XMIT_CN;
 282}
 283
 284static struct sk_buff *choke_dequeue(struct Qdisc *sch)
 285{
 286        struct choke_sched_data *q = qdisc_priv(sch);
 287        struct sk_buff *skb;
 288
 289        if (q->head == q->tail) {
 290                if (!red_is_idling(&q->vars))
 291                        red_start_of_idle_period(&q->vars);
 292                return NULL;
 293        }
 294
 295        skb = q->tab[q->head];
 296        q->tab[q->head] = NULL;
 297        choke_zap_head_holes(q);
 298        --sch->q.qlen;
 299        qdisc_qstats_backlog_dec(sch, skb);
 300        qdisc_bstats_update(sch, skb);
 301
 302        return skb;
 303}
 304
 305static void choke_reset(struct Qdisc *sch)
 306{
 307        struct choke_sched_data *q = qdisc_priv(sch);
 308
 309        while (q->head != q->tail) {
 310                struct sk_buff *skb = q->tab[q->head];
 311
 312                q->head = (q->head + 1) & q->tab_mask;
 313                if (!skb)
 314                        continue;
 315                rtnl_qdisc_drop(skb, sch);
 316        }
 317
 318        sch->q.qlen = 0;
 319        sch->qstats.backlog = 0;
 320        if (q->tab)
 321                memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
 322        q->head = q->tail = 0;
 323        red_restart(&q->vars);
 324}
 325
 326static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
 327        [TCA_CHOKE_PARMS]       = { .len = sizeof(struct tc_red_qopt) },
 328        [TCA_CHOKE_STAB]        = { .len = RED_STAB_SIZE },
 329        [TCA_CHOKE_MAX_P]       = { .type = NLA_U32 },
 330};
 331
 332
 333static void choke_free(void *addr)
 334{
 335        kvfree(addr);
 336}
 337
 338static int choke_change(struct Qdisc *sch, struct nlattr *opt,
 339                        struct netlink_ext_ack *extack)
 340{
 341        struct choke_sched_data *q = qdisc_priv(sch);
 342        struct nlattr *tb[TCA_CHOKE_MAX + 1];
 343        const struct tc_red_qopt *ctl;
 344        int err;
 345        struct sk_buff **old = NULL;
 346        unsigned int mask;
 347        u32 max_P;
 348        u8 *stab;
 349
 350        if (opt == NULL)
 351                return -EINVAL;
 352
 353        err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt,
 354                                          choke_policy, NULL);
 355        if (err < 0)
 356                return err;
 357
 358        if (tb[TCA_CHOKE_PARMS] == NULL ||
 359            tb[TCA_CHOKE_STAB] == NULL)
 360                return -EINVAL;
 361
 362        max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
 363
 364        ctl = nla_data(tb[TCA_CHOKE_PARMS]);
 365        stab = nla_data(tb[TCA_CHOKE_STAB]);
 366        if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab))
 367                return -EINVAL;
 368
 369        if (ctl->limit > CHOKE_MAX_QUEUE)
 370                return -EINVAL;
 371
 372        mask = roundup_pow_of_two(ctl->limit + 1) - 1;
 373        if (mask != q->tab_mask) {
 374                struct sk_buff **ntab;
 375
 376                ntab = kvcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
 377                if (!ntab)
 378                        return -ENOMEM;
 379
 380                sch_tree_lock(sch);
 381                old = q->tab;
 382                if (old) {
 383                        unsigned int oqlen = sch->q.qlen, tail = 0;
 384                        unsigned dropped = 0;
 385
 386                        while (q->head != q->tail) {
 387                                struct sk_buff *skb = q->tab[q->head];
 388
 389                                q->head = (q->head + 1) & q->tab_mask;
 390                                if (!skb)
 391                                        continue;
 392                                if (tail < mask) {
 393                                        ntab[tail++] = skb;
 394                                        continue;
 395                                }
 396                                dropped += qdisc_pkt_len(skb);
 397                                qdisc_qstats_backlog_dec(sch, skb);
 398                                --sch->q.qlen;
 399                                rtnl_qdisc_drop(skb, sch);
 400                        }
 401                        qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
 402                        q->head = 0;
 403                        q->tail = tail;
 404                }
 405
 406                q->tab_mask = mask;
 407                q->tab = ntab;
 408        } else
 409                sch_tree_lock(sch);
 410
 411        q->flags = ctl->flags;
 412        q->limit = ctl->limit;
 413
 414        red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
 415                      ctl->Plog, ctl->Scell_log,
 416                      stab,
 417                      max_P);
 418        red_set_vars(&q->vars);
 419
 420        if (q->head == q->tail)
 421                red_end_of_idle_period(&q->vars);
 422
 423        sch_tree_unlock(sch);
 424        choke_free(old);
 425        return 0;
 426}
 427
 428static int choke_init(struct Qdisc *sch, struct nlattr *opt,
 429                      struct netlink_ext_ack *extack)
 430{
 431        return choke_change(sch, opt, extack);
 432}
 433
 434static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
 435{
 436        struct choke_sched_data *q = qdisc_priv(sch);
 437        struct nlattr *opts = NULL;
 438        struct tc_red_qopt opt = {
 439                .limit          = q->limit,
 440                .flags          = q->flags,
 441                .qth_min        = q->parms.qth_min >> q->parms.Wlog,
 442                .qth_max        = q->parms.qth_max >> q->parms.Wlog,
 443                .Wlog           = q->parms.Wlog,
 444                .Plog           = q->parms.Plog,
 445                .Scell_log      = q->parms.Scell_log,
 446        };
 447
 448        opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 449        if (opts == NULL)
 450                goto nla_put_failure;
 451
 452        if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
 453            nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
 454                goto nla_put_failure;
 455        return nla_nest_end(skb, opts);
 456
 457nla_put_failure:
 458        nla_nest_cancel(skb, opts);
 459        return -EMSGSIZE;
 460}
 461
 462static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 463{
 464        struct choke_sched_data *q = qdisc_priv(sch);
 465        struct tc_choke_xstats st = {
 466                .early  = q->stats.prob_drop + q->stats.forced_drop,
 467                .marked = q->stats.prob_mark + q->stats.forced_mark,
 468                .pdrop  = q->stats.pdrop,
 469                .other  = q->stats.other,
 470                .matched = q->stats.matched,
 471        };
 472
 473        return gnet_stats_copy_app(d, &st, sizeof(st));
 474}
 475
 476static void choke_destroy(struct Qdisc *sch)
 477{
 478        struct choke_sched_data *q = qdisc_priv(sch);
 479
 480        choke_free(q->tab);
 481}
 482
 483static struct sk_buff *choke_peek_head(struct Qdisc *sch)
 484{
 485        struct choke_sched_data *q = qdisc_priv(sch);
 486
 487        return (q->head != q->tail) ? q->tab[q->head] : NULL;
 488}
 489
 490static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
 491        .id             =       "choke",
 492        .priv_size      =       sizeof(struct choke_sched_data),
 493
 494        .enqueue        =       choke_enqueue,
 495        .dequeue        =       choke_dequeue,
 496        .peek           =       choke_peek_head,
 497        .init           =       choke_init,
 498        .destroy        =       choke_destroy,
 499        .reset          =       choke_reset,
 500        .change         =       choke_change,
 501        .dump           =       choke_dump,
 502        .dump_stats     =       choke_dump_stats,
 503        .owner          =       THIS_MODULE,
 504};
 505
 506static int __init choke_module_init(void)
 507{
 508        return register_qdisc(&choke_qdisc_ops);
 509}
 510
 511static void __exit choke_module_exit(void)
 512{
 513        unregister_qdisc(&choke_qdisc_ops);
 514}
 515
 516module_init(choke_module_init)
 517module_exit(choke_module_exit)
 518
 519MODULE_LICENSE("GPL");
 520