linux/net/sched/sch_choke.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * net/sched/sch_choke.c        CHOKE scheduler
   4 *
   5 * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
   6 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
   7 */
   8
   9#include <linux/module.h>
  10#include <linux/types.h>
  11#include <linux/kernel.h>
  12#include <linux/skbuff.h>
  13#include <linux/vmalloc.h>
  14#include <net/pkt_sched.h>
  15#include <net/pkt_cls.h>
  16#include <net/inet_ecn.h>
  17#include <net/red.h>
  18#include <net/flow_dissector.h>
  19
  20/*
  21   CHOKe stateless AQM for fair bandwidth allocation
  22   =================================================
  23
  24   CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
  25   unresponsive flows) is a variant of RED that penalizes misbehaving flows but
  26   maintains no flow state. The difference from RED is an additional step
  27   during the enqueuing process. If average queue size is over the
  28   low threshold (qmin), a packet is chosen at random from the queue.
  29   If both the new and chosen packet are from the same flow, both
  30   are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
  31   needs to access packets in queue randomly. It has a minimal class
  32   interface to allow overriding the builtin flow classifier with
  33   filters.
  34
  35   Source:
  36   R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
  37   Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
  38   IEEE INFOCOM, 2000.
  39
  40   A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
  41   Characteristics", IEEE/ACM Transactions on Networking, 2004
  42
  43 */
  44
  45/* Upper bound on size of sk_buff table (packets) */
  46#define CHOKE_MAX_QUEUE (128*1024 - 1)
  47
  48struct choke_sched_data {
  49/* Parameters */
  50        u32              limit;
  51        unsigned char    flags;
  52
  53        struct red_parms parms;
  54
  55/* Variables */
  56        struct red_vars  vars;
  57        struct {
  58                u32     prob_drop;      /* Early probability drops */
  59                u32     prob_mark;      /* Early probability marks */
  60                u32     forced_drop;    /* Forced drops, qavg > max_thresh */
  61                u32     forced_mark;    /* Forced marks, qavg > max_thresh */
  62                u32     pdrop;          /* Drops due to queue limits */
  63                u32     other;          /* Drops due to drop() calls */
  64                u32     matched;        /* Drops to flow match */
  65        } stats;
  66
  67        unsigned int     head;
  68        unsigned int     tail;
  69
  70        unsigned int     tab_mask; /* size - 1 */
  71
  72        struct sk_buff **tab;
  73};
  74
  75/* number of elements in queue including holes */
  76static unsigned int choke_len(const struct choke_sched_data *q)
  77{
  78        return (q->tail - q->head) & q->tab_mask;
  79}
  80
  81/* Is ECN parameter configured */
  82static int use_ecn(const struct choke_sched_data *q)
  83{
  84        return q->flags & TC_RED_ECN;
  85}
  86
  87/* Should packets over max just be dropped (versus marked) */
  88static int use_harddrop(const struct choke_sched_data *q)
  89{
  90        return q->flags & TC_RED_HARDDROP;
  91}
  92
  93/* Move head pointer forward to skip over holes */
  94static void choke_zap_head_holes(struct choke_sched_data *q)
  95{
  96        do {
  97                q->head = (q->head + 1) & q->tab_mask;
  98                if (q->head == q->tail)
  99                        break;
 100        } while (q->tab[q->head] == NULL);
 101}
 102
 103/* Move tail pointer backwards to reuse holes */
 104static void choke_zap_tail_holes(struct choke_sched_data *q)
 105{
 106        do {
 107                q->tail = (q->tail - 1) & q->tab_mask;
 108                if (q->head == q->tail)
 109                        break;
 110        } while (q->tab[q->tail] == NULL);
 111}
 112
 113/* Drop packet from queue array by creating a "hole" */
 114static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
 115                              struct sk_buff **to_free)
 116{
 117        struct choke_sched_data *q = qdisc_priv(sch);
 118        struct sk_buff *skb = q->tab[idx];
 119
 120        q->tab[idx] = NULL;
 121
 122        if (idx == q->head)
 123                choke_zap_head_holes(q);
 124        if (idx == q->tail)
 125                choke_zap_tail_holes(q);
 126
 127        qdisc_qstats_backlog_dec(sch, skb);
 128        qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
 129        qdisc_drop(skb, sch, to_free);
 130        --sch->q.qlen;
 131}
 132
 133struct choke_skb_cb {
 134        u16                     classid;
 135        u8                      keys_valid;
 136        struct                  flow_keys_digest keys;
 137};
 138
 139static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
 140{
 141        qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
 142        return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
 143}
 144
 145static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
 146{
 147        choke_skb_cb(skb)->classid = classid;
 148}
 149
 150/*
 151 * Compare flow of two packets
 152 *  Returns true only if source and destination address and port match.
 153 *          false for special cases
 154 */
 155static bool choke_match_flow(struct sk_buff *skb1,
 156                             struct sk_buff *skb2)
 157{
 158        struct flow_keys temp;
 159
 160        if (skb1->protocol != skb2->protocol)
 161                return false;
 162
 163        if (!choke_skb_cb(skb1)->keys_valid) {
 164                choke_skb_cb(skb1)->keys_valid = 1;
 165                skb_flow_dissect_flow_keys(skb1, &temp, 0);
 166                make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
 167        }
 168
 169        if (!choke_skb_cb(skb2)->keys_valid) {
 170                choke_skb_cb(skb2)->keys_valid = 1;
 171                skb_flow_dissect_flow_keys(skb2, &temp, 0);
 172                make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
 173        }
 174
 175        return !memcmp(&choke_skb_cb(skb1)->keys,
 176                       &choke_skb_cb(skb2)->keys,
 177                       sizeof(choke_skb_cb(skb1)->keys));
 178}
 179
 180/*
 181 * Select a packet at random from queue
 182 * HACK: since queue can have holes from previous deletion; retry several
 183 *   times to find a random skb but then just give up and return the head
 184 * Will return NULL if queue is empty (q->head == q->tail)
 185 */
 186static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
 187                                         unsigned int *pidx)
 188{
 189        struct sk_buff *skb;
 190        int retrys = 3;
 191
 192        do {
 193                *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
 194                skb = q->tab[*pidx];
 195                if (skb)
 196                        return skb;
 197        } while (--retrys > 0);
 198
 199        return q->tab[*pidx = q->head];
 200}
 201
 202/*
 203 * Compare new packet with random packet in queue
 204 * returns true if matched and sets *pidx
 205 */
 206static bool choke_match_random(const struct choke_sched_data *q,
 207                               struct sk_buff *nskb,
 208                               unsigned int *pidx)
 209{
 210        struct sk_buff *oskb;
 211
 212        if (q->head == q->tail)
 213                return false;
 214
 215        oskb = choke_peek_random(q, pidx);
 216        return choke_match_flow(oskb, nskb);
 217}
 218
 219static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 220                         struct sk_buff **to_free)
 221{
 222        struct choke_sched_data *q = qdisc_priv(sch);
 223        const struct red_parms *p = &q->parms;
 224
 225        choke_skb_cb(skb)->keys_valid = 0;
 226        /* Compute average queue usage (see RED) */
 227        q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
 228        if (red_is_idling(&q->vars))
 229                red_end_of_idle_period(&q->vars);
 230
 231        /* Is queue small? */
 232        if (q->vars.qavg <= p->qth_min)
 233                q->vars.qcount = -1;
 234        else {
 235                unsigned int idx;
 236
 237                /* Draw a packet at random from queue and compare flow */
 238                if (choke_match_random(q, skb, &idx)) {
 239                        q->stats.matched++;
 240                        choke_drop_by_idx(sch, idx, to_free);
 241                        goto congestion_drop;
 242                }
 243
 244                /* Queue is large, always mark/drop */
 245                if (q->vars.qavg > p->qth_max) {
 246                        q->vars.qcount = -1;
 247
 248                        qdisc_qstats_overlimit(sch);
 249                        if (use_harddrop(q) || !use_ecn(q) ||
 250                            !INET_ECN_set_ce(skb)) {
 251                                q->stats.forced_drop++;
 252                                goto congestion_drop;
 253                        }
 254
 255                        q->stats.forced_mark++;
 256                } else if (++q->vars.qcount) {
 257                        if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
 258                                q->vars.qcount = 0;
 259                                q->vars.qR = red_random(p);
 260
 261                                qdisc_qstats_overlimit(sch);
 262                                if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
 263                                        q->stats.prob_drop++;
 264                                        goto congestion_drop;
 265                                }
 266
 267                                q->stats.prob_mark++;
 268                        }
 269                } else
 270                        q->vars.qR = red_random(p);
 271        }
 272
 273        /* Admit new packet */
 274        if (sch->q.qlen < q->limit) {
 275                q->tab[q->tail] = skb;
 276                q->tail = (q->tail + 1) & q->tab_mask;
 277                ++sch->q.qlen;
 278                qdisc_qstats_backlog_inc(sch, skb);
 279                return NET_XMIT_SUCCESS;
 280        }
 281
 282        q->stats.pdrop++;
 283        return qdisc_drop(skb, sch, to_free);
 284
 285congestion_drop:
 286        qdisc_drop(skb, sch, to_free);
 287        return NET_XMIT_CN;
 288}
 289
 290static struct sk_buff *choke_dequeue(struct Qdisc *sch)
 291{
 292        struct choke_sched_data *q = qdisc_priv(sch);
 293        struct sk_buff *skb;
 294
 295        if (q->head == q->tail) {
 296                if (!red_is_idling(&q->vars))
 297                        red_start_of_idle_period(&q->vars);
 298                return NULL;
 299        }
 300
 301        skb = q->tab[q->head];
 302        q->tab[q->head] = NULL;
 303        choke_zap_head_holes(q);
 304        --sch->q.qlen;
 305        qdisc_qstats_backlog_dec(sch, skb);
 306        qdisc_bstats_update(sch, skb);
 307
 308        return skb;
 309}
 310
 311static void choke_reset(struct Qdisc *sch)
 312{
 313        struct choke_sched_data *q = qdisc_priv(sch);
 314
 315        while (q->head != q->tail) {
 316                struct sk_buff *skb = q->tab[q->head];
 317
 318                q->head = (q->head + 1) & q->tab_mask;
 319                if (!skb)
 320                        continue;
 321                rtnl_qdisc_drop(skb, sch);
 322        }
 323
 324        sch->q.qlen = 0;
 325        sch->qstats.backlog = 0;
 326        memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
 327        q->head = q->tail = 0;
 328        red_restart(&q->vars);
 329}
 330
 331static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
 332        [TCA_CHOKE_PARMS]       = { .len = sizeof(struct tc_red_qopt) },
 333        [TCA_CHOKE_STAB]        = { .len = RED_STAB_SIZE },
 334        [TCA_CHOKE_MAX_P]       = { .type = NLA_U32 },
 335};
 336
 337
 338static void choke_free(void *addr)
 339{
 340        kvfree(addr);
 341}
 342
 343static int choke_change(struct Qdisc *sch, struct nlattr *opt,
 344                        struct netlink_ext_ack *extack)
 345{
 346        struct choke_sched_data *q = qdisc_priv(sch);
 347        struct nlattr *tb[TCA_CHOKE_MAX + 1];
 348        const struct tc_red_qopt *ctl;
 349        int err;
 350        struct sk_buff **old = NULL;
 351        unsigned int mask;
 352        u32 max_P;
 353
 354        if (opt == NULL)
 355                return -EINVAL;
 356
 357        err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt,
 358                                          choke_policy, NULL);
 359        if (err < 0)
 360                return err;
 361
 362        if (tb[TCA_CHOKE_PARMS] == NULL ||
 363            tb[TCA_CHOKE_STAB] == NULL)
 364                return -EINVAL;
 365
 366        max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
 367
 368        ctl = nla_data(tb[TCA_CHOKE_PARMS]);
 369
 370        if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
 371                return -EINVAL;
 372
 373        if (ctl->limit > CHOKE_MAX_QUEUE)
 374                return -EINVAL;
 375
 376        mask = roundup_pow_of_two(ctl->limit + 1) - 1;
 377        if (mask != q->tab_mask) {
 378                struct sk_buff **ntab;
 379
 380                ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO);
 381                if (!ntab)
 382                        return -ENOMEM;
 383
 384                sch_tree_lock(sch);
 385                old = q->tab;
 386                if (old) {
 387                        unsigned int oqlen = sch->q.qlen, tail = 0;
 388                        unsigned dropped = 0;
 389
 390                        while (q->head != q->tail) {
 391                                struct sk_buff *skb = q->tab[q->head];
 392
 393                                q->head = (q->head + 1) & q->tab_mask;
 394                                if (!skb)
 395                                        continue;
 396                                if (tail < mask) {
 397                                        ntab[tail++] = skb;
 398                                        continue;
 399                                }
 400                                dropped += qdisc_pkt_len(skb);
 401                                qdisc_qstats_backlog_dec(sch, skb);
 402                                --sch->q.qlen;
 403                                rtnl_qdisc_drop(skb, sch);
 404                        }
 405                        qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
 406                        q->head = 0;
 407                        q->tail = tail;
 408                }
 409
 410                q->tab_mask = mask;
 411                q->tab = ntab;
 412        } else
 413                sch_tree_lock(sch);
 414
 415        q->flags = ctl->flags;
 416        q->limit = ctl->limit;
 417
 418        red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
 419                      ctl->Plog, ctl->Scell_log,
 420                      nla_data(tb[TCA_CHOKE_STAB]),
 421                      max_P);
 422        red_set_vars(&q->vars);
 423
 424        if (q->head == q->tail)
 425                red_end_of_idle_period(&q->vars);
 426
 427        sch_tree_unlock(sch);
 428        choke_free(old);
 429        return 0;
 430}
 431
 432static int choke_init(struct Qdisc *sch, struct nlattr *opt,
 433                      struct netlink_ext_ack *extack)
 434{
 435        return choke_change(sch, opt, extack);
 436}
 437
 438static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
 439{
 440        struct choke_sched_data *q = qdisc_priv(sch);
 441        struct nlattr *opts = NULL;
 442        struct tc_red_qopt opt = {
 443                .limit          = q->limit,
 444                .flags          = q->flags,
 445                .qth_min        = q->parms.qth_min >> q->parms.Wlog,
 446                .qth_max        = q->parms.qth_max >> q->parms.Wlog,
 447                .Wlog           = q->parms.Wlog,
 448                .Plog           = q->parms.Plog,
 449                .Scell_log      = q->parms.Scell_log,
 450        };
 451
 452        opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 453        if (opts == NULL)
 454                goto nla_put_failure;
 455
 456        if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
 457            nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
 458                goto nla_put_failure;
 459        return nla_nest_end(skb, opts);
 460
 461nla_put_failure:
 462        nla_nest_cancel(skb, opts);
 463        return -EMSGSIZE;
 464}
 465
 466static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 467{
 468        struct choke_sched_data *q = qdisc_priv(sch);
 469        struct tc_choke_xstats st = {
 470                .early  = q->stats.prob_drop + q->stats.forced_drop,
 471                .marked = q->stats.prob_mark + q->stats.forced_mark,
 472                .pdrop  = q->stats.pdrop,
 473                .other  = q->stats.other,
 474                .matched = q->stats.matched,
 475        };
 476
 477        return gnet_stats_copy_app(d, &st, sizeof(st));
 478}
 479
 480static void choke_destroy(struct Qdisc *sch)
 481{
 482        struct choke_sched_data *q = qdisc_priv(sch);
 483
 484        choke_free(q->tab);
 485}
 486
 487static struct sk_buff *choke_peek_head(struct Qdisc *sch)
 488{
 489        struct choke_sched_data *q = qdisc_priv(sch);
 490
 491        return (q->head != q->tail) ? q->tab[q->head] : NULL;
 492}
 493
 494static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
 495        .id             =       "choke",
 496        .priv_size      =       sizeof(struct choke_sched_data),
 497
 498        .enqueue        =       choke_enqueue,
 499        .dequeue        =       choke_dequeue,
 500        .peek           =       choke_peek_head,
 501        .init           =       choke_init,
 502        .destroy        =       choke_destroy,
 503        .reset          =       choke_reset,
 504        .change         =       choke_change,
 505        .dump           =       choke_dump,
 506        .dump_stats     =       choke_dump_stats,
 507        .owner          =       THIS_MODULE,
 508};
 509
 510static int __init choke_module_init(void)
 511{
 512        return register_qdisc(&choke_qdisc_ops);
 513}
 514
 515static void __exit choke_module_exit(void)
 516{
 517        unregister_qdisc(&choke_qdisc_ops);
 518}
 519
 520module_init(choke_module_init)
 521module_exit(choke_module_exit)
 522
 523MODULE_LICENSE("GPL");
 524