linux/net/sched/sch_choke.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_choke.c        CHOKE scheduler
   3 *
   4 * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
   5 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License
   9 * version 2 as published by the Free Software Foundation.
  10 *
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/types.h>
  15#include <linux/kernel.h>
  16#include <linux/skbuff.h>
  17#include <linux/vmalloc.h>
  18#include <net/pkt_sched.h>
  19#include <net/pkt_cls.h>
  20#include <net/inet_ecn.h>
  21#include <net/red.h>
  22#include <net/flow_dissector.h>
  23
  24/*
  25   CHOKe stateless AQM for fair bandwidth allocation
  26   =================================================
  27
  28   CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
  29   unresponsive flows) is a variant of RED that penalizes misbehaving flows but
  30   maintains no flow state. The difference from RED is an additional step
  31   during the enqueuing process. If average queue size is over the
  32   low threshold (qmin), a packet is chosen at random from the queue.
  33   If both the new and chosen packet are from the same flow, both
  34   are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
  35   needs to access packets in queue randomly. It has a minimal class
  36   interface to allow overriding the builtin flow classifier with
  37   filters.
  38
  39   Source:
  40   R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
  41   Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
  42   IEEE INFOCOM, 2000.
  43
  44   A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
  45   Characteristics", IEEE/ACM Transactions on Networking, 2004
  46
  47 */
  48
  49/* Upper bound on size of sk_buff table (packets) */
  50#define CHOKE_MAX_QUEUE (128*1024 - 1)
  51
  52struct choke_sched_data {
  53/* Parameters */
  54        u32              limit;
  55        unsigned char    flags;
  56
  57        struct red_parms parms;
  58
  59/* Variables */
  60        struct red_vars  vars;
  61        struct {
  62                u32     prob_drop;      /* Early probability drops */
  63                u32     prob_mark;      /* Early probability marks */
  64                u32     forced_drop;    /* Forced drops, qavg > max_thresh */
  65                u32     forced_mark;    /* Forced marks, qavg > max_thresh */
  66                u32     pdrop;          /* Drops due to queue limits */
  67                u32     other;          /* Drops due to drop() calls */
  68                u32     matched;        /* Drops to flow match */
  69        } stats;
  70
  71        unsigned int     head;
  72        unsigned int     tail;
  73
  74        unsigned int     tab_mask; /* size - 1 */
  75
  76        struct sk_buff **tab;
  77};
  78
  79/* number of elements in queue including holes */
  80static unsigned int choke_len(const struct choke_sched_data *q)
  81{
  82        return (q->tail - q->head) & q->tab_mask;
  83}
  84
  85/* Is ECN parameter configured */
  86static int use_ecn(const struct choke_sched_data *q)
  87{
  88        return q->flags & TC_RED_ECN;
  89}
  90
  91/* Should packets over max just be dropped (versus marked) */
  92static int use_harddrop(const struct choke_sched_data *q)
  93{
  94        return q->flags & TC_RED_HARDDROP;
  95}
  96
  97/* Move head pointer forward to skip over holes */
  98static void choke_zap_head_holes(struct choke_sched_data *q)
  99{
 100        do {
 101                q->head = (q->head + 1) & q->tab_mask;
 102                if (q->head == q->tail)
 103                        break;
 104        } while (q->tab[q->head] == NULL);
 105}
 106
 107/* Move tail pointer backwards to reuse holes */
 108static void choke_zap_tail_holes(struct choke_sched_data *q)
 109{
 110        do {
 111                q->tail = (q->tail - 1) & q->tab_mask;
 112                if (q->head == q->tail)
 113                        break;
 114        } while (q->tab[q->tail] == NULL);
 115}
 116
 117/* Drop packet from queue array by creating a "hole" */
 118static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
 119                              struct sk_buff **to_free)
 120{
 121        struct choke_sched_data *q = qdisc_priv(sch);
 122        struct sk_buff *skb = q->tab[idx];
 123
 124        q->tab[idx] = NULL;
 125
 126        if (idx == q->head)
 127                choke_zap_head_holes(q);
 128        if (idx == q->tail)
 129                choke_zap_tail_holes(q);
 130
 131        qdisc_qstats_backlog_dec(sch, skb);
 132        qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
 133        qdisc_drop(skb, sch, to_free);
 134        --sch->q.qlen;
 135}
 136
 137struct choke_skb_cb {
 138        u8                      keys_valid;
 139        struct                  flow_keys_digest keys;
 140};
 141
 142static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
 143{
 144        qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
 145        return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
 146}
 147
 148/*
 149 * Compare flow of two packets
 150 *  Returns true only if source and destination address and port match.
 151 *          false for special cases
 152 */
 153static bool choke_match_flow(struct sk_buff *skb1,
 154                             struct sk_buff *skb2)
 155{
 156        struct flow_keys temp;
 157
 158        if (skb1->protocol != skb2->protocol)
 159                return false;
 160
 161        if (!choke_skb_cb(skb1)->keys_valid) {
 162                choke_skb_cb(skb1)->keys_valid = 1;
 163                skb_flow_dissect_flow_keys(skb1, &temp, 0);
 164                make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
 165        }
 166
 167        if (!choke_skb_cb(skb2)->keys_valid) {
 168                choke_skb_cb(skb2)->keys_valid = 1;
 169                skb_flow_dissect_flow_keys(skb2, &temp, 0);
 170                make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
 171        }
 172
 173        return !memcmp(&choke_skb_cb(skb1)->keys,
 174                       &choke_skb_cb(skb2)->keys,
 175                       sizeof(choke_skb_cb(skb1)->keys));
 176}
 177
 178/*
 179 * Select a packet at random from queue
 180 * HACK: since queue can have holes from previous deletion; retry several
 181 *   times to find a random skb but then just give up and return the head
 182 * Will return NULL if queue is empty (q->head == q->tail)
 183 */
 184static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
 185                                         unsigned int *pidx)
 186{
 187        struct sk_buff *skb;
 188        int retrys = 3;
 189
 190        do {
 191                *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
 192                skb = q->tab[*pidx];
 193                if (skb)
 194                        return skb;
 195        } while (--retrys > 0);
 196
 197        return q->tab[*pidx = q->head];
 198}
 199
 200/*
 201 * Compare new packet with random packet in queue
 202 * returns true if matched and sets *pidx
 203 */
 204static bool choke_match_random(const struct choke_sched_data *q,
 205                               struct sk_buff *nskb,
 206                               unsigned int *pidx)
 207{
 208        struct sk_buff *oskb;
 209
 210        if (q->head == q->tail)
 211                return false;
 212
 213        oskb = choke_peek_random(q, pidx);
 214        return choke_match_flow(oskb, nskb);
 215}
 216
 217static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 218                         struct sk_buff **to_free)
 219{
 220        struct choke_sched_data *q = qdisc_priv(sch);
 221        const struct red_parms *p = &q->parms;
 222
 223        choke_skb_cb(skb)->keys_valid = 0;
 224        /* Compute average queue usage (see RED) */
 225        q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
 226        if (red_is_idling(&q->vars))
 227                red_end_of_idle_period(&q->vars);
 228
 229        /* Is queue small? */
 230        if (q->vars.qavg <= p->qth_min)
 231                q->vars.qcount = -1;
 232        else {
 233                unsigned int idx;
 234
 235                /* Draw a packet at random from queue and compare flow */
 236                if (choke_match_random(q, skb, &idx)) {
 237                        q->stats.matched++;
 238                        choke_drop_by_idx(sch, idx, to_free);
 239                        goto congestion_drop;
 240                }
 241
 242                /* Queue is large, always mark/drop */
 243                if (q->vars.qavg > p->qth_max) {
 244                        q->vars.qcount = -1;
 245
 246                        qdisc_qstats_overlimit(sch);
 247                        if (use_harddrop(q) || !use_ecn(q) ||
 248                            !INET_ECN_set_ce(skb)) {
 249                                q->stats.forced_drop++;
 250                                goto congestion_drop;
 251                        }
 252
 253                        q->stats.forced_mark++;
 254                } else if (++q->vars.qcount) {
 255                        if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
 256                                q->vars.qcount = 0;
 257                                q->vars.qR = red_random(p);
 258
 259                                qdisc_qstats_overlimit(sch);
 260                                if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
 261                                        q->stats.prob_drop++;
 262                                        goto congestion_drop;
 263                                }
 264
 265                                q->stats.prob_mark++;
 266                        }
 267                } else
 268                        q->vars.qR = red_random(p);
 269        }
 270
 271        /* Admit new packet */
 272        if (sch->q.qlen < q->limit) {
 273                q->tab[q->tail] = skb;
 274                q->tail = (q->tail + 1) & q->tab_mask;
 275                ++sch->q.qlen;
 276                qdisc_qstats_backlog_inc(sch, skb);
 277                return NET_XMIT_SUCCESS;
 278        }
 279
 280        q->stats.pdrop++;
 281        return qdisc_drop(skb, sch, to_free);
 282
 283congestion_drop:
 284        qdisc_drop(skb, sch, to_free);
 285        return NET_XMIT_CN;
 286}
 287
 288static struct sk_buff *choke_dequeue(struct Qdisc *sch)
 289{
 290        struct choke_sched_data *q = qdisc_priv(sch);
 291        struct sk_buff *skb;
 292
 293        if (q->head == q->tail) {
 294                if (!red_is_idling(&q->vars))
 295                        red_start_of_idle_period(&q->vars);
 296                return NULL;
 297        }
 298
 299        skb = q->tab[q->head];
 300        q->tab[q->head] = NULL;
 301        choke_zap_head_holes(q);
 302        --sch->q.qlen;
 303        qdisc_qstats_backlog_dec(sch, skb);
 304        qdisc_bstats_update(sch, skb);
 305
 306        return skb;
 307}
 308
 309static void choke_reset(struct Qdisc *sch)
 310{
 311        struct choke_sched_data *q = qdisc_priv(sch);
 312
 313        while (q->head != q->tail) {
 314                struct sk_buff *skb = q->tab[q->head];
 315
 316                q->head = (q->head + 1) & q->tab_mask;
 317                if (!skb)
 318                        continue;
 319                rtnl_qdisc_drop(skb, sch);
 320        }
 321
 322        sch->q.qlen = 0;
 323        sch->qstats.backlog = 0;
 324        if (q->tab)
 325                memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
 326        q->head = q->tail = 0;
 327        red_restart(&q->vars);
 328}
 329
 330static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
 331        [TCA_CHOKE_PARMS]       = { .len = sizeof(struct tc_red_qopt) },
 332        [TCA_CHOKE_STAB]        = { .len = RED_STAB_SIZE },
 333        [TCA_CHOKE_MAX_P]       = { .type = NLA_U32 },
 334};
 335
 336
 337static void choke_free(void *addr)
 338{
 339        kvfree(addr);
 340}
 341
 342static int choke_change(struct Qdisc *sch, struct nlattr *opt,
 343                        struct netlink_ext_ack *extack)
 344{
 345        struct choke_sched_data *q = qdisc_priv(sch);
 346        struct nlattr *tb[TCA_CHOKE_MAX + 1];
 347        const struct tc_red_qopt *ctl;
 348        int err;
 349        struct sk_buff **old = NULL;
 350        unsigned int mask;
 351        u32 max_P;
 352
 353        if (opt == NULL)
 354                return -EINVAL;
 355
 356        err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt,
 357                                          choke_policy, NULL);
 358        if (err < 0)
 359                return err;
 360
 361        if (tb[TCA_CHOKE_PARMS] == NULL ||
 362            tb[TCA_CHOKE_STAB] == NULL)
 363                return -EINVAL;
 364
 365        max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
 366
 367        ctl = nla_data(tb[TCA_CHOKE_PARMS]);
 368
 369        if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
 370                return -EINVAL;
 371
 372        if (ctl->limit > CHOKE_MAX_QUEUE)
 373                return -EINVAL;
 374
 375        mask = roundup_pow_of_two(ctl->limit + 1) - 1;
 376        if (mask != q->tab_mask) {
 377                struct sk_buff **ntab;
 378
 379                ntab = kvcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
 380                if (!ntab)
 381                        return -ENOMEM;
 382
 383                sch_tree_lock(sch);
 384                old = q->tab;
 385                if (old) {
 386                        unsigned int oqlen = sch->q.qlen, tail = 0;
 387                        unsigned dropped = 0;
 388
 389                        while (q->head != q->tail) {
 390                                struct sk_buff *skb = q->tab[q->head];
 391
 392                                q->head = (q->head + 1) & q->tab_mask;
 393                                if (!skb)
 394                                        continue;
 395                                if (tail < mask) {
 396                                        ntab[tail++] = skb;
 397                                        continue;
 398                                }
 399                                dropped += qdisc_pkt_len(skb);
 400                                qdisc_qstats_backlog_dec(sch, skb);
 401                                --sch->q.qlen;
 402                                rtnl_qdisc_drop(skb, sch);
 403                        }
 404                        qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
 405                        q->head = 0;
 406                        q->tail = tail;
 407                }
 408
 409                q->tab_mask = mask;
 410                q->tab = ntab;
 411        } else
 412                sch_tree_lock(sch);
 413
 414        q->flags = ctl->flags;
 415        q->limit = ctl->limit;
 416
 417        red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
 418                      ctl->Plog, ctl->Scell_log,
 419                      nla_data(tb[TCA_CHOKE_STAB]),
 420                      max_P);
 421        red_set_vars(&q->vars);
 422
 423        if (q->head == q->tail)
 424                red_end_of_idle_period(&q->vars);
 425
 426        sch_tree_unlock(sch);
 427        choke_free(old);
 428        return 0;
 429}
 430
 431static int choke_init(struct Qdisc *sch, struct nlattr *opt,
 432                      struct netlink_ext_ack *extack)
 433{
 434        return choke_change(sch, opt, extack);
 435}
 436
 437static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
 438{
 439        struct choke_sched_data *q = qdisc_priv(sch);
 440        struct nlattr *opts = NULL;
 441        struct tc_red_qopt opt = {
 442                .limit          = q->limit,
 443                .flags          = q->flags,
 444                .qth_min        = q->parms.qth_min >> q->parms.Wlog,
 445                .qth_max        = q->parms.qth_max >> q->parms.Wlog,
 446                .Wlog           = q->parms.Wlog,
 447                .Plog           = q->parms.Plog,
 448                .Scell_log      = q->parms.Scell_log,
 449        };
 450
 451        opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 452        if (opts == NULL)
 453                goto nla_put_failure;
 454
 455        if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
 456            nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
 457                goto nla_put_failure;
 458        return nla_nest_end(skb, opts);
 459
 460nla_put_failure:
 461        nla_nest_cancel(skb, opts);
 462        return -EMSGSIZE;
 463}
 464
 465static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 466{
 467        struct choke_sched_data *q = qdisc_priv(sch);
 468        struct tc_choke_xstats st = {
 469                .early  = q->stats.prob_drop + q->stats.forced_drop,
 470                .marked = q->stats.prob_mark + q->stats.forced_mark,
 471                .pdrop  = q->stats.pdrop,
 472                .other  = q->stats.other,
 473                .matched = q->stats.matched,
 474        };
 475
 476        return gnet_stats_copy_app(d, &st, sizeof(st));
 477}
 478
 479static void choke_destroy(struct Qdisc *sch)
 480{
 481        struct choke_sched_data *q = qdisc_priv(sch);
 482
 483        choke_free(q->tab);
 484}
 485
 486static struct sk_buff *choke_peek_head(struct Qdisc *sch)
 487{
 488        struct choke_sched_data *q = qdisc_priv(sch);
 489
 490        return (q->head != q->tail) ? q->tab[q->head] : NULL;
 491}
 492
 493static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
 494        .id             =       "choke",
 495        .priv_size      =       sizeof(struct choke_sched_data),
 496
 497        .enqueue        =       choke_enqueue,
 498        .dequeue        =       choke_dequeue,
 499        .peek           =       choke_peek_head,
 500        .init           =       choke_init,
 501        .destroy        =       choke_destroy,
 502        .reset          =       choke_reset,
 503        .change         =       choke_change,
 504        .dump           =       choke_dump,
 505        .dump_stats     =       choke_dump_stats,
 506        .owner          =       THIS_MODULE,
 507};
 508
 509static int __init choke_module_init(void)
 510{
 511        return register_qdisc(&choke_qdisc_ops);
 512}
 513
 514static void __exit choke_module_exit(void)
 515{
 516        unregister_qdisc(&choke_qdisc_ops);
 517}
 518
 519module_init(choke_module_init)
 520module_exit(choke_module_exit)
 521
 522MODULE_LICENSE("GPL");
 523