linux/net/sched/sch_choke.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_choke.c        CHOKE scheduler
   3 *
   4 * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
   5 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License
   9 * version 2 as published by the Free Software Foundation.
  10 *
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/types.h>
  15#include <linux/kernel.h>
  16#include <linux/skbuff.h>
  17#include <linux/vmalloc.h>
  18#include <net/pkt_sched.h>
  19#include <net/pkt_cls.h>
  20#include <net/inet_ecn.h>
  21#include <net/red.h>
  22#include <net/flow_dissector.h>
  23
  24/*
  25   CHOKe stateless AQM for fair bandwidth allocation
  26   =================================================
  27
  28   CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
  29   unresponsive flows) is a variant of RED that penalizes misbehaving flows but
  30   maintains no flow state. The difference from RED is an additional step
  31   during the enqueuing process. If average queue size is over the
  32   low threshold (qmin), a packet is chosen at random from the queue.
  33   If both the new and chosen packet are from the same flow, both
  34   are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
  35   needs to access packets in queue randomly. It has a minimal class
  36   interface to allow overriding the builtin flow classifier with
  37   filters.
  38
  39   Source:
  40   R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
  41   Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
  42   IEEE INFOCOM, 2000.
  43
  44   A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
  45   Characteristics", IEEE/ACM Transactions on Networking, 2004
  46
  47 */
  48
  49/* Upper bound on size of sk_buff table (packets) */
  50#define CHOKE_MAX_QUEUE (128*1024 - 1)
  51
  52struct choke_sched_data {
  53/* Parameters */
  54        u32              limit;
  55        unsigned char    flags;
  56
  57        struct red_parms parms;
  58
  59/* Variables */
  60        struct red_vars  vars;
  61        struct {
  62                u32     prob_drop;      /* Early probability drops */
  63                u32     prob_mark;      /* Early probability marks */
  64                u32     forced_drop;    /* Forced drops, qavg > max_thresh */
  65                u32     forced_mark;    /* Forced marks, qavg > max_thresh */
  66                u32     pdrop;          /* Drops due to queue limits */
  67                u32     other;          /* Drops due to drop() calls */
  68                u32     matched;        /* Drops to flow match */
  69        } stats;
  70
  71        unsigned int     head;
  72        unsigned int     tail;
  73
  74        unsigned int     tab_mask; /* size - 1 */
  75
  76        struct sk_buff **tab;
  77};
  78
  79/* number of elements in queue including holes */
  80static unsigned int choke_len(const struct choke_sched_data *q)
  81{
  82        return (q->tail - q->head) & q->tab_mask;
  83}
  84
  85/* Is ECN parameter configured */
  86static int use_ecn(const struct choke_sched_data *q)
  87{
  88        return q->flags & TC_RED_ECN;
  89}
  90
  91/* Should packets over max just be dropped (versus marked) */
  92static int use_harddrop(const struct choke_sched_data *q)
  93{
  94        return q->flags & TC_RED_HARDDROP;
  95}
  96
  97/* Move head pointer forward to skip over holes */
  98static void choke_zap_head_holes(struct choke_sched_data *q)
  99{
 100        do {
 101                q->head = (q->head + 1) & q->tab_mask;
 102                if (q->head == q->tail)
 103                        break;
 104        } while (q->tab[q->head] == NULL);
 105}
 106
 107/* Move tail pointer backwards to reuse holes */
 108static void choke_zap_tail_holes(struct choke_sched_data *q)
 109{
 110        do {
 111                q->tail = (q->tail - 1) & q->tab_mask;
 112                if (q->head == q->tail)
 113                        break;
 114        } while (q->tab[q->tail] == NULL);
 115}
 116
 117/* Drop packet from queue array by creating a "hole" */
 118static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
 119                              struct sk_buff **to_free)
 120{
 121        struct choke_sched_data *q = qdisc_priv(sch);
 122        struct sk_buff *skb = q->tab[idx];
 123
 124        q->tab[idx] = NULL;
 125
 126        if (idx == q->head)
 127                choke_zap_head_holes(q);
 128        if (idx == q->tail)
 129                choke_zap_tail_holes(q);
 130
 131        qdisc_qstats_backlog_dec(sch, skb);
 132        qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
 133        qdisc_drop(skb, sch, to_free);
 134        --sch->q.qlen;
 135}
 136
 137struct choke_skb_cb {
 138        u16                     classid;
 139        u8                      keys_valid;
 140        struct                  flow_keys_digest keys;
 141};
 142
 143static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
 144{
 145        qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
 146        return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
 147}
 148
 149static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
 150{
 151        choke_skb_cb(skb)->classid = classid;
 152}
 153
 154/*
 155 * Compare flow of two packets
 156 *  Returns true only if source and destination address and port match.
 157 *          false for special cases
 158 */
 159static bool choke_match_flow(struct sk_buff *skb1,
 160                             struct sk_buff *skb2)
 161{
 162        struct flow_keys temp;
 163
 164        if (skb1->protocol != skb2->protocol)
 165                return false;
 166
 167        if (!choke_skb_cb(skb1)->keys_valid) {
 168                choke_skb_cb(skb1)->keys_valid = 1;
 169                skb_flow_dissect_flow_keys(skb1, &temp, 0);
 170                make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
 171        }
 172
 173        if (!choke_skb_cb(skb2)->keys_valid) {
 174                choke_skb_cb(skb2)->keys_valid = 1;
 175                skb_flow_dissect_flow_keys(skb2, &temp, 0);
 176                make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
 177        }
 178
 179        return !memcmp(&choke_skb_cb(skb1)->keys,
 180                       &choke_skb_cb(skb2)->keys,
 181                       sizeof(choke_skb_cb(skb1)->keys));
 182}
 183
 184/*
 185 * Select a packet at random from queue
 186 * HACK: since queue can have holes from previous deletion; retry several
 187 *   times to find a random skb but then just give up and return the head
 188 * Will return NULL if queue is empty (q->head == q->tail)
 189 */
 190static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
 191                                         unsigned int *pidx)
 192{
 193        struct sk_buff *skb;
 194        int retrys = 3;
 195
 196        do {
 197                *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
 198                skb = q->tab[*pidx];
 199                if (skb)
 200                        return skb;
 201        } while (--retrys > 0);
 202
 203        return q->tab[*pidx = q->head];
 204}
 205
 206/*
 207 * Compare new packet with random packet in queue
 208 * returns true if matched and sets *pidx
 209 */
 210static bool choke_match_random(const struct choke_sched_data *q,
 211                               struct sk_buff *nskb,
 212                               unsigned int *pidx)
 213{
 214        struct sk_buff *oskb;
 215
 216        if (q->head == q->tail)
 217                return false;
 218
 219        oskb = choke_peek_random(q, pidx);
 220        return choke_match_flow(oskb, nskb);
 221}
 222
 223static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 224                         struct sk_buff **to_free)
 225{
 226        struct choke_sched_data *q = qdisc_priv(sch);
 227        const struct red_parms *p = &q->parms;
 228
 229        choke_skb_cb(skb)->keys_valid = 0;
 230        /* Compute average queue usage (see RED) */
 231        q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
 232        if (red_is_idling(&q->vars))
 233                red_end_of_idle_period(&q->vars);
 234
 235        /* Is queue small? */
 236        if (q->vars.qavg <= p->qth_min)
 237                q->vars.qcount = -1;
 238        else {
 239                unsigned int idx;
 240
 241                /* Draw a packet at random from queue and compare flow */
 242                if (choke_match_random(q, skb, &idx)) {
 243                        q->stats.matched++;
 244                        choke_drop_by_idx(sch, idx, to_free);
 245                        goto congestion_drop;
 246                }
 247
 248                /* Queue is large, always mark/drop */
 249                if (q->vars.qavg > p->qth_max) {
 250                        q->vars.qcount = -1;
 251
 252                        qdisc_qstats_overlimit(sch);
 253                        if (use_harddrop(q) || !use_ecn(q) ||
 254                            !INET_ECN_set_ce(skb)) {
 255                                q->stats.forced_drop++;
 256                                goto congestion_drop;
 257                        }
 258
 259                        q->stats.forced_mark++;
 260                } else if (++q->vars.qcount) {
 261                        if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
 262                                q->vars.qcount = 0;
 263                                q->vars.qR = red_random(p);
 264
 265                                qdisc_qstats_overlimit(sch);
 266                                if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
 267                                        q->stats.prob_drop++;
 268                                        goto congestion_drop;
 269                                }
 270
 271                                q->stats.prob_mark++;
 272                        }
 273                } else
 274                        q->vars.qR = red_random(p);
 275        }
 276
 277        /* Admit new packet */
 278        if (sch->q.qlen < q->limit) {
 279                q->tab[q->tail] = skb;
 280                q->tail = (q->tail + 1) & q->tab_mask;
 281                ++sch->q.qlen;
 282                qdisc_qstats_backlog_inc(sch, skb);
 283                return NET_XMIT_SUCCESS;
 284        }
 285
 286        q->stats.pdrop++;
 287        return qdisc_drop(skb, sch, to_free);
 288
 289congestion_drop:
 290        qdisc_drop(skb, sch, to_free);
 291        return NET_XMIT_CN;
 292}
 293
 294static struct sk_buff *choke_dequeue(struct Qdisc *sch)
 295{
 296        struct choke_sched_data *q = qdisc_priv(sch);
 297        struct sk_buff *skb;
 298
 299        if (q->head == q->tail) {
 300                if (!red_is_idling(&q->vars))
 301                        red_start_of_idle_period(&q->vars);
 302                return NULL;
 303        }
 304
 305        skb = q->tab[q->head];
 306        q->tab[q->head] = NULL;
 307        choke_zap_head_holes(q);
 308        --sch->q.qlen;
 309        qdisc_qstats_backlog_dec(sch, skb);
 310        qdisc_bstats_update(sch, skb);
 311
 312        return skb;
 313}
 314
 315static void choke_reset(struct Qdisc *sch)
 316{
 317        struct choke_sched_data *q = qdisc_priv(sch);
 318
 319        while (q->head != q->tail) {
 320                struct sk_buff *skb = q->tab[q->head];
 321
 322                q->head = (q->head + 1) & q->tab_mask;
 323                if (!skb)
 324                        continue;
 325                rtnl_qdisc_drop(skb, sch);
 326        }
 327
 328        sch->q.qlen = 0;
 329        sch->qstats.backlog = 0;
 330        memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
 331        q->head = q->tail = 0;
 332        red_restart(&q->vars);
 333}
 334
 335static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
 336        [TCA_CHOKE_PARMS]       = { .len = sizeof(struct tc_red_qopt) },
 337        [TCA_CHOKE_STAB]        = { .len = RED_STAB_SIZE },
 338        [TCA_CHOKE_MAX_P]       = { .type = NLA_U32 },
 339};
 340
 341
 342static void choke_free(void *addr)
 343{
 344        kvfree(addr);
 345}
 346
 347static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 348{
 349        struct choke_sched_data *q = qdisc_priv(sch);
 350        struct nlattr *tb[TCA_CHOKE_MAX + 1];
 351        const struct tc_red_qopt *ctl;
 352        int err;
 353        struct sk_buff **old = NULL;
 354        unsigned int mask;
 355        u32 max_P;
 356
 357        if (opt == NULL)
 358                return -EINVAL;
 359
 360        err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
 361        if (err < 0)
 362                return err;
 363
 364        if (tb[TCA_CHOKE_PARMS] == NULL ||
 365            tb[TCA_CHOKE_STAB] == NULL)
 366                return -EINVAL;
 367
 368        max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
 369
 370        ctl = nla_data(tb[TCA_CHOKE_PARMS]);
 371
 372        if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
 373                return -EINVAL;
 374
 375        if (ctl->limit > CHOKE_MAX_QUEUE)
 376                return -EINVAL;
 377
 378        mask = roundup_pow_of_two(ctl->limit + 1) - 1;
 379        if (mask != q->tab_mask) {
 380                struct sk_buff **ntab;
 381
 382                ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO);
 383                if (!ntab)
 384                        return -ENOMEM;
 385
 386                sch_tree_lock(sch);
 387                old = q->tab;
 388                if (old) {
 389                        unsigned int oqlen = sch->q.qlen, tail = 0;
 390                        unsigned dropped = 0;
 391
 392                        while (q->head != q->tail) {
 393                                struct sk_buff *skb = q->tab[q->head];
 394
 395                                q->head = (q->head + 1) & q->tab_mask;
 396                                if (!skb)
 397                                        continue;
 398                                if (tail < mask) {
 399                                        ntab[tail++] = skb;
 400                                        continue;
 401                                }
 402                                dropped += qdisc_pkt_len(skb);
 403                                qdisc_qstats_backlog_dec(sch, skb);
 404                                --sch->q.qlen;
 405                                rtnl_qdisc_drop(skb, sch);
 406                        }
 407                        qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
 408                        q->head = 0;
 409                        q->tail = tail;
 410                }
 411
 412                q->tab_mask = mask;
 413                q->tab = ntab;
 414        } else
 415                sch_tree_lock(sch);
 416
 417        q->flags = ctl->flags;
 418        q->limit = ctl->limit;
 419
 420        red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
 421                      ctl->Plog, ctl->Scell_log,
 422                      nla_data(tb[TCA_CHOKE_STAB]),
 423                      max_P);
 424        red_set_vars(&q->vars);
 425
 426        if (q->head == q->tail)
 427                red_end_of_idle_period(&q->vars);
 428
 429        sch_tree_unlock(sch);
 430        choke_free(old);
 431        return 0;
 432}
 433
 434static int choke_init(struct Qdisc *sch, struct nlattr *opt)
 435{
 436        return choke_change(sch, opt);
 437}
 438
 439static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
 440{
 441        struct choke_sched_data *q = qdisc_priv(sch);
 442        struct nlattr *opts = NULL;
 443        struct tc_red_qopt opt = {
 444                .limit          = q->limit,
 445                .flags          = q->flags,
 446                .qth_min        = q->parms.qth_min >> q->parms.Wlog,
 447                .qth_max        = q->parms.qth_max >> q->parms.Wlog,
 448                .Wlog           = q->parms.Wlog,
 449                .Plog           = q->parms.Plog,
 450                .Scell_log      = q->parms.Scell_log,
 451        };
 452
 453        opts = nla_nest_start(skb, TCA_OPTIONS);
 454        if (opts == NULL)
 455                goto nla_put_failure;
 456
 457        if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
 458            nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
 459                goto nla_put_failure;
 460        return nla_nest_end(skb, opts);
 461
 462nla_put_failure:
 463        nla_nest_cancel(skb, opts);
 464        return -EMSGSIZE;
 465}
 466
 467static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 468{
 469        struct choke_sched_data *q = qdisc_priv(sch);
 470        struct tc_choke_xstats st = {
 471                .early  = q->stats.prob_drop + q->stats.forced_drop,
 472                .marked = q->stats.prob_mark + q->stats.forced_mark,
 473                .pdrop  = q->stats.pdrop,
 474                .other  = q->stats.other,
 475                .matched = q->stats.matched,
 476        };
 477
 478        return gnet_stats_copy_app(d, &st, sizeof(st));
 479}
 480
 481static void choke_destroy(struct Qdisc *sch)
 482{
 483        struct choke_sched_data *q = qdisc_priv(sch);
 484
 485        choke_free(q->tab);
 486}
 487
 488static struct sk_buff *choke_peek_head(struct Qdisc *sch)
 489{
 490        struct choke_sched_data *q = qdisc_priv(sch);
 491
 492        return (q->head != q->tail) ? q->tab[q->head] : NULL;
 493}
 494
 495static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
 496        .id             =       "choke",
 497        .priv_size      =       sizeof(struct choke_sched_data),
 498
 499        .enqueue        =       choke_enqueue,
 500        .dequeue        =       choke_dequeue,
 501        .peek           =       choke_peek_head,
 502        .init           =       choke_init,
 503        .destroy        =       choke_destroy,
 504        .reset          =       choke_reset,
 505        .change         =       choke_change,
 506        .dump           =       choke_dump,
 507        .dump_stats     =       choke_dump_stats,
 508        .owner          =       THIS_MODULE,
 509};
 510
 511static int __init choke_module_init(void)
 512{
 513        return register_qdisc(&choke_qdisc_ops);
 514}
 515
 516static void __exit choke_module_exit(void)
 517{
 518        unregister_qdisc(&choke_qdisc_ops);
 519}
 520
 521module_init(choke_module_init)
 522module_exit(choke_module_exit)
 523
 524MODULE_LICENSE("GPL");
 525