linux/net/ipv4/tcp_illinois.c
<<
>>
Prefs
   1/*
   2 * TCP Illinois congestion control.
   3 * Home page:
   4 *      http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
   5 *
   6 * The algorithm is described in:
   7 * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
   8 *  for High-Speed Networks"
   9 * http://www.ews.uiuc.edu/~shaoliu/papersandslides/liubassri06perf.pdf
  10 *
  11 * Implemented from description in paper and ns-2 simulation.
  12 * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/skbuff.h>
  17#include <linux/inet_diag.h>
  18#include <asm/div64.h>
  19#include <net/tcp.h>
  20
  21#define ALPHA_SHIFT     7
  22#define ALPHA_SCALE     (1u<<ALPHA_SHIFT)
  23#define ALPHA_MIN       ((3*ALPHA_SCALE)/10)    /* ~0.3 */
  24#define ALPHA_MAX       (10*ALPHA_SCALE)        /* 10.0 */
  25#define ALPHA_BASE      ALPHA_SCALE             /* 1.0 */
  26#define U32_MAX         ((u32)~0U)
  27#define RTT_MAX         (U32_MAX / ALPHA_MAX)   /* 3.3 secs */
  28
  29#define BETA_SHIFT      6
  30#define BETA_SCALE      (1u<<BETA_SHIFT)
  31#define BETA_MIN        (BETA_SCALE/8)          /* 0.125 */
  32#define BETA_MAX        (BETA_SCALE/2)          /* 0.5 */
  33#define BETA_BASE       BETA_MAX
  34
  35static int win_thresh __read_mostly = 15;
  36module_param(win_thresh, int, 0);
  37MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing");
  38
  39static int theta __read_mostly = 5;
  40module_param(theta, int, 0);
  41MODULE_PARM_DESC(theta, "# of fast RTT's before full growth");
  42
  43/* TCP Illinois Parameters */
  44struct illinois {
  45        u64     sum_rtt;        /* sum of rtt's measured within last rtt */
  46        u16     cnt_rtt;        /* # of rtts measured within last rtt */
  47        u32     base_rtt;       /* min of all rtt in usec */
  48        u32     max_rtt;        /* max of all rtt in usec */
  49        u32     end_seq;        /* right edge of current RTT */
  50        u32     alpha;          /* Additive increase */
  51        u32     beta;           /* Muliplicative decrease */
  52        u16     acked;          /* # packets acked by current ACK */
  53        u8      rtt_above;      /* average rtt has gone above threshold */
  54        u8      rtt_low;        /* # of rtts measurements below threshold */
  55};
  56
  57static void rtt_reset(struct sock *sk)
  58{
  59        struct tcp_sock *tp = tcp_sk(sk);
  60        struct illinois *ca = inet_csk_ca(sk);
  61
  62        ca->end_seq = tp->snd_nxt;
  63        ca->cnt_rtt = 0;
  64        ca->sum_rtt = 0;
  65
  66        /* TODO: age max_rtt? */
  67}
  68
  69static void tcp_illinois_init(struct sock *sk)
  70{
  71        struct illinois *ca = inet_csk_ca(sk);
  72
  73        ca->alpha = ALPHA_MAX;
  74        ca->beta = BETA_BASE;
  75        ca->base_rtt = 0x7fffffff;
  76        ca->max_rtt = 0;
  77
  78        ca->acked = 0;
  79        ca->rtt_low = 0;
  80        ca->rtt_above = 0;
  81
  82        rtt_reset(sk);
  83}
  84
  85/* Measure RTT for each ack. */
  86static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, s32 rtt)
  87{
  88        struct illinois *ca = inet_csk_ca(sk);
  89
  90        ca->acked = pkts_acked;
  91
  92        /* dup ack, no rtt sample */
  93        if (rtt < 0)
  94                return;
  95
  96        /* ignore bogus values, this prevents wraparound in alpha math */
  97        if (rtt > RTT_MAX)
  98                rtt = RTT_MAX;
  99
 100        /* keep track of minimum RTT seen so far */
 101        if (ca->base_rtt > rtt)
 102                ca->base_rtt = rtt;
 103
 104        /* and max */
 105        if (ca->max_rtt < rtt)
 106                ca->max_rtt = rtt;
 107
 108        ++ca->cnt_rtt;
 109        ca->sum_rtt += rtt;
 110}
 111
 112/* Maximum queuing delay */
 113static inline u32 max_delay(const struct illinois *ca)
 114{
 115        return ca->max_rtt - ca->base_rtt;
 116}
 117
 118/* Average queuing delay */
 119static inline u32 avg_delay(const struct illinois *ca)
 120{
 121        u64 t = ca->sum_rtt;
 122
 123        do_div(t, ca->cnt_rtt);
 124        return t - ca->base_rtt;
 125}
 126
 127/*
 128 * Compute value of alpha used for additive increase.
 129 * If small window then use 1.0, equivalent to Reno.
 130 *
 131 * For larger windows, adjust based on average delay.
 132 * A. If average delay is at minimum (we are uncongested),
 133 *    then use large alpha (10.0) to increase faster.
 134 * B. If average delay is at maximum (getting congested)
 135 *    then use small alpha (0.3)
 136 *
 137 * The result is a convex window growth curve.
 138 */
 139static u32 alpha(struct illinois *ca, u32 da, u32 dm)
 140{
 141        u32 d1 = dm / 100;      /* Low threshold */
 142
 143        if (da <= d1) {
 144                /* If never got out of low delay zone, then use max */
 145                if (!ca->rtt_above)
 146                        return ALPHA_MAX;
 147
 148                /* Wait for 5 good RTT's before allowing alpha to go alpha max.
 149                 * This prevents one good RTT from causing sudden window increase.
 150                 */
 151                if (++ca->rtt_low < theta)
 152                        return ca->alpha;
 153
 154                ca->rtt_low = 0;
 155                ca->rtt_above = 0;
 156                return ALPHA_MAX;
 157        }
 158
 159        ca->rtt_above = 1;
 160
 161        /*
 162         * Based on:
 163         *
 164         *      (dm - d1) amin amax
 165         * k1 = -------------------
 166         *         amax - amin
 167         *
 168         *       (dm - d1) amin
 169         * k2 = ----------------  - d1
 170         *        amax - amin
 171         *
 172         *             k1
 173         * alpha = ----------
 174         *          k2 + da
 175         */
 176
 177        dm -= d1;
 178        da -= d1;
 179        return (dm * ALPHA_MAX) /
 180                (dm + (da  * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
 181}
 182
 183/*
 184 * Beta used for multiplicative decrease.
 185 * For small window sizes returns same value as Reno (0.5)
 186 *
 187 * If delay is small (10% of max) then beta = 1/8
 188 * If delay is up to 80% of max then beta = 1/2
 189 * In between is a linear function
 190 */
 191static u32 beta(u32 da, u32 dm)
 192{
 193        u32 d2, d3;
 194
 195        d2 = dm / 10;
 196        if (da <= d2)
 197                return BETA_MIN;
 198
 199        d3 = (8 * dm) / 10;
 200        if (da >= d3 || d3 <= d2)
 201                return BETA_MAX;
 202
 203        /*
 204         * Based on:
 205         *
 206         *       bmin d3 - bmax d2
 207         * k3 = -------------------
 208         *         d3 - d2
 209         *
 210         *       bmax - bmin
 211         * k4 = -------------
 212         *         d3 - d2
 213         *
 214         * b = k3 + k4 da
 215         */
 216        return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da)
 217                / (d3 - d2);
 218}
 219
 220/* Update alpha and beta values once per RTT */
 221static void update_params(struct sock *sk)
 222{
 223        struct tcp_sock *tp = tcp_sk(sk);
 224        struct illinois *ca = inet_csk_ca(sk);
 225
 226        if (tp->snd_cwnd < win_thresh) {
 227                ca->alpha = ALPHA_BASE;
 228                ca->beta = BETA_BASE;
 229        } else if (ca->cnt_rtt > 0) {
 230                u32 dm = max_delay(ca);
 231                u32 da = avg_delay(ca);
 232
 233                ca->alpha = alpha(ca, da, dm);
 234                ca->beta = beta(da, dm);
 235        }
 236
 237        rtt_reset(sk);
 238}
 239
 240/*
 241 * In case of loss, reset to default values
 242 */
 243static void tcp_illinois_state(struct sock *sk, u8 new_state)
 244{
 245        struct illinois *ca = inet_csk_ca(sk);
 246
 247        if (new_state == TCP_CA_Loss) {
 248                ca->alpha = ALPHA_BASE;
 249                ca->beta = BETA_BASE;
 250                ca->rtt_low = 0;
 251                ca->rtt_above = 0;
 252                rtt_reset(sk);
 253        }
 254}
 255
 256/*
 257 * Increase window in response to successful acknowledgment.
 258 */
 259static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 260{
 261        struct tcp_sock *tp = tcp_sk(sk);
 262        struct illinois *ca = inet_csk_ca(sk);
 263
 264        if (after(ack, ca->end_seq))
 265                update_params(sk);
 266
 267        /* RFC2861 only increase cwnd if fully utilized */
 268        if (!tcp_is_cwnd_limited(sk, in_flight))
 269                return;
 270
 271        /* In slow start */
 272        if (tp->snd_cwnd <= tp->snd_ssthresh)
 273                tcp_slow_start(tp);
 274
 275        else {
 276                u32 delta;
 277
 278                /* snd_cwnd_cnt is # of packets since last cwnd increment */
 279                tp->snd_cwnd_cnt += ca->acked;
 280                ca->acked = 1;
 281
 282                /* This is close approximation of:
 283                 * tp->snd_cwnd += alpha/tp->snd_cwnd
 284                */
 285                delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
 286                if (delta >= tp->snd_cwnd) {
 287                        tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
 288                                           (u32) tp->snd_cwnd_clamp);
 289                        tp->snd_cwnd_cnt = 0;
 290                }
 291        }
 292}
 293
 294static u32 tcp_illinois_ssthresh(struct sock *sk)
 295{
 296        struct tcp_sock *tp = tcp_sk(sk);
 297        struct illinois *ca = inet_csk_ca(sk);
 298
 299        /* Multiplicative decrease */
 300        return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
 301}
 302
 303
 304/* Extract info for Tcp socket info provided via netlink. */
 305static void tcp_illinois_info(struct sock *sk, u32 ext,
 306                              struct sk_buff *skb)
 307{
 308        const struct illinois *ca = inet_csk_ca(sk);
 309
 310        if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
 311                struct tcpvegas_info info = {
 312                        .tcpv_enabled = 1,
 313                        .tcpv_rttcnt = ca->cnt_rtt,
 314                        .tcpv_minrtt = ca->base_rtt,
 315                };
 316                u64 t = ca->sum_rtt;
 317
 318                do_div(t, ca->cnt_rtt);
 319                info.tcpv_rtt = t;
 320
 321                nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
 322        }
 323}
 324
 325static struct tcp_congestion_ops tcp_illinois = {
 326        .flags          = TCP_CONG_RTT_STAMP,
 327        .init           = tcp_illinois_init,
 328        .ssthresh       = tcp_illinois_ssthresh,
 329        .min_cwnd       = tcp_reno_min_cwnd,
 330        .cong_avoid     = tcp_illinois_cong_avoid,
 331        .set_state      = tcp_illinois_state,
 332        .get_info       = tcp_illinois_info,
 333        .pkts_acked     = tcp_illinois_acked,
 334
 335        .owner          = THIS_MODULE,
 336        .name           = "illinois",
 337};
 338
 339static int __init tcp_illinois_register(void)
 340{
 341        BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE);
 342        return tcp_register_congestion_control(&tcp_illinois);
 343}
 344
 345static void __exit tcp_illinois_unregister(void)
 346{
 347        tcp_unregister_congestion_control(&tcp_illinois);
 348}
 349
 350module_init(tcp_illinois_register);
 351module_exit(tcp_illinois_unregister);
 352
 353MODULE_AUTHOR("Stephen Hemminger, Shao Liu");
 354MODULE_LICENSE("GPL");
 355MODULE_DESCRIPTION("TCP Illinois");
 356MODULE_VERSION("1.0");
 357