linux/net/ipv4/tcp_lp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * TCP Low Priority (TCP-LP)
   4 *
   5 * TCP Low Priority is a distributed algorithm whose goal is to utilize only
   6 *   the excess network bandwidth as compared to the ``fair share`` of
   7 *   bandwidth as targeted by TCP.
   8 *
   9 * As of 2.6.13, Linux supports pluggable congestion control algorithms.
  10 * Due to the limitation of the API, we take the following changes from
  11 * the original TCP-LP implementation:
  12 *   o We use newReno in most core CA handling. Only add some checking
  13 *     within cong_avoid.
  14 *   o Error correcting in remote HZ, therefore remote HZ will be keeped
  15 *     on checking and updating.
  16 *   o Handling calculation of One-Way-Delay (OWD) within rtt_sample, since
  17 *     OWD have a similar meaning as RTT. Also correct the buggy formular.
  18 *   o Handle reaction for Early Congestion Indication (ECI) within
  19 *     pkts_acked, as mentioned within pseudo code.
  20 *   o OWD is handled in relative format, where local time stamp will in
  21 *     tcp_time_stamp format.
  22 *
  23 * Original Author:
  24 *   Aleksandar Kuzmanovic <akuzma@northwestern.edu>
  25 * Available from:
  26 *   http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf
  27 * Original implementation for 2.4.19:
  28 *   http://www-ece.rice.edu/networks/TCP-LP/
  29 *
  30 * 2.6.x module Authors:
  31 *   Wong Hoi Sing, Edison <hswong3i@gmail.com>
  32 *   Hung Hing Lun, Mike <hlhung3i@gmail.com>
  33 * SourceForge project page:
  34 *   http://tcp-lp-mod.sourceforge.net/
  35 */
  36
  37#include <linux/module.h>
  38#include <net/tcp.h>
  39
  40/* resolution of owd */
  41#define LP_RESOL       TCP_TS_HZ
  42
  43/**
  44 * enum tcp_lp_state
  45 * @LP_VALID_RHZ: is remote HZ valid?
  46 * @LP_VALID_OWD: is OWD valid?
  47 * @LP_WITHIN_THR: are we within threshold?
  48 * @LP_WITHIN_INF: are we within inference?
  49 *
  50 * TCP-LP's state flags.
  51 * We create this set of state flag mainly for debugging.
  52 */
  53enum tcp_lp_state {
  54        LP_VALID_RHZ = (1 << 0),
  55        LP_VALID_OWD = (1 << 1),
  56        LP_WITHIN_THR = (1 << 3),
  57        LP_WITHIN_INF = (1 << 4),
  58};
  59
  60/**
  61 * struct lp
  62 * @flag: TCP-LP state flag
  63 * @sowd: smoothed OWD << 3
  64 * @owd_min: min OWD
  65 * @owd_max: max OWD
  66 * @owd_max_rsv: reserved max owd
  67 * @remote_hz: estimated remote HZ
  68 * @remote_ref_time: remote reference time
  69 * @local_ref_time: local reference time
  70 * @last_drop: time for last active drop
  71 * @inference: current inference
  72 *
  73 * TCP-LP's private struct.
  74 * We get the idea from original TCP-LP implementation where only left those we
  75 * found are really useful.
  76 */
  77struct lp {
  78        u32 flag;
  79        u32 sowd;
  80        u32 owd_min;
  81        u32 owd_max;
  82        u32 owd_max_rsv;
  83        u32 remote_hz;
  84        u32 remote_ref_time;
  85        u32 local_ref_time;
  86        u32 last_drop;
  87        u32 inference;
  88};
  89
  90/**
  91 * tcp_lp_init
  92 * @sk: socket to initialize congestion control algorithm for
  93 *
  94 * Init all required variables.
  95 * Clone the handling from Vegas module implementation.
  96 */
  97static void tcp_lp_init(struct sock *sk)
  98{
  99        struct lp *lp = inet_csk_ca(sk);
 100
 101        lp->flag = 0;
 102        lp->sowd = 0;
 103        lp->owd_min = 0xffffffff;
 104        lp->owd_max = 0;
 105        lp->owd_max_rsv = 0;
 106        lp->remote_hz = 0;
 107        lp->remote_ref_time = 0;
 108        lp->local_ref_time = 0;
 109        lp->last_drop = 0;
 110        lp->inference = 0;
 111}
 112
 113/**
 114 * tcp_lp_cong_avoid
 115 * @sk: socket to avoid congesting
 116 *
 117 * Implementation of cong_avoid.
 118 * Will only call newReno CA when away from inference.
 119 * From TCP-LP's paper, this will be handled in additive increasement.
 120 */
 121static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 122{
 123        struct lp *lp = inet_csk_ca(sk);
 124
 125        if (!(lp->flag & LP_WITHIN_INF))
 126                tcp_reno_cong_avoid(sk, ack, acked);
 127}
 128
 129/**
 130 * tcp_lp_remote_hz_estimator
 131 * @sk: socket which needs an estimate for the remote HZs
 132 *
 133 * Estimate remote HZ.
 134 * We keep on updating the estimated value, where original TCP-LP
 135 * implementation only guest it for once and use forever.
 136 */
 137static u32 tcp_lp_remote_hz_estimator(struct sock *sk)
 138{
 139        struct tcp_sock *tp = tcp_sk(sk);
 140        struct lp *lp = inet_csk_ca(sk);
 141        s64 rhz = lp->remote_hz << 6;   /* remote HZ << 6 */
 142        s64 m = 0;
 143
 144        /* not yet record reference time
 145         * go away!! record it before come back!! */
 146        if (lp->remote_ref_time == 0 || lp->local_ref_time == 0)
 147                goto out;
 148
 149        /* we can't calc remote HZ with no different!! */
 150        if (tp->rx_opt.rcv_tsval == lp->remote_ref_time ||
 151            tp->rx_opt.rcv_tsecr == lp->local_ref_time)
 152                goto out;
 153
 154        m = TCP_TS_HZ *
 155            (tp->rx_opt.rcv_tsval - lp->remote_ref_time) /
 156            (tp->rx_opt.rcv_tsecr - lp->local_ref_time);
 157        if (m < 0)
 158                m = -m;
 159
 160        if (rhz > 0) {
 161                m -= rhz >> 6;  /* m is now error in remote HZ est */
 162                rhz += m;       /* 63/64 old + 1/64 new */
 163        } else
 164                rhz = m << 6;
 165
 166 out:
 167        /* record time for successful remote HZ calc */
 168        if ((rhz >> 6) > 0)
 169                lp->flag |= LP_VALID_RHZ;
 170        else
 171                lp->flag &= ~LP_VALID_RHZ;
 172
 173        /* record reference time stamp */
 174        lp->remote_ref_time = tp->rx_opt.rcv_tsval;
 175        lp->local_ref_time = tp->rx_opt.rcv_tsecr;
 176
 177        return rhz >> 6;
 178}
 179
 180/**
 181 * tcp_lp_owd_calculator
 182 * @sk: socket to calculate one way delay for
 183 *
 184 * Calculate one way delay (in relative format).
 185 * Original implement OWD as minus of remote time difference to local time
 186 * difference directly. As this time difference just simply equal to RTT, when
 187 * the network status is stable, remote RTT will equal to local RTT, and result
 188 * OWD into zero.
 189 * It seems to be a bug and so we fixed it.
 190 */
 191static u32 tcp_lp_owd_calculator(struct sock *sk)
 192{
 193        struct tcp_sock *tp = tcp_sk(sk);
 194        struct lp *lp = inet_csk_ca(sk);
 195        s64 owd = 0;
 196
 197        lp->remote_hz = tcp_lp_remote_hz_estimator(sk);
 198
 199        if (lp->flag & LP_VALID_RHZ) {
 200                owd =
 201                    tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) -
 202                    tp->rx_opt.rcv_tsecr * (LP_RESOL / TCP_TS_HZ);
 203                if (owd < 0)
 204                        owd = -owd;
 205        }
 206
 207        if (owd > 0)
 208                lp->flag |= LP_VALID_OWD;
 209        else
 210                lp->flag &= ~LP_VALID_OWD;
 211
 212        return owd;
 213}
 214
 215/**
 216 * tcp_lp_rtt_sample
 217 * @sk: socket to add a rtt sample to
 218 * @rtt: round trip time, which is ignored!
 219 *
 220 * Implementation or rtt_sample.
 221 * Will take the following action,
 222 *   1. calc OWD,
 223 *   2. record the min/max OWD,
 224 *   3. calc smoothed OWD (SOWD).
 225 * Most ideas come from the original TCP-LP implementation.
 226 */
 227static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt)
 228{
 229        struct lp *lp = inet_csk_ca(sk);
 230        s64 mowd = tcp_lp_owd_calculator(sk);
 231
 232        /* sorry that we don't have valid data */
 233        if (!(lp->flag & LP_VALID_RHZ) || !(lp->flag & LP_VALID_OWD))
 234                return;
 235
 236        /* record the next min owd */
 237        if (mowd < lp->owd_min)
 238                lp->owd_min = mowd;
 239
 240        /* always forget the max of the max
 241         * we just set owd_max as one below it */
 242        if (mowd > lp->owd_max) {
 243                if (mowd > lp->owd_max_rsv) {
 244                        if (lp->owd_max_rsv == 0)
 245                                lp->owd_max = mowd;
 246                        else
 247                                lp->owd_max = lp->owd_max_rsv;
 248                        lp->owd_max_rsv = mowd;
 249                } else
 250                        lp->owd_max = mowd;
 251        }
 252
 253        /* calc for smoothed owd */
 254        if (lp->sowd != 0) {
 255                mowd -= lp->sowd >> 3;  /* m is now error in owd est */
 256                lp->sowd += mowd;       /* owd = 7/8 owd + 1/8 new */
 257        } else
 258                lp->sowd = mowd << 3;   /* take the measured time be owd */
 259}
 260
 261/**
 262 * tcp_lp_pkts_acked
 263 * @sk: socket requiring congestion avoidance calculations
 264 *
 265 * Implementation of pkts_acked.
 266 * Deal with active drop under Early Congestion Indication.
 267 * Only drop to half and 1 will be handle, because we hope to use back
 268 * newReno in increase case.
 269 * We work it out by following the idea from TCP-LP's paper directly
 270 */
 271static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
 272{
 273        struct tcp_sock *tp = tcp_sk(sk);
 274        struct lp *lp = inet_csk_ca(sk);
 275        u32 now = tcp_time_stamp(tp);
 276        u32 delta;
 277
 278        if (sample->rtt_us > 0)
 279                tcp_lp_rtt_sample(sk, sample->rtt_us);
 280
 281        /* calc inference */
 282        delta = now - tp->rx_opt.rcv_tsecr;
 283        if ((s32)delta > 0)
 284                lp->inference = 3 * delta;
 285
 286        /* test if within inference */
 287        if (lp->last_drop && (now - lp->last_drop < lp->inference))
 288                lp->flag |= LP_WITHIN_INF;
 289        else
 290                lp->flag &= ~LP_WITHIN_INF;
 291
 292        /* test if within threshold */
 293        if (lp->sowd >> 3 <
 294            lp->owd_min + 15 * (lp->owd_max - lp->owd_min) / 100)
 295                lp->flag |= LP_WITHIN_THR;
 296        else
 297                lp->flag &= ~LP_WITHIN_THR;
 298
 299        pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag,
 300                 tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max,
 301                 lp->sowd >> 3);
 302
 303        if (lp->flag & LP_WITHIN_THR)
 304                return;
 305
 306        /* FIXME: try to reset owd_min and owd_max here
 307         * so decrease the chance the min/max is no longer suitable
 308         * and will usually within threshold when within inference */
 309        lp->owd_min = lp->sowd >> 3;
 310        lp->owd_max = lp->sowd >> 2;
 311        lp->owd_max_rsv = lp->sowd >> 2;
 312
 313        /* happened within inference
 314         * drop snd_cwnd into 1 */
 315        if (lp->flag & LP_WITHIN_INF)
 316                tp->snd_cwnd = 1U;
 317
 318        /* happened after inference
 319         * cut snd_cwnd into half */
 320        else
 321                tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U);
 322
 323        /* record this drop time */
 324        lp->last_drop = now;
 325}
 326
 327static struct tcp_congestion_ops tcp_lp __read_mostly = {
 328        .init = tcp_lp_init,
 329        .ssthresh = tcp_reno_ssthresh,
 330        .undo_cwnd = tcp_reno_undo_cwnd,
 331        .cong_avoid = tcp_lp_cong_avoid,
 332        .pkts_acked = tcp_lp_pkts_acked,
 333
 334        .owner = THIS_MODULE,
 335        .name = "lp"
 336};
 337
 338static int __init tcp_lp_register(void)
 339{
 340        BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE);
 341        return tcp_register_congestion_control(&tcp_lp);
 342}
 343
 344static void __exit tcp_lp_unregister(void)
 345{
 346        tcp_unregister_congestion_control(&tcp_lp);
 347}
 348
 349module_init(tcp_lp_register);
 350module_exit(tcp_lp_unregister);
 351
 352MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun Mike");
 353MODULE_LICENSE("GPL");
 354MODULE_DESCRIPTION("TCP Low Priority");
 355