1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include <linux/mm.h>
27#include <linux/module.h>
28#include <linux/math64.h>
29#include <net/tcp.h>
30
31#define BICTCP_BETA_SCALE 1024
32
33
34#define BICTCP_HZ 10
35
36
37#define HYSTART_ACK_TRAIN 0x1
38#define HYSTART_DELAY 0x2
39
40
41#define HYSTART_MIN_SAMPLES 8
42#define HYSTART_DELAY_MIN (4U<<3)
43#define HYSTART_DELAY_MAX (16U<<3)
44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
45
46static int fast_convergence __read_mostly = 1;
47static int beta __read_mostly = 717;
48static int initial_ssthresh __read_mostly;
49static int bic_scale __read_mostly = 41;
50static int tcp_friendliness __read_mostly = 1;
51
52static int hystart __read_mostly = 1;
53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
54static int hystart_low_window __read_mostly = 16;
55static int hystart_ack_delta __read_mostly = 2;
56
57static u32 cube_rtt_scale __read_mostly;
58static u32 beta_scale __read_mostly;
59static u64 cube_factor __read_mostly;
60
61
62module_param(fast_convergence, int, 0644);
63MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
64module_param(beta, int, 0644);
65MODULE_PARM_DESC(beta, "beta for multiplicative increase");
66module_param(initial_ssthresh, int, 0644);
67MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
68module_param(bic_scale, int, 0444);
69MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
70module_param(tcp_friendliness, int, 0644);
71MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
72module_param(hystart, int, 0644);
73MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm");
74module_param(hystart_detect, int, 0644);
75MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
76 " 1: packet-train 2: delay 3: both packet-train and delay");
77module_param(hystart_low_window, int, 0644);
78MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
79module_param(hystart_ack_delta, int, 0644);
80MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)");
81
82
83struct bictcp {
84 u32 cnt;
85 u32 last_max_cwnd;
86 u32 loss_cwnd;
87 u32 last_cwnd;
88 u32 last_time;
89 u32 bic_origin_point;
90 u32 bic_K;
91
92 u32 delay_min;
93 u32 epoch_start;
94 u32 ack_cnt;
95 u32 tcp_cwnd;
96 u16 unused;
97 u8 sample_cnt;
98 u8 found;
99 u32 round_start;
100 u32 end_seq;
101 u32 last_ack;
102 u32 curr_rtt;
103};
104
105static inline void bictcp_reset(struct bictcp *ca)
106{
107 ca->cnt = 0;
108 ca->last_max_cwnd = 0;
109 ca->last_cwnd = 0;
110 ca->last_time = 0;
111 ca->bic_origin_point = 0;
112 ca->bic_K = 0;
113 ca->delay_min = 0;
114 ca->epoch_start = 0;
115 ca->ack_cnt = 0;
116 ca->tcp_cwnd = 0;
117 ca->found = 0;
118}
119
120static inline u32 bictcp_clock(void)
121{
122#if HZ < 1000
123 return ktime_to_ms(ktime_get_real());
124#else
125 return jiffies_to_msecs(jiffies);
126#endif
127}
128
129static inline void bictcp_hystart_reset(struct sock *sk)
130{
131 struct tcp_sock *tp = tcp_sk(sk);
132 struct bictcp *ca = inet_csk_ca(sk);
133
134 ca->round_start = ca->last_ack = bictcp_clock();
135 ca->end_seq = tp->snd_nxt;
136 ca->curr_rtt = 0;
137 ca->sample_cnt = 0;
138}
139
140static void bictcp_init(struct sock *sk)
141{
142 struct bictcp *ca = inet_csk_ca(sk);
143
144 bictcp_reset(ca);
145 ca->loss_cwnd = 0;
146
147 if (hystart)
148 bictcp_hystart_reset(sk);
149
150 if (!hystart && initial_ssthresh)
151 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
152}
153
154
155
156
157
158static u32 cubic_root(u64 a)
159{
160 u32 x, b, shift;
161
162
163
164
165
166
167
168
169 static const u8 v[] = {
170 0, 54, 54, 54, 118, 118, 118, 118,
171 123, 129, 134, 138, 143, 147, 151, 156,
172 157, 161, 164, 168, 170, 173, 176, 179,
173 181, 185, 187, 190, 192, 194, 197, 199,
174 200, 202, 204, 206, 209, 211, 213, 215,
175 217, 219, 221, 222, 224, 225, 227, 229,
176 231, 232, 234, 236, 237, 239, 240, 242,
177 244, 245, 246, 248, 250, 251, 252, 254,
178 };
179
180 b = fls64(a);
181 if (b < 7) {
182
183 return ((u32)v[(u32)a] + 35) >> 6;
184 }
185
186 b = ((b * 84) >> 8) - 1;
187 shift = (a >> (b * 3));
188
189 x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
190
191
192
193
194
195
196
197 x = (2 * x + (u32)div64_u64(a, (u64)x * (u64)(x - 1)));
198 x = ((x * 341) >> 10);
199 return x;
200}
201
202
203
204
205static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
206{
207 u32 delta, bic_target, max_cnt;
208 u64 offs, t;
209
210 ca->ack_cnt += acked;
211
212 if (ca->last_cwnd == cwnd &&
213 (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
214 return;
215
216
217
218
219
220 if (ca->epoch_start && tcp_time_stamp == ca->last_time)
221 goto tcp_friendliness;
222
223 ca->last_cwnd = cwnd;
224 ca->last_time = tcp_time_stamp;
225
226 if (ca->epoch_start == 0) {
227 ca->epoch_start = tcp_time_stamp;
228 ca->ack_cnt = acked;
229 ca->tcp_cwnd = cwnd;
230
231 if (ca->last_max_cwnd <= cwnd) {
232 ca->bic_K = 0;
233 ca->bic_origin_point = cwnd;
234 } else {
235
236
237
238 ca->bic_K = cubic_root(cube_factor
239 * (ca->last_max_cwnd - cwnd));
240 ca->bic_origin_point = ca->last_max_cwnd;
241 }
242 }
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258 t = (s32)(tcp_time_stamp - ca->epoch_start);
259 t += msecs_to_jiffies(ca->delay_min >> 3);
260
261 t <<= BICTCP_HZ;
262 do_div(t, HZ);
263
264 if (t < ca->bic_K)
265 offs = ca->bic_K - t;
266 else
267 offs = t - ca->bic_K;
268
269
270 delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
271 if (t < ca->bic_K)
272 bic_target = ca->bic_origin_point - delta;
273 else
274 bic_target = ca->bic_origin_point + delta;
275
276
277 if (bic_target > cwnd) {
278 ca->cnt = cwnd / (bic_target - cwnd);
279 } else {
280 ca->cnt = 100 * cwnd;
281 }
282
283
284
285
286
287 if (ca->last_max_cwnd == 0 && ca->cnt > 20)
288 ca->cnt = 20;
289
290tcp_friendliness:
291
292 if (tcp_friendliness) {
293 u32 scale = beta_scale;
294
295 delta = (cwnd * scale) >> 3;
296 while (ca->ack_cnt > delta) {
297 ca->ack_cnt -= delta;
298 ca->tcp_cwnd++;
299 }
300
301 if (ca->tcp_cwnd > cwnd) {
302 delta = ca->tcp_cwnd - cwnd;
303 max_cnt = cwnd / delta;
304 if (ca->cnt > max_cnt)
305 ca->cnt = max_cnt;
306 }
307 }
308
309 if (ca->cnt == 0)
310 ca->cnt = 1;
311}
312
313static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
314{
315 struct tcp_sock *tp = tcp_sk(sk);
316 struct bictcp *ca = inet_csk_ca(sk);
317
318 if (!tcp_is_cwnd_limited(sk))
319 return;
320
321 if (tp->snd_cwnd <= tp->snd_ssthresh) {
322 if (hystart && after(ack, ca->end_seq))
323 bictcp_hystart_reset(sk);
324 acked = tcp_slow_start(tp, acked);
325 if (!acked)
326 return;
327 }
328 bictcp_update(ca, tp->snd_cwnd, acked);
329 tcp_cong_avoid_ai(tp, ca->cnt, acked);
330}
331
332static u32 bictcp_recalc_ssthresh(struct sock *sk)
333{
334 const struct tcp_sock *tp = tcp_sk(sk);
335 struct bictcp *ca = inet_csk_ca(sk);
336
337 ca->epoch_start = 0;
338
339
340 if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
341 ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
342 / (2 * BICTCP_BETA_SCALE);
343 else
344 ca->last_max_cwnd = tp->snd_cwnd;
345
346 ca->loss_cwnd = tp->snd_cwnd;
347
348 return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
349}
350
351static u32 bictcp_undo_cwnd(struct sock *sk)
352{
353 struct bictcp *ca = inet_csk_ca(sk);
354
355 return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
356}
357
358static void bictcp_state(struct sock *sk, u8 new_state)
359{
360 if (new_state == TCP_CA_Loss) {
361 bictcp_reset(inet_csk_ca(sk));
362 bictcp_hystart_reset(sk);
363 }
364}
365
366static void hystart_update(struct sock *sk, u32 delay)
367{
368 struct tcp_sock *tp = tcp_sk(sk);
369 struct bictcp *ca = inet_csk_ca(sk);
370
371 if (ca->found & hystart_detect)
372 return;
373
374 if (hystart_detect & HYSTART_ACK_TRAIN) {
375 u32 now = bictcp_clock();
376
377
378 if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
379 ca->last_ack = now;
380 if ((s32)(now - ca->round_start) > ca->delay_min >> 4) {
381 ca->found |= HYSTART_ACK_TRAIN;
382 NET_INC_STATS_BH(sock_net(sk),
383 LINUX_MIB_TCPHYSTARTTRAINDETECT);
384 NET_ADD_STATS_BH(sock_net(sk),
385 LINUX_MIB_TCPHYSTARTTRAINCWND,
386 tp->snd_cwnd);
387 tp->snd_ssthresh = tp->snd_cwnd;
388 }
389 }
390 }
391
392 if (hystart_detect & HYSTART_DELAY) {
393
394 if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
395 if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
396 ca->curr_rtt = delay;
397
398 ca->sample_cnt++;
399 } else {
400 if (ca->curr_rtt > ca->delay_min +
401 HYSTART_DELAY_THRESH(ca->delay_min >> 3)) {
402 ca->found |= HYSTART_DELAY;
403 NET_INC_STATS_BH(sock_net(sk),
404 LINUX_MIB_TCPHYSTARTDELAYDETECT);
405 NET_ADD_STATS_BH(sock_net(sk),
406 LINUX_MIB_TCPHYSTARTDELAYCWND,
407 tp->snd_cwnd);
408 tp->snd_ssthresh = tp->snd_cwnd;
409 }
410 }
411 }
412}
413
414
415
416
417static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
418{
419 const struct tcp_sock *tp = tcp_sk(sk);
420 struct bictcp *ca = inet_csk_ca(sk);
421 u32 delay;
422
423
424 if (rtt_us < 0)
425 return;
426
427
428 if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
429 return;
430
431 delay = (rtt_us << 3) / USEC_PER_MSEC;
432 if (delay == 0)
433 delay = 1;
434
435
436 if (ca->delay_min == 0 || ca->delay_min > delay)
437 ca->delay_min = delay;
438
439
440 if (hystart && tp->snd_cwnd <= tp->snd_ssthresh &&
441 tp->snd_cwnd >= hystart_low_window)
442 hystart_update(sk, delay);
443}
444
445static struct tcp_congestion_ops cubictcp __read_mostly = {
446 .init = bictcp_init,
447 .ssthresh = bictcp_recalc_ssthresh,
448 .cong_avoid = bictcp_cong_avoid,
449 .set_state = bictcp_state,
450 .undo_cwnd = bictcp_undo_cwnd,
451 .pkts_acked = bictcp_acked,
452 .owner = THIS_MODULE,
453 .name = "cubic",
454};
455
456static int __init cubictcp_register(void)
457{
458 BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
459
460
461
462
463
464 beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3
465 / (BICTCP_BETA_SCALE - beta);
466
467 cube_rtt_scale = (bic_scale * 10);
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483 cube_factor = 1ull << (10+3*BICTCP_HZ);
484
485
486 do_div(cube_factor, bic_scale * 10);
487
488 return tcp_register_congestion_control(&cubictcp);
489}
490
491static void __exit cubictcp_unregister(void)
492{
493 tcp_unregister_congestion_control(&cubictcp);
494}
495
496module_init(cubictcp_register);
497module_exit(cubictcp_unregister);
498
499MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
500MODULE_LICENSE("GPL");
501MODULE_DESCRIPTION("CUBIC TCP");
502MODULE_VERSION("2.3");
503