1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include <linux/mm.h>
27#include <linux/module.h>
28#include <linux/math64.h>
29#include <net/tcp.h>
30
31#define BICTCP_BETA_SCALE 1024
32
33
34#define BICTCP_HZ 10
35
36
37#define HYSTART_ACK_TRAIN 0x1
38#define HYSTART_DELAY 0x2
39
40
41#define HYSTART_MIN_SAMPLES 8
42#define HYSTART_DELAY_MIN (4U<<3)
43#define HYSTART_DELAY_MAX (16U<<3)
44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
45
46static int fast_convergence __read_mostly = 1;
47static int beta __read_mostly = 717;
48static int initial_ssthresh __read_mostly;
49static int bic_scale __read_mostly = 41;
50static int tcp_friendliness __read_mostly = 1;
51
52static int hystart __read_mostly = 1;
53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
54static int hystart_low_window __read_mostly = 16;
55static int hystart_ack_delta __read_mostly = 2;
56
57static u32 cube_rtt_scale __read_mostly;
58static u32 beta_scale __read_mostly;
59static u64 cube_factor __read_mostly;
60
61
62module_param(fast_convergence, int, 0644);
63MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
64module_param(beta, int, 0644);
65MODULE_PARM_DESC(beta, "beta for multiplicative increase");
66module_param(initial_ssthresh, int, 0644);
67MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
68module_param(bic_scale, int, 0444);
69MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
70module_param(tcp_friendliness, int, 0644);
71MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
72module_param(hystart, int, 0644);
73MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm");
74module_param(hystart_detect, int, 0644);
75MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
76 " 1: packet-train 2: delay 3: both packet-train and delay");
77module_param(hystart_low_window, int, 0644);
78MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
79module_param(hystart_ack_delta, int, 0644);
80MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)");
81
82
83struct bictcp {
84 u32 cnt;
85 u32 last_max_cwnd;
86 u32 loss_cwnd;
87 u32 last_cwnd;
88 u32 last_time;
89 u32 bic_origin_point;
90 u32 bic_K;
91
92 u32 delay_min;
93 u32 epoch_start;
94 u32 ack_cnt;
95 u32 tcp_cwnd;
96 u16 unused;
97 u8 sample_cnt;
98 u8 found;
99 u32 round_start;
100 u32 end_seq;
101 u32 last_ack;
102 u32 curr_rtt;
103};
104
105static inline void bictcp_reset(struct bictcp *ca)
106{
107 ca->cnt = 0;
108 ca->last_max_cwnd = 0;
109 ca->last_cwnd = 0;
110 ca->last_time = 0;
111 ca->bic_origin_point = 0;
112 ca->bic_K = 0;
113 ca->delay_min = 0;
114 ca->epoch_start = 0;
115 ca->ack_cnt = 0;
116 ca->tcp_cwnd = 0;
117 ca->found = 0;
118}
119
120static inline u32 bictcp_clock(void)
121{
122#if HZ < 1000
123 return ktime_to_ms(ktime_get_real());
124#else
125 return jiffies_to_msecs(jiffies);
126#endif
127}
128
129static inline void bictcp_hystart_reset(struct sock *sk)
130{
131 struct tcp_sock *tp = tcp_sk(sk);
132 struct bictcp *ca = inet_csk_ca(sk);
133
134 ca->round_start = ca->last_ack = bictcp_clock();
135 ca->end_seq = tp->snd_nxt;
136 ca->curr_rtt = 0;
137 ca->sample_cnt = 0;
138}
139
140static void bictcp_init(struct sock *sk)
141{
142 struct bictcp *ca = inet_csk_ca(sk);
143
144 bictcp_reset(ca);
145 ca->loss_cwnd = 0;
146
147 if (hystart)
148 bictcp_hystart_reset(sk);
149
150 if (!hystart && initial_ssthresh)
151 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
152}
153
154
155
156
157
158static u32 cubic_root(u64 a)
159{
160 u32 x, b, shift;
161
162
163
164
165
166
167
168
169 static const u8 v[] = {
170 0, 54, 54, 54, 118, 118, 118, 118,
171 123, 129, 134, 138, 143, 147, 151, 156,
172 157, 161, 164, 168, 170, 173, 176, 179,
173 181, 185, 187, 190, 192, 194, 197, 199,
174 200, 202, 204, 206, 209, 211, 213, 215,
175 217, 219, 221, 222, 224, 225, 227, 229,
176 231, 232, 234, 236, 237, 239, 240, 242,
177 244, 245, 246, 248, 250, 251, 252, 254,
178 };
179
180 b = fls64(a);
181 if (b < 7) {
182
183 return ((u32)v[(u32)a] + 35) >> 6;
184 }
185
186 b = ((b * 84) >> 8) - 1;
187 shift = (a >> (b * 3));
188
189 x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
190
191
192
193
194
195
196
197 x = (2 * x + (u32)div64_u64(a, (u64)x * (u64)(x - 1)));
198 x = ((x * 341) >> 10);
199 return x;
200}
201
202
203
204
205static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
206{
207 u32 delta, bic_target, max_cnt;
208 u64 offs, t;
209
210 ca->ack_cnt += acked;
211
212 if (ca->last_cwnd == cwnd &&
213 (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
214 return;
215
216
217
218
219
220 if (ca->epoch_start && tcp_time_stamp == ca->last_time)
221 goto tcp_friendliness;
222
223 ca->last_cwnd = cwnd;
224 ca->last_time = tcp_time_stamp;
225
226 if (ca->epoch_start == 0) {
227 ca->epoch_start = tcp_time_stamp;
228 ca->ack_cnt = acked;
229 ca->tcp_cwnd = cwnd;
230
231 if (ca->last_max_cwnd <= cwnd) {
232 ca->bic_K = 0;
233 ca->bic_origin_point = cwnd;
234 } else {
235
236
237
238 ca->bic_K = cubic_root(cube_factor
239 * (ca->last_max_cwnd - cwnd));
240 ca->bic_origin_point = ca->last_max_cwnd;
241 }
242 }
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258 t = (s32)(tcp_time_stamp - ca->epoch_start);
259 t += msecs_to_jiffies(ca->delay_min >> 3);
260
261 t <<= BICTCP_HZ;
262 do_div(t, HZ);
263
264 if (t < ca->bic_K)
265 offs = ca->bic_K - t;
266 else
267 offs = t - ca->bic_K;
268
269
270 delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
271 if (t < ca->bic_K)
272 bic_target = ca->bic_origin_point - delta;
273 else
274 bic_target = ca->bic_origin_point + delta;
275
276
277 if (bic_target > cwnd) {
278 ca->cnt = cwnd / (bic_target - cwnd);
279 } else {
280 ca->cnt = 100 * cwnd;
281 }
282
283
284
285
286
287 if (ca->last_max_cwnd == 0 && ca->cnt > 20)
288 ca->cnt = 20;
289
290tcp_friendliness:
291
292 if (tcp_friendliness) {
293 u32 scale = beta_scale;
294
295 delta = (cwnd * scale) >> 3;
296 while (ca->ack_cnt > delta) {
297 ca->ack_cnt -= delta;
298 ca->tcp_cwnd++;
299 }
300
301 if (ca->tcp_cwnd > cwnd) {
302 delta = ca->tcp_cwnd - cwnd;
303 max_cnt = cwnd / delta;
304 if (ca->cnt > max_cnt)
305 ca->cnt = max_cnt;
306 }
307 }
308
309
310
311
312 ca->cnt = max(ca->cnt, 2U);
313}
314
315static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
316{
317 struct tcp_sock *tp = tcp_sk(sk);
318 struct bictcp *ca = inet_csk_ca(sk);
319
320 if (!tcp_is_cwnd_limited(sk))
321 return;
322
323 if (tp->snd_cwnd <= tp->snd_ssthresh) {
324 if (hystart && after(ack, ca->end_seq))
325 bictcp_hystart_reset(sk);
326 acked = tcp_slow_start(tp, acked);
327 if (!acked)
328 return;
329 }
330 bictcp_update(ca, tp->snd_cwnd, acked);
331 tcp_cong_avoid_ai(tp, ca->cnt, acked);
332}
333
334static u32 bictcp_recalc_ssthresh(struct sock *sk)
335{
336 const struct tcp_sock *tp = tcp_sk(sk);
337 struct bictcp *ca = inet_csk_ca(sk);
338
339 ca->epoch_start = 0;
340
341
342 if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
343 ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
344 / (2 * BICTCP_BETA_SCALE);
345 else
346 ca->last_max_cwnd = tp->snd_cwnd;
347
348 ca->loss_cwnd = tp->snd_cwnd;
349
350 return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
351}
352
353static u32 bictcp_undo_cwnd(struct sock *sk)
354{
355 struct bictcp *ca = inet_csk_ca(sk);
356
357 return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
358}
359
360static void bictcp_state(struct sock *sk, u8 new_state)
361{
362 if (new_state == TCP_CA_Loss) {
363 bictcp_reset(inet_csk_ca(sk));
364 bictcp_hystart_reset(sk);
365 }
366}
367
368static void hystart_update(struct sock *sk, u32 delay)
369{
370 struct tcp_sock *tp = tcp_sk(sk);
371 struct bictcp *ca = inet_csk_ca(sk);
372
373 if (ca->found & hystart_detect)
374 return;
375
376 if (hystart_detect & HYSTART_ACK_TRAIN) {
377 u32 now = bictcp_clock();
378
379
380 if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
381 ca->last_ack = now;
382 if ((s32)(now - ca->round_start) > ca->delay_min >> 4) {
383 ca->found |= HYSTART_ACK_TRAIN;
384 NET_INC_STATS_BH(sock_net(sk),
385 LINUX_MIB_TCPHYSTARTTRAINDETECT);
386 NET_ADD_STATS_BH(sock_net(sk),
387 LINUX_MIB_TCPHYSTARTTRAINCWND,
388 tp->snd_cwnd);
389 tp->snd_ssthresh = tp->snd_cwnd;
390 }
391 }
392 }
393
394 if (hystart_detect & HYSTART_DELAY) {
395
396 if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
397 if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
398 ca->curr_rtt = delay;
399
400 ca->sample_cnt++;
401 } else {
402 if (ca->curr_rtt > ca->delay_min +
403 HYSTART_DELAY_THRESH(ca->delay_min >> 3)) {
404 ca->found |= HYSTART_DELAY;
405 NET_INC_STATS_BH(sock_net(sk),
406 LINUX_MIB_TCPHYSTARTDELAYDETECT);
407 NET_ADD_STATS_BH(sock_net(sk),
408 LINUX_MIB_TCPHYSTARTDELAYCWND,
409 tp->snd_cwnd);
410 tp->snd_ssthresh = tp->snd_cwnd;
411 }
412 }
413 }
414}
415
416
417
418
419static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
420{
421 const struct tcp_sock *tp = tcp_sk(sk);
422 struct bictcp *ca = inet_csk_ca(sk);
423 u32 delay;
424
425
426 if (rtt_us < 0)
427 return;
428
429
430 if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
431 return;
432
433 delay = (rtt_us << 3) / USEC_PER_MSEC;
434 if (delay == 0)
435 delay = 1;
436
437
438 if (ca->delay_min == 0 || ca->delay_min > delay)
439 ca->delay_min = delay;
440
441
442 if (hystart && tp->snd_cwnd <= tp->snd_ssthresh &&
443 tp->snd_cwnd >= hystart_low_window)
444 hystart_update(sk, delay);
445}
446
447static struct tcp_congestion_ops cubictcp __read_mostly = {
448 .init = bictcp_init,
449 .ssthresh = bictcp_recalc_ssthresh,
450 .cong_avoid = bictcp_cong_avoid,
451 .set_state = bictcp_state,
452 .undo_cwnd = bictcp_undo_cwnd,
453 .pkts_acked = bictcp_acked,
454 .owner = THIS_MODULE,
455 .name = "cubic",
456};
457
458static int __init cubictcp_register(void)
459{
460 BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
461
462
463
464
465
466 beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3
467 / (BICTCP_BETA_SCALE - beta);
468
469 cube_rtt_scale = (bic_scale * 10);
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485 cube_factor = 1ull << (10+3*BICTCP_HZ);
486
487
488 do_div(cube_factor, bic_scale * 10);
489
490 return tcp_register_congestion_control(&cubictcp);
491}
492
493static void __exit cubictcp_unregister(void)
494{
495 tcp_unregister_congestion_control(&cubictcp);
496}
497
498module_init(cubictcp_register);
499module_exit(cubictcp_unregister);
500
501MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
502MODULE_LICENSE("GPL");
503MODULE_DESCRIPTION("CUBIC TCP");
504MODULE_VERSION("2.3");
505