1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include <linux/mm.h>
27#include <linux/module.h>
28#include <linux/math64.h>
29#include <net/tcp.h>
30
31#define BICTCP_BETA_SCALE 1024
32
33
34#define BICTCP_HZ 10
35
36
37#define HYSTART_ACK_TRAIN 0x1
38#define HYSTART_DELAY 0x2
39
40
41#define HYSTART_MIN_SAMPLES 8
42#define HYSTART_DELAY_MIN (4U<<3)
43#define HYSTART_DELAY_MAX (16U<<3)
44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
45
46static int fast_convergence __read_mostly = 1;
47static int beta __read_mostly = 717;
48static int initial_ssthresh __read_mostly;
49static int bic_scale __read_mostly = 41;
50static int tcp_friendliness __read_mostly = 1;
51
52static int hystart __read_mostly = 1;
53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
54static int hystart_low_window __read_mostly = 16;
55static int hystart_ack_delta __read_mostly = 2;
56
57static u32 cube_rtt_scale __read_mostly;
58static u32 beta_scale __read_mostly;
59static u64 cube_factor __read_mostly;
60
61
62module_param(fast_convergence, int, 0644);
63MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
64module_param(beta, int, 0644);
65MODULE_PARM_DESC(beta, "beta for multiplicative increase");
66module_param(initial_ssthresh, int, 0644);
67MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
68module_param(bic_scale, int, 0444);
69MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
70module_param(tcp_friendliness, int, 0644);
71MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
72module_param(hystart, int, 0644);
73MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm");
74module_param(hystart_detect, int, 0644);
75MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms"
76 " 1: packet-train 2: delay 3: both packet-train and delay");
77module_param(hystart_low_window, int, 0644);
78MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
79module_param(hystart_ack_delta, int, 0644);
80MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)");
81
82
83struct bictcp {
84 u32 cnt;
85 u32 last_max_cwnd;
86 u32 last_cwnd;
87 u32 last_time;
88 u32 bic_origin_point;
89 u32 bic_K;
90
91 u32 delay_min;
92 u32 epoch_start;
93 u32 ack_cnt;
94 u32 tcp_cwnd;
95 u16 unused;
96 u8 sample_cnt;
97 u8 found;
98 u32 round_start;
99 u32 end_seq;
100 u32 last_ack;
101 u32 curr_rtt;
102};
103
104static inline void bictcp_reset(struct bictcp *ca)
105{
106 ca->cnt = 0;
107 ca->last_max_cwnd = 0;
108 ca->last_cwnd = 0;
109 ca->last_time = 0;
110 ca->bic_origin_point = 0;
111 ca->bic_K = 0;
112 ca->delay_min = 0;
113 ca->epoch_start = 0;
114 ca->ack_cnt = 0;
115 ca->tcp_cwnd = 0;
116 ca->found = 0;
117}
118
119static inline u32 bictcp_clock(void)
120{
121#if HZ < 1000
122 return ktime_to_ms(ktime_get_real());
123#else
124 return jiffies_to_msecs(jiffies);
125#endif
126}
127
128static inline void bictcp_hystart_reset(struct sock *sk)
129{
130 struct tcp_sock *tp = tcp_sk(sk);
131 struct bictcp *ca = inet_csk_ca(sk);
132
133 ca->round_start = ca->last_ack = bictcp_clock();
134 ca->end_seq = tp->snd_nxt;
135 ca->curr_rtt = 0;
136 ca->sample_cnt = 0;
137}
138
139static void bictcp_init(struct sock *sk)
140{
141 struct bictcp *ca = inet_csk_ca(sk);
142
143 bictcp_reset(ca);
144
145 if (hystart)
146 bictcp_hystart_reset(sk);
147
148 if (!hystart && initial_ssthresh)
149 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
150}
151
152static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
153{
154 if (event == CA_EVENT_TX_START) {
155 struct bictcp *ca = inet_csk_ca(sk);
156 u32 now = tcp_jiffies32;
157 s32 delta;
158
159 delta = now - tcp_sk(sk)->lsndtime;
160
161
162
163
164 if (ca->epoch_start && delta > 0) {
165 ca->epoch_start += delta;
166 if (after(ca->epoch_start, now))
167 ca->epoch_start = now;
168 }
169 return;
170 }
171}
172
173
174
175
176
177static u32 cubic_root(u64 a)
178{
179 u32 x, b, shift;
180
181
182
183
184
185
186
187
188 static const u8 v[] = {
189 0, 54, 54, 54, 118, 118, 118, 118,
190 123, 129, 134, 138, 143, 147, 151, 156,
191 157, 161, 164, 168, 170, 173, 176, 179,
192 181, 185, 187, 190, 192, 194, 197, 199,
193 200, 202, 204, 206, 209, 211, 213, 215,
194 217, 219, 221, 222, 224, 225, 227, 229,
195 231, 232, 234, 236, 237, 239, 240, 242,
196 244, 245, 246, 248, 250, 251, 252, 254,
197 };
198
199 b = fls64(a);
200 if (b < 7) {
201
202 return ((u32)v[(u32)a] + 35) >> 6;
203 }
204
205 b = ((b * 84) >> 8) - 1;
206 shift = (a >> (b * 3));
207
208 x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
209
210
211
212
213
214
215
216 x = (2 * x + (u32)div64_u64(a, (u64)x * (u64)(x - 1)));
217 x = ((x * 341) >> 10);
218 return x;
219}
220
221
222
223
224static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
225{
226 u32 delta, bic_target, max_cnt;
227 u64 offs, t;
228
229 ca->ack_cnt += acked;
230
231 if (ca->last_cwnd == cwnd &&
232 (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32)
233 return;
234
235
236
237
238
239 if (ca->epoch_start && tcp_jiffies32 == ca->last_time)
240 goto tcp_friendliness;
241
242 ca->last_cwnd = cwnd;
243 ca->last_time = tcp_jiffies32;
244
245 if (ca->epoch_start == 0) {
246 ca->epoch_start = tcp_jiffies32;
247 ca->ack_cnt = acked;
248 ca->tcp_cwnd = cwnd;
249
250 if (ca->last_max_cwnd <= cwnd) {
251 ca->bic_K = 0;
252 ca->bic_origin_point = cwnd;
253 } else {
254
255
256
257 ca->bic_K = cubic_root(cube_factor
258 * (ca->last_max_cwnd - cwnd));
259 ca->bic_origin_point = ca->last_max_cwnd;
260 }
261 }
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277 t = (s32)(tcp_jiffies32 - ca->epoch_start);
278 t += msecs_to_jiffies(ca->delay_min >> 3);
279
280 t <<= BICTCP_HZ;
281 do_div(t, HZ);
282
283 if (t < ca->bic_K)
284 offs = ca->bic_K - t;
285 else
286 offs = t - ca->bic_K;
287
288
289 delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
290 if (t < ca->bic_K)
291 bic_target = ca->bic_origin_point - delta;
292 else
293 bic_target = ca->bic_origin_point + delta;
294
295
296 if (bic_target > cwnd) {
297 ca->cnt = cwnd / (bic_target - cwnd);
298 } else {
299 ca->cnt = 100 * cwnd;
300 }
301
302
303
304
305
306 if (ca->last_max_cwnd == 0 && ca->cnt > 20)
307 ca->cnt = 20;
308
309tcp_friendliness:
310
311 if (tcp_friendliness) {
312 u32 scale = beta_scale;
313
314 delta = (cwnd * scale) >> 3;
315 while (ca->ack_cnt > delta) {
316 ca->ack_cnt -= delta;
317 ca->tcp_cwnd++;
318 }
319
320 if (ca->tcp_cwnd > cwnd) {
321 delta = ca->tcp_cwnd - cwnd;
322 max_cnt = cwnd / delta;
323 if (ca->cnt > max_cnt)
324 ca->cnt = max_cnt;
325 }
326 }
327
328
329
330
331 ca->cnt = max(ca->cnt, 2U);
332}
333
334static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
335{
336 struct tcp_sock *tp = tcp_sk(sk);
337 struct bictcp *ca = inet_csk_ca(sk);
338
339 if (!tcp_is_cwnd_limited(sk))
340 return;
341
342 if (tcp_in_slow_start(tp)) {
343 if (hystart && after(ack, ca->end_seq))
344 bictcp_hystart_reset(sk);
345 acked = tcp_slow_start(tp, acked);
346 if (!acked)
347 return;
348 }
349 bictcp_update(ca, tp->snd_cwnd, acked);
350 tcp_cong_avoid_ai(tp, ca->cnt, acked);
351}
352
353static u32 bictcp_recalc_ssthresh(struct sock *sk)
354{
355 const struct tcp_sock *tp = tcp_sk(sk);
356 struct bictcp *ca = inet_csk_ca(sk);
357
358 ca->epoch_start = 0;
359
360
361 if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
362 ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
363 / (2 * BICTCP_BETA_SCALE);
364 else
365 ca->last_max_cwnd = tp->snd_cwnd;
366
367 return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
368}
369
370static void bictcp_state(struct sock *sk, u8 new_state)
371{
372 if (new_state == TCP_CA_Loss) {
373 bictcp_reset(inet_csk_ca(sk));
374 bictcp_hystart_reset(sk);
375 }
376}
377
378static void hystart_update(struct sock *sk, u32 delay)
379{
380 struct tcp_sock *tp = tcp_sk(sk);
381 struct bictcp *ca = inet_csk_ca(sk);
382
383 if (ca->found & hystart_detect)
384 return;
385
386 if (hystart_detect & HYSTART_ACK_TRAIN) {
387 u32 now = bictcp_clock();
388
389
390 if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
391 ca->last_ack = now;
392 if ((s32)(now - ca->round_start) > ca->delay_min >> 4) {
393 ca->found |= HYSTART_ACK_TRAIN;
394 NET_INC_STATS(sock_net(sk),
395 LINUX_MIB_TCPHYSTARTTRAINDETECT);
396 NET_ADD_STATS(sock_net(sk),
397 LINUX_MIB_TCPHYSTARTTRAINCWND,
398 tp->snd_cwnd);
399 tp->snd_ssthresh = tp->snd_cwnd;
400 }
401 }
402 }
403
404 if (hystart_detect & HYSTART_DELAY) {
405
406 if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
407 if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
408 ca->curr_rtt = delay;
409
410 ca->sample_cnt++;
411 } else {
412 if (ca->curr_rtt > ca->delay_min +
413 HYSTART_DELAY_THRESH(ca->delay_min >> 3)) {
414 ca->found |= HYSTART_DELAY;
415 NET_INC_STATS(sock_net(sk),
416 LINUX_MIB_TCPHYSTARTDELAYDETECT);
417 NET_ADD_STATS(sock_net(sk),
418 LINUX_MIB_TCPHYSTARTDELAYCWND,
419 tp->snd_cwnd);
420 tp->snd_ssthresh = tp->snd_cwnd;
421 }
422 }
423 }
424}
425
426
427
428
429static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
430{
431 const struct tcp_sock *tp = tcp_sk(sk);
432 struct bictcp *ca = inet_csk_ca(sk);
433 u32 delay;
434
435
436 if (sample->rtt_us < 0)
437 return;
438
439
440 if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ)
441 return;
442
443 delay = (sample->rtt_us << 3) / USEC_PER_MSEC;
444 if (delay == 0)
445 delay = 1;
446
447
448 if (ca->delay_min == 0 || ca->delay_min > delay)
449 ca->delay_min = delay;
450
451
452 if (hystart && tcp_in_slow_start(tp) &&
453 tp->snd_cwnd >= hystart_low_window)
454 hystart_update(sk, delay);
455}
456
457static struct tcp_congestion_ops cubictcp __read_mostly = {
458 .init = bictcp_init,
459 .ssthresh = bictcp_recalc_ssthresh,
460 .cong_avoid = bictcp_cong_avoid,
461 .set_state = bictcp_state,
462 .undo_cwnd = tcp_reno_undo_cwnd,
463 .cwnd_event = bictcp_cwnd_event,
464 .pkts_acked = bictcp_acked,
465 .owner = THIS_MODULE,
466 .name = "cubic",
467};
468
469static int __init cubictcp_register(void)
470{
471 BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
472
473
474
475
476
477 beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3
478 / (BICTCP_BETA_SCALE - beta);
479
480 cube_rtt_scale = (bic_scale * 10);
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496 cube_factor = 1ull << (10+3*BICTCP_HZ);
497
498
499 do_div(cube_factor, bic_scale * 10);
500
501 return tcp_register_congestion_control(&cubictcp);
502}
503
504static void __exit cubictcp_unregister(void)
505{
506 tcp_unregister_congestion_control(&cubictcp);
507}
508
509module_init(cubictcp_register);
510module_exit(cubictcp_unregister);
511
512MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
513MODULE_LICENSE("GPL");
514MODULE_DESCRIPTION("CUBIC TCP");
515MODULE_VERSION("2.3");
516