1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59#include <linux/module.h>
60#include <net/tcp.h>
61#include <linux/inet_diag.h>
62#include <linux/inet.h>
63#include <linux/random.h>
64#include <linux/win_minmax.h>
65
66
67
68
69
70
71
72#define BW_SCALE 24
73#define BW_UNIT (1 << BW_SCALE)
74
75#define BBR_SCALE 8
76#define BBR_UNIT (1 << BBR_SCALE)
77
78
79enum bbr_mode {
80 BBR_STARTUP,
81 BBR_DRAIN,
82 BBR_PROBE_BW,
83 BBR_PROBE_RTT,
84};
85
86
87struct bbr {
88 u32 min_rtt_us;
89 u32 min_rtt_stamp;
90 u32 probe_rtt_done_stamp;
91 struct minmax bw;
92 u32 rtt_cnt;
93 u32 next_rtt_delivered;
94 u64 cycle_mstamp;
95 u32 mode:3,
96 prev_ca_state:3,
97 packet_conservation:1,
98 restore_cwnd:1,
99 round_start:1,
100 tso_segs_goal:7,
101 idle_restart:1,
102 probe_rtt_round_done:1,
103 unused:5,
104 lt_is_sampling:1,
105 lt_rtt_cnt:7,
106 lt_use_bw:1;
107 u32 lt_bw;
108 u32 lt_last_delivered;
109 u32 lt_last_stamp;
110 u32 lt_last_lost;
111 u32 pacing_gain:10,
112 cwnd_gain:10,
113 full_bw_cnt:3,
114 cycle_idx:3,
115 has_seen_rtt:1,
116 unused_b:5;
117 u32 prior_cwnd;
118 u32 full_bw;
119};
120
121#define CYCLE_LEN 8
122
123
124static const int bbr_bw_rtts = CYCLE_LEN + 2;
125
126static const u32 bbr_min_rtt_win_sec = 10;
127
128static const u32 bbr_probe_rtt_mode_ms = 200;
129
130static const int bbr_min_tso_rate = 1200000;
131
132
133
134
135
136
137static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1;
138
139
140
141static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885;
142
143static const int bbr_cwnd_gain = BBR_UNIT * 2;
144
145static const int bbr_pacing_gain[] = {
146 BBR_UNIT * 5 / 4,
147 BBR_UNIT * 3 / 4,
148 BBR_UNIT, BBR_UNIT, BBR_UNIT,
149 BBR_UNIT, BBR_UNIT, BBR_UNIT
150};
151
152static const u32 bbr_cycle_rand = 7;
153
154
155
156
157
158static const u32 bbr_cwnd_min_target = 4;
159
160
161
162static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4;
163
164static const u32 bbr_full_bw_cnt = 3;
165
166
167
168static const u32 bbr_lt_intvl_min_rtts = 4;
169
170static const u32 bbr_lt_loss_thresh = 50;
171
172static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8;
173
174static const u32 bbr_lt_bw_diff = 4000 / 8;
175
176static const u32 bbr_lt_bw_max_rtts = 48;
177
178
179static bool bbr_full_bw_reached(const struct sock *sk)
180{
181 const struct bbr *bbr = inet_csk_ca(sk);
182
183 return bbr->full_bw_cnt >= bbr_full_bw_cnt;
184}
185
186
187static u32 bbr_max_bw(const struct sock *sk)
188{
189 struct bbr *bbr = inet_csk_ca(sk);
190
191 return minmax_get(&bbr->bw);
192}
193
194
195static u32 bbr_bw(const struct sock *sk)
196{
197 struct bbr *bbr = inet_csk_ca(sk);
198
199 return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk);
200}
201
202
203
204
205
206static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
207{
208 rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache);
209 rate *= gain;
210 rate >>= BBR_SCALE;
211 rate *= USEC_PER_SEC;
212 return rate >> BW_SCALE;
213}
214
215
216static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
217{
218 u64 rate = bw;
219
220 rate = bbr_rate_bytes_per_sec(sk, rate, gain);
221 rate = min_t(u64, rate, sk->sk_max_pacing_rate);
222 return rate;
223}
224
225
226static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
227{
228 struct tcp_sock *tp = tcp_sk(sk);
229 struct bbr *bbr = inet_csk_ca(sk);
230 u64 bw;
231 u32 rtt_us;
232
233 if (tp->srtt_us) {
234 rtt_us = max(tp->srtt_us >> 3, 1U);
235 bbr->has_seen_rtt = 1;
236 } else {
237 rtt_us = USEC_PER_MSEC;
238 }
239 bw = (u64)tp->snd_cwnd * BW_UNIT;
240 do_div(bw, rtt_us);
241 sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
242}
243
244
245
246
247
248
249
250
251static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
252{
253 struct tcp_sock *tp = tcp_sk(sk);
254 struct bbr *bbr = inet_csk_ca(sk);
255 u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
256
257 if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
258 bbr_init_pacing_rate_from_rtt(sk);
259 if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
260 sk->sk_pacing_rate = rate;
261}
262
263
264static u32 bbr_tso_segs_goal(struct sock *sk)
265{
266 struct bbr *bbr = inet_csk_ca(sk);
267
268 return bbr->tso_segs_goal;
269}
270
271static void bbr_set_tso_segs_goal(struct sock *sk)
272{
273 struct tcp_sock *tp = tcp_sk(sk);
274 struct bbr *bbr = inet_csk_ca(sk);
275 u32 min_segs;
276
277 min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
278 bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
279 0x7FU);
280}
281
282
283static void bbr_save_cwnd(struct sock *sk)
284{
285 struct tcp_sock *tp = tcp_sk(sk);
286 struct bbr *bbr = inet_csk_ca(sk);
287
288 if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
289 bbr->prior_cwnd = tp->snd_cwnd;
290 else
291 bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
292}
293
294static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
295{
296 struct tcp_sock *tp = tcp_sk(sk);
297 struct bbr *bbr = inet_csk_ca(sk);
298
299 if (event == CA_EVENT_TX_START && tp->app_limited) {
300 bbr->idle_restart = 1;
301
302
303
304 if (bbr->mode == BBR_PROBE_BW)
305 bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
306 }
307}
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
330{
331 struct bbr *bbr = inet_csk_ca(sk);
332 u32 cwnd;
333 u64 w;
334
335
336
337
338
339
340
341 if (unlikely(bbr->min_rtt_us == ~0U))
342 return TCP_INIT_CWND;
343
344 w = (u64)bw * bbr->min_rtt_us;
345
346
347 cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
348
349
350 cwnd += 3 * bbr->tso_segs_goal;
351
352
353 cwnd = (cwnd + 1) & ~1U;
354
355 return cwnd;
356}
357
358
359
360
361
362
363
364
365
366static bool bbr_set_cwnd_to_recover_or_restore(
367 struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd)
368{
369 struct tcp_sock *tp = tcp_sk(sk);
370 struct bbr *bbr = inet_csk_ca(sk);
371 u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
372 u32 cwnd = tp->snd_cwnd;
373
374
375
376
377
378 if (rs->losses > 0)
379 cwnd = max_t(s32, cwnd - rs->losses, 1);
380
381 if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) {
382
383 bbr->packet_conservation = 1;
384 bbr->next_rtt_delivered = tp->delivered;
385
386 cwnd = tcp_packets_in_flight(tp) + acked;
387 } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) {
388
389 bbr->restore_cwnd = 1;
390 bbr->packet_conservation = 0;
391 }
392 bbr->prev_ca_state = state;
393
394 if (bbr->restore_cwnd) {
395
396 cwnd = max(cwnd, bbr->prior_cwnd);
397 bbr->restore_cwnd = 0;
398 }
399
400 if (bbr->packet_conservation) {
401 *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked);
402 return true;
403 }
404 *new_cwnd = cwnd;
405 return false;
406}
407
408
409
410
411static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
412 u32 acked, u32 bw, int gain)
413{
414 struct tcp_sock *tp = tcp_sk(sk);
415 struct bbr *bbr = inet_csk_ca(sk);
416 u32 cwnd = 0, target_cwnd = 0;
417
418 if (!acked)
419 return;
420
421 if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
422 goto done;
423
424
425 target_cwnd = bbr_target_cwnd(sk, bw, gain);
426 if (bbr_full_bw_reached(sk))
427 cwnd = min(cwnd + acked, target_cwnd);
428 else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND)
429 cwnd = cwnd + acked;
430 cwnd = max(cwnd, bbr_cwnd_min_target);
431
432done:
433 tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
434 if (bbr->mode == BBR_PROBE_RTT)
435 tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
436}
437
438
439static bool bbr_is_next_cycle_phase(struct sock *sk,
440 const struct rate_sample *rs)
441{
442 struct tcp_sock *tp = tcp_sk(sk);
443 struct bbr *bbr = inet_csk_ca(sk);
444 bool is_full_length =
445 tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) >
446 bbr->min_rtt_us;
447 u32 inflight, bw;
448
449
450
451
452 if (bbr->pacing_gain == BBR_UNIT)
453 return is_full_length;
454
455 inflight = rs->prior_in_flight;
456 bw = bbr_max_bw(sk);
457
458
459
460
461
462
463 if (bbr->pacing_gain > BBR_UNIT)
464 return is_full_length &&
465 (rs->losses ||
466 inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain));
467
468
469
470
471
472 return is_full_length ||
473 inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT);
474}
475
476static void bbr_advance_cycle_phase(struct sock *sk)
477{
478 struct tcp_sock *tp = tcp_sk(sk);
479 struct bbr *bbr = inet_csk_ca(sk);
480
481 bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
482 bbr->cycle_mstamp = tp->delivered_mstamp;
483 bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx];
484}
485
486
487static void bbr_update_cycle_phase(struct sock *sk,
488 const struct rate_sample *rs)
489{
490 struct bbr *bbr = inet_csk_ca(sk);
491
492 if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw &&
493 bbr_is_next_cycle_phase(sk, rs))
494 bbr_advance_cycle_phase(sk);
495}
496
497static void bbr_reset_startup_mode(struct sock *sk)
498{
499 struct bbr *bbr = inet_csk_ca(sk);
500
501 bbr->mode = BBR_STARTUP;
502 bbr->pacing_gain = bbr_high_gain;
503 bbr->cwnd_gain = bbr_high_gain;
504}
505
506static void bbr_reset_probe_bw_mode(struct sock *sk)
507{
508 struct bbr *bbr = inet_csk_ca(sk);
509
510 bbr->mode = BBR_PROBE_BW;
511 bbr->pacing_gain = BBR_UNIT;
512 bbr->cwnd_gain = bbr_cwnd_gain;
513 bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
514 bbr_advance_cycle_phase(sk);
515}
516
517static void bbr_reset_mode(struct sock *sk)
518{
519 if (!bbr_full_bw_reached(sk))
520 bbr_reset_startup_mode(sk);
521 else
522 bbr_reset_probe_bw_mode(sk);
523}
524
525
526static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
527{
528 struct tcp_sock *tp = tcp_sk(sk);
529 struct bbr *bbr = inet_csk_ca(sk);
530
531 bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC);
532 bbr->lt_last_delivered = tp->delivered;
533 bbr->lt_last_lost = tp->lost;
534 bbr->lt_rtt_cnt = 0;
535}
536
537
538static void bbr_reset_lt_bw_sampling(struct sock *sk)
539{
540 struct bbr *bbr = inet_csk_ca(sk);
541
542 bbr->lt_bw = 0;
543 bbr->lt_use_bw = 0;
544 bbr->lt_is_sampling = false;
545 bbr_reset_lt_bw_sampling_interval(sk);
546}
547
548
549static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw)
550{
551 struct bbr *bbr = inet_csk_ca(sk);
552 u32 diff;
553
554 if (bbr->lt_bw) {
555
556 diff = abs(bw - bbr->lt_bw);
557 if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) ||
558 (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <=
559 bbr_lt_bw_diff)) {
560
561 bbr->lt_bw = (bw + bbr->lt_bw) >> 1;
562 bbr->lt_use_bw = 1;
563 bbr->pacing_gain = BBR_UNIT;
564 bbr->lt_rtt_cnt = 0;
565 return;
566 }
567 }
568 bbr->lt_bw = bw;
569 bbr_reset_lt_bw_sampling_interval(sk);
570}
571
572
573
574
575
576
577
578
579static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
580{
581 struct tcp_sock *tp = tcp_sk(sk);
582 struct bbr *bbr = inet_csk_ca(sk);
583 u32 lost, delivered;
584 u64 bw;
585 u32 t;
586
587 if (bbr->lt_use_bw) {
588 if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
589 ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) {
590 bbr_reset_lt_bw_sampling(sk);
591 bbr_reset_probe_bw_mode(sk);
592 }
593 return;
594 }
595
596
597
598
599
600 if (!bbr->lt_is_sampling) {
601 if (!rs->losses)
602 return;
603 bbr_reset_lt_bw_sampling_interval(sk);
604 bbr->lt_is_sampling = true;
605 }
606
607
608 if (rs->is_app_limited) {
609 bbr_reset_lt_bw_sampling(sk);
610 return;
611 }
612
613 if (bbr->round_start)
614 bbr->lt_rtt_cnt++;
615 if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts)
616 return;
617 if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) {
618 bbr_reset_lt_bw_sampling(sk);
619 return;
620 }
621
622
623
624
625
626 if (!rs->losses)
627 return;
628
629
630 lost = tp->lost - bbr->lt_last_lost;
631 delivered = tp->delivered - bbr->lt_last_delivered;
632
633 if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered)
634 return;
635
636
637 t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp;
638 if ((s32)t < 1)
639 return;
640
641 if (t >= ~0U / USEC_PER_MSEC) {
642 bbr_reset_lt_bw_sampling(sk);
643 return;
644 }
645 t *= USEC_PER_MSEC;
646 bw = (u64)delivered * BW_UNIT;
647 do_div(bw, t);
648 bbr_lt_bw_interval_done(sk, bw);
649}
650
651
652static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
653{
654 struct tcp_sock *tp = tcp_sk(sk);
655 struct bbr *bbr = inet_csk_ca(sk);
656 u64 bw;
657
658 bbr->round_start = 0;
659 if (rs->delivered < 0 || rs->interval_us <= 0)
660 return;
661
662
663 if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) {
664 bbr->next_rtt_delivered = tp->delivered;
665 bbr->rtt_cnt++;
666 bbr->round_start = 1;
667 bbr->packet_conservation = 0;
668 }
669
670 bbr_lt_bw_sampling(sk, rs);
671
672
673
674
675
676 bw = (u64)rs->delivered * BW_UNIT;
677 do_div(bw, rs->interval_us);
678
679
680
681
682
683
684
685
686
687
688
689
690 if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) {
691
692 minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw);
693 }
694}
695
696
697
698
699
700
701
702
703
704static void bbr_check_full_bw_reached(struct sock *sk,
705 const struct rate_sample *rs)
706{
707 struct bbr *bbr = inet_csk_ca(sk);
708 u32 bw_thresh;
709
710 if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited)
711 return;
712
713 bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE;
714 if (bbr_max_bw(sk) >= bw_thresh) {
715 bbr->full_bw = bbr_max_bw(sk);
716 bbr->full_bw_cnt = 0;
717 return;
718 }
719 ++bbr->full_bw_cnt;
720}
721
722
723static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
724{
725 struct bbr *bbr = inet_csk_ca(sk);
726
727 if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
728 bbr->mode = BBR_DRAIN;
729 bbr->pacing_gain = bbr_drain_gain;
730 bbr->cwnd_gain = bbr_high_gain;
731 }
732 if (bbr->mode == BBR_DRAIN &&
733 tcp_packets_in_flight(tcp_sk(sk)) <=
734 bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT))
735 bbr_reset_probe_bw_mode(sk);
736}
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
758{
759 struct tcp_sock *tp = tcp_sk(sk);
760 struct bbr *bbr = inet_csk_ca(sk);
761 bool filter_expired;
762
763
764 filter_expired = after(tcp_jiffies32,
765 bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
766 if (rs->rtt_us >= 0 &&
767 (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) {
768 bbr->min_rtt_us = rs->rtt_us;
769 bbr->min_rtt_stamp = tcp_jiffies32;
770 }
771
772 if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
773 !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
774 bbr->mode = BBR_PROBE_RTT;
775 bbr->pacing_gain = BBR_UNIT;
776 bbr->cwnd_gain = BBR_UNIT;
777 bbr_save_cwnd(sk);
778 bbr->probe_rtt_done_stamp = 0;
779 }
780
781 if (bbr->mode == BBR_PROBE_RTT) {
782
783 tp->app_limited =
784 (tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
785
786 if (!bbr->probe_rtt_done_stamp &&
787 tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
788 bbr->probe_rtt_done_stamp = tcp_jiffies32 +
789 msecs_to_jiffies(bbr_probe_rtt_mode_ms);
790 bbr->probe_rtt_round_done = 0;
791 bbr->next_rtt_delivered = tp->delivered;
792 } else if (bbr->probe_rtt_done_stamp) {
793 if (bbr->round_start)
794 bbr->probe_rtt_round_done = 1;
795 if (bbr->probe_rtt_round_done &&
796 after(tcp_jiffies32, bbr->probe_rtt_done_stamp)) {
797 bbr->min_rtt_stamp = tcp_jiffies32;
798 bbr->restore_cwnd = 1;
799 bbr_reset_mode(sk);
800 }
801 }
802 }
803 bbr->idle_restart = 0;
804}
805
806static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
807{
808 bbr_update_bw(sk, rs);
809 bbr_update_cycle_phase(sk, rs);
810 bbr_check_full_bw_reached(sk, rs);
811 bbr_check_drain(sk, rs);
812 bbr_update_min_rtt(sk, rs);
813}
814
815static void bbr_main(struct sock *sk, const struct rate_sample *rs)
816{
817 struct bbr *bbr = inet_csk_ca(sk);
818 u32 bw;
819
820 bbr_update_model(sk, rs);
821
822 bw = bbr_bw(sk);
823 bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
824 bbr_set_tso_segs_goal(sk);
825 bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
826}
827
828static void bbr_init(struct sock *sk)
829{
830 struct tcp_sock *tp = tcp_sk(sk);
831 struct bbr *bbr = inet_csk_ca(sk);
832
833 bbr->prior_cwnd = 0;
834 bbr->tso_segs_goal = 0;
835 bbr->rtt_cnt = 0;
836 bbr->next_rtt_delivered = 0;
837 bbr->prev_ca_state = TCP_CA_Open;
838 bbr->packet_conservation = 0;
839
840 bbr->probe_rtt_done_stamp = 0;
841 bbr->probe_rtt_round_done = 0;
842 bbr->min_rtt_us = tcp_min_rtt(tp);
843 bbr->min_rtt_stamp = tcp_jiffies32;
844
845 minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);
846
847 bbr->has_seen_rtt = 0;
848 bbr_init_pacing_rate_from_rtt(sk);
849
850 bbr->restore_cwnd = 0;
851 bbr->round_start = 0;
852 bbr->idle_restart = 0;
853 bbr->full_bw = 0;
854 bbr->full_bw_cnt = 0;
855 bbr->cycle_mstamp = 0;
856 bbr->cycle_idx = 0;
857 bbr_reset_lt_bw_sampling(sk);
858 bbr_reset_startup_mode(sk);
859
860 cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
861}
862
863static u32 bbr_sndbuf_expand(struct sock *sk)
864{
865
866 return 3;
867}
868
869
870
871
872static u32 bbr_undo_cwnd(struct sock *sk)
873{
874 return tcp_sk(sk)->snd_cwnd;
875}
876
877
878static u32 bbr_ssthresh(struct sock *sk)
879{
880 bbr_save_cwnd(sk);
881 return TCP_INFINITE_SSTHRESH;
882}
883
884static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
885 union tcp_cc_info *info)
886{
887 if (ext & (1 << (INET_DIAG_BBRINFO - 1)) ||
888 ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
889 struct tcp_sock *tp = tcp_sk(sk);
890 struct bbr *bbr = inet_csk_ca(sk);
891 u64 bw = bbr_bw(sk);
892
893 bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE;
894 memset(&info->bbr, 0, sizeof(info->bbr));
895 info->bbr.bbr_bw_lo = (u32)bw;
896 info->bbr.bbr_bw_hi = (u32)(bw >> 32);
897 info->bbr.bbr_min_rtt = bbr->min_rtt_us;
898 info->bbr.bbr_pacing_gain = bbr->pacing_gain;
899 info->bbr.bbr_cwnd_gain = bbr->cwnd_gain;
900 *attr = INET_DIAG_BBRINFO;
901 return sizeof(info->bbr);
902 }
903 return 0;
904}
905
906static void bbr_set_state(struct sock *sk, u8 new_state)
907{
908 struct bbr *bbr = inet_csk_ca(sk);
909
910 if (new_state == TCP_CA_Loss) {
911 struct rate_sample rs = { .losses = 1 };
912
913 bbr->prev_ca_state = TCP_CA_Loss;
914 bbr->full_bw = 0;
915 bbr->round_start = 1;
916 bbr_lt_bw_sampling(sk, &rs);
917 }
918}
919
920static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
921 .flags = TCP_CONG_NON_RESTRICTED,
922 .name = "bbr",
923 .owner = THIS_MODULE,
924 .init = bbr_init,
925 .cong_control = bbr_main,
926 .sndbuf_expand = bbr_sndbuf_expand,
927 .undo_cwnd = bbr_undo_cwnd,
928 .cwnd_event = bbr_cwnd_event,
929 .ssthresh = bbr_ssthresh,
930 .tso_segs_goal = bbr_tso_segs_goal,
931 .get_info = bbr_get_info,
932 .set_state = bbr_set_state,
933};
934
935static int __init bbr_register(void)
936{
937 BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
938 return tcp_register_congestion_control(&tcp_bbr_cong_ops);
939}
940
941static void __exit bbr_unregister(void)
942{
943 tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
944}
945
946module_init(bbr_register);
947module_exit(bbr_unregister);
948
949MODULE_AUTHOR("Van Jacobson <vanj@google.com>");
950MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>");
951MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>");
952MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>");
953MODULE_LICENSE("Dual BSD/GPL");
954MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)");
955