1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59#include <linux/module.h>
60#include <net/tcp.h>
61#include <linux/inet_diag.h>
62#include <linux/inet.h>
63#include <linux/random.h>
64#include <linux/win_minmax.h>
65
66
67
68
69
70
71
72#define BW_SCALE 24
73#define BW_UNIT (1 << BW_SCALE)
74
75#define BBR_SCALE 8
76#define BBR_UNIT (1 << BBR_SCALE)
77
78
79enum bbr_mode {
80 BBR_STARTUP,
81 BBR_DRAIN,
82 BBR_PROBE_BW,
83 BBR_PROBE_RTT,
84};
85
86
87struct bbr {
88 u32 min_rtt_us;
89 u32 min_rtt_stamp;
90 u32 probe_rtt_done_stamp;
91 struct minmax bw;
92 u32 rtt_cnt;
93 u32 next_rtt_delivered;
94 u64 cycle_mstamp;
95 u32 mode:3,
96 prev_ca_state:3,
97 packet_conservation:1,
98 round_start:1,
99 idle_restart:1,
100 probe_rtt_round_done:1,
101 unused:13,
102 lt_is_sampling:1,
103 lt_rtt_cnt:7,
104 lt_use_bw:1;
105 u32 lt_bw;
106 u32 lt_last_delivered;
107 u32 lt_last_stamp;
108 u32 lt_last_lost;
109 u32 pacing_gain:10,
110 cwnd_gain:10,
111 full_bw_reached:1,
112 full_bw_cnt:2,
113 cycle_idx:3,
114 has_seen_rtt:1,
115 unused_b:5;
116 u32 prior_cwnd;
117 u32 full_bw;
118};
119
120#define CYCLE_LEN 8
121
122
123static const int bbr_bw_rtts = CYCLE_LEN + 2;
124
125static const u32 bbr_min_rtt_win_sec = 10;
126
127static const u32 bbr_probe_rtt_mode_ms = 200;
128
129static const int bbr_min_tso_rate = 1200000;
130
131
132
133
134
135
136
137static const int bbr_pacing_margin_percent = 1;
138
139
140
141
142
143
144static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1;
145
146
147
148static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885;
149
150static const int bbr_cwnd_gain = BBR_UNIT * 2;
151
152static const int bbr_pacing_gain[] = {
153 BBR_UNIT * 5 / 4,
154 BBR_UNIT * 3 / 4,
155 BBR_UNIT, BBR_UNIT, BBR_UNIT,
156 BBR_UNIT, BBR_UNIT, BBR_UNIT
157};
158
159static const u32 bbr_cycle_rand = 7;
160
161
162
163
164
165static const u32 bbr_cwnd_min_target = 4;
166
167
168
169static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4;
170
171static const u32 bbr_full_bw_cnt = 3;
172
173
174
175static const u32 bbr_lt_intvl_min_rtts = 4;
176
177static const u32 bbr_lt_loss_thresh = 50;
178
179static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8;
180
181static const u32 bbr_lt_bw_diff = 4000 / 8;
182
183static const u32 bbr_lt_bw_max_rtts = 48;
184
185static void bbr_check_probe_rtt_done(struct sock *sk);
186
187
188static bool bbr_full_bw_reached(const struct sock *sk)
189{
190 const struct bbr *bbr = inet_csk_ca(sk);
191
192 return bbr->full_bw_reached;
193}
194
195
196static u32 bbr_max_bw(const struct sock *sk)
197{
198 struct bbr *bbr = inet_csk_ca(sk);
199
200 return minmax_get(&bbr->bw);
201}
202
203
204static u32 bbr_bw(const struct sock *sk)
205{
206 struct bbr *bbr = inet_csk_ca(sk);
207
208 return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk);
209}
210
211
212
213
214
215static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
216{
217 unsigned int mss = tcp_sk(sk)->mss_cache;
218
219 rate *= mss;
220 rate *= gain;
221 rate >>= BBR_SCALE;
222 rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_margin_percent);
223 return rate >> BW_SCALE;
224}
225
226
227static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
228{
229 u64 rate = bw;
230
231 rate = bbr_rate_bytes_per_sec(sk, rate, gain);
232 rate = min_t(u64, rate, sk->sk_max_pacing_rate);
233 return rate;
234}
235
236
237static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
238{
239 struct tcp_sock *tp = tcp_sk(sk);
240 struct bbr *bbr = inet_csk_ca(sk);
241 u64 bw;
242 u32 rtt_us;
243
244 if (tp->srtt_us) {
245 rtt_us = max(tp->srtt_us >> 3, 1U);
246 bbr->has_seen_rtt = 1;
247 } else {
248 rtt_us = USEC_PER_MSEC;
249 }
250 bw = (u64)tp->snd_cwnd * BW_UNIT;
251 do_div(bw, rtt_us);
252 sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
253}
254
255
256static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
257{
258 struct tcp_sock *tp = tcp_sk(sk);
259 struct bbr *bbr = inet_csk_ca(sk);
260 unsigned long rate = bbr_bw_to_pacing_rate(sk, bw, gain);
261
262 if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
263 bbr_init_pacing_rate_from_rtt(sk);
264 if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
265 sk->sk_pacing_rate = rate;
266}
267
268
269static u32 bbr_min_tso_segs(struct sock *sk)
270{
271 return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
272}
273
274static u32 bbr_tso_segs_goal(struct sock *sk)
275{
276 struct tcp_sock *tp = tcp_sk(sk);
277 u32 segs, bytes;
278
279
280
281
282 bytes = min_t(unsigned long, sk->sk_pacing_rate >> sk->sk_pacing_shift,
283 GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
284 segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
285
286 return min(segs, 0x7FU);
287}
288
289
290static void bbr_save_cwnd(struct sock *sk)
291{
292 struct tcp_sock *tp = tcp_sk(sk);
293 struct bbr *bbr = inet_csk_ca(sk);
294
295 if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
296 bbr->prior_cwnd = tp->snd_cwnd;
297 else
298 bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
299}
300
301static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
302{
303 struct tcp_sock *tp = tcp_sk(sk);
304 struct bbr *bbr = inet_csk_ca(sk);
305
306 if (event == CA_EVENT_TX_START && tp->app_limited) {
307 bbr->idle_restart = 1;
308
309
310
311 if (bbr->mode == BBR_PROBE_BW)
312 bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
313 else if (bbr->mode == BBR_PROBE_RTT)
314 bbr_check_probe_rtt_done(sk);
315 }
316}
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
339{
340 struct bbr *bbr = inet_csk_ca(sk);
341 u32 cwnd;
342 u64 w;
343
344
345
346
347
348
349
350 if (unlikely(bbr->min_rtt_us == ~0U))
351 return TCP_INIT_CWND;
352
353 w = (u64)bw * bbr->min_rtt_us;
354
355
356 cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
357
358
359 cwnd += 3 * bbr_tso_segs_goal(sk);
360
361
362 cwnd = (cwnd + 1) & ~1U;
363
364
365 if (bbr->mode == BBR_PROBE_BW && gain > BBR_UNIT)
366 cwnd += 2;
367
368 return cwnd;
369}
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now)
386{
387 struct tcp_sock *tp = tcp_sk(sk);
388 struct bbr *bbr = inet_csk_ca(sk);
389 u64 now_ns, edt_ns, interval_us;
390 u32 interval_delivered, inflight_at_edt;
391
392 now_ns = tp->tcp_clock_cache;
393 edt_ns = max(tp->tcp_wstamp_ns, now_ns);
394 interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC);
395 interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE;
396 inflight_at_edt = inflight_now;
397 if (bbr->pacing_gain > BBR_UNIT)
398 inflight_at_edt += bbr_tso_segs_goal(sk);
399 if (interval_delivered >= inflight_at_edt)
400 return 0;
401 return inflight_at_edt - interval_delivered;
402}
403
404
405
406
407
408
409
410
411
412static bool bbr_set_cwnd_to_recover_or_restore(
413 struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd)
414{
415 struct tcp_sock *tp = tcp_sk(sk);
416 struct bbr *bbr = inet_csk_ca(sk);
417 u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
418 u32 cwnd = tp->snd_cwnd;
419
420
421
422
423
424 if (rs->losses > 0)
425 cwnd = max_t(s32, cwnd - rs->losses, 1);
426
427 if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) {
428
429 bbr->packet_conservation = 1;
430 bbr->next_rtt_delivered = tp->delivered;
431
432 cwnd = tcp_packets_in_flight(tp) + acked;
433 } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) {
434
435 cwnd = max(cwnd, bbr->prior_cwnd);
436 bbr->packet_conservation = 0;
437 }
438 bbr->prev_ca_state = state;
439
440 if (bbr->packet_conservation) {
441 *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked);
442 return true;
443 }
444 *new_cwnd = cwnd;
445 return false;
446}
447
448
449
450
451static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
452 u32 acked, u32 bw, int gain)
453{
454 struct tcp_sock *tp = tcp_sk(sk);
455 struct bbr *bbr = inet_csk_ca(sk);
456 u32 cwnd = tp->snd_cwnd, target_cwnd = 0;
457
458 if (!acked)
459 goto done;
460
461 if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
462 goto done;
463
464
465 target_cwnd = bbr_target_cwnd(sk, bw, gain);
466 if (bbr_full_bw_reached(sk))
467 cwnd = min(cwnd + acked, target_cwnd);
468 else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND)
469 cwnd = cwnd + acked;
470 cwnd = max(cwnd, bbr_cwnd_min_target);
471
472done:
473 tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
474 if (bbr->mode == BBR_PROBE_RTT)
475 tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
476}
477
478
479static bool bbr_is_next_cycle_phase(struct sock *sk,
480 const struct rate_sample *rs)
481{
482 struct tcp_sock *tp = tcp_sk(sk);
483 struct bbr *bbr = inet_csk_ca(sk);
484 bool is_full_length =
485 tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) >
486 bbr->min_rtt_us;
487 u32 inflight, bw;
488
489
490
491
492 if (bbr->pacing_gain == BBR_UNIT)
493 return is_full_length;
494
495 inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);
496 bw = bbr_max_bw(sk);
497
498
499
500
501
502
503 if (bbr->pacing_gain > BBR_UNIT)
504 return is_full_length &&
505 (rs->losses ||
506 inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain));
507
508
509
510
511
512 return is_full_length ||
513 inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT);
514}
515
516static void bbr_advance_cycle_phase(struct sock *sk)
517{
518 struct tcp_sock *tp = tcp_sk(sk);
519 struct bbr *bbr = inet_csk_ca(sk);
520
521 bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
522 bbr->cycle_mstamp = tp->delivered_mstamp;
523}
524
525
526static void bbr_update_cycle_phase(struct sock *sk,
527 const struct rate_sample *rs)
528{
529 struct bbr *bbr = inet_csk_ca(sk);
530
531 if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs))
532 bbr_advance_cycle_phase(sk);
533}
534
535static void bbr_reset_startup_mode(struct sock *sk)
536{
537 struct bbr *bbr = inet_csk_ca(sk);
538
539 bbr->mode = BBR_STARTUP;
540}
541
542static void bbr_reset_probe_bw_mode(struct sock *sk)
543{
544 struct bbr *bbr = inet_csk_ca(sk);
545
546 bbr->mode = BBR_PROBE_BW;
547 bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
548 bbr_advance_cycle_phase(sk);
549}
550
551static void bbr_reset_mode(struct sock *sk)
552{
553 if (!bbr_full_bw_reached(sk))
554 bbr_reset_startup_mode(sk);
555 else
556 bbr_reset_probe_bw_mode(sk);
557}
558
559
560static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
561{
562 struct tcp_sock *tp = tcp_sk(sk);
563 struct bbr *bbr = inet_csk_ca(sk);
564
565 bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC);
566 bbr->lt_last_delivered = tp->delivered;
567 bbr->lt_last_lost = tp->lost;
568 bbr->lt_rtt_cnt = 0;
569}
570
571
572static void bbr_reset_lt_bw_sampling(struct sock *sk)
573{
574 struct bbr *bbr = inet_csk_ca(sk);
575
576 bbr->lt_bw = 0;
577 bbr->lt_use_bw = 0;
578 bbr->lt_is_sampling = false;
579 bbr_reset_lt_bw_sampling_interval(sk);
580}
581
582
583static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw)
584{
585 struct bbr *bbr = inet_csk_ca(sk);
586 u32 diff;
587
588 if (bbr->lt_bw) {
589
590 diff = abs(bw - bbr->lt_bw);
591 if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) ||
592 (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <=
593 bbr_lt_bw_diff)) {
594
595 bbr->lt_bw = (bw + bbr->lt_bw) >> 1;
596 bbr->lt_use_bw = 1;
597 bbr->pacing_gain = BBR_UNIT;
598 bbr->lt_rtt_cnt = 0;
599 return;
600 }
601 }
602 bbr->lt_bw = bw;
603 bbr_reset_lt_bw_sampling_interval(sk);
604}
605
606
607
608
609
610
611
612
613static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
614{
615 struct tcp_sock *tp = tcp_sk(sk);
616 struct bbr *bbr = inet_csk_ca(sk);
617 u32 lost, delivered;
618 u64 bw;
619 u32 t;
620
621 if (bbr->lt_use_bw) {
622 if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
623 ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) {
624 bbr_reset_lt_bw_sampling(sk);
625 bbr_reset_probe_bw_mode(sk);
626 }
627 return;
628 }
629
630
631
632
633
634 if (!bbr->lt_is_sampling) {
635 if (!rs->losses)
636 return;
637 bbr_reset_lt_bw_sampling_interval(sk);
638 bbr->lt_is_sampling = true;
639 }
640
641
642 if (rs->is_app_limited) {
643 bbr_reset_lt_bw_sampling(sk);
644 return;
645 }
646
647 if (bbr->round_start)
648 bbr->lt_rtt_cnt++;
649 if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts)
650 return;
651 if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) {
652 bbr_reset_lt_bw_sampling(sk);
653 return;
654 }
655
656
657
658
659
660 if (!rs->losses)
661 return;
662
663
664 lost = tp->lost - bbr->lt_last_lost;
665 delivered = tp->delivered - bbr->lt_last_delivered;
666
667 if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered)
668 return;
669
670
671 t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp;
672 if ((s32)t < 1)
673 return;
674
675 if (t >= ~0U / USEC_PER_MSEC) {
676 bbr_reset_lt_bw_sampling(sk);
677 return;
678 }
679 t *= USEC_PER_MSEC;
680 bw = (u64)delivered * BW_UNIT;
681 do_div(bw, t);
682 bbr_lt_bw_interval_done(sk, bw);
683}
684
685
686static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
687{
688 struct tcp_sock *tp = tcp_sk(sk);
689 struct bbr *bbr = inet_csk_ca(sk);
690 u64 bw;
691
692 bbr->round_start = 0;
693 if (rs->delivered < 0 || rs->interval_us <= 0)
694 return;
695
696
697 if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) {
698 bbr->next_rtt_delivered = tp->delivered;
699 bbr->rtt_cnt++;
700 bbr->round_start = 1;
701 bbr->packet_conservation = 0;
702 }
703
704 bbr_lt_bw_sampling(sk, rs);
705
706
707
708
709
710 bw = (u64)rs->delivered * BW_UNIT;
711 do_div(bw, rs->interval_us);
712
713
714
715
716
717
718
719
720
721
722
723
724 if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) {
725
726 minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw);
727 }
728}
729
730
731
732
733
734
735
736
737
738static void bbr_check_full_bw_reached(struct sock *sk,
739 const struct rate_sample *rs)
740{
741 struct bbr *bbr = inet_csk_ca(sk);
742 u32 bw_thresh;
743
744 if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited)
745 return;
746
747 bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE;
748 if (bbr_max_bw(sk) >= bw_thresh) {
749 bbr->full_bw = bbr_max_bw(sk);
750 bbr->full_bw_cnt = 0;
751 return;
752 }
753 ++bbr->full_bw_cnt;
754 bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt;
755}
756
757
758static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
759{
760 struct bbr *bbr = inet_csk_ca(sk);
761
762 if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
763 bbr->mode = BBR_DRAIN;
764 tcp_sk(sk)->snd_ssthresh =
765 bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);
766 }
767 if (bbr->mode == BBR_DRAIN &&
768 bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=
769 bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT))
770 bbr_reset_probe_bw_mode(sk);
771}
772
773static void bbr_check_probe_rtt_done(struct sock *sk)
774{
775 struct tcp_sock *tp = tcp_sk(sk);
776 struct bbr *bbr = inet_csk_ca(sk);
777
778 if (!(bbr->probe_rtt_done_stamp &&
779 after(tcp_jiffies32, bbr->probe_rtt_done_stamp)))
780 return;
781
782 bbr->min_rtt_stamp = tcp_jiffies32;
783 tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
784 bbr_reset_mode(sk);
785}
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
807{
808 struct tcp_sock *tp = tcp_sk(sk);
809 struct bbr *bbr = inet_csk_ca(sk);
810 bool filter_expired;
811
812
813 filter_expired = after(tcp_jiffies32,
814 bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
815 if (rs->rtt_us >= 0 &&
816 (rs->rtt_us <= bbr->min_rtt_us ||
817 (filter_expired && !rs->is_ack_delayed))) {
818 bbr->min_rtt_us = rs->rtt_us;
819 bbr->min_rtt_stamp = tcp_jiffies32;
820 }
821
822 if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
823 !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
824 bbr->mode = BBR_PROBE_RTT;
825 bbr_save_cwnd(sk);
826 bbr->probe_rtt_done_stamp = 0;
827 }
828
829 if (bbr->mode == BBR_PROBE_RTT) {
830
831 tp->app_limited =
832 (tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
833
834 if (!bbr->probe_rtt_done_stamp &&
835 tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
836 bbr->probe_rtt_done_stamp = tcp_jiffies32 +
837 msecs_to_jiffies(bbr_probe_rtt_mode_ms);
838 bbr->probe_rtt_round_done = 0;
839 bbr->next_rtt_delivered = tp->delivered;
840 } else if (bbr->probe_rtt_done_stamp) {
841 if (bbr->round_start)
842 bbr->probe_rtt_round_done = 1;
843 if (bbr->probe_rtt_round_done)
844 bbr_check_probe_rtt_done(sk);
845 }
846 }
847
848 if (rs->delivered > 0)
849 bbr->idle_restart = 0;
850}
851
852static void bbr_update_gains(struct sock *sk)
853{
854 struct bbr *bbr = inet_csk_ca(sk);
855
856 switch (bbr->mode) {
857 case BBR_STARTUP:
858 bbr->pacing_gain = bbr_high_gain;
859 bbr->cwnd_gain = bbr_high_gain;
860 break;
861 case BBR_DRAIN:
862 bbr->pacing_gain = bbr_drain_gain;
863 bbr->cwnd_gain = bbr_high_gain;
864 break;
865 case BBR_PROBE_BW:
866 bbr->pacing_gain = (bbr->lt_use_bw ?
867 BBR_UNIT :
868 bbr_pacing_gain[bbr->cycle_idx]);
869 bbr->cwnd_gain = bbr_cwnd_gain;
870 break;
871 case BBR_PROBE_RTT:
872 bbr->pacing_gain = BBR_UNIT;
873 bbr->cwnd_gain = BBR_UNIT;
874 break;
875 default:
876 WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode);
877 break;
878 }
879}
880
881static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
882{
883 bbr_update_bw(sk, rs);
884 bbr_update_cycle_phase(sk, rs);
885 bbr_check_full_bw_reached(sk, rs);
886 bbr_check_drain(sk, rs);
887 bbr_update_min_rtt(sk, rs);
888 bbr_update_gains(sk);
889}
890
891static void bbr_main(struct sock *sk, const struct rate_sample *rs)
892{
893 struct bbr *bbr = inet_csk_ca(sk);
894 u32 bw;
895
896 bbr_update_model(sk, rs);
897
898 bw = bbr_bw(sk);
899 bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
900 bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
901}
902
903static void bbr_init(struct sock *sk)
904{
905 struct tcp_sock *tp = tcp_sk(sk);
906 struct bbr *bbr = inet_csk_ca(sk);
907
908 bbr->prior_cwnd = 0;
909 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
910 bbr->rtt_cnt = 0;
911 bbr->next_rtt_delivered = 0;
912 bbr->prev_ca_state = TCP_CA_Open;
913 bbr->packet_conservation = 0;
914
915 bbr->probe_rtt_done_stamp = 0;
916 bbr->probe_rtt_round_done = 0;
917 bbr->min_rtt_us = tcp_min_rtt(tp);
918 bbr->min_rtt_stamp = tcp_jiffies32;
919
920 minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);
921
922 bbr->has_seen_rtt = 0;
923 bbr_init_pacing_rate_from_rtt(sk);
924
925 bbr->round_start = 0;
926 bbr->idle_restart = 0;
927 bbr->full_bw_reached = 0;
928 bbr->full_bw = 0;
929 bbr->full_bw_cnt = 0;
930 bbr->cycle_mstamp = 0;
931 bbr->cycle_idx = 0;
932 bbr_reset_lt_bw_sampling(sk);
933 bbr_reset_startup_mode(sk);
934
935 cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
936}
937
938static u32 bbr_sndbuf_expand(struct sock *sk)
939{
940
941 return 3;
942}
943
944
945
946
947static u32 bbr_undo_cwnd(struct sock *sk)
948{
949 struct bbr *bbr = inet_csk_ca(sk);
950
951 bbr->full_bw = 0;
952 bbr->full_bw_cnt = 0;
953 bbr_reset_lt_bw_sampling(sk);
954 return tcp_sk(sk)->snd_cwnd;
955}
956
957
958static u32 bbr_ssthresh(struct sock *sk)
959{
960 bbr_save_cwnd(sk);
961 return tcp_sk(sk)->snd_ssthresh;
962}
963
964static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
965 union tcp_cc_info *info)
966{
967 if (ext & (1 << (INET_DIAG_BBRINFO - 1)) ||
968 ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
969 struct tcp_sock *tp = tcp_sk(sk);
970 struct bbr *bbr = inet_csk_ca(sk);
971 u64 bw = bbr_bw(sk);
972
973 bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE;
974 memset(&info->bbr, 0, sizeof(info->bbr));
975 info->bbr.bbr_bw_lo = (u32)bw;
976 info->bbr.bbr_bw_hi = (u32)(bw >> 32);
977 info->bbr.bbr_min_rtt = bbr->min_rtt_us;
978 info->bbr.bbr_pacing_gain = bbr->pacing_gain;
979 info->bbr.bbr_cwnd_gain = bbr->cwnd_gain;
980 *attr = INET_DIAG_BBRINFO;
981 return sizeof(info->bbr);
982 }
983 return 0;
984}
985
986static void bbr_set_state(struct sock *sk, u8 new_state)
987{
988 struct bbr *bbr = inet_csk_ca(sk);
989
990 if (new_state == TCP_CA_Loss) {
991 struct rate_sample rs = { .losses = 1 };
992
993 bbr->prev_ca_state = TCP_CA_Loss;
994 bbr->full_bw = 0;
995 bbr->round_start = 1;
996 bbr_lt_bw_sampling(sk, &rs);
997 }
998}
999
1000static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
1001 .flags = TCP_CONG_NON_RESTRICTED,
1002 .name = "bbr",
1003 .owner = THIS_MODULE,
1004 .init = bbr_init,
1005 .cong_control = bbr_main,
1006 .sndbuf_expand = bbr_sndbuf_expand,
1007 .undo_cwnd = bbr_undo_cwnd,
1008 .cwnd_event = bbr_cwnd_event,
1009 .ssthresh = bbr_ssthresh,
1010 .min_tso_segs = bbr_min_tso_segs,
1011 .get_info = bbr_get_info,
1012 .set_state = bbr_set_state,
1013};
1014
1015static int __init bbr_register(void)
1016{
1017 BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
1018 return tcp_register_congestion_control(&tcp_bbr_cong_ops);
1019}
1020
1021static void __exit bbr_unregister(void)
1022{
1023 tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
1024}
1025
1026module_init(bbr_register);
1027module_exit(bbr_unregister);
1028
1029MODULE_AUTHOR("Van Jacobson <vanj@google.com>");
1030MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>");
1031MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>");
1032MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>");
1033MODULE_LICENSE("Dual BSD/GPL");
1034MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)");
1035