1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59#include <linux/module.h>
60#include <net/tcp.h>
61#include <linux/inet_diag.h>
62#include <linux/inet.h>
63#include <linux/random.h>
64#include <linux/win_minmax.h>
65
66
67
68
69
70
71
72#define BW_SCALE 24
73#define BW_UNIT (1 << BW_SCALE)
74
75#define BBR_SCALE 8
76#define BBR_UNIT (1 << BBR_SCALE)
77
78
79enum bbr_mode {
80 BBR_STARTUP,
81 BBR_DRAIN,
82 BBR_PROBE_BW,
83 BBR_PROBE_RTT,
84};
85
86
87struct bbr {
88 u32 min_rtt_us;
89 u32 min_rtt_stamp;
90 u32 probe_rtt_done_stamp;
91 struct minmax bw;
92 u32 rtt_cnt;
93 u32 next_rtt_delivered;
94 u64 cycle_mstamp;
95 u32 mode:3,
96 prev_ca_state:3,
97 packet_conservation:1,
98 round_start:1,
99 idle_restart:1,
100 probe_rtt_round_done:1,
101 unused:13,
102 lt_is_sampling:1,
103 lt_rtt_cnt:7,
104 lt_use_bw:1;
105 u32 lt_bw;
106 u32 lt_last_delivered;
107 u32 lt_last_stamp;
108 u32 lt_last_lost;
109 u32 pacing_gain:10,
110 cwnd_gain:10,
111 full_bw_reached:1,
112 full_bw_cnt:2,
113 cycle_idx:3,
114 has_seen_rtt:1,
115 unused_b:5;
116 u32 prior_cwnd;
117 u32 full_bw;
118};
119
120#define CYCLE_LEN 8
121
122
123static const int bbr_bw_rtts = CYCLE_LEN + 2;
124
125static const u32 bbr_min_rtt_win_sec = 10;
126
127static const u32 bbr_probe_rtt_mode_ms = 200;
128
129static const int bbr_min_tso_rate = 1200000;
130
131
132
133
134
135
136static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1;
137
138
139
140static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885;
141
142static const int bbr_cwnd_gain = BBR_UNIT * 2;
143
144static const int bbr_pacing_gain[] = {
145 BBR_UNIT * 5 / 4,
146 BBR_UNIT * 3 / 4,
147 BBR_UNIT, BBR_UNIT, BBR_UNIT,
148 BBR_UNIT, BBR_UNIT, BBR_UNIT
149};
150
151static const u32 bbr_cycle_rand = 7;
152
153
154
155
156
157static const u32 bbr_cwnd_min_target = 4;
158
159
160
161static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4;
162
163static const u32 bbr_full_bw_cnt = 3;
164
165
166
167static const u32 bbr_lt_intvl_min_rtts = 4;
168
169static const u32 bbr_lt_loss_thresh = 50;
170
171static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8;
172
173static const u32 bbr_lt_bw_diff = 4000 / 8;
174
175static const u32 bbr_lt_bw_max_rtts = 48;
176
177static void bbr_check_probe_rtt_done(struct sock *sk);
178
179
180static bool bbr_full_bw_reached(const struct sock *sk)
181{
182 const struct bbr *bbr = inet_csk_ca(sk);
183
184 return bbr->full_bw_reached;
185}
186
187
188static u32 bbr_max_bw(const struct sock *sk)
189{
190 struct bbr *bbr = inet_csk_ca(sk);
191
192 return minmax_get(&bbr->bw);
193}
194
195
196static u32 bbr_bw(const struct sock *sk)
197{
198 struct bbr *bbr = inet_csk_ca(sk);
199
200 return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk);
201}
202
203
204
205
206
207static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
208{
209 rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache);
210 rate *= gain;
211 rate >>= BBR_SCALE;
212 rate *= USEC_PER_SEC;
213 return rate >> BW_SCALE;
214}
215
216
217static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
218{
219 u64 rate = bw;
220
221 rate = bbr_rate_bytes_per_sec(sk, rate, gain);
222 rate = min_t(u64, rate, sk->sk_max_pacing_rate);
223 return rate;
224}
225
226
227static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
228{
229 struct tcp_sock *tp = tcp_sk(sk);
230 struct bbr *bbr = inet_csk_ca(sk);
231 u64 bw;
232 u32 rtt_us;
233
234 if (tp->srtt_us) {
235 rtt_us = max(tp->srtt_us >> 3, 1U);
236 bbr->has_seen_rtt = 1;
237 } else {
238 rtt_us = USEC_PER_MSEC;
239 }
240 bw = (u64)tp->snd_cwnd * BW_UNIT;
241 do_div(bw, rtt_us);
242 sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
243}
244
245
246
247
248
249
250
251
252static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
253{
254 struct tcp_sock *tp = tcp_sk(sk);
255 struct bbr *bbr = inet_csk_ca(sk);
256 u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
257
258 if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
259 bbr_init_pacing_rate_from_rtt(sk);
260 if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
261 sk->sk_pacing_rate = rate;
262}
263
264
265static u32 bbr_min_tso_segs(struct sock *sk)
266{
267 return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
268}
269
270static u32 bbr_tso_segs_goal(struct sock *sk)
271{
272 struct tcp_sock *tp = tcp_sk(sk);
273 u32 segs, bytes;
274
275
276
277
278 bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift,
279 GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
280 segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
281
282 return min(segs, 0x7FU);
283}
284
285
286static void bbr_save_cwnd(struct sock *sk)
287{
288 struct tcp_sock *tp = tcp_sk(sk);
289 struct bbr *bbr = inet_csk_ca(sk);
290
291 if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
292 bbr->prior_cwnd = tp->snd_cwnd;
293 else
294 bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
295}
296
297static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
298{
299 struct tcp_sock *tp = tcp_sk(sk);
300 struct bbr *bbr = inet_csk_ca(sk);
301
302 if (event == CA_EVENT_TX_START && tp->app_limited) {
303 bbr->idle_restart = 1;
304
305
306
307 if (bbr->mode == BBR_PROBE_BW)
308 bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
309 else if (bbr->mode == BBR_PROBE_RTT)
310 bbr_check_probe_rtt_done(sk);
311 }
312}
313
314
315
316
317
318
319
320
321
322
323static u32 bbr_bdp(struct sock *sk, u32 bw, int gain)
324{
325 struct bbr *bbr = inet_csk_ca(sk);
326 u32 bdp;
327 u64 w;
328
329
330
331
332
333
334
335 if (unlikely(bbr->min_rtt_us == ~0U))
336 return TCP_INIT_CWND;
337
338 w = (u64)bw * bbr->min_rtt_us;
339
340
341 bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
342
343 return bdp;
344}
345
346
347
348
349
350
351
352
353
354
355
356static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd)
357{
358 struct bbr *bbr = inet_csk_ca(sk);
359
360
361 cwnd += 3 * bbr_tso_segs_goal(sk);
362
363
364 cwnd = (cwnd + 1) & ~1U;
365
366
367 if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0)
368 cwnd += 2;
369
370 return cwnd;
371}
372
373
374
375
376
377
378
379
380
381static bool bbr_set_cwnd_to_recover_or_restore(
382 struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd)
383{
384 struct tcp_sock *tp = tcp_sk(sk);
385 struct bbr *bbr = inet_csk_ca(sk);
386 u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
387 u32 cwnd = tp->snd_cwnd;
388
389
390
391
392
393 if (rs->losses > 0)
394 cwnd = max_t(s32, cwnd - rs->losses, 1);
395
396 if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) {
397
398 bbr->packet_conservation = 1;
399 bbr->next_rtt_delivered = tp->delivered;
400
401 cwnd = tcp_packets_in_flight(tp) + acked;
402 } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) {
403
404 cwnd = max(cwnd, bbr->prior_cwnd);
405 bbr->packet_conservation = 0;
406 }
407 bbr->prev_ca_state = state;
408
409 if (bbr->packet_conservation) {
410 *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked);
411 return true;
412 }
413 *new_cwnd = cwnd;
414 return false;
415}
416
417
418static u32 bbr_inflight(struct sock *sk, u32 bw, int gain)
419{
420 u32 inflight;
421
422 inflight = bbr_bdp(sk, bw, gain);
423 inflight = bbr_quantization_budget(sk, inflight);
424
425 return inflight;
426}
427
428
429
430
431static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
432 u32 acked, u32 bw, int gain)
433{
434 struct tcp_sock *tp = tcp_sk(sk);
435 struct bbr *bbr = inet_csk_ca(sk);
436 u32 cwnd = tp->snd_cwnd, target_cwnd = 0;
437
438 if (!acked)
439 goto done;
440
441 if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
442 goto done;
443
444
445 target_cwnd = bbr_bdp(sk, bw, gain);
446 target_cwnd = bbr_quantization_budget(sk, target_cwnd);
447 if (bbr_full_bw_reached(sk))
448 cwnd = min(cwnd + acked, target_cwnd);
449 else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND)
450 cwnd = cwnd + acked;
451 cwnd = max(cwnd, bbr_cwnd_min_target);
452
453done:
454 tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
455 if (bbr->mode == BBR_PROBE_RTT)
456 tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
457}
458
459
460static bool bbr_is_next_cycle_phase(struct sock *sk,
461 const struct rate_sample *rs)
462{
463 struct tcp_sock *tp = tcp_sk(sk);
464 struct bbr *bbr = inet_csk_ca(sk);
465 bool is_full_length =
466 tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) >
467 bbr->min_rtt_us;
468 u32 inflight, bw;
469
470
471
472
473 if (bbr->pacing_gain == BBR_UNIT)
474 return is_full_length;
475
476 inflight = rs->prior_in_flight;
477 bw = bbr_max_bw(sk);
478
479
480
481
482
483
484 if (bbr->pacing_gain > BBR_UNIT)
485 return is_full_length &&
486 (rs->losses ||
487 inflight >= bbr_inflight(sk, bw, bbr->pacing_gain));
488
489
490
491
492
493 return is_full_length ||
494 inflight <= bbr_inflight(sk, bw, BBR_UNIT);
495}
496
497static void bbr_advance_cycle_phase(struct sock *sk)
498{
499 struct tcp_sock *tp = tcp_sk(sk);
500 struct bbr *bbr = inet_csk_ca(sk);
501
502 bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
503 bbr->cycle_mstamp = tp->delivered_mstamp;
504}
505
506
507static void bbr_update_cycle_phase(struct sock *sk,
508 const struct rate_sample *rs)
509{
510 struct bbr *bbr = inet_csk_ca(sk);
511
512 if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs))
513 bbr_advance_cycle_phase(sk);
514}
515
516static void bbr_reset_startup_mode(struct sock *sk)
517{
518 struct bbr *bbr = inet_csk_ca(sk);
519
520 bbr->mode = BBR_STARTUP;
521}
522
523static void bbr_reset_probe_bw_mode(struct sock *sk)
524{
525 struct bbr *bbr = inet_csk_ca(sk);
526
527 bbr->mode = BBR_PROBE_BW;
528 bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
529 bbr_advance_cycle_phase(sk);
530}
531
532static void bbr_reset_mode(struct sock *sk)
533{
534 if (!bbr_full_bw_reached(sk))
535 bbr_reset_startup_mode(sk);
536 else
537 bbr_reset_probe_bw_mode(sk);
538}
539
540
541static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
542{
543 struct tcp_sock *tp = tcp_sk(sk);
544 struct bbr *bbr = inet_csk_ca(sk);
545
546 bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC);
547 bbr->lt_last_delivered = tp->delivered;
548 bbr->lt_last_lost = tp->lost;
549 bbr->lt_rtt_cnt = 0;
550}
551
552
553static void bbr_reset_lt_bw_sampling(struct sock *sk)
554{
555 struct bbr *bbr = inet_csk_ca(sk);
556
557 bbr->lt_bw = 0;
558 bbr->lt_use_bw = 0;
559 bbr->lt_is_sampling = false;
560 bbr_reset_lt_bw_sampling_interval(sk);
561}
562
563
564static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw)
565{
566 struct bbr *bbr = inet_csk_ca(sk);
567 u32 diff;
568
569 if (bbr->lt_bw) {
570
571 diff = abs(bw - bbr->lt_bw);
572 if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) ||
573 (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <=
574 bbr_lt_bw_diff)) {
575
576 bbr->lt_bw = (bw + bbr->lt_bw) >> 1;
577 bbr->lt_use_bw = 1;
578 bbr->pacing_gain = BBR_UNIT;
579 bbr->lt_rtt_cnt = 0;
580 return;
581 }
582 }
583 bbr->lt_bw = bw;
584 bbr_reset_lt_bw_sampling_interval(sk);
585}
586
587
588
589
590
591
592
593
594static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
595{
596 struct tcp_sock *tp = tcp_sk(sk);
597 struct bbr *bbr = inet_csk_ca(sk);
598 u32 lost, delivered;
599 u64 bw;
600 u32 t;
601
602 if (bbr->lt_use_bw) {
603 if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
604 ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) {
605 bbr_reset_lt_bw_sampling(sk);
606 bbr_reset_probe_bw_mode(sk);
607 }
608 return;
609 }
610
611
612
613
614
615 if (!bbr->lt_is_sampling) {
616 if (!rs->losses)
617 return;
618 bbr_reset_lt_bw_sampling_interval(sk);
619 bbr->lt_is_sampling = true;
620 }
621
622
623 if (rs->is_app_limited) {
624 bbr_reset_lt_bw_sampling(sk);
625 return;
626 }
627
628 if (bbr->round_start)
629 bbr->lt_rtt_cnt++;
630 if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts)
631 return;
632 if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) {
633 bbr_reset_lt_bw_sampling(sk);
634 return;
635 }
636
637
638
639
640
641 if (!rs->losses)
642 return;
643
644
645 lost = tp->lost - bbr->lt_last_lost;
646 delivered = tp->delivered - bbr->lt_last_delivered;
647
648 if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered)
649 return;
650
651
652 t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp;
653 if ((s32)t < 1)
654 return;
655
656 if (t >= ~0U / USEC_PER_MSEC) {
657 bbr_reset_lt_bw_sampling(sk);
658 return;
659 }
660 t *= USEC_PER_MSEC;
661 bw = (u64)delivered * BW_UNIT;
662 do_div(bw, t);
663 bbr_lt_bw_interval_done(sk, bw);
664}
665
666
667static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
668{
669 struct tcp_sock *tp = tcp_sk(sk);
670 struct bbr *bbr = inet_csk_ca(sk);
671 u64 bw;
672
673 bbr->round_start = 0;
674 if (rs->delivered < 0 || rs->interval_us <= 0)
675 return;
676
677
678 if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) {
679 bbr->next_rtt_delivered = tp->delivered;
680 bbr->rtt_cnt++;
681 bbr->round_start = 1;
682 bbr->packet_conservation = 0;
683 }
684
685 bbr_lt_bw_sampling(sk, rs);
686
687
688
689
690
691 bw = (u64)rs->delivered * BW_UNIT;
692 do_div(bw, rs->interval_us);
693
694
695
696
697
698
699
700
701
702
703
704
705 if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) {
706
707 minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw);
708 }
709}
710
711
712
713
714
715
716
717
718
719static void bbr_check_full_bw_reached(struct sock *sk,
720 const struct rate_sample *rs)
721{
722 struct bbr *bbr = inet_csk_ca(sk);
723 u32 bw_thresh;
724
725 if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited)
726 return;
727
728 bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE;
729 if (bbr_max_bw(sk) >= bw_thresh) {
730 bbr->full_bw = bbr_max_bw(sk);
731 bbr->full_bw_cnt = 0;
732 return;
733 }
734 ++bbr->full_bw_cnt;
735 bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt;
736}
737
738
739static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
740{
741 struct bbr *bbr = inet_csk_ca(sk);
742
743 if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
744 bbr->mode = BBR_DRAIN;
745 tcp_sk(sk)->snd_ssthresh =
746 bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT);
747 }
748 if (bbr->mode == BBR_DRAIN &&
749 tcp_packets_in_flight(tcp_sk(sk)) <=
750 bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT))
751 bbr_reset_probe_bw_mode(sk);
752}
753
754static void bbr_check_probe_rtt_done(struct sock *sk)
755{
756 struct tcp_sock *tp = tcp_sk(sk);
757 struct bbr *bbr = inet_csk_ca(sk);
758
759 if (!(bbr->probe_rtt_done_stamp &&
760 after(tcp_jiffies32, bbr->probe_rtt_done_stamp)))
761 return;
762
763 bbr->min_rtt_stamp = tcp_jiffies32;
764 tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
765 bbr_reset_mode(sk);
766}
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
788{
789 struct tcp_sock *tp = tcp_sk(sk);
790 struct bbr *bbr = inet_csk_ca(sk);
791 bool filter_expired;
792
793
794 filter_expired = after(tcp_jiffies32,
795 bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
796 if (rs->rtt_us >= 0 &&
797 (rs->rtt_us <= bbr->min_rtt_us ||
798 (filter_expired && !rs->is_ack_delayed))) {
799 bbr->min_rtt_us = rs->rtt_us;
800 bbr->min_rtt_stamp = tcp_jiffies32;
801 }
802
803 if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
804 !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
805 bbr->mode = BBR_PROBE_RTT;
806 bbr_save_cwnd(sk);
807 bbr->probe_rtt_done_stamp = 0;
808 }
809
810 if (bbr->mode == BBR_PROBE_RTT) {
811
812 tp->app_limited =
813 (tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
814
815 if (!bbr->probe_rtt_done_stamp &&
816 tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
817 bbr->probe_rtt_done_stamp = tcp_jiffies32 +
818 msecs_to_jiffies(bbr_probe_rtt_mode_ms);
819 bbr->probe_rtt_round_done = 0;
820 bbr->next_rtt_delivered = tp->delivered;
821 } else if (bbr->probe_rtt_done_stamp) {
822 if (bbr->round_start)
823 bbr->probe_rtt_round_done = 1;
824 if (bbr->probe_rtt_round_done)
825 bbr_check_probe_rtt_done(sk);
826 }
827 }
828
829 if (rs->delivered > 0)
830 bbr->idle_restart = 0;
831}
832
833static void bbr_update_gains(struct sock *sk)
834{
835 struct bbr *bbr = inet_csk_ca(sk);
836
837 switch (bbr->mode) {
838 case BBR_STARTUP:
839 bbr->pacing_gain = bbr_high_gain;
840 bbr->cwnd_gain = bbr_high_gain;
841 break;
842 case BBR_DRAIN:
843 bbr->pacing_gain = bbr_drain_gain;
844 bbr->cwnd_gain = bbr_high_gain;
845 break;
846 case BBR_PROBE_BW:
847 bbr->pacing_gain = (bbr->lt_use_bw ?
848 BBR_UNIT :
849 bbr_pacing_gain[bbr->cycle_idx]);
850 bbr->cwnd_gain = bbr_cwnd_gain;
851 break;
852 case BBR_PROBE_RTT:
853 bbr->pacing_gain = BBR_UNIT;
854 bbr->cwnd_gain = BBR_UNIT;
855 break;
856 default:
857 WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode);
858 break;
859 }
860}
861
862static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
863{
864 bbr_update_bw(sk, rs);
865 bbr_update_cycle_phase(sk, rs);
866 bbr_check_full_bw_reached(sk, rs);
867 bbr_check_drain(sk, rs);
868 bbr_update_min_rtt(sk, rs);
869 bbr_update_gains(sk);
870}
871
872static void bbr_main(struct sock *sk, const struct rate_sample *rs)
873{
874 struct bbr *bbr = inet_csk_ca(sk);
875 u32 bw;
876
877 bbr_update_model(sk, rs);
878
879 bw = bbr_bw(sk);
880 bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
881 bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
882}
883
884static void bbr_init(struct sock *sk)
885{
886 struct tcp_sock *tp = tcp_sk(sk);
887 struct bbr *bbr = inet_csk_ca(sk);
888
889 bbr->prior_cwnd = 0;
890 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
891 bbr->rtt_cnt = 0;
892 bbr->next_rtt_delivered = 0;
893 bbr->prev_ca_state = TCP_CA_Open;
894 bbr->packet_conservation = 0;
895
896 bbr->probe_rtt_done_stamp = 0;
897 bbr->probe_rtt_round_done = 0;
898 bbr->min_rtt_us = tcp_min_rtt(tp);
899 bbr->min_rtt_stamp = tcp_jiffies32;
900
901 minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);
902
903 bbr->has_seen_rtt = 0;
904 bbr_init_pacing_rate_from_rtt(sk);
905
906 bbr->round_start = 0;
907 bbr->idle_restart = 0;
908 bbr->full_bw_reached = 0;
909 bbr->full_bw = 0;
910 bbr->full_bw_cnt = 0;
911 bbr->cycle_mstamp = 0;
912 bbr->cycle_idx = 0;
913 bbr_reset_lt_bw_sampling(sk);
914 bbr_reset_startup_mode(sk);
915
916 cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
917}
918
919static u32 bbr_sndbuf_expand(struct sock *sk)
920{
921
922 return 3;
923}
924
925
926
927
928static u32 bbr_undo_cwnd(struct sock *sk)
929{
930 struct bbr *bbr = inet_csk_ca(sk);
931
932 bbr->full_bw = 0;
933 bbr->full_bw_cnt = 0;
934 bbr_reset_lt_bw_sampling(sk);
935 return tcp_sk(sk)->snd_cwnd;
936}
937
938
939static u32 bbr_ssthresh(struct sock *sk)
940{
941 bbr_save_cwnd(sk);
942 return tcp_sk(sk)->snd_ssthresh;
943}
944
945static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
946 union tcp_cc_info *info)
947{
948 if (ext & (1 << (INET_DIAG_BBRINFO - 1)) ||
949 ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
950 struct tcp_sock *tp = tcp_sk(sk);
951 struct bbr *bbr = inet_csk_ca(sk);
952 u64 bw = bbr_bw(sk);
953
954 bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE;
955 memset(&info->bbr, 0, sizeof(info->bbr));
956 info->bbr.bbr_bw_lo = (u32)bw;
957 info->bbr.bbr_bw_hi = (u32)(bw >> 32);
958 info->bbr.bbr_min_rtt = bbr->min_rtt_us;
959 info->bbr.bbr_pacing_gain = bbr->pacing_gain;
960 info->bbr.bbr_cwnd_gain = bbr->cwnd_gain;
961 *attr = INET_DIAG_BBRINFO;
962 return sizeof(info->bbr);
963 }
964 return 0;
965}
966
967static void bbr_set_state(struct sock *sk, u8 new_state)
968{
969 struct bbr *bbr = inet_csk_ca(sk);
970
971 if (new_state == TCP_CA_Loss) {
972 struct rate_sample rs = { .losses = 1 };
973
974 bbr->prev_ca_state = TCP_CA_Loss;
975 bbr->full_bw = 0;
976 bbr->round_start = 1;
977 bbr_lt_bw_sampling(sk, &rs);
978 }
979}
980
981static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
982 .flags = TCP_CONG_NON_RESTRICTED,
983 .name = "bbr",
984 .owner = THIS_MODULE,
985 .init = bbr_init,
986 .cong_control = bbr_main,
987 .sndbuf_expand = bbr_sndbuf_expand,
988 .undo_cwnd = bbr_undo_cwnd,
989 .cwnd_event = bbr_cwnd_event,
990 .ssthresh = bbr_ssthresh,
991 .min_tso_segs = bbr_min_tso_segs,
992 .get_info = bbr_get_info,
993 .set_state = bbr_set_state,
994};
995
996static int __init bbr_register(void)
997{
998 BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
999 return tcp_register_congestion_control(&tcp_bbr_cong_ops);
1000}
1001
1002static void __exit bbr_unregister(void)
1003{
1004 tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
1005}
1006
1007module_init(bbr_register);
1008module_exit(bbr_unregister);
1009
1010MODULE_AUTHOR("Van Jacobson <vanj@google.com>");
1011MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>");
1012MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>");
1013MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>");
1014MODULE_LICENSE("Dual BSD/GPL");
1015MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)");
1016