1
2#ifndef __BPF_TCP_HELPERS_H
3#define __BPF_TCP_HELPERS_H
4
5#include <stdbool.h>
6#include <linux/types.h>
7#include <bpf/bpf_helpers.h>
8#include <bpf/bpf_core_read.h>
9#include <bpf/bpf_tracing.h>
10
11#define BPF_STRUCT_OPS(name, args...) \
12SEC("struct_ops/"#name) \
13BPF_PROG(name, args)
14
15#define tcp_jiffies32 ((__u32)bpf_jiffies64())
16
17struct sock_common {
18 unsigned char skc_state;
19 __u16 skc_num;
20} __attribute__((preserve_access_index));
21
22enum sk_pacing {
23 SK_PACING_NONE = 0,
24 SK_PACING_NEEDED = 1,
25 SK_PACING_FQ = 2,
26};
27
28struct sock {
29 struct sock_common __sk_common;
30 unsigned long sk_pacing_rate;
31 __u32 sk_pacing_status;
32} __attribute__((preserve_access_index));
33
34struct inet_sock {
35 struct sock sk;
36} __attribute__((preserve_access_index));
37
38struct inet_connection_sock {
39 struct inet_sock icsk_inet;
40 __u8 icsk_ca_state:6,
41 icsk_ca_setsockopt:1,
42 icsk_ca_dst_locked:1;
43 struct {
44 __u8 pending;
45 } icsk_ack;
46 __u64 icsk_ca_priv[104 / sizeof(__u64)];
47} __attribute__((preserve_access_index));
48
49struct request_sock {
50 struct sock_common __req_common;
51} __attribute__((preserve_access_index));
52
53struct tcp_sock {
54 struct inet_connection_sock inet_conn;
55
56 __u32 rcv_nxt;
57 __u32 snd_nxt;
58 __u32 snd_una;
59 __u8 ecn_flags;
60 __u32 delivered;
61 __u32 delivered_ce;
62 __u32 snd_cwnd;
63 __u32 snd_cwnd_cnt;
64 __u32 snd_cwnd_clamp;
65 __u32 snd_ssthresh;
66 __u8 syn_data:1,
67 syn_fastopen:1,
68 syn_fastopen_exp:1,
69 syn_fastopen_ch:1,
70 syn_data_acked:1,
71 save_syn:1,
72 is_cwnd_limited:1,
73 syn_smc:1;
74 __u32 max_packets_out;
75 __u32 lsndtime;
76 __u32 prior_cwnd;
77 __u64 tcp_mstamp;
78} __attribute__((preserve_access_index));
79
80static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
81{
82 return (struct inet_connection_sock *)sk;
83}
84
85static __always_inline void *inet_csk_ca(const struct sock *sk)
86{
87 return (void *)inet_csk(sk)->icsk_ca_priv;
88}
89
90static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk)
91{
92 return (struct tcp_sock *)sk;
93}
94
95static __always_inline bool before(__u32 seq1, __u32 seq2)
96{
97 return (__s32)(seq1-seq2) < 0;
98}
99#define after(seq2, seq1) before(seq1, seq2)
100
101#define TCP_ECN_OK 1
102#define TCP_ECN_QUEUE_CWR 2
103#define TCP_ECN_DEMAND_CWR 4
104#define TCP_ECN_SEEN 8
105
106enum inet_csk_ack_state_t {
107 ICSK_ACK_SCHED = 1,
108 ICSK_ACK_TIMER = 2,
109 ICSK_ACK_PUSHED = 4,
110 ICSK_ACK_PUSHED2 = 8,
111 ICSK_ACK_NOW = 16
112};
113
114enum tcp_ca_event {
115 CA_EVENT_TX_START = 0,
116 CA_EVENT_CWND_RESTART = 1,
117 CA_EVENT_COMPLETE_CWR = 2,
118 CA_EVENT_LOSS = 3,
119 CA_EVENT_ECN_NO_CE = 4,
120 CA_EVENT_ECN_IS_CE = 5,
121};
122
123struct ack_sample {
124 __u32 pkts_acked;
125 __s32 rtt_us;
126 __u32 in_flight;
127} __attribute__((preserve_access_index));
128
129struct rate_sample {
130 __u64 prior_mstamp;
131 __u32 prior_delivered;
132 __s32 delivered;
133 long interval_us;
134 __u32 snd_interval_us;
135 __u32 rcv_interval_us;
136 long rtt_us;
137 int losses;
138 __u32 acked_sacked;
139 __u32 prior_in_flight;
140 bool is_app_limited;
141 bool is_retrans;
142 bool is_ack_delayed;
143} __attribute__((preserve_access_index));
144
145#define TCP_CA_NAME_MAX 16
146#define TCP_CONG_NEEDS_ECN 0x2
147
148struct tcp_congestion_ops {
149 char name[TCP_CA_NAME_MAX];
150 __u32 flags;
151
152
153 void (*init)(struct sock *sk);
154
155 void (*release)(struct sock *sk);
156
157
158 __u32 (*ssthresh)(struct sock *sk);
159
160 void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked);
161
162 void (*set_state)(struct sock *sk, __u8 new_state);
163
164 void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
165
166 void (*in_ack_event)(struct sock *sk, __u32 flags);
167
168 __u32 (*undo_cwnd)(struct sock *sk);
169
170 void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
171
172 __u32 (*min_tso_segs)(struct sock *sk);
173
174 __u32 (*sndbuf_expand)(struct sock *sk);
175
176
177
178 void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
179};
180
181#define min(a, b) ((a) < (b) ? (a) : (b))
182#define max(a, b) ((a) > (b) ? (a) : (b))
183#define min_not_zero(x, y) ({ \
184 typeof(x) __x = (x); \
185 typeof(y) __y = (y); \
186 __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
187
188static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked)
189{
190 __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
191
192 acked -= cwnd - tp->snd_cwnd;
193 tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
194
195 return acked;
196}
197
198static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
199{
200 return tp->snd_cwnd < tp->snd_ssthresh;
201}
202
203static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
204{
205 const struct tcp_sock *tp = tcp_sk(sk);
206
207
208 if (tcp_in_slow_start(tp))
209 return tp->snd_cwnd < 2 * tp->max_packets_out;
210
211 return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
212}
213
214static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked)
215{
216
217 if (tp->snd_cwnd_cnt >= w) {
218 tp->snd_cwnd_cnt = 0;
219 tp->snd_cwnd++;
220 }
221
222 tp->snd_cwnd_cnt += acked;
223 if (tp->snd_cwnd_cnt >= w) {
224 __u32 delta = tp->snd_cwnd_cnt / w;
225
226 tp->snd_cwnd_cnt -= delta * w;
227 tp->snd_cwnd += delta;
228 }
229 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
230}
231
232#endif
233