1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#ifndef _TCP_H
19#define _TCP_H
20
21#define FASTRETRANS_DEBUG 1
22
23#include <linux/list.h>
24#include <linux/tcp.h>
25#include <linux/bug.h>
26#include <linux/slab.h>
27#include <linux/cache.h>
28#include <linux/percpu.h>
29#include <linux/skbuff.h>
30#include <linux/cryptohash.h>
31#include <linux/kref.h>
32#include <linux/ktime.h>
33
34#include <net/inet_connection_sock.h>
35#include <net/inet_timewait_sock.h>
36#include <net/inet_hashtables.h>
37#include <net/checksum.h>
38#include <net/request_sock.h>
39#include <net/sock.h>
40#include <net/snmp.h>
41#include <net/ip.h>
42#include <net/tcp_states.h>
43#include <net/inet_ecn.h>
44#include <net/dst.h>
45
46#include <linux/seq_file.h>
47#include <linux/memcontrol.h>
48
49#include <linux/bpf.h>
50#include <linux/filter.h>
51#include <linux/bpf-cgroup.h>
52
53extern struct inet_hashinfo tcp_hashinfo;
54
55extern struct percpu_counter tcp_orphan_count;
56void tcp_time_wait(struct sock *sk, int state, int timeo);
57
58#define MAX_TCP_HEADER (128 + MAX_HEADER)
59#define MAX_TCP_OPTION_SPACE 40
60
61
62
63
64
65#define MAX_TCP_WINDOW 32767U
66
67
68#define TCP_MIN_MSS 88U
69
70
71#define TCP_BASE_MSS 1024
72
73
74#define TCP_PROBE_INTERVAL 600
75
76
77#define TCP_PROBE_THRESHOLD 8
78
79
80#define TCP_FASTRETRANS_THRESH 3
81
82
83#define TCP_MAX_QUICKACKS 16U
84
85
86#define TCP_MAX_WSCALE 14U
87
88
89#define TCP_URG_VALID 0x0100
90#define TCP_URG_NOTYET 0x0200
91#define TCP_URG_READ 0x0400
92
93#define TCP_RETR1 3
94
95
96
97
98
99
100#define TCP_RETR2 15
101
102
103
104
105
106
107#define TCP_SYN_RETRIES 6
108
109
110
111
112
113
114
115
116#define TCP_SYNACK_RETRIES 5
117
118
119
120
121
122
123#define TCP_TIMEWAIT_LEN (60*HZ)
124
125#define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
126
127
128
129
130
131
132#define TCP_DELACK_MAX ((unsigned)(HZ/5))
133#if HZ >= 100
134#define TCP_DELACK_MIN ((unsigned)(HZ/25))
135#define TCP_ATO_MIN ((unsigned)(HZ/25))
136#else
137#define TCP_DELACK_MIN 4U
138#define TCP_ATO_MIN 4U
139#endif
140#define TCP_RTO_MAX ((unsigned)(120*HZ))
141#define TCP_RTO_MIN ((unsigned)(HZ/5))
142#define TCP_TIMEOUT_MIN (2U)
143#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))
144#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))
145
146
147
148
149
150
151#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U))
152
153
154#define TCP_KEEPALIVE_TIME (120*60*HZ)
155#define TCP_KEEPALIVE_PROBES 9
156#define TCP_KEEPALIVE_INTVL (75*HZ)
157
158#define MAX_TCP_KEEPIDLE 32767
159#define MAX_TCP_KEEPINTVL 32767
160#define MAX_TCP_KEEPCNT 127
161#define MAX_TCP_SYNCNT 127
162
163#define TCP_SYNQ_INTERVAL (HZ/5)
164
165#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
166#define TCP_PAWS_MSL 60
167
168
169
170
171
172#define TCP_PAWS_WINDOW 1
173
174
175
176
177
178
179
180#define TCPOPT_NOP 1
181#define TCPOPT_EOL 0
182#define TCPOPT_MSS 2
183#define TCPOPT_WINDOW 3
184#define TCPOPT_SACK_PERM 4
185#define TCPOPT_SACK 5
186#define TCPOPT_TIMESTAMP 8
187#define TCPOPT_MD5SIG 19
188#define TCPOPT_FASTOPEN 34
189#define TCPOPT_EXP 254
190
191
192
193#define TCPOPT_FASTOPEN_MAGIC 0xF989
194
195
196
197
198
199#define TCPOLEN_MSS 4
200#define TCPOLEN_WINDOW 3
201#define TCPOLEN_SACK_PERM 2
202#define TCPOLEN_TIMESTAMP 10
203#define TCPOLEN_MD5SIG 18
204#define TCPOLEN_FASTOPEN_BASE 2
205#define TCPOLEN_EXP_FASTOPEN_BASE 4
206
207
208#define TCPOLEN_TSTAMP_ALIGNED 12
209#define TCPOLEN_WSCALE_ALIGNED 4
210#define TCPOLEN_SACKPERM_ALIGNED 4
211#define TCPOLEN_SACK_BASE 2
212#define TCPOLEN_SACK_BASE_ALIGNED 4
213#define TCPOLEN_SACK_PERBLOCK 8
214#define TCPOLEN_MD5SIG_ALIGNED 20
215#define TCPOLEN_MSS_ALIGNED 4
216
217
218#define TCP_NAGLE_OFF 1
219#define TCP_NAGLE_CORK 2
220#define TCP_NAGLE_PUSH 4
221
222
223#define TCP_THIN_LINEAR_RETRIES 6
224
225
226#define TCP_INIT_CWND 10
227
228
229#define TFO_CLIENT_ENABLE 1
230#define TFO_SERVER_ENABLE 2
231#define TFO_CLIENT_NO_COOKIE 4
232
233
234#define TFO_SERVER_COOKIE_NOT_REQD 0x200
235
236
237
238
239#define TFO_SERVER_WO_SOCKOPT1 0x400
240
241
242
243extern int sysctl_tcp_fastopen;
244extern int sysctl_tcp_retrans_collapse;
245extern int sysctl_tcp_stdurg;
246extern int sysctl_tcp_rfc1337;
247extern int sysctl_tcp_abort_on_overflow;
248extern int sysctl_tcp_max_orphans;
249extern int sysctl_tcp_fack;
250extern int sysctl_tcp_reordering;
251extern int sysctl_tcp_max_reordering;
252extern int sysctl_tcp_dsack;
253extern long sysctl_tcp_mem[3];
254extern int sysctl_tcp_wmem[3];
255extern int sysctl_tcp_rmem[3];
256extern int sysctl_tcp_app_win;
257extern int sysctl_tcp_adv_win_scale;
258extern int sysctl_tcp_frto;
259extern int sysctl_tcp_nometrics_save;
260extern int sysctl_tcp_moderate_rcvbuf;
261extern int sysctl_tcp_tso_win_divisor;
262extern int sysctl_tcp_workaround_signed_windows;
263extern int sysctl_tcp_slow_start_after_idle;
264extern int sysctl_tcp_thin_linear_timeouts;
265extern int sysctl_tcp_thin_dupack;
266extern int sysctl_tcp_early_retrans;
267extern int sysctl_tcp_recovery;
268#define TCP_RACK_LOSS_DETECTION 0x1
269
270extern int sysctl_tcp_limit_output_bytes;
271extern int sysctl_tcp_challenge_ack_limit;
272extern int sysctl_tcp_min_tso_segs;
273extern int sysctl_tcp_min_rtt_wlen;
274extern int sysctl_tcp_autocorking;
275extern int sysctl_tcp_invalid_ratelimit;
276extern int sysctl_tcp_pacing_ss_ratio;
277extern int sysctl_tcp_pacing_ca_ratio;
278
279extern atomic_long_t tcp_memory_allocated;
280extern struct percpu_counter tcp_sockets_allocated;
281extern unsigned long tcp_memory_pressure;
282
283
284static inline bool tcp_under_memory_pressure(const struct sock *sk)
285{
286 if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
287 mem_cgroup_under_socket_pressure(sk->sk_memcg))
288 return true;
289
290 return tcp_memory_pressure;
291}
292
293
294
295
296
297static inline bool before(__u32 seq1, __u32 seq2)
298{
299 return (__s32)(seq1-seq2) < 0;
300}
301#define after(seq2, seq1) before(seq1, seq2)
302
303
304static inline bool between(__u32 seq1, __u32 seq2, __u32 seq3)
305{
306 return seq3 - seq2 >= seq1 - seq2;
307}
308
309static inline bool tcp_out_of_memory(struct sock *sk)
310{
311 if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
312 sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2))
313 return true;
314 return false;
315}
316
317void sk_forced_mem_schedule(struct sock *sk, int size);
318
319static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
320{
321 struct percpu_counter *ocp = sk->sk_prot->orphan_count;
322 int orphans = percpu_counter_read_positive(ocp);
323
324 if (orphans << shift > sysctl_tcp_max_orphans) {
325 orphans = percpu_counter_sum_positive(ocp);
326 if (orphans << shift > sysctl_tcp_max_orphans)
327 return true;
328 }
329 return false;
330}
331
332bool tcp_check_oom(struct sock *sk, int shift);
333
334
335extern struct proto tcp_prot;
336
337#define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field)
338#define __TCP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.tcp_statistics, field)
339#define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
340#define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
341
342void tcp_tasklet_init(void);
343
344void tcp_v4_err(struct sk_buff *skb, u32);
345
346void tcp_shutdown(struct sock *sk, int how);
347
348int tcp_v4_early_demux(struct sk_buff *skb);
349int tcp_v4_rcv(struct sk_buff *skb);
350
351int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
352int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
353int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
354int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
355 int flags);
356int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
357 size_t size, int flags);
358ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
359 size_t size, int flags);
360void tcp_release_cb(struct sock *sk);
361void tcp_wfree(struct sk_buff *skb);
362void tcp_write_timer_handler(struct sock *sk);
363void tcp_delack_timer_handler(struct sock *sk);
364int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
365int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
366void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
367 const struct tcphdr *th);
368void tcp_rcv_space_adjust(struct sock *sk);
369int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
370void tcp_twsk_destructor(struct sock *sk);
371ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
372 struct pipe_inode_info *pipe, size_t len,
373 unsigned int flags);
374
375static inline void tcp_dec_quickack_mode(struct sock *sk,
376 const unsigned int pkts)
377{
378 struct inet_connection_sock *icsk = inet_csk(sk);
379
380 if (icsk->icsk_ack.quick) {
381 if (pkts >= icsk->icsk_ack.quick) {
382 icsk->icsk_ack.quick = 0;
383
384 icsk->icsk_ack.ato = TCP_ATO_MIN;
385 } else
386 icsk->icsk_ack.quick -= pkts;
387 }
388}
389
390#define TCP_ECN_OK 1
391#define TCP_ECN_QUEUE_CWR 2
392#define TCP_ECN_DEMAND_CWR 4
393#define TCP_ECN_SEEN 8
394
395enum tcp_tw_status {
396 TCP_TW_SUCCESS = 0,
397 TCP_TW_RST = 1,
398 TCP_TW_ACK = 2,
399 TCP_TW_SYN = 3
400};
401
402
403enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
404 struct sk_buff *skb,
405 const struct tcphdr *th);
406struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
407 struct request_sock *req, bool fastopen);
408int tcp_child_process(struct sock *parent, struct sock *child,
409 struct sk_buff *skb);
410void tcp_enter_loss(struct sock *sk);
411void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag);
412void tcp_clear_retrans(struct tcp_sock *tp);
413void tcp_update_metrics(struct sock *sk);
414void tcp_init_metrics(struct sock *sk);
415void tcp_metrics_init(void);
416bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
417void tcp_disable_fack(struct tcp_sock *tp);
418void tcp_close(struct sock *sk, long timeout);
419void tcp_init_sock(struct sock *sk);
420unsigned int tcp_poll(struct file *file, struct socket *sock,
421 struct poll_table_struct *wait);
422int tcp_getsockopt(struct sock *sk, int level, int optname,
423 char __user *optval, int __user *optlen);
424int tcp_setsockopt(struct sock *sk, int level, int optname,
425 char __user *optval, unsigned int optlen);
426int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
427 char __user *optval, int __user *optlen);
428int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
429 char __user *optval, unsigned int optlen);
430void tcp_set_keepalive(struct sock *sk, int val);
431void tcp_syn_ack_timeout(const struct request_sock *req);
432int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
433 int flags, int *addr_len);
434void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
435 struct tcp_options_received *opt_rx,
436 int estab, struct tcp_fastopen_cookie *foc);
437const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
438
439
440
441
442
443void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
444void tcp_v4_mtu_reduced(struct sock *sk);
445void tcp_req_err(struct sock *sk, u32 seq, bool abort);
446int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
447struct sock *tcp_create_openreq_child(const struct sock *sk,
448 struct request_sock *req,
449 struct sk_buff *skb);
450void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst);
451struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
452 struct request_sock *req,
453 struct dst_entry *dst,
454 struct request_sock *req_unhash,
455 bool *own_req);
456int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
457int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
458int tcp_connect(struct sock *sk);
459enum tcp_synack_type {
460 TCP_SYNACK_NORMAL,
461 TCP_SYNACK_FASTOPEN,
462 TCP_SYNACK_COOKIE,
463};
464struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
465 struct request_sock *req,
466 struct tcp_fastopen_cookie *foc,
467 enum tcp_synack_type synack_type);
468int tcp_disconnect(struct sock *sk, int flags);
469
470void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
471int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size);
472void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
473
474
475struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
476 struct request_sock *req,
477 struct dst_entry *dst, u32 tsoff);
478int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
479 u32 cookie);
480struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
481#ifdef CONFIG_SYN_COOKIES
482
483
484
485
486
487
488
489
490#define MAX_SYNCOOKIE_AGE 2
491#define TCP_SYNCOOKIE_PERIOD (60 * HZ)
492#define TCP_SYNCOOKIE_VALID (MAX_SYNCOOKIE_AGE * TCP_SYNCOOKIE_PERIOD)
493
494
495
496
497
498static inline void tcp_synq_overflow(const struct sock *sk)
499{
500 unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
501 unsigned long now = jiffies;
502
503 if (time_after(now, last_overflow + HZ))
504 tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
505}
506
507
508static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
509{
510 unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
511
512 return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID);
513}
514
515static inline u32 tcp_cookie_time(void)
516{
517 u64 val = get_jiffies_64();
518
519 do_div(val, TCP_SYNCOOKIE_PERIOD);
520 return val;
521}
522
523u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
524 u16 *mssp);
525__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
526u64 cookie_init_timestamp(struct request_sock *req);
527bool cookie_timestamp_decode(const struct net *net,
528 struct tcp_options_received *opt);
529bool cookie_ecn_ok(const struct tcp_options_received *opt,
530 const struct net *net, const struct dst_entry *dst);
531
532
533int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
534 u32 cookie);
535struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
536
537u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
538 const struct tcphdr *th, u16 *mssp);
539__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
540#endif
541
542
543u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
544 int min_tso_segs);
545void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
546 int nonagle);
547int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
548int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
549void tcp_retransmit_timer(struct sock *sk);
550void tcp_xmit_retransmit_queue(struct sock *);
551void tcp_simple_retransmit(struct sock *);
552void tcp_enter_recovery(struct sock *sk, bool ece_ack);
553int tcp_trim_head(struct sock *, struct sk_buff *, u32);
554int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t);
555
556void tcp_send_probe0(struct sock *);
557void tcp_send_partial(struct sock *);
558int tcp_write_wakeup(struct sock *, int mib);
559void tcp_send_fin(struct sock *sk);
560void tcp_send_active_reset(struct sock *sk, gfp_t priority);
561int tcp_send_synack(struct sock *);
562void tcp_push_one(struct sock *, unsigned int mss_now);
563void tcp_send_ack(struct sock *sk);
564void tcp_send_delayed_ack(struct sock *sk);
565void tcp_send_loss_probe(struct sock *sk);
566bool tcp_schedule_loss_probe(struct sock *sk);
567void tcp_skb_collapse_tstamp(struct sk_buff *skb,
568 const struct sk_buff *next_skb);
569
570
571void tcp_rearm_rto(struct sock *sk);
572void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
573void tcp_reset(struct sock *sk);
574void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
575void tcp_fin(struct sock *sk);
576
577
578void tcp_init_xmit_timers(struct sock *);
579static inline void tcp_clear_xmit_timers(struct sock *sk)
580{
581 hrtimer_cancel(&tcp_sk(sk)->pacing_timer);
582 inet_csk_clear_xmit_timers(sk);
583}
584
585unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
586unsigned int tcp_current_mss(struct sock *sk);
587
588
589static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
590{
591 int cutoff;
592
593
594
595
596
597
598
599
600 if (tp->max_window > TCP_MSS_DEFAULT)
601 cutoff = (tp->max_window >> 1);
602 else
603 cutoff = tp->max_window;
604
605 if (cutoff && pktsize > cutoff)
606 return max_t(int, cutoff, 68U - tp->tcp_header_len);
607 else
608 return pktsize;
609}
610
611
612void tcp_get_info(struct sock *, struct tcp_info *);
613
614
615int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
616 sk_read_actor_t recv_actor);
617
618void tcp_initialize_rcv_mss(struct sock *sk);
619
620int tcp_mtu_to_mss(struct sock *sk, int pmtu);
621int tcp_mss_to_mtu(struct sock *sk, int mss);
622void tcp_mtup_init(struct sock *sk);
623void tcp_init_buffer_space(struct sock *sk);
624
625static inline void tcp_bound_rto(const struct sock *sk)
626{
627 if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
628 inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
629}
630
631static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
632{
633 return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
634}
635
636static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
637{
638 tp->pred_flags = htonl((tp->tcp_header_len << 26) |
639 ntohl(TCP_FLAG_ACK) |
640 snd_wnd);
641}
642
643static inline void tcp_fast_path_on(struct tcp_sock *tp)
644{
645 __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
646}
647
648static inline void tcp_fast_path_check(struct sock *sk)
649{
650 struct tcp_sock *tp = tcp_sk(sk);
651
652 if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
653 tp->rcv_wnd &&
654 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
655 !tp->urg_data)
656 tcp_fast_path_on(tp);
657}
658
659
660static inline u32 tcp_rto_min(struct sock *sk)
661{
662 const struct dst_entry *dst = __sk_dst_get(sk);
663 u32 rto_min = TCP_RTO_MIN;
664
665 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
666 rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
667 return rto_min;
668}
669
670static inline u32 tcp_rto_min_us(struct sock *sk)
671{
672 return jiffies_to_usecs(tcp_rto_min(sk));
673}
674
675static inline bool tcp_ca_dst_locked(const struct dst_entry *dst)
676{
677 return dst_metric_locked(dst, RTAX_CC_ALGO);
678}
679
680
681static inline u32 tcp_min_rtt(const struct tcp_sock *tp)
682{
683 return minmax_get(&tp->rtt_min);
684}
685
686
687
688
689
690static inline u32 tcp_receive_window(const struct tcp_sock *tp)
691{
692 s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
693
694 if (win < 0)
695 win = 0;
696 return (u32) win;
697}
698
699
700
701
702
703u32 __tcp_select_window(struct sock *sk);
704
705void tcp_send_window_probe(struct sock *sk);
706
707
708
709
710
711#define tcp_jiffies32 ((u32)jiffies)
712
713
714
715
716
717
718#define TCP_TS_HZ 1000
719
720static inline u64 tcp_clock_ns(void)
721{
722 return local_clock();
723}
724
725static inline u64 tcp_clock_us(void)
726{
727 return div_u64(tcp_clock_ns(), NSEC_PER_USEC);
728}
729
730
731static inline u32 tcp_time_stamp(const struct tcp_sock *tp)
732{
733 return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ);
734}
735
736
737static inline u32 tcp_time_stamp_raw(void)
738{
739 return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ);
740}
741
742
743
744
745
746static inline void tcp_mstamp_refresh(struct tcp_sock *tp)
747{
748 u64 val = tcp_clock_us();
749
750 if (val > tp->tcp_mstamp)
751 tp->tcp_mstamp = val;
752}
753
754static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
755{
756 return max_t(s64, t1 - t0, 0);
757}
758
759static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
760{
761 return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ);
762}
763
764
765#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
766
767#define TCPHDR_FIN 0x01
768#define TCPHDR_SYN 0x02
769#define TCPHDR_RST 0x04
770#define TCPHDR_PSH 0x08
771#define TCPHDR_ACK 0x10
772#define TCPHDR_URG 0x20
773#define TCPHDR_ECE 0x40
774#define TCPHDR_CWR 0x80
775
776#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR)
777
778
779
780
781
782
783
784struct tcp_skb_cb {
785 __u32 seq;
786 __u32 end_seq;
787 union {
788
789
790
791
792
793
794 __u32 tcp_tw_isn;
795 struct {
796 u16 tcp_gso_segs;
797 u16 tcp_gso_size;
798 };
799
800
801
802
803
804 ktime_t swtstamp;
805 };
806 __u8 tcp_flags;
807
808 __u8 sacked;
809#define TCPCB_SACKED_ACKED 0x01
810#define TCPCB_SACKED_RETRANS 0x02
811#define TCPCB_LOST 0x04
812#define TCPCB_TAGBITS 0x07
813#define TCPCB_REPAIRED 0x10
814#define TCPCB_EVER_RETRANS 0x80
815#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \
816 TCPCB_REPAIRED)
817
818 __u8 ip_dsfield;
819 __u8 txstamp_ack:1,
820 eor:1,
821 has_rxtstamp:1,
822 unused:5;
823 __u32 ack_seq;
824 union {
825 struct {
826
827 __u32 in_flight:30,
828 is_app_limited:1,
829 unused:1;
830
831 __u32 delivered;
832
833 u64 first_tx_mstamp;
834
835 u64 delivered_mstamp;
836 } tx;
837 union {
838 struct inet_skb_parm h4;
839#if IS_ENABLED(CONFIG_IPV6)
840 struct inet6_skb_parm h6;
841#endif
842 } header;
843 struct {
844 __u32 key;
845 __u32 flags;
846 struct bpf_map *map;
847 void *data_end;
848 } bpf;
849 };
850};
851
852#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
853
854
855#if IS_ENABLED(CONFIG_IPV6)
856
857
858
859static inline int tcp_v6_iif(const struct sk_buff *skb)
860{
861 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
862
863 return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
864}
865
866
867static inline int tcp_v6_sdif(const struct sk_buff *skb)
868{
869#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
870 if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags))
871 return TCP_SKB_CB(skb)->header.h6.iif;
872#endif
873 return 0;
874}
875#endif
876
877
878static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
879{
880#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
881 if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
882 skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
883 return true;
884#endif
885 return false;
886}
887
888
889static inline int tcp_v4_sdif(struct sk_buff *skb)
890{
891#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
892 if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
893 return TCP_SKB_CB(skb)->header.h4.iif;
894#endif
895 return 0;
896}
897
898
899
900
901static inline int tcp_skb_pcount(const struct sk_buff *skb)
902{
903 return TCP_SKB_CB(skb)->tcp_gso_segs;
904}
905
906static inline void tcp_skb_pcount_set(struct sk_buff *skb, int segs)
907{
908 TCP_SKB_CB(skb)->tcp_gso_segs = segs;
909}
910
911static inline void tcp_skb_pcount_add(struct sk_buff *skb, int segs)
912{
913 TCP_SKB_CB(skb)->tcp_gso_segs += segs;
914}
915
916
917static inline int tcp_skb_mss(const struct sk_buff *skb)
918{
919 return TCP_SKB_CB(skb)->tcp_gso_size;
920}
921
922static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb)
923{
924 return likely(!TCP_SKB_CB(skb)->eor);
925}
926
927
928enum tcp_ca_event {
929 CA_EVENT_TX_START,
930 CA_EVENT_CWND_RESTART,
931 CA_EVENT_COMPLETE_CWR,
932 CA_EVENT_LOSS,
933 CA_EVENT_ECN_NO_CE,
934 CA_EVENT_ECN_IS_CE,
935 CA_EVENT_DELAYED_ACK,
936 CA_EVENT_NON_DELAYED_ACK,
937};
938
939
940enum tcp_ca_ack_event_flags {
941 CA_ACK_SLOWPATH = (1 << 0),
942 CA_ACK_WIN_UPDATE = (1 << 1),
943 CA_ACK_ECE = (1 << 2),
944};
945
946
947
948
949#define TCP_CA_NAME_MAX 16
950#define TCP_CA_MAX 128
951#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)
952
953#define TCP_CA_UNSPEC 0
954
955
956#define TCP_CONG_NON_RESTRICTED 0x1
957
958#define TCP_CONG_NEEDS_ECN 0x2
959
960union tcp_cc_info;
961
962struct ack_sample {
963 u32 pkts_acked;
964 s32 rtt_us;
965 u32 in_flight;
966};
967
968
969
970
971
972
973
974
975
976struct rate_sample {
977 u64 prior_mstamp;
978 u32 prior_delivered;
979 s32 delivered;
980 long interval_us;
981 long rtt_us;
982 int losses;
983 u32 acked_sacked;
984 u32 prior_in_flight;
985 bool is_app_limited;
986 bool is_retrans;
987};
988
989struct tcp_congestion_ops {
990 struct list_head list;
991 u32 key;
992 u32 flags;
993
994
995 void (*init)(struct sock *sk);
996
997 void (*release)(struct sock *sk);
998
999
1000 u32 (*ssthresh)(struct sock *sk);
1001
1002 void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked);
1003
1004 void (*set_state)(struct sock *sk, u8 new_state);
1005
1006 void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
1007
1008 void (*in_ack_event)(struct sock *sk, u32 flags);
1009
1010 u32 (*undo_cwnd)(struct sock *sk);
1011
1012 void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
1013
1014 u32 (*tso_segs_goal)(struct sock *sk);
1015
1016 u32 (*sndbuf_expand)(struct sock *sk);
1017
1018
1019
1020 void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
1021
1022 size_t (*get_info)(struct sock *sk, u32 ext, int *attr,
1023 union tcp_cc_info *info);
1024
1025 char name[TCP_CA_NAME_MAX];
1026 struct module *owner;
1027};
1028
1029int tcp_register_congestion_control(struct tcp_congestion_ops *type);
1030void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
1031
1032void tcp_assign_congestion_control(struct sock *sk);
1033void tcp_init_congestion_control(struct sock *sk);
1034void tcp_cleanup_congestion_control(struct sock *sk);
1035int tcp_set_default_congestion_control(const char *name);
1036void tcp_get_default_congestion_control(char *name);
1037void tcp_get_available_congestion_control(char *buf, size_t len);
1038void tcp_get_allowed_congestion_control(char *buf, size_t len);
1039int tcp_set_allowed_congestion_control(char *allowed);
1040int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit);
1041u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
1042void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
1043
1044u32 tcp_reno_ssthresh(struct sock *sk);
1045u32 tcp_reno_undo_cwnd(struct sock *sk);
1046void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
1047extern struct tcp_congestion_ops tcp_reno;
1048
1049struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
1050u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca);
1051#ifdef CONFIG_INET
1052char *tcp_ca_get_name_by_key(u32 key, char *buffer);
1053#else
1054static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
1055{
1056 return NULL;
1057}
1058#endif
1059
1060static inline bool tcp_ca_needs_ecn(const struct sock *sk)
1061{
1062 const struct inet_connection_sock *icsk = inet_csk(sk);
1063
1064 return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN;
1065}
1066
1067static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
1068{
1069 struct inet_connection_sock *icsk = inet_csk(sk);
1070
1071 if (icsk->icsk_ca_ops->set_state)
1072 icsk->icsk_ca_ops->set_state(sk, ca_state);
1073 icsk->icsk_ca_state = ca_state;
1074}
1075
1076static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
1077{
1078 const struct inet_connection_sock *icsk = inet_csk(sk);
1079
1080 if (icsk->icsk_ca_ops->cwnd_event)
1081 icsk->icsk_ca_ops->cwnd_event(sk, event);
1082}
1083
1084
1085void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
1086void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
1087 struct rate_sample *rs);
1088void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
1089 struct rate_sample *rs);
1090void tcp_rate_check_app_limited(struct sock *sk);
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100static inline int tcp_is_sack(const struct tcp_sock *tp)
1101{
1102 return tp->rx_opt.sack_ok;
1103}
1104
1105static inline bool tcp_is_reno(const struct tcp_sock *tp)
1106{
1107 return !tcp_is_sack(tp);
1108}
1109
1110static inline bool tcp_is_fack(const struct tcp_sock *tp)
1111{
1112 return tp->rx_opt.sack_ok & TCP_FACK_ENABLED;
1113}
1114
1115static inline void tcp_enable_fack(struct tcp_sock *tp)
1116{
1117 tp->rx_opt.sack_ok |= TCP_FACK_ENABLED;
1118}
1119
1120static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
1121{
1122 return tp->sacked_out + tp->lost_out;
1123}
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
1140{
1141 return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
1142}
1143
1144#define TCP_INFINITE_SSTHRESH 0x7fffffff
1145
1146static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
1147{
1148 return tp->snd_cwnd < tp->snd_ssthresh;
1149}
1150
1151static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
1152{
1153 return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
1154}
1155
1156static inline bool tcp_in_cwnd_reduction(const struct sock *sk)
1157{
1158 return (TCPF_CA_CWR | TCPF_CA_Recovery) &
1159 (1 << inet_csk(sk)->icsk_ca_state);
1160}
1161
1162
1163
1164
1165
1166static inline __u32 tcp_current_ssthresh(const struct sock *sk)
1167{
1168 const struct tcp_sock *tp = tcp_sk(sk);
1169
1170 if (tcp_in_cwnd_reduction(sk))
1171 return tp->snd_ssthresh;
1172 else
1173 return max(tp->snd_ssthresh,
1174 ((tp->snd_cwnd >> 1) +
1175 (tp->snd_cwnd >> 2)));
1176}
1177
1178
1179#define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out)
1180
1181void tcp_enter_cwr(struct sock *sk);
1182__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst);
1183
1184
1185
1186
1187static inline __u32 tcp_max_tso_deferred_mss(const struct tcp_sock *tp)
1188{
1189 return 3;
1190}
1191
1192
1193static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
1194{
1195 return tp->snd_una + tp->snd_wnd;
1196}
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211static inline bool tcp_is_cwnd_limited(const struct sock *sk)
1212{
1213 const struct tcp_sock *tp = tcp_sk(sk);
1214
1215
1216 if (tcp_in_slow_start(tp))
1217 return tp->snd_cwnd < 2 * tp->max_packets_out;
1218
1219 return tp->is_cwnd_limited;
1220}
1221
1222
1223
1224
1225
1226
1227
1228static inline unsigned long tcp_probe0_base(const struct sock *sk)
1229{
1230 return max_t(unsigned long, inet_csk(sk)->icsk_rto, TCP_RTO_MIN);
1231}
1232
1233
1234static inline unsigned long tcp_probe0_when(const struct sock *sk,
1235 unsigned long max_when)
1236{
1237 u64 when = (u64)tcp_probe0_base(sk) << inet_csk(sk)->icsk_backoff;
1238
1239 return (unsigned long)min_t(u64, when, max_when);
1240}
1241
1242static inline void tcp_check_probe_timer(struct sock *sk)
1243{
1244 if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
1245 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
1246 tcp_probe0_base(sk), TCP_RTO_MAX);
1247}
1248
1249static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
1250{
1251 tp->snd_wl1 = seq;
1252}
1253
1254static inline void tcp_update_wl(struct tcp_sock *tp, u32 seq)
1255{
1256 tp->snd_wl1 = seq;
1257}
1258
1259
1260
1261
1262static inline __sum16 tcp_v4_check(int len, __be32 saddr,
1263 __be32 daddr, __wsum base)
1264{
1265 return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
1266}
1267
1268static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb)
1269{
1270 return __skb_checksum_complete(skb);
1271}
1272
1273static inline bool tcp_checksum_complete(struct sk_buff *skb)
1274{
1275 return !skb_csum_unnecessary(skb) &&
1276 __tcp_checksum_complete(skb);
1277}
1278
1279bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
1280int tcp_filter(struct sock *sk, struct sk_buff *skb);
1281
1282#undef STATE_TRACE
1283
1284#ifdef STATE_TRACE
1285static const char *statename[]={
1286 "Unused","Established","Syn Sent","Syn Recv",
1287 "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
1288 "Close Wait","Last ACK","Listen","Closing"
1289};
1290#endif
1291void tcp_set_state(struct sock *sk, int state);
1292
1293void tcp_done(struct sock *sk);
1294
1295int tcp_abort(struct sock *sk, int err);
1296
1297static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
1298{
1299 rx_opt->dsack = 0;
1300 rx_opt->num_sacks = 0;
1301}
1302
1303u32 tcp_default_init_rwnd(u32 mss);
1304void tcp_cwnd_restart(struct sock *sk, s32 delta);
1305
1306static inline void tcp_slow_start_after_idle_check(struct sock *sk)
1307{
1308 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
1309 struct tcp_sock *tp = tcp_sk(sk);
1310 s32 delta;
1311
1312 if (!sysctl_tcp_slow_start_after_idle || tp->packets_out ||
1313 ca_ops->cong_control)
1314 return;
1315 delta = tcp_jiffies32 - tp->lsndtime;
1316 if (delta > inet_csk(sk)->icsk_rto)
1317 tcp_cwnd_restart(sk, delta);
1318}
1319
1320
1321void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
1322 __u32 *window_clamp, int wscale_ok,
1323 __u8 *rcv_wscale, __u32 init_rcv_wnd);
1324
1325static inline int tcp_win_from_space(int space)
1326{
1327 int tcp_adv_win_scale = sysctl_tcp_adv_win_scale;
1328
1329 return tcp_adv_win_scale <= 0 ?
1330 (space>>(-tcp_adv_win_scale)) :
1331 space - (space>>tcp_adv_win_scale);
1332}
1333
1334
1335static inline int tcp_space(const struct sock *sk)
1336{
1337 return tcp_win_from_space(sk->sk_rcvbuf -
1338 atomic_read(&sk->sk_rmem_alloc));
1339}
1340
1341static inline int tcp_full_space(const struct sock *sk)
1342{
1343 return tcp_win_from_space(sk->sk_rcvbuf);
1344}
1345
1346extern void tcp_openreq_init_rwin(struct request_sock *req,
1347 const struct sock *sk_listener,
1348 const struct dst_entry *dst);
1349
1350void tcp_enter_memory_pressure(struct sock *sk);
1351void tcp_leave_memory_pressure(struct sock *sk);
1352
1353static inline int keepalive_intvl_when(const struct tcp_sock *tp)
1354{
1355 struct net *net = sock_net((struct sock *)tp);
1356
1357 return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl;
1358}
1359
1360static inline int keepalive_time_when(const struct tcp_sock *tp)
1361{
1362 struct net *net = sock_net((struct sock *)tp);
1363
1364 return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time;
1365}
1366
1367static inline int keepalive_probes(const struct tcp_sock *tp)
1368{
1369 struct net *net = sock_net((struct sock *)tp);
1370
1371 return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes;
1372}
1373
1374static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
1375{
1376 const struct inet_connection_sock *icsk = &tp->inet_conn;
1377
1378 return min_t(u32, tcp_jiffies32 - icsk->icsk_ack.lrcvtime,
1379 tcp_jiffies32 - tp->rcv_tstamp);
1380}
1381
1382static inline int tcp_fin_time(const struct sock *sk)
1383{
1384 int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
1385 const int rto = inet_csk(sk)->icsk_rto;
1386
1387 if (fin_timeout < (rto << 2) - (rto >> 1))
1388 fin_timeout = (rto << 2) - (rto >> 1);
1389
1390 return fin_timeout;
1391}
1392
1393static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
1394 int paws_win)
1395{
1396 if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
1397 return true;
1398 if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
1399 return true;
1400
1401
1402
1403
1404
1405 if (!rx_opt->ts_recent)
1406 return true;
1407 return false;
1408}
1409
1410static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
1411 int rst)
1412{
1413 if (tcp_paws_check(rx_opt, 0))
1414 return false;
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428 if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
1429 return false;
1430 return true;
1431}
1432
1433bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
1434 int mib_idx, u32 *last_oow_ack_time);
1435
1436static inline void tcp_mib_init(struct net *net)
1437{
1438
1439 TCP_ADD_STATS(net, TCP_MIB_RTOALGORITHM, 1);
1440 TCP_ADD_STATS(net, TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ);
1441 TCP_ADD_STATS(net, TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ);
1442 TCP_ADD_STATS(net, TCP_MIB_MAXCONN, -1);
1443}
1444
1445
1446static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp)
1447{
1448 tp->lost_skb_hint = NULL;
1449}
1450
1451static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
1452{
1453 tcp_clear_retrans_hints_partial(tp);
1454 tp->retransmit_skb_hint = NULL;
1455}
1456
1457union tcp_md5_addr {
1458 struct in_addr a4;
1459#if IS_ENABLED(CONFIG_IPV6)
1460 struct in6_addr a6;
1461#endif
1462};
1463
1464
1465struct tcp_md5sig_key {
1466 struct hlist_node node;
1467 u8 keylen;
1468 u8 family;
1469 union tcp_md5_addr addr;
1470 u8 prefixlen;
1471 u8 key[TCP_MD5SIG_MAXKEYLEN];
1472 struct rcu_head rcu;
1473};
1474
1475
1476struct tcp_md5sig_info {
1477 struct hlist_head head;
1478 struct rcu_head rcu;
1479};
1480
1481
1482struct tcp4_pseudohdr {
1483 __be32 saddr;
1484 __be32 daddr;
1485 __u8 pad;
1486 __u8 protocol;
1487 __be16 len;
1488};
1489
1490struct tcp6_pseudohdr {
1491 struct in6_addr saddr;
1492 struct in6_addr daddr;
1493 __be32 len;
1494 __be32 protocol;
1495};
1496
1497union tcp_md5sum_block {
1498 struct tcp4_pseudohdr ip4;
1499#if IS_ENABLED(CONFIG_IPV6)
1500 struct tcp6_pseudohdr ip6;
1501#endif
1502};
1503
1504
1505struct tcp_md5sig_pool {
1506 struct ahash_request *md5_req;
1507 void *scratch;
1508};
1509
1510
1511int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1512 const struct sock *sk, const struct sk_buff *skb);
1513int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1514 int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
1515 gfp_t gfp);
1516int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
1517 int family, u8 prefixlen);
1518struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
1519 const struct sock *addr_sk);
1520
1521#ifdef CONFIG_TCP_MD5SIG
1522struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
1523 const union tcp_md5_addr *addr,
1524 int family);
1525#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
1526#else
1527static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
1528 const union tcp_md5_addr *addr,
1529 int family)
1530{
1531 return NULL;
1532}
1533#define tcp_twsk_md5_key(twsk) NULL
1534#endif
1535
1536bool tcp_alloc_md5sig_pool(void);
1537
1538struct tcp_md5sig_pool *tcp_get_md5sig_pool(void);
1539static inline void tcp_put_md5sig_pool(void)
1540{
1541 local_bh_enable();
1542}
1543
1544int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
1545 unsigned int header_len);
1546int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
1547 const struct tcp_md5sig_key *key);
1548
1549
1550void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
1551 struct tcp_fastopen_cookie *cookie, int *syn_loss,
1552 unsigned long *last_syn_loss);
1553void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
1554 struct tcp_fastopen_cookie *cookie, bool syn_lost,
1555 u16 try_exp);
1556struct tcp_fastopen_request {
1557
1558 struct tcp_fastopen_cookie cookie;
1559 struct msghdr *data;
1560 size_t size;
1561 int copied;
1562};
1563void tcp_free_fastopen_req(struct tcp_sock *tp);
1564
1565extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
1566int tcp_fastopen_reset_cipher(void *key, unsigned int len);
1567void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
1568struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
1569 struct request_sock *req,
1570 struct tcp_fastopen_cookie *foc);
1571void tcp_fastopen_init_key_once(bool publish);
1572bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
1573 struct tcp_fastopen_cookie *cookie);
1574bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
1575#define TCP_FASTOPEN_KEY_LENGTH 16
1576
1577
1578struct tcp_fastopen_context {
1579 struct crypto_cipher *tfm;
1580 __u8 key[TCP_FASTOPEN_KEY_LENGTH];
1581 struct rcu_head rcu;
1582};
1583
1584extern unsigned int sysctl_tcp_fastopen_blackhole_timeout;
1585void tcp_fastopen_active_disable(struct sock *sk);
1586bool tcp_fastopen_active_should_disable(struct sock *sk);
1587void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
1588void tcp_fastopen_active_timeout_reset(void);
1589
1590
1591
1592
1593enum tcp_chrono {
1594 TCP_CHRONO_UNSPEC,
1595 TCP_CHRONO_BUSY,
1596 TCP_CHRONO_RWND_LIMITED,
1597 TCP_CHRONO_SNDBUF_LIMITED,
1598 __TCP_CHRONO_MAX,
1599};
1600
1601void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type);
1602void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type);
1603
1604
1605static inline void tcp_write_queue_purge(struct sock *sk)
1606{
1607 struct sk_buff *skb;
1608
1609 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
1610 while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
1611 sk_wmem_free_skb(sk, skb);
1612 sk_mem_reclaim(sk);
1613 tcp_clear_all_retrans_hints(tcp_sk(sk));
1614}
1615
1616static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
1617{
1618 return skb_peek(&sk->sk_write_queue);
1619}
1620
1621static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk)
1622{
1623 return skb_peek_tail(&sk->sk_write_queue);
1624}
1625
1626static inline struct sk_buff *tcp_write_queue_next(const struct sock *sk,
1627 const struct sk_buff *skb)
1628{
1629 return skb_queue_next(&sk->sk_write_queue, skb);
1630}
1631
1632static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk,
1633 const struct sk_buff *skb)
1634{
1635 return skb_queue_prev(&sk->sk_write_queue, skb);
1636}
1637
1638#define tcp_for_write_queue(skb, sk) \
1639 skb_queue_walk(&(sk)->sk_write_queue, skb)
1640
1641#define tcp_for_write_queue_from(skb, sk) \
1642 skb_queue_walk_from(&(sk)->sk_write_queue, skb)
1643
1644#define tcp_for_write_queue_from_safe(skb, tmp, sk) \
1645 skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
1646
1647static inline struct sk_buff *tcp_send_head(const struct sock *sk)
1648{
1649 return sk->sk_send_head;
1650}
1651
1652static inline bool tcp_skb_is_last(const struct sock *sk,
1653 const struct sk_buff *skb)
1654{
1655 return skb_queue_is_last(&sk->sk_write_queue, skb);
1656}
1657
1658static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb)
1659{
1660 if (tcp_skb_is_last(sk, skb))
1661 sk->sk_send_head = NULL;
1662 else
1663 sk->sk_send_head = tcp_write_queue_next(sk, skb);
1664}
1665
1666static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
1667{
1668 if (sk->sk_send_head == skb_unlinked) {
1669 sk->sk_send_head = NULL;
1670 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
1671 }
1672 if (tcp_sk(sk)->highest_sack == skb_unlinked)
1673 tcp_sk(sk)->highest_sack = NULL;
1674}
1675
1676static inline void tcp_init_send_head(struct sock *sk)
1677{
1678 sk->sk_send_head = NULL;
1679}
1680
1681static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
1682{
1683 __skb_queue_tail(&sk->sk_write_queue, skb);
1684}
1685
1686static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
1687{
1688 __tcp_add_write_queue_tail(sk, skb);
1689
1690
1691 if (sk->sk_send_head == NULL) {
1692 sk->sk_send_head = skb;
1693 tcp_chrono_start(sk, TCP_CHRONO_BUSY);
1694
1695 if (tcp_sk(sk)->highest_sack == NULL)
1696 tcp_sk(sk)->highest_sack = skb;
1697 }
1698}
1699
1700static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb)
1701{
1702 __skb_queue_head(&sk->sk_write_queue, skb);
1703}
1704
1705
1706static inline void tcp_insert_write_queue_after(struct sk_buff *skb,
1707 struct sk_buff *buff,
1708 struct sock *sk)
1709{
1710 __skb_queue_after(&sk->sk_write_queue, skb, buff);
1711}
1712
1713
1714static inline void tcp_insert_write_queue_before(struct sk_buff *new,
1715 struct sk_buff *skb,
1716 struct sock *sk)
1717{
1718 __skb_queue_before(&sk->sk_write_queue, skb, new);
1719
1720 if (sk->sk_send_head == skb)
1721 sk->sk_send_head = new;
1722}
1723
1724static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
1725{
1726 __skb_unlink(skb, &sk->sk_write_queue);
1727}
1728
1729static inline bool tcp_write_queue_empty(struct sock *sk)
1730{
1731 return skb_queue_empty(&sk->sk_write_queue);
1732}
1733
1734static inline void tcp_push_pending_frames(struct sock *sk)
1735{
1736 if (tcp_send_head(sk)) {
1737 struct tcp_sock *tp = tcp_sk(sk);
1738
1739 __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle);
1740 }
1741}
1742
1743
1744
1745
1746
1747static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp)
1748{
1749 if (!tp->sacked_out)
1750 return tp->snd_una;
1751
1752 if (tp->highest_sack == NULL)
1753 return tp->snd_nxt;
1754
1755 return TCP_SKB_CB(tp->highest_sack)->seq;
1756}
1757
1758static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb)
1759{
1760 tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL :
1761 tcp_write_queue_next(sk, skb);
1762}
1763
1764static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
1765{
1766 return tcp_sk(sk)->highest_sack;
1767}
1768
1769static inline void tcp_highest_sack_reset(struct sock *sk)
1770{
1771 tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk);
1772}
1773
1774
1775static inline void tcp_highest_sack_replace(struct sock *sk,
1776 struct sk_buff *old,
1777 struct sk_buff *new)
1778{
1779 if (old == tcp_highest_sack(sk))
1780 tcp_sk(sk)->highest_sack = new;
1781}
1782
1783
1784static inline bool inet_sk_transparent(const struct sock *sk)
1785{
1786 switch (sk->sk_state) {
1787 case TCP_TIME_WAIT:
1788 return inet_twsk(sk)->tw_transparent;
1789 case TCP_NEW_SYN_RECV:
1790 return inet_rsk(inet_reqsk(sk))->no_srccheck;
1791 }
1792 return inet_sk(sk)->transparent;
1793}
1794
1795
1796
1797
1798static inline bool tcp_stream_is_thin(struct tcp_sock *tp)
1799{
1800 return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp);
1801}
1802
1803
1804enum tcp_seq_states {
1805 TCP_SEQ_STATE_LISTENING,
1806 TCP_SEQ_STATE_ESTABLISHED,
1807};
1808
1809int tcp_seq_open(struct inode *inode, struct file *file);
1810
1811struct tcp_seq_afinfo {
1812 char *name;
1813 sa_family_t family;
1814 const struct file_operations *seq_fops;
1815 struct seq_operations seq_ops;
1816};
1817
1818struct tcp_iter_state {
1819 struct seq_net_private p;
1820 sa_family_t family;
1821 enum tcp_seq_states state;
1822 struct sock *syn_wait_sk;
1823 int bucket, offset, sbucket, num;
1824 loff_t last_pos;
1825};
1826
1827int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
1828void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo);
1829
1830extern struct request_sock_ops tcp_request_sock_ops;
1831extern struct request_sock_ops tcp6_request_sock_ops;
1832
1833void tcp_v4_destroy_sock(struct sock *sk);
1834
1835struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
1836 netdev_features_t features);
1837struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb);
1838int tcp_gro_complete(struct sk_buff *skb);
1839
1840void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
1841
1842static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
1843{
1844 struct net *net = sock_net((struct sock *)tp);
1845 return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
1846}
1847
1848static inline bool tcp_stream_memory_free(const struct sock *sk)
1849{
1850 const struct tcp_sock *tp = tcp_sk(sk);
1851 u32 notsent_bytes = tp->write_seq - tp->snd_nxt;
1852
1853 return notsent_bytes < tcp_notsent_lowat(tp);
1854}
1855
1856#ifdef CONFIG_PROC_FS
1857int tcp4_proc_init(void);
1858void tcp4_proc_exit(void);
1859#endif
1860
1861int tcp_rtx_synack(const struct sock *sk, struct request_sock *req);
1862int tcp_conn_request(struct request_sock_ops *rsk_ops,
1863 const struct tcp_request_sock_ops *af_ops,
1864 struct sock *sk, struct sk_buff *skb);
1865
1866
1867struct tcp_sock_af_ops {
1868#ifdef CONFIG_TCP_MD5SIG
1869 struct tcp_md5sig_key *(*md5_lookup) (const struct sock *sk,
1870 const struct sock *addr_sk);
1871 int (*calc_md5_hash)(char *location,
1872 const struct tcp_md5sig_key *md5,
1873 const struct sock *sk,
1874 const struct sk_buff *skb);
1875 int (*md5_parse)(struct sock *sk,
1876 int optname,
1877 char __user *optval,
1878 int optlen);
1879#endif
1880};
1881
1882struct tcp_request_sock_ops {
1883 u16 mss_clamp;
1884#ifdef CONFIG_TCP_MD5SIG
1885 struct tcp_md5sig_key *(*req_md5_lookup)(const struct sock *sk,
1886 const struct sock *addr_sk);
1887 int (*calc_md5_hash) (char *location,
1888 const struct tcp_md5sig_key *md5,
1889 const struct sock *sk,
1890 const struct sk_buff *skb);
1891#endif
1892 void (*init_req)(struct request_sock *req,
1893 const struct sock *sk_listener,
1894 struct sk_buff *skb);
1895#ifdef CONFIG_SYN_COOKIES
1896 __u32 (*cookie_init_seq)(const struct sk_buff *skb,
1897 __u16 *mss);
1898#endif
1899 struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
1900 const struct request_sock *req);
1901 u32 (*init_seq)(const struct sk_buff *skb);
1902 u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
1903 int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
1904 struct flowi *fl, struct request_sock *req,
1905 struct tcp_fastopen_cookie *foc,
1906 enum tcp_synack_type synack_type);
1907};
1908
1909#ifdef CONFIG_SYN_COOKIES
1910static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
1911 const struct sock *sk, struct sk_buff *skb,
1912 __u16 *mss)
1913{
1914 tcp_synq_overflow(sk);
1915 __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
1916 return ops->cookie_init_seq(skb, mss);
1917}
1918#else
1919static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
1920 const struct sock *sk, struct sk_buff *skb,
1921 __u16 *mss)
1922{
1923 return 0;
1924}
1925#endif
1926
1927int tcpv4_offload_init(void);
1928
1929void tcp_v4_init(void);
1930void tcp_init(void);
1931
1932
1933extern void tcp_rack_mark_lost(struct sock *sk);
1934extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
1935 u64 xmit_time);
1936extern void tcp_rack_reo_timeout(struct sock *sk);
1937
1938
1939static inline s64 tcp_rto_delta_us(const struct sock *sk)
1940{
1941 const struct sk_buff *skb = tcp_write_queue_head(sk);
1942 u32 rto = inet_csk(sk)->icsk_rto;
1943 u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
1944
1945 return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
1946}
1947
1948
1949
1950
1951static inline struct ip_options_rcu *tcp_v4_save_options(struct net *net,
1952 struct sk_buff *skb)
1953{
1954 const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
1955 struct ip_options_rcu *dopt = NULL;
1956
1957 if (opt->optlen) {
1958 int opt_size = sizeof(*dopt) + opt->optlen;
1959
1960 dopt = kmalloc(opt_size, GFP_ATOMIC);
1961 if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) {
1962 kfree(dopt);
1963 dopt = NULL;
1964 }
1965 }
1966 return dopt;
1967}
1968
1969
1970
1971
1972
1973
1974static inline bool skb_is_tcp_pure_ack(const struct sk_buff *skb)
1975{
1976 return skb->truesize == 2;
1977}
1978
1979static inline void skb_set_tcp_pure_ack(struct sk_buff *skb)
1980{
1981 skb->truesize = 2;
1982}
1983
1984static inline int tcp_inq(struct sock *sk)
1985{
1986 struct tcp_sock *tp = tcp_sk(sk);
1987 int answ;
1988
1989 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
1990 answ = 0;
1991 } else if (sock_flag(sk, SOCK_URGINLINE) ||
1992 !tp->urg_data ||
1993 before(tp->urg_seq, tp->copied_seq) ||
1994 !before(tp->urg_seq, tp->rcv_nxt)) {
1995
1996 answ = tp->rcv_nxt - tp->copied_seq;
1997
1998
1999 if (answ && sock_flag(sk, SOCK_DONE))
2000 answ--;
2001 } else {
2002 answ = tp->urg_seq - tp->copied_seq;
2003 }
2004
2005 return answ;
2006}
2007
2008int tcp_peek_len(struct socket *sock);
2009
2010static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb)
2011{
2012 u16 segs_in;
2013
2014 segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
2015 tp->segs_in += segs_in;
2016 if (skb->len > tcp_hdrlen(skb))
2017 tp->data_segs_in += segs_in;
2018}
2019
2020
2021
2022
2023
2024
2025
2026
2027static inline void tcp_listendrop(const struct sock *sk)
2028{
2029 atomic_inc(&((struct sock *)sk)->sk_drops);
2030 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
2031}
2032
2033enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer);
2034
2035
2036
2037
2038
2039#define TCP_ULP_NAME_MAX 16
2040#define TCP_ULP_MAX 128
2041#define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX)
2042
2043struct tcp_ulp_ops {
2044 struct list_head list;
2045
2046
2047 int (*init)(struct sock *sk);
2048
2049 void (*release)(struct sock *sk);
2050
2051 char name[TCP_ULP_NAME_MAX];
2052 struct module *owner;
2053};
2054int tcp_register_ulp(struct tcp_ulp_ops *type);
2055void tcp_unregister_ulp(struct tcp_ulp_ops *type);
2056int tcp_set_ulp(struct sock *sk, const char *name);
2057void tcp_get_available_ulp(char *buf, size_t len);
2058void tcp_cleanup_ulp(struct sock *sk);
2059
2060
2061
2062
2063
2064
2065#ifdef CONFIG_BPF
2066static inline int tcp_call_bpf(struct sock *sk, int op)
2067{
2068 struct bpf_sock_ops_kern sock_ops;
2069 int ret;
2070
2071 if (sk_fullsock(sk))
2072 sock_owned_by_me(sk);
2073
2074 memset(&sock_ops, 0, sizeof(sock_ops));
2075 sock_ops.sk = sk;
2076 sock_ops.op = op;
2077
2078 ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
2079 if (ret == 0)
2080 ret = sock_ops.reply;
2081 else
2082 ret = -1;
2083 return ret;
2084}
2085#else
2086static inline int tcp_call_bpf(struct sock *sk, int op)
2087{
2088 return -EPERM;
2089}
2090#endif
2091
2092static inline u32 tcp_timeout_init(struct sock *sk)
2093{
2094 int timeout;
2095
2096 timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT);
2097
2098 if (timeout <= 0)
2099 timeout = TCP_TIMEOUT_INIT;
2100 return timeout;
2101}
2102
2103static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
2104{
2105 int rwnd;
2106
2107 rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT);
2108
2109 if (rwnd < 0)
2110 rwnd = 0;
2111 return rwnd;
2112}
2113
2114static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
2115{
2116 return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
2117}
2118#endif
2119