1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#ifndef _TCP_H
19#define _TCP_H
20
21#define FASTRETRANS_DEBUG 1
22
23#include <linux/list.h>
24#include <linux/tcp.h>
25#include <linux/bug.h>
26#include <linux/slab.h>
27#include <linux/cache.h>
28#include <linux/percpu.h>
29#include <linux/skbuff.h>
30#include <linux/cryptohash.h>
31#include <linux/kref.h>
32#include <linux/ktime.h>
33
34#include <net/inet_connection_sock.h>
35#include <net/inet_timewait_sock.h>
36#include <net/inet_hashtables.h>
37#include <net/checksum.h>
38#include <net/request_sock.h>
39#include <net/sock.h>
40#include <net/snmp.h>
41#include <net/ip.h>
42#include <net/tcp_states.h>
43#include <net/inet_ecn.h>
44#include <net/dst.h>
45
46#include <linux/seq_file.h>
47#include <linux/memcontrol.h>
48#include <linux/bpf-cgroup.h>
49
50extern struct inet_hashinfo tcp_hashinfo;
51
52extern struct percpu_counter tcp_orphan_count;
53void tcp_time_wait(struct sock *sk, int state, int timeo);
54
55#define MAX_TCP_HEADER (128 + MAX_HEADER)
56#define MAX_TCP_OPTION_SPACE 40
57
58
59
60
61
62#define MAX_TCP_WINDOW 32767U
63
64
65#define TCP_MIN_MSS 88U
66
67
68#define TCP_BASE_MSS 1024
69
70
71#define TCP_PROBE_INTERVAL 600
72
73
74#define TCP_PROBE_THRESHOLD 8
75
76
77#define TCP_FASTRETRANS_THRESH 3
78
79
80#define TCP_MAX_QUICKACKS 16U
81
82
83#define TCP_MAX_WSCALE 14U
84
85
86#define TCP_URG_VALID 0x0100
87#define TCP_URG_NOTYET 0x0200
88#define TCP_URG_READ 0x0400
89
90#define TCP_RETR1 3
91
92
93
94
95
96
97#define TCP_RETR2 15
98
99
100
101
102
103
104#define TCP_SYN_RETRIES 6
105
106
107
108
109
110
111
112
113#define TCP_SYNACK_RETRIES 5
114
115
116
117
118
119
120#define TCP_TIMEWAIT_LEN (60*HZ)
121
122#define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN
123
124
125
126
127
128
129#define TCP_DELACK_MAX ((unsigned)(HZ/5))
130#if HZ >= 100
131#define TCP_DELACK_MIN ((unsigned)(HZ/25))
132#define TCP_ATO_MIN ((unsigned)(HZ/25))
133#else
134#define TCP_DELACK_MIN 4U
135#define TCP_ATO_MIN 4U
136#endif
137#define TCP_RTO_MAX ((unsigned)(120*HZ))
138#define TCP_RTO_MIN ((unsigned)(HZ/5))
139#define TCP_TIMEOUT_MIN (2U)
140#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))
141#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))
142
143
144
145
146
147
148#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U))
149
150
151#define TCP_KEEPALIVE_TIME (120*60*HZ)
152#define TCP_KEEPALIVE_PROBES 9
153#define TCP_KEEPALIVE_INTVL (75*HZ)
154
155#define MAX_TCP_KEEPIDLE 32767
156#define MAX_TCP_KEEPINTVL 32767
157#define MAX_TCP_KEEPCNT 127
158#define MAX_TCP_SYNCNT 127
159
160#define TCP_SYNQ_INTERVAL (HZ/5)
161
162#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
163#define TCP_PAWS_MSL 60
164
165
166
167
168
169#define TCP_PAWS_WINDOW 1
170
171
172
173
174
175
176
177#define TCPOPT_NOP 1
178#define TCPOPT_EOL 0
179#define TCPOPT_MSS 2
180#define TCPOPT_WINDOW 3
181#define TCPOPT_SACK_PERM 4
182#define TCPOPT_SACK 5
183#define TCPOPT_TIMESTAMP 8
184#define TCPOPT_MD5SIG 19
185#define TCPOPT_FASTOPEN 34
186#define TCPOPT_EXP 254
187
188
189
190#define TCPOPT_FASTOPEN_MAGIC 0xF989
191#define TCPOPT_SMC_MAGIC 0xE2D4C3D9
192
193
194
195
196
197#define TCPOLEN_MSS 4
198#define TCPOLEN_WINDOW 3
199#define TCPOLEN_SACK_PERM 2
200#define TCPOLEN_TIMESTAMP 10
201#define TCPOLEN_MD5SIG 18
202#define TCPOLEN_FASTOPEN_BASE 2
203#define TCPOLEN_EXP_FASTOPEN_BASE 4
204#define TCPOLEN_EXP_SMC_BASE 6
205
206
207#define TCPOLEN_TSTAMP_ALIGNED 12
208#define TCPOLEN_WSCALE_ALIGNED 4
209#define TCPOLEN_SACKPERM_ALIGNED 4
210#define TCPOLEN_SACK_BASE 2
211#define TCPOLEN_SACK_BASE_ALIGNED 4
212#define TCPOLEN_SACK_PERBLOCK 8
213#define TCPOLEN_MD5SIG_ALIGNED 20
214#define TCPOLEN_MSS_ALIGNED 4
215#define TCPOLEN_EXP_SMC_BASE_ALIGNED 8
216
217
218#define TCP_NAGLE_OFF 1
219#define TCP_NAGLE_CORK 2
220#define TCP_NAGLE_PUSH 4
221
222
223#define TCP_THIN_LINEAR_RETRIES 6
224
225
226#define TCP_INIT_CWND 10
227
228
229#define TFO_CLIENT_ENABLE 1
230#define TFO_SERVER_ENABLE 2
231#define TFO_CLIENT_NO_COOKIE 4
232
233
234#define TFO_SERVER_COOKIE_NOT_REQD 0x200
235
236
237
238
239#define TFO_SERVER_WO_SOCKOPT1 0x400
240
241
242
243extern int sysctl_tcp_max_orphans;
244extern long sysctl_tcp_mem[3];
245
246#define TCP_RACK_LOSS_DETECTION 0x1
247#define TCP_RACK_STATIC_REO_WND 0x2
248
249extern atomic_long_t tcp_memory_allocated;
250extern struct percpu_counter tcp_sockets_allocated;
251extern unsigned long tcp_memory_pressure;
252
253
254static inline bool tcp_under_memory_pressure(const struct sock *sk)
255{
256 if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
257 mem_cgroup_under_socket_pressure(sk->sk_memcg))
258 return true;
259
260 return tcp_memory_pressure;
261}
262
263
264
265
266
267static inline bool before(__u32 seq1, __u32 seq2)
268{
269 return (__s32)(seq1-seq2) < 0;
270}
271#define after(seq2, seq1) before(seq1, seq2)
272
273
274static inline bool between(__u32 seq1, __u32 seq2, __u32 seq3)
275{
276 return seq3 - seq2 >= seq1 - seq2;
277}
278
279static inline bool tcp_out_of_memory(struct sock *sk)
280{
281 if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
282 sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2))
283 return true;
284 return false;
285}
286
287void sk_forced_mem_schedule(struct sock *sk, int size);
288
289static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
290{
291 struct percpu_counter *ocp = sk->sk_prot->orphan_count;
292 int orphans = percpu_counter_read_positive(ocp);
293
294 if (orphans << shift > sysctl_tcp_max_orphans) {
295 orphans = percpu_counter_sum_positive(ocp);
296 if (orphans << shift > sysctl_tcp_max_orphans)
297 return true;
298 }
299 return false;
300}
301
302bool tcp_check_oom(struct sock *sk, int shift);
303
304
305extern struct proto tcp_prot;
306
307#define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field)
308#define __TCP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.tcp_statistics, field)
309#define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
310#define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
311
312void tcp_tasklet_init(void);
313
314void tcp_v4_err(struct sk_buff *skb, u32);
315
316void tcp_shutdown(struct sock *sk, int how);
317
318int tcp_v4_early_demux(struct sk_buff *skb);
319int tcp_v4_rcv(struct sk_buff *skb);
320
321int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
322int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
323int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
324int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
325 int flags);
326int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
327 size_t size, int flags);
328ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
329 size_t size, int flags);
330void tcp_release_cb(struct sock *sk);
331void tcp_wfree(struct sk_buff *skb);
332void tcp_write_timer_handler(struct sock *sk);
333void tcp_delack_timer_handler(struct sock *sk);
334int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
335int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
336void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
337 const struct tcphdr *th);
338void tcp_rcv_space_adjust(struct sock *sk);
339int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
340void tcp_twsk_destructor(struct sock *sk);
341ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
342 struct pipe_inode_info *pipe, size_t len,
343 unsigned int flags);
344
345static inline void tcp_dec_quickack_mode(struct sock *sk,
346 const unsigned int pkts)
347{
348 struct inet_connection_sock *icsk = inet_csk(sk);
349
350 if (icsk->icsk_ack.quick) {
351 if (pkts >= icsk->icsk_ack.quick) {
352 icsk->icsk_ack.quick = 0;
353
354 icsk->icsk_ack.ato = TCP_ATO_MIN;
355 } else
356 icsk->icsk_ack.quick -= pkts;
357 }
358}
359
360#define TCP_ECN_OK 1
361#define TCP_ECN_QUEUE_CWR 2
362#define TCP_ECN_DEMAND_CWR 4
363#define TCP_ECN_SEEN 8
364
365enum tcp_tw_status {
366 TCP_TW_SUCCESS = 0,
367 TCP_TW_RST = 1,
368 TCP_TW_ACK = 2,
369 TCP_TW_SYN = 3
370};
371
372
373enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
374 struct sk_buff *skb,
375 const struct tcphdr *th);
376struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
377 struct request_sock *req, bool fastopen);
378int tcp_child_process(struct sock *parent, struct sock *child,
379 struct sk_buff *skb);
380void tcp_enter_loss(struct sock *sk);
381void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag);
382void tcp_clear_retrans(struct tcp_sock *tp);
383void tcp_update_metrics(struct sock *sk);
384void tcp_init_metrics(struct sock *sk);
385void tcp_metrics_init(void);
386bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
387void tcp_close(struct sock *sk, long timeout);
388void tcp_init_sock(struct sock *sk);
389void tcp_init_transfer(struct sock *sk, int bpf_op);
390__poll_t tcp_poll(struct file *file, struct socket *sock,
391 struct poll_table_struct *wait);
392int tcp_getsockopt(struct sock *sk, int level, int optname,
393 char __user *optval, int __user *optlen);
394int tcp_setsockopt(struct sock *sk, int level, int optname,
395 char __user *optval, unsigned int optlen);
396int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
397 char __user *optval, int __user *optlen);
398int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
399 char __user *optval, unsigned int optlen);
400void tcp_set_keepalive(struct sock *sk, int val);
401void tcp_syn_ack_timeout(const struct request_sock *req);
402int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
403 int flags, int *addr_len);
404void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
405 struct tcp_options_received *opt_rx,
406 int estab, struct tcp_fastopen_cookie *foc);
407const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
408
409
410
411
412
413void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
414void tcp_v4_mtu_reduced(struct sock *sk);
415void tcp_req_err(struct sock *sk, u32 seq, bool abort);
416int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
417struct sock *tcp_create_openreq_child(const struct sock *sk,
418 struct request_sock *req,
419 struct sk_buff *skb);
420void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst);
421struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
422 struct request_sock *req,
423 struct dst_entry *dst,
424 struct request_sock *req_unhash,
425 bool *own_req);
426int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
427int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
428int tcp_connect(struct sock *sk);
429enum tcp_synack_type {
430 TCP_SYNACK_NORMAL,
431 TCP_SYNACK_FASTOPEN,
432 TCP_SYNACK_COOKIE,
433};
434struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
435 struct request_sock *req,
436 struct tcp_fastopen_cookie *foc,
437 enum tcp_synack_type synack_type);
438int tcp_disconnect(struct sock *sk, int flags);
439
440void tcp_finish_connect(struct sock *sk, struct sk_buff *skb);
441int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size);
442void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
443
444
445struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
446 struct request_sock *req,
447 struct dst_entry *dst, u32 tsoff);
448int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
449 u32 cookie);
450struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
451#ifdef CONFIG_SYN_COOKIES
452
453
454
455
456
457
458
459
460#define MAX_SYNCOOKIE_AGE 2
461#define TCP_SYNCOOKIE_PERIOD (60 * HZ)
462#define TCP_SYNCOOKIE_VALID (MAX_SYNCOOKIE_AGE * TCP_SYNCOOKIE_PERIOD)
463
464
465
466
467
468static inline void tcp_synq_overflow(const struct sock *sk)
469{
470 unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
471 unsigned long now = jiffies;
472
473 if (time_after(now, last_overflow + HZ))
474 tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
475}
476
477
478static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
479{
480 unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
481
482 return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID);
483}
484
485static inline u32 tcp_cookie_time(void)
486{
487 u64 val = get_jiffies_64();
488
489 do_div(val, TCP_SYNCOOKIE_PERIOD);
490 return val;
491}
492
493u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
494 u16 *mssp);
495__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
496u64 cookie_init_timestamp(struct request_sock *req);
497bool cookie_timestamp_decode(const struct net *net,
498 struct tcp_options_received *opt);
499bool cookie_ecn_ok(const struct tcp_options_received *opt,
500 const struct net *net, const struct dst_entry *dst);
501
502
503int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
504 u32 cookie);
505struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
506
507u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
508 const struct tcphdr *th, u16 *mssp);
509__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss);
510#endif
511
512
513u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
514 int min_tso_segs);
515void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
516 int nonagle);
517int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
518int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
519void tcp_retransmit_timer(struct sock *sk);
520void tcp_xmit_retransmit_queue(struct sock *);
521void tcp_simple_retransmit(struct sock *);
522void tcp_enter_recovery(struct sock *sk, bool ece_ack);
523int tcp_trim_head(struct sock *, struct sk_buff *, u32);
524enum tcp_queue {
525 TCP_FRAG_IN_WRITE_QUEUE,
526 TCP_FRAG_IN_RTX_QUEUE,
527};
528int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
529 struct sk_buff *skb, u32 len,
530 unsigned int mss_now, gfp_t gfp);
531
532void tcp_send_probe0(struct sock *);
533void tcp_send_partial(struct sock *);
534int tcp_write_wakeup(struct sock *, int mib);
535void tcp_send_fin(struct sock *sk);
536void tcp_send_active_reset(struct sock *sk, gfp_t priority);
537int tcp_send_synack(struct sock *);
538void tcp_push_one(struct sock *, unsigned int mss_now);
539void tcp_send_ack(struct sock *sk);
540void tcp_send_delayed_ack(struct sock *sk);
541void tcp_send_loss_probe(struct sock *sk);
542bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto);
543void tcp_skb_collapse_tstamp(struct sk_buff *skb,
544 const struct sk_buff *next_skb);
545
546
547void tcp_rearm_rto(struct sock *sk);
548void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
549void tcp_reset(struct sock *sk);
550void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
551void tcp_fin(struct sock *sk);
552
553
554void tcp_init_xmit_timers(struct sock *);
555static inline void tcp_clear_xmit_timers(struct sock *sk)
556{
557 hrtimer_cancel(&tcp_sk(sk)->pacing_timer);
558 inet_csk_clear_xmit_timers(sk);
559}
560
561unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
562unsigned int tcp_current_mss(struct sock *sk);
563
564
565static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
566{
567 int cutoff;
568
569
570
571
572
573
574
575
576 if (tp->max_window > TCP_MSS_DEFAULT)
577 cutoff = (tp->max_window >> 1);
578 else
579 cutoff = tp->max_window;
580
581 if (cutoff && pktsize > cutoff)
582 return max_t(int, cutoff, 68U - tp->tcp_header_len);
583 else
584 return pktsize;
585}
586
587
588void tcp_get_info(struct sock *, struct tcp_info *);
589
590
591int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
592 sk_read_actor_t recv_actor);
593
594void tcp_initialize_rcv_mss(struct sock *sk);
595
596int tcp_mtu_to_mss(struct sock *sk, int pmtu);
597int tcp_mss_to_mtu(struct sock *sk, int mss);
598void tcp_mtup_init(struct sock *sk);
599void tcp_init_buffer_space(struct sock *sk);
600
601static inline void tcp_bound_rto(const struct sock *sk)
602{
603 if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
604 inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
605}
606
607static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
608{
609 return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
610}
611
612static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
613{
614 tp->pred_flags = htonl((tp->tcp_header_len << 26) |
615 ntohl(TCP_FLAG_ACK) |
616 snd_wnd);
617}
618
619static inline void tcp_fast_path_on(struct tcp_sock *tp)
620{
621 __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
622}
623
624static inline void tcp_fast_path_check(struct sock *sk)
625{
626 struct tcp_sock *tp = tcp_sk(sk);
627
628 if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
629 tp->rcv_wnd &&
630 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
631 !tp->urg_data)
632 tcp_fast_path_on(tp);
633}
634
635
636static inline u32 tcp_rto_min(struct sock *sk)
637{
638 const struct dst_entry *dst = __sk_dst_get(sk);
639 u32 rto_min = TCP_RTO_MIN;
640
641 if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
642 rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
643 return rto_min;
644}
645
646static inline u32 tcp_rto_min_us(struct sock *sk)
647{
648 return jiffies_to_usecs(tcp_rto_min(sk));
649}
650
651static inline bool tcp_ca_dst_locked(const struct dst_entry *dst)
652{
653 return dst_metric_locked(dst, RTAX_CC_ALGO);
654}
655
656
657static inline u32 tcp_min_rtt(const struct tcp_sock *tp)
658{
659 return minmax_get(&tp->rtt_min);
660}
661
662
663
664
665
666static inline u32 tcp_receive_window(const struct tcp_sock *tp)
667{
668 s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
669
670 if (win < 0)
671 win = 0;
672 return (u32) win;
673}
674
675
676
677
678
679u32 __tcp_select_window(struct sock *sk);
680
681void tcp_send_window_probe(struct sock *sk);
682
683
684
685
686
687#define tcp_jiffies32 ((u32)jiffies)
688
689
690
691
692
693
694#define TCP_TS_HZ 1000
695
696static inline u64 tcp_clock_ns(void)
697{
698 return local_clock();
699}
700
701static inline u64 tcp_clock_us(void)
702{
703 return div_u64(tcp_clock_ns(), NSEC_PER_USEC);
704}
705
706
707static inline u32 tcp_time_stamp(const struct tcp_sock *tp)
708{
709 return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ);
710}
711
712
713static inline u32 tcp_time_stamp_raw(void)
714{
715 return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ);
716}
717
718
719
720
721
722static inline void tcp_mstamp_refresh(struct tcp_sock *tp)
723{
724 u64 val = tcp_clock_us();
725
726 if (val > tp->tcp_mstamp)
727 tp->tcp_mstamp = val;
728}
729
730static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
731{
732 return max_t(s64, t1 - t0, 0);
733}
734
735static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
736{
737 return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ);
738}
739
740
741#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
742
743#define TCPHDR_FIN 0x01
744#define TCPHDR_SYN 0x02
745#define TCPHDR_RST 0x04
746#define TCPHDR_PSH 0x08
747#define TCPHDR_ACK 0x10
748#define TCPHDR_URG 0x20
749#define TCPHDR_ECE 0x40
750#define TCPHDR_CWR 0x80
751
752#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR)
753
754
755
756
757
758
759
760struct tcp_skb_cb {
761 __u32 seq;
762 __u32 end_seq;
763 union {
764
765
766
767
768
769
770 __u32 tcp_tw_isn;
771 struct {
772 u16 tcp_gso_segs;
773 u16 tcp_gso_size;
774 };
775 };
776 __u8 tcp_flags;
777
778 __u8 sacked;
779#define TCPCB_SACKED_ACKED 0x01
780#define TCPCB_SACKED_RETRANS 0x02
781#define TCPCB_LOST 0x04
782#define TCPCB_TAGBITS 0x07
783#define TCPCB_REPAIRED 0x10
784#define TCPCB_EVER_RETRANS 0x80
785#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \
786 TCPCB_REPAIRED)
787
788 __u8 ip_dsfield;
789 __u8 txstamp_ack:1,
790 eor:1,
791 has_rxtstamp:1,
792 unused:5;
793 __u32 ack_seq;
794 union {
795 struct {
796
797 __u32 in_flight:30,
798 is_app_limited:1,
799 unused:1;
800
801 __u32 delivered;
802
803 u64 first_tx_mstamp;
804
805 u64 delivered_mstamp;
806 } tx;
807 union {
808 struct inet_skb_parm h4;
809#if IS_ENABLED(CONFIG_IPV6)
810 struct inet6_skb_parm h6;
811#endif
812 } header;
813 struct {
814 __u32 key;
815 __u32 flags;
816 struct bpf_map *map;
817 void *data_end;
818 } bpf;
819 };
820};
821
822#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
823
824
825#if IS_ENABLED(CONFIG_IPV6)
826
827
828
829static inline int tcp_v6_iif(const struct sk_buff *skb)
830{
831 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
832
833 return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
834}
835
836
837static inline int tcp_v6_sdif(const struct sk_buff *skb)
838{
839#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
840 if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags))
841 return TCP_SKB_CB(skb)->header.h6.iif;
842#endif
843 return 0;
844}
845#endif
846
847static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
848{
849#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
850 if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
851 skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
852 return true;
853#endif
854 return false;
855}
856
857
858static inline int tcp_v4_sdif(struct sk_buff *skb)
859{
860#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
861 if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
862 return TCP_SKB_CB(skb)->header.h4.iif;
863#endif
864 return 0;
865}
866
867
868
869
870static inline int tcp_skb_pcount(const struct sk_buff *skb)
871{
872 return TCP_SKB_CB(skb)->tcp_gso_segs;
873}
874
875static inline void tcp_skb_pcount_set(struct sk_buff *skb, int segs)
876{
877 TCP_SKB_CB(skb)->tcp_gso_segs = segs;
878}
879
880static inline void tcp_skb_pcount_add(struct sk_buff *skb, int segs)
881{
882 TCP_SKB_CB(skb)->tcp_gso_segs += segs;
883}
884
885
886static inline int tcp_skb_mss(const struct sk_buff *skb)
887{
888 return TCP_SKB_CB(skb)->tcp_gso_size;
889}
890
891static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb)
892{
893 return likely(!TCP_SKB_CB(skb)->eor);
894}
895
896
897enum tcp_ca_event {
898 CA_EVENT_TX_START,
899 CA_EVENT_CWND_RESTART,
900 CA_EVENT_COMPLETE_CWR,
901 CA_EVENT_LOSS,
902 CA_EVENT_ECN_NO_CE,
903 CA_EVENT_ECN_IS_CE,
904 CA_EVENT_DELAYED_ACK,
905 CA_EVENT_NON_DELAYED_ACK,
906};
907
908
909enum tcp_ca_ack_event_flags {
910 CA_ACK_SLOWPATH = (1 << 0),
911 CA_ACK_WIN_UPDATE = (1 << 1),
912 CA_ACK_ECE = (1 << 2),
913};
914
915
916
917
918#define TCP_CA_NAME_MAX 16
919#define TCP_CA_MAX 128
920#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)
921
922#define TCP_CA_UNSPEC 0
923
924
925#define TCP_CONG_NON_RESTRICTED 0x1
926
927#define TCP_CONG_NEEDS_ECN 0x2
928
929union tcp_cc_info;
930
931struct ack_sample {
932 u32 pkts_acked;
933 s32 rtt_us;
934 u32 in_flight;
935};
936
937
938
939
940
941
942
943
944
945struct rate_sample {
946 u64 prior_mstamp;
947 u32 prior_delivered;
948 s32 delivered;
949 long interval_us;
950 long rtt_us;
951 int losses;
952 u32 acked_sacked;
953 u32 prior_in_flight;
954 bool is_app_limited;
955 bool is_retrans;
956 bool is_ack_delayed;
957};
958
959struct tcp_congestion_ops {
960 struct list_head list;
961 u32 key;
962 u32 flags;
963
964
965 void (*init)(struct sock *sk);
966
967 void (*release)(struct sock *sk);
968
969
970 u32 (*ssthresh)(struct sock *sk);
971
972 void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked);
973
974 void (*set_state)(struct sock *sk, u8 new_state);
975
976 void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
977
978 void (*in_ack_event)(struct sock *sk, u32 flags);
979
980 u32 (*undo_cwnd)(struct sock *sk);
981
982 void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
983
984 u32 (*tso_segs_goal)(struct sock *sk);
985
986 u32 (*sndbuf_expand)(struct sock *sk);
987
988
989
990 void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
991
992 size_t (*get_info)(struct sock *sk, u32 ext, int *attr,
993 union tcp_cc_info *info);
994
995 char name[TCP_CA_NAME_MAX];
996 struct module *owner;
997};
998
999int tcp_register_congestion_control(struct tcp_congestion_ops *type);
1000void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
1001
1002void tcp_assign_congestion_control(struct sock *sk);
1003void tcp_init_congestion_control(struct sock *sk);
1004void tcp_cleanup_congestion_control(struct sock *sk);
1005int tcp_set_default_congestion_control(struct net *net, const char *name);
1006void tcp_get_default_congestion_control(struct net *net, char *name);
1007void tcp_get_available_congestion_control(char *buf, size_t len);
1008void tcp_get_allowed_congestion_control(char *buf, size_t len);
1009int tcp_set_allowed_congestion_control(char *allowed);
1010int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit);
1011u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
1012void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
1013
1014u32 tcp_reno_ssthresh(struct sock *sk);
1015u32 tcp_reno_undo_cwnd(struct sock *sk);
1016void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
1017extern struct tcp_congestion_ops tcp_reno;
1018
1019struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
1020u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca);
1021#ifdef CONFIG_INET
1022char *tcp_ca_get_name_by_key(u32 key, char *buffer);
1023#else
1024static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
1025{
1026 return NULL;
1027}
1028#endif
1029
1030static inline bool tcp_ca_needs_ecn(const struct sock *sk)
1031{
1032 const struct inet_connection_sock *icsk = inet_csk(sk);
1033
1034 return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN;
1035}
1036
1037static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
1038{
1039 struct inet_connection_sock *icsk = inet_csk(sk);
1040
1041 if (icsk->icsk_ca_ops->set_state)
1042 icsk->icsk_ca_ops->set_state(sk, ca_state);
1043 icsk->icsk_ca_state = ca_state;
1044}
1045
1046static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
1047{
1048 const struct inet_connection_sock *icsk = inet_csk(sk);
1049
1050 if (icsk->icsk_ca_ops->cwnd_event)
1051 icsk->icsk_ca_ops->cwnd_event(sk, event);
1052}
1053
1054
1055void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
1056void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
1057 struct rate_sample *rs);
1058void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
1059 bool is_sack_reneg, struct rate_sample *rs);
1060void tcp_rate_check_app_limited(struct sock *sk);
1061
1062
1063
1064
1065
1066
1067
1068
1069static inline int tcp_is_sack(const struct tcp_sock *tp)
1070{
1071 return tp->rx_opt.sack_ok;
1072}
1073
1074static inline bool tcp_is_reno(const struct tcp_sock *tp)
1075{
1076 return !tcp_is_sack(tp);
1077}
1078
1079static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
1080{
1081 return tp->sacked_out + tp->lost_out;
1082}
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
1099{
1100 return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
1101}
1102
1103#define TCP_INFINITE_SSTHRESH 0x7fffffff
1104
1105static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
1106{
1107 return tp->snd_cwnd < tp->snd_ssthresh;
1108}
1109
1110static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
1111{
1112 return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
1113}
1114
1115static inline bool tcp_in_cwnd_reduction(const struct sock *sk)
1116{
1117 return (TCPF_CA_CWR | TCPF_CA_Recovery) &
1118 (1 << inet_csk(sk)->icsk_ca_state);
1119}
1120
1121
1122
1123
1124
1125static inline __u32 tcp_current_ssthresh(const struct sock *sk)
1126{
1127 const struct tcp_sock *tp = tcp_sk(sk);
1128
1129 if (tcp_in_cwnd_reduction(sk))
1130 return tp->snd_ssthresh;
1131 else
1132 return max(tp->snd_ssthresh,
1133 ((tp->snd_cwnd >> 1) +
1134 (tp->snd_cwnd >> 2)));
1135}
1136
1137
1138#define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out)
1139
1140void tcp_enter_cwr(struct sock *sk);
1141__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst);
1142
1143
1144
1145
1146static inline __u32 tcp_max_tso_deferred_mss(const struct tcp_sock *tp)
1147{
1148 return 3;
1149}
1150
1151
1152static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
1153{
1154 return tp->snd_una + tp->snd_wnd;
1155}
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170static inline bool tcp_is_cwnd_limited(const struct sock *sk)
1171{
1172 const struct tcp_sock *tp = tcp_sk(sk);
1173
1174
1175 if (tcp_in_slow_start(tp))
1176 return tp->snd_cwnd < 2 * tp->max_packets_out;
1177
1178 return tp->is_cwnd_limited;
1179}
1180
1181
1182
1183
1184
1185
1186
1187static inline unsigned long tcp_probe0_base(const struct sock *sk)
1188{
1189 return max_t(unsigned long, inet_csk(sk)->icsk_rto, TCP_RTO_MIN);
1190}
1191
1192
1193static inline unsigned long tcp_probe0_when(const struct sock *sk,
1194 unsigned long max_when)
1195{
1196 u64 when = (u64)tcp_probe0_base(sk) << inet_csk(sk)->icsk_backoff;
1197
1198 return (unsigned long)min_t(u64, when, max_when);
1199}
1200
1201static inline void tcp_check_probe_timer(struct sock *sk)
1202{
1203 if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
1204 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
1205 tcp_probe0_base(sk), TCP_RTO_MAX);
1206}
1207
1208static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
1209{
1210 tp->snd_wl1 = seq;
1211}
1212
1213static inline void tcp_update_wl(struct tcp_sock *tp, u32 seq)
1214{
1215 tp->snd_wl1 = seq;
1216}
1217
1218
1219
1220
1221static inline __sum16 tcp_v4_check(int len, __be32 saddr,
1222 __be32 daddr, __wsum base)
1223{
1224 return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
1225}
1226
1227static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb)
1228{
1229 return __skb_checksum_complete(skb);
1230}
1231
1232static inline bool tcp_checksum_complete(struct sk_buff *skb)
1233{
1234 return !skb_csum_unnecessary(skb) &&
1235 __tcp_checksum_complete(skb);
1236}
1237
1238bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
1239int tcp_filter(struct sock *sk, struct sk_buff *skb);
1240
1241#undef STATE_TRACE
1242
1243#ifdef STATE_TRACE
1244static const char *statename[]={
1245 "Unused","Established","Syn Sent","Syn Recv",
1246 "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
1247 "Close Wait","Last ACK","Listen","Closing"
1248};
1249#endif
1250void tcp_set_state(struct sock *sk, int state);
1251
1252void tcp_done(struct sock *sk);
1253
1254int tcp_abort(struct sock *sk, int err);
1255
1256static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
1257{
1258 rx_opt->dsack = 0;
1259 rx_opt->num_sacks = 0;
1260}
1261
1262u32 tcp_default_init_rwnd(u32 mss);
1263void tcp_cwnd_restart(struct sock *sk, s32 delta);
1264
1265static inline void tcp_slow_start_after_idle_check(struct sock *sk)
1266{
1267 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
1268 struct tcp_sock *tp = tcp_sk(sk);
1269 s32 delta;
1270
1271 if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
1272 ca_ops->cong_control)
1273 return;
1274 delta = tcp_jiffies32 - tp->lsndtime;
1275 if (delta > inet_csk(sk)->icsk_rto)
1276 tcp_cwnd_restart(sk, delta);
1277}
1278
1279
1280void tcp_select_initial_window(const struct sock *sk, int __space,
1281 __u32 mss, __u32 *rcv_wnd,
1282 __u32 *window_clamp, int wscale_ok,
1283 __u8 *rcv_wscale, __u32 init_rcv_wnd);
1284
1285static inline int tcp_win_from_space(const struct sock *sk, int space)
1286{
1287 int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
1288
1289 return tcp_adv_win_scale <= 0 ?
1290 (space>>(-tcp_adv_win_scale)) :
1291 space - (space>>tcp_adv_win_scale);
1292}
1293
1294
1295static inline int tcp_space(const struct sock *sk)
1296{
1297 return tcp_win_from_space(sk, sk->sk_rcvbuf -
1298 atomic_read(&sk->sk_rmem_alloc));
1299}
1300
1301static inline int tcp_full_space(const struct sock *sk)
1302{
1303 return tcp_win_from_space(sk, sk->sk_rcvbuf);
1304}
1305
1306extern void tcp_openreq_init_rwin(struct request_sock *req,
1307 const struct sock *sk_listener,
1308 const struct dst_entry *dst);
1309
1310void tcp_enter_memory_pressure(struct sock *sk);
1311void tcp_leave_memory_pressure(struct sock *sk);
1312
1313static inline int keepalive_intvl_when(const struct tcp_sock *tp)
1314{
1315 struct net *net = sock_net((struct sock *)tp);
1316
1317 return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl;
1318}
1319
1320static inline int keepalive_time_when(const struct tcp_sock *tp)
1321{
1322 struct net *net = sock_net((struct sock *)tp);
1323
1324 return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time;
1325}
1326
1327static inline int keepalive_probes(const struct tcp_sock *tp)
1328{
1329 struct net *net = sock_net((struct sock *)tp);
1330
1331 return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes;
1332}
1333
1334static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
1335{
1336 const struct inet_connection_sock *icsk = &tp->inet_conn;
1337
1338 return min_t(u32, tcp_jiffies32 - icsk->icsk_ack.lrcvtime,
1339 tcp_jiffies32 - tp->rcv_tstamp);
1340}
1341
1342static inline int tcp_fin_time(const struct sock *sk)
1343{
1344 int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
1345 const int rto = inet_csk(sk)->icsk_rto;
1346
1347 if (fin_timeout < (rto << 2) - (rto >> 1))
1348 fin_timeout = (rto << 2) - (rto >> 1);
1349
1350 return fin_timeout;
1351}
1352
1353static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
1354 int paws_win)
1355{
1356 if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
1357 return true;
1358 if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
1359 return true;
1360
1361
1362
1363
1364
1365 if (!rx_opt->ts_recent)
1366 return true;
1367 return false;
1368}
1369
1370static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
1371 int rst)
1372{
1373 if (tcp_paws_check(rx_opt, 0))
1374 return false;
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388 if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
1389 return false;
1390 return true;
1391}
1392
1393bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
1394 int mib_idx, u32 *last_oow_ack_time);
1395
1396static inline void tcp_mib_init(struct net *net)
1397{
1398
1399 TCP_ADD_STATS(net, TCP_MIB_RTOALGORITHM, 1);
1400 TCP_ADD_STATS(net, TCP_MIB_RTOMIN, TCP_RTO_MIN*1000/HZ);
1401 TCP_ADD_STATS(net, TCP_MIB_RTOMAX, TCP_RTO_MAX*1000/HZ);
1402 TCP_ADD_STATS(net, TCP_MIB_MAXCONN, -1);
1403}
1404
1405
1406static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp)
1407{
1408 tp->lost_skb_hint = NULL;
1409}
1410
1411static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
1412{
1413 tcp_clear_retrans_hints_partial(tp);
1414 tp->retransmit_skb_hint = NULL;
1415}
1416
1417union tcp_md5_addr {
1418 struct in_addr a4;
1419#if IS_ENABLED(CONFIG_IPV6)
1420 struct in6_addr a6;
1421#endif
1422};
1423
1424
1425struct tcp_md5sig_key {
1426 struct hlist_node node;
1427 u8 keylen;
1428 u8 family;
1429 union tcp_md5_addr addr;
1430 u8 prefixlen;
1431 u8 key[TCP_MD5SIG_MAXKEYLEN];
1432 struct rcu_head rcu;
1433};
1434
1435
1436struct tcp_md5sig_info {
1437 struct hlist_head head;
1438 struct rcu_head rcu;
1439};
1440
1441
1442struct tcp4_pseudohdr {
1443 __be32 saddr;
1444 __be32 daddr;
1445 __u8 pad;
1446 __u8 protocol;
1447 __be16 len;
1448};
1449
1450struct tcp6_pseudohdr {
1451 struct in6_addr saddr;
1452 struct in6_addr daddr;
1453 __be32 len;
1454 __be32 protocol;
1455};
1456
1457union tcp_md5sum_block {
1458 struct tcp4_pseudohdr ip4;
1459#if IS_ENABLED(CONFIG_IPV6)
1460 struct tcp6_pseudohdr ip6;
1461#endif
1462};
1463
1464
1465struct tcp_md5sig_pool {
1466 struct ahash_request *md5_req;
1467 void *scratch;
1468};
1469
1470
1471int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1472 const struct sock *sk, const struct sk_buff *skb);
1473int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
1474 int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
1475 gfp_t gfp);
1476int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
1477 int family, u8 prefixlen);
1478struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
1479 const struct sock *addr_sk);
1480
1481#ifdef CONFIG_TCP_MD5SIG
1482struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
1483 const union tcp_md5_addr *addr,
1484 int family);
1485#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
1486#else
1487static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
1488 const union tcp_md5_addr *addr,
1489 int family)
1490{
1491 return NULL;
1492}
1493#define tcp_twsk_md5_key(twsk) NULL
1494#endif
1495
1496bool tcp_alloc_md5sig_pool(void);
1497
1498struct tcp_md5sig_pool *tcp_get_md5sig_pool(void);
1499static inline void tcp_put_md5sig_pool(void)
1500{
1501 local_bh_enable();
1502}
1503
1504int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
1505 unsigned int header_len);
1506int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
1507 const struct tcp_md5sig_key *key);
1508
1509
1510void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
1511 struct tcp_fastopen_cookie *cookie);
1512void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
1513 struct tcp_fastopen_cookie *cookie, bool syn_lost,
1514 u16 try_exp);
1515struct tcp_fastopen_request {
1516
1517 struct tcp_fastopen_cookie cookie;
1518 struct msghdr *data;
1519 size_t size;
1520 int copied;
1521};
1522void tcp_free_fastopen_req(struct tcp_sock *tp);
1523void tcp_fastopen_destroy_cipher(struct sock *sk);
1524void tcp_fastopen_ctx_destroy(struct net *net);
1525int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
1526 void *key, unsigned int len);
1527void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
1528struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
1529 struct request_sock *req,
1530 struct tcp_fastopen_cookie *foc,
1531 const struct dst_entry *dst);
1532void tcp_fastopen_init_key_once(struct net *net);
1533bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
1534 struct tcp_fastopen_cookie *cookie);
1535bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
1536#define TCP_FASTOPEN_KEY_LENGTH 16
1537
1538
1539struct tcp_fastopen_context {
1540 struct crypto_cipher *tfm;
1541 __u8 key[TCP_FASTOPEN_KEY_LENGTH];
1542 struct rcu_head rcu;
1543};
1544
1545extern unsigned int sysctl_tcp_fastopen_blackhole_timeout;
1546void tcp_fastopen_active_disable(struct sock *sk);
1547bool tcp_fastopen_active_should_disable(struct sock *sk);
1548void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
1549void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
1550
1551
1552
1553
1554enum tcp_chrono {
1555 TCP_CHRONO_UNSPEC,
1556 TCP_CHRONO_BUSY,
1557 TCP_CHRONO_RWND_LIMITED,
1558 TCP_CHRONO_SNDBUF_LIMITED,
1559 __TCP_CHRONO_MAX,
1560};
1561
1562void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type);
1563void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type);
1564
1565
1566
1567
1568static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb)
1569{
1570 skb->destructor = NULL;
1571 skb->_skb_refdst = 0UL;
1572}
1573
1574#define tcp_skb_tsorted_save(skb) { \
1575 unsigned long _save = skb->_skb_refdst; \
1576 skb->_skb_refdst = 0UL;
1577
1578#define tcp_skb_tsorted_restore(skb) \
1579 skb->_skb_refdst = _save; \
1580}
1581
1582void tcp_write_queue_purge(struct sock *sk);
1583
1584static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk)
1585{
1586 return skb_rb_first(&sk->tcp_rtx_queue);
1587}
1588
1589static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
1590{
1591 return skb_peek(&sk->sk_write_queue);
1592}
1593
1594static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk)
1595{
1596 return skb_peek_tail(&sk->sk_write_queue);
1597}
1598
1599#define tcp_for_write_queue_from_safe(skb, tmp, sk) \
1600 skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
1601
1602static inline struct sk_buff *tcp_send_head(const struct sock *sk)
1603{
1604 return skb_peek(&sk->sk_write_queue);
1605}
1606
1607static inline bool tcp_skb_is_last(const struct sock *sk,
1608 const struct sk_buff *skb)
1609{
1610 return skb_queue_is_last(&sk->sk_write_queue, skb);
1611}
1612
1613static inline bool tcp_write_queue_empty(const struct sock *sk)
1614{
1615 return skb_queue_empty(&sk->sk_write_queue);
1616}
1617
1618static inline bool tcp_rtx_queue_empty(const struct sock *sk)
1619{
1620 return RB_EMPTY_ROOT(&sk->tcp_rtx_queue);
1621}
1622
1623static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk)
1624{
1625 return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk);
1626}
1627
1628static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
1629{
1630 if (tcp_write_queue_empty(sk))
1631 tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
1632}
1633
1634static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
1635{
1636 __skb_queue_tail(&sk->sk_write_queue, skb);
1637}
1638
1639static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
1640{
1641 __tcp_add_write_queue_tail(sk, skb);
1642
1643
1644 if (sk->sk_write_queue.next == skb)
1645 tcp_chrono_start(sk, TCP_CHRONO_BUSY);
1646}
1647
1648
1649static inline void tcp_insert_write_queue_before(struct sk_buff *new,
1650 struct sk_buff *skb,
1651 struct sock *sk)
1652{
1653 __skb_queue_before(&sk->sk_write_queue, skb, new);
1654}
1655
1656static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
1657{
1658 tcp_skb_tsorted_anchor_cleanup(skb);
1659 __skb_unlink(skb, &sk->sk_write_queue);
1660}
1661
1662void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb);
1663
1664static inline void tcp_rtx_queue_unlink(struct sk_buff *skb, struct sock *sk)
1665{
1666 tcp_skb_tsorted_anchor_cleanup(skb);
1667 rb_erase(&skb->rbnode, &sk->tcp_rtx_queue);
1668}
1669
1670static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct sock *sk)
1671{
1672 list_del(&skb->tcp_tsorted_anchor);
1673 tcp_rtx_queue_unlink(skb, sk);
1674 sk_wmem_free_skb(sk, skb);
1675}
1676
1677static inline void tcp_push_pending_frames(struct sock *sk)
1678{
1679 if (tcp_send_head(sk)) {
1680 struct tcp_sock *tp = tcp_sk(sk);
1681
1682 __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle);
1683 }
1684}
1685
1686
1687
1688
1689
1690static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp)
1691{
1692 if (!tp->sacked_out)
1693 return tp->snd_una;
1694
1695 if (tp->highest_sack == NULL)
1696 return tp->snd_nxt;
1697
1698 return TCP_SKB_CB(tp->highest_sack)->seq;
1699}
1700
1701static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb)
1702{
1703 tcp_sk(sk)->highest_sack = skb_rb_next(skb);
1704}
1705
1706static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
1707{
1708 return tcp_sk(sk)->highest_sack;
1709}
1710
1711static inline void tcp_highest_sack_reset(struct sock *sk)
1712{
1713 tcp_sk(sk)->highest_sack = tcp_rtx_queue_head(sk);
1714}
1715
1716
1717static inline void tcp_highest_sack_replace(struct sock *sk,
1718 struct sk_buff *old,
1719 struct sk_buff *new)
1720{
1721 if (old == tcp_highest_sack(sk))
1722 tcp_sk(sk)->highest_sack = new;
1723}
1724
1725
1726static inline bool inet_sk_transparent(const struct sock *sk)
1727{
1728 switch (sk->sk_state) {
1729 case TCP_TIME_WAIT:
1730 return inet_twsk(sk)->tw_transparent;
1731 case TCP_NEW_SYN_RECV:
1732 return inet_rsk(inet_reqsk(sk))->no_srccheck;
1733 }
1734 return inet_sk(sk)->transparent;
1735}
1736
1737
1738
1739
1740static inline bool tcp_stream_is_thin(struct tcp_sock *tp)
1741{
1742 return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp);
1743}
1744
1745
1746enum tcp_seq_states {
1747 TCP_SEQ_STATE_LISTENING,
1748 TCP_SEQ_STATE_ESTABLISHED,
1749};
1750
1751int tcp_seq_open(struct inode *inode, struct file *file);
1752
1753struct tcp_seq_afinfo {
1754 char *name;
1755 sa_family_t family;
1756 const struct file_operations *seq_fops;
1757 struct seq_operations seq_ops;
1758};
1759
1760struct tcp_iter_state {
1761 struct seq_net_private p;
1762 sa_family_t family;
1763 enum tcp_seq_states state;
1764 struct sock *syn_wait_sk;
1765 int bucket, offset, sbucket, num;
1766 loff_t last_pos;
1767};
1768
1769int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
1770void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo);
1771
1772extern struct request_sock_ops tcp_request_sock_ops;
1773extern struct request_sock_ops tcp6_request_sock_ops;
1774
1775void tcp_v4_destroy_sock(struct sock *sk);
1776
1777struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
1778 netdev_features_t features);
1779struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb);
1780int tcp_gro_complete(struct sk_buff *skb);
1781
1782void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
1783
1784static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
1785{
1786 struct net *net = sock_net((struct sock *)tp);
1787 return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
1788}
1789
1790static inline bool tcp_stream_memory_free(const struct sock *sk)
1791{
1792 const struct tcp_sock *tp = tcp_sk(sk);
1793 u32 notsent_bytes = tp->write_seq - tp->snd_nxt;
1794
1795 return notsent_bytes < tcp_notsent_lowat(tp);
1796}
1797
1798#ifdef CONFIG_PROC_FS
1799int tcp4_proc_init(void);
1800void tcp4_proc_exit(void);
1801#endif
1802
1803int tcp_rtx_synack(const struct sock *sk, struct request_sock *req);
1804int tcp_conn_request(struct request_sock_ops *rsk_ops,
1805 const struct tcp_request_sock_ops *af_ops,
1806 struct sock *sk, struct sk_buff *skb);
1807
1808
1809struct tcp_sock_af_ops {
1810#ifdef CONFIG_TCP_MD5SIG
1811 struct tcp_md5sig_key *(*md5_lookup) (const struct sock *sk,
1812 const struct sock *addr_sk);
1813 int (*calc_md5_hash)(char *location,
1814 const struct tcp_md5sig_key *md5,
1815 const struct sock *sk,
1816 const struct sk_buff *skb);
1817 int (*md5_parse)(struct sock *sk,
1818 int optname,
1819 char __user *optval,
1820 int optlen);
1821#endif
1822};
1823
1824struct tcp_request_sock_ops {
1825 u16 mss_clamp;
1826#ifdef CONFIG_TCP_MD5SIG
1827 struct tcp_md5sig_key *(*req_md5_lookup)(const struct sock *sk,
1828 const struct sock *addr_sk);
1829 int (*calc_md5_hash) (char *location,
1830 const struct tcp_md5sig_key *md5,
1831 const struct sock *sk,
1832 const struct sk_buff *skb);
1833#endif
1834 void (*init_req)(struct request_sock *req,
1835 const struct sock *sk_listener,
1836 struct sk_buff *skb);
1837#ifdef CONFIG_SYN_COOKIES
1838 __u32 (*cookie_init_seq)(const struct sk_buff *skb,
1839 __u16 *mss);
1840#endif
1841 struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
1842 const struct request_sock *req);
1843 u32 (*init_seq)(const struct sk_buff *skb);
1844 u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
1845 int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
1846 struct flowi *fl, struct request_sock *req,
1847 struct tcp_fastopen_cookie *foc,
1848 enum tcp_synack_type synack_type);
1849};
1850
1851#ifdef CONFIG_SYN_COOKIES
1852static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
1853 const struct sock *sk, struct sk_buff *skb,
1854 __u16 *mss)
1855{
1856 tcp_synq_overflow(sk);
1857 __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
1858 return ops->cookie_init_seq(skb, mss);
1859}
1860#else
1861static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
1862 const struct sock *sk, struct sk_buff *skb,
1863 __u16 *mss)
1864{
1865 return 0;
1866}
1867#endif
1868
1869int tcpv4_offload_init(void);
1870
1871void tcp_v4_init(void);
1872void tcp_init(void);
1873
1874
1875extern void tcp_rack_mark_lost(struct sock *sk);
1876extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
1877 u64 xmit_time);
1878extern void tcp_rack_reo_timeout(struct sock *sk);
1879extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs);
1880
1881
1882static inline s64 tcp_rto_delta_us(const struct sock *sk)
1883{
1884 const struct sk_buff *skb = tcp_rtx_queue_head(sk);
1885 u32 rto = inet_csk(sk)->icsk_rto;
1886 u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
1887
1888 return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
1889}
1890
1891
1892
1893
1894static inline struct ip_options_rcu *tcp_v4_save_options(struct net *net,
1895 struct sk_buff *skb)
1896{
1897 const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
1898 struct ip_options_rcu *dopt = NULL;
1899
1900 if (opt->optlen) {
1901 int opt_size = sizeof(*dopt) + opt->optlen;
1902
1903 dopt = kmalloc(opt_size, GFP_ATOMIC);
1904 if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) {
1905 kfree(dopt);
1906 dopt = NULL;
1907 }
1908 }
1909 return dopt;
1910}
1911
1912
1913
1914
1915
1916
1917static inline bool skb_is_tcp_pure_ack(const struct sk_buff *skb)
1918{
1919 return skb->truesize == 2;
1920}
1921
1922static inline void skb_set_tcp_pure_ack(struct sk_buff *skb)
1923{
1924 skb->truesize = 2;
1925}
1926
1927static inline int tcp_inq(struct sock *sk)
1928{
1929 struct tcp_sock *tp = tcp_sk(sk);
1930 int answ;
1931
1932 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
1933 answ = 0;
1934 } else if (sock_flag(sk, SOCK_URGINLINE) ||
1935 !tp->urg_data ||
1936 before(tp->urg_seq, tp->copied_seq) ||
1937 !before(tp->urg_seq, tp->rcv_nxt)) {
1938
1939 answ = tp->rcv_nxt - tp->copied_seq;
1940
1941
1942 if (answ && sock_flag(sk, SOCK_DONE))
1943 answ--;
1944 } else {
1945 answ = tp->urg_seq - tp->copied_seq;
1946 }
1947
1948 return answ;
1949}
1950
1951int tcp_peek_len(struct socket *sock);
1952
1953static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb)
1954{
1955 u16 segs_in;
1956
1957 segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
1958 tp->segs_in += segs_in;
1959 if (skb->len > tcp_hdrlen(skb))
1960 tp->data_segs_in += segs_in;
1961}
1962
1963
1964
1965
1966
1967
1968
1969
1970static inline void tcp_listendrop(const struct sock *sk)
1971{
1972 atomic_inc(&((struct sock *)sk)->sk_drops);
1973 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
1974}
1975
1976enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer);
1977
1978
1979
1980
1981
1982#define TCP_ULP_NAME_MAX 16
1983#define TCP_ULP_MAX 128
1984#define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX)
1985
1986enum {
1987 TCP_ULP_TLS,
1988 TCP_ULP_BPF,
1989};
1990
1991struct tcp_ulp_ops {
1992 struct list_head list;
1993
1994
1995 int (*init)(struct sock *sk);
1996
1997 void (*release)(struct sock *sk);
1998
1999 int uid;
2000 char name[TCP_ULP_NAME_MAX];
2001 bool user_visible;
2002 struct module *owner;
2003};
2004int tcp_register_ulp(struct tcp_ulp_ops *type);
2005void tcp_unregister_ulp(struct tcp_ulp_ops *type);
2006int tcp_set_ulp(struct sock *sk, const char *name);
2007int tcp_set_ulp_id(struct sock *sk, const int ulp);
2008void tcp_get_available_ulp(char *buf, size_t len);
2009void tcp_cleanup_ulp(struct sock *sk);
2010
2011
2012
2013
2014
2015
2016#ifdef CONFIG_BPF
2017static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
2018{
2019 struct bpf_sock_ops_kern sock_ops;
2020 int ret;
2021
2022 memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
2023 if (sk_fullsock(sk)) {
2024 sock_ops.is_fullsock = 1;
2025 sock_owned_by_me(sk);
2026 }
2027
2028 sock_ops.sk = sk;
2029 sock_ops.op = op;
2030 if (nargs > 0)
2031 memcpy(sock_ops.args, args, nargs * sizeof(*args));
2032
2033 ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
2034 if (ret == 0)
2035 ret = sock_ops.reply;
2036 else
2037 ret = -1;
2038 return ret;
2039}
2040
2041static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
2042{
2043 u32 args[2] = {arg1, arg2};
2044
2045 return tcp_call_bpf(sk, op, 2, args);
2046}
2047
2048static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
2049 u32 arg3)
2050{
2051 u32 args[3] = {arg1, arg2, arg3};
2052
2053 return tcp_call_bpf(sk, op, 3, args);
2054}
2055
2056#else
2057static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
2058{
2059 return -EPERM;
2060}
2061
2062static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
2063{
2064 return -EPERM;
2065}
2066
2067static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
2068 u32 arg3)
2069{
2070 return -EPERM;
2071}
2072
2073#endif
2074
2075static inline u32 tcp_timeout_init(struct sock *sk)
2076{
2077 int timeout;
2078
2079 timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT, 0, NULL);
2080
2081 if (timeout <= 0)
2082 timeout = TCP_TIMEOUT_INIT;
2083 return timeout;
2084}
2085
2086static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
2087{
2088 int rwnd;
2089
2090 rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT, 0, NULL);
2091
2092 if (rwnd < 0)
2093 rwnd = 0;
2094 return rwnd;
2095}
2096
2097static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
2098{
2099 return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
2100}
2101
2102#if IS_ENABLED(CONFIG_SMC)
2103extern struct static_key_false tcp_have_smc;
2104#endif
2105#endif
2106