1
2
3
4
5
6
7#define pr_fmt(fmt) "MPTCP: " fmt
8
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/netdevice.h>
12#include <crypto/algapi.h>
13#include <crypto/sha.h>
14#include <net/sock.h>
15#include <net/inet_common.h>
16#include <net/inet_hashtables.h>
17#include <net/protocol.h>
18#include <net/tcp.h>
19#if IS_ENABLED(CONFIG_MPTCP_IPV6)
20#include <net/ip6_route.h>
21#endif
22#include <net/mptcp.h>
23#include "protocol.h"
24#include "mib.h"
25
26static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
27 enum linux_mptcp_mib_field field)
28{
29 MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
30}
31
32static int subflow_rebuild_header(struct sock *sk)
33{
34 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
35 int local_id, err = 0;
36
37 if (subflow->request_mptcp && !subflow->token) {
38 pr_debug("subflow=%p", sk);
39 err = mptcp_token_new_connect(sk);
40 } else if (subflow->request_join && !subflow->local_nonce) {
41 struct mptcp_sock *msk = (struct mptcp_sock *)subflow->conn;
42
43 pr_debug("subflow=%p", sk);
44
45 do {
46 get_random_bytes(&subflow->local_nonce, sizeof(u32));
47 } while (!subflow->local_nonce);
48
49 if (subflow->local_id)
50 goto out;
51
52 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
53 if (local_id < 0)
54 return -EINVAL;
55
56 subflow->local_id = local_id;
57 }
58
59out:
60 if (err)
61 return err;
62
63 return subflow->icsk_af_ops->rebuild_header(sk);
64}
65
66static void subflow_req_destructor(struct request_sock *req)
67{
68 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
69
70 pr_debug("subflow_req=%p", subflow_req);
71
72 if (subflow_req->msk)
73 sock_put((struct sock *)subflow_req->msk);
74
75 if (subflow_req->mp_capable)
76 mptcp_token_destroy_request(subflow_req->token);
77 tcp_request_sock_ops.destructor(req);
78}
79
80static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
81 void *hmac)
82{
83 u8 msg[8];
84
85 put_unaligned_be32(nonce1, &msg[0]);
86 put_unaligned_be32(nonce2, &msg[4]);
87
88 mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
89}
90
91
92static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
93 const struct sk_buff *skb)
94{
95 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
96 u8 hmac[SHA256_DIGEST_SIZE];
97 struct mptcp_sock *msk;
98 int local_id;
99
100 msk = mptcp_token_get_sock(subflow_req->token);
101 if (!msk) {
102 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
103 return NULL;
104 }
105
106 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
107 if (local_id < 0) {
108 sock_put((struct sock *)msk);
109 return NULL;
110 }
111 subflow_req->local_id = local_id;
112
113 get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
114
115 subflow_generate_hmac(msk->local_key, msk->remote_key,
116 subflow_req->local_nonce,
117 subflow_req->remote_nonce, hmac);
118
119 subflow_req->thmac = get_unaligned_be64(hmac);
120 return msk;
121}
122
123static void subflow_init_req(struct request_sock *req,
124 const struct sock *sk_listener,
125 struct sk_buff *skb)
126{
127 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
128 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
129 struct mptcp_options_received mp_opt;
130
131 pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
132
133 mptcp_get_options(skb, &mp_opt);
134
135 subflow_req->mp_capable = 0;
136 subflow_req->mp_join = 0;
137 subflow_req->msk = NULL;
138
139#ifdef CONFIG_TCP_MD5SIG
140
141
142
143 if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
144 return;
145#endif
146
147 if (mp_opt.mp_capable) {
148 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
149
150 if (mp_opt.mp_join)
151 return;
152 } else if (mp_opt.mp_join) {
153 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
154 }
155
156 if (mp_opt.mp_capable && listener->request_mptcp) {
157 int err;
158
159 err = mptcp_token_new_request(req);
160 if (err == 0)
161 subflow_req->mp_capable = 1;
162
163 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
164 } else if (mp_opt.mp_join && listener->request_mptcp) {
165 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
166 subflow_req->mp_join = 1;
167 subflow_req->backup = mp_opt.backup;
168 subflow_req->remote_id = mp_opt.join_id;
169 subflow_req->token = mp_opt.token;
170 subflow_req->remote_nonce = mp_opt.nonce;
171 subflow_req->msk = subflow_token_join_request(req, skb);
172 pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
173 subflow_req->remote_nonce, subflow_req->msk);
174 }
175}
176
177static void subflow_v4_init_req(struct request_sock *req,
178 const struct sock *sk_listener,
179 struct sk_buff *skb)
180{
181 tcp_rsk(req)->is_mptcp = 1;
182
183 tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
184
185 subflow_init_req(req, sk_listener, skb);
186}
187
188#if IS_ENABLED(CONFIG_MPTCP_IPV6)
189static void subflow_v6_init_req(struct request_sock *req,
190 const struct sock *sk_listener,
191 struct sk_buff *skb)
192{
193 tcp_rsk(req)->is_mptcp = 1;
194
195 tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
196
197 subflow_init_req(req, sk_listener, skb);
198}
199#endif
200
201
202static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
203{
204 u8 hmac[SHA256_DIGEST_SIZE];
205 u64 thmac;
206
207 subflow_generate_hmac(subflow->remote_key, subflow->local_key,
208 subflow->remote_nonce, subflow->local_nonce,
209 hmac);
210
211 thmac = get_unaligned_be64(hmac);
212 pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
213 subflow, subflow->token,
214 (unsigned long long)thmac,
215 (unsigned long long)subflow->thmac);
216
217 return thmac == subflow->thmac;
218}
219
220static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
221{
222 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
223 struct mptcp_options_received mp_opt;
224 struct sock *parent = subflow->conn;
225
226 subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
227
228 if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
229 inet_sk_state_store(parent, TCP_ESTABLISHED);
230 parent->sk_state_change(parent);
231 }
232
233
234 if (subflow->conn_finished)
235 return;
236
237 subflow->conn_finished = 1;
238 subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
239 pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
240
241 mptcp_get_options(skb, &mp_opt);
242 if (subflow->request_mptcp && mp_opt.mp_capable) {
243 subflow->mp_capable = 1;
244 subflow->can_ack = 1;
245 subflow->remote_key = mp_opt.sndr_key;
246 pr_debug("subflow=%p, remote_key=%llu", subflow,
247 subflow->remote_key);
248 } else if (subflow->request_join && mp_opt.mp_join) {
249 subflow->mp_join = 1;
250 subflow->thmac = mp_opt.thmac;
251 subflow->remote_nonce = mp_opt.nonce;
252 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
253 subflow->thmac, subflow->remote_nonce);
254 } else {
255 if (subflow->request_mptcp)
256 MPTCP_INC_STATS(sock_net(sk),
257 MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
258 mptcp_do_fallback(sk);
259 pr_fallback(mptcp_sk(subflow->conn));
260 }
261
262 if (mptcp_check_fallback(sk)) {
263 mptcp_rcv_space_init(mptcp_sk(parent), sk);
264 return;
265 }
266
267 if (subflow->mp_capable) {
268 pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
269 subflow->remote_key);
270 mptcp_finish_connect(sk);
271 } else if (subflow->mp_join) {
272 u8 hmac[SHA256_DIGEST_SIZE];
273
274 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
275 subflow, subflow->thmac,
276 subflow->remote_nonce);
277 if (!subflow_thmac_valid(subflow)) {
278 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
279 subflow->mp_join = 0;
280 goto do_reset;
281 }
282
283 subflow_generate_hmac(subflow->local_key, subflow->remote_key,
284 subflow->local_nonce,
285 subflow->remote_nonce,
286 hmac);
287
288 memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
289
290 if (!mptcp_finish_join(sk))
291 goto do_reset;
292
293 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
294 } else {
295do_reset:
296 tcp_send_active_reset(sk, GFP_ATOMIC);
297 tcp_done(sk);
298 }
299}
300
301static struct request_sock_ops subflow_request_sock_ops;
302static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
303
304static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
305{
306 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
307
308 pr_debug("subflow=%p", subflow);
309
310
311 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
312 goto drop;
313
314 return tcp_conn_request(&subflow_request_sock_ops,
315 &subflow_request_sock_ipv4_ops,
316 sk, skb);
317drop:
318 tcp_listendrop(sk);
319 return 0;
320}
321
322#if IS_ENABLED(CONFIG_MPTCP_IPV6)
323static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
324static struct inet_connection_sock_af_ops subflow_v6_specific;
325static struct inet_connection_sock_af_ops subflow_v6m_specific;
326
327static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
328{
329 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
330
331 pr_debug("subflow=%p", subflow);
332
333 if (skb->protocol == htons(ETH_P_IP))
334 return subflow_v4_conn_request(sk, skb);
335
336 if (!ipv6_unicast_destination(skb))
337 goto drop;
338
339 return tcp_conn_request(&subflow_request_sock_ops,
340 &subflow_request_sock_ipv6_ops, sk, skb);
341
342drop:
343 tcp_listendrop(sk);
344 return 0;
345}
346#endif
347
348
349static bool subflow_hmac_valid(const struct request_sock *req,
350 const struct mptcp_options_received *mp_opt)
351{
352 const struct mptcp_subflow_request_sock *subflow_req;
353 u8 hmac[SHA256_DIGEST_SIZE];
354 struct mptcp_sock *msk;
355
356 subflow_req = mptcp_subflow_rsk(req);
357 msk = subflow_req->msk;
358 if (!msk)
359 return false;
360
361 subflow_generate_hmac(msk->remote_key, msk->local_key,
362 subflow_req->remote_nonce,
363 subflow_req->local_nonce, hmac);
364
365 return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
366}
367
368static void mptcp_sock_destruct(struct sock *sk)
369{
370
371
372
373
374
375
376
377
378
379
380
381
382 if (sk->sk_state == TCP_SYN_RECV) {
383 sk->sk_state = TCP_CLOSE;
384 WARN_ON_ONCE(sk->sk_socket);
385 sock_orphan(sk);
386 }
387
388 mptcp_token_destroy(mptcp_sk(sk)->token);
389 inet_sock_destruct(sk);
390}
391
392static void mptcp_force_close(struct sock *sk)
393{
394 inet_sk_state_store(sk, TCP_CLOSE);
395 sk_common_release(sk);
396}
397
398static void subflow_ulp_fallback(struct sock *sk,
399 struct mptcp_subflow_context *old_ctx)
400{
401 struct inet_connection_sock *icsk = inet_csk(sk);
402
403 mptcp_subflow_tcp_fallback(sk, old_ctx);
404 icsk->icsk_ulp_ops = NULL;
405 rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
406 tcp_sk(sk)->is_mptcp = 0;
407}
408
409static void subflow_drop_ctx(struct sock *ssk)
410{
411 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
412
413 if (!ctx)
414 return;
415
416 subflow_ulp_fallback(ssk, ctx);
417 if (ctx->conn)
418 sock_put(ctx->conn);
419
420 kfree_rcu(ctx, rcu);
421}
422
423static struct sock *subflow_syn_recv_sock(const struct sock *sk,
424 struct sk_buff *skb,
425 struct request_sock *req,
426 struct dst_entry *dst,
427 struct request_sock *req_unhash,
428 bool *own_req)
429{
430 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
431 struct mptcp_subflow_request_sock *subflow_req;
432 struct mptcp_options_received mp_opt;
433 bool fallback, fallback_is_fatal;
434 struct sock *new_msk = NULL;
435 struct sock *child;
436
437 pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
438
439
440
441
442 mp_opt.mp_capable = 0;
443
444
445 subflow_req = mptcp_subflow_rsk(req);
446 fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
447 fallback = !tcp_rsk(req)->is_mptcp;
448 if (fallback)
449 goto create_child;
450
451
452 if (subflow_req->mp_capable) {
453 if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
454
455
456
457
458 goto create_msk;
459 }
460
461 mptcp_get_options(skb, &mp_opt);
462 if (!mp_opt.mp_capable) {
463 fallback = true;
464 goto create_child;
465 }
466
467create_msk:
468 new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
469 if (!new_msk)
470 fallback = true;
471 } else if (subflow_req->mp_join) {
472 mptcp_get_options(skb, &mp_opt);
473 if (!mp_opt.mp_join ||
474 !subflow_hmac_valid(req, &mp_opt)) {
475 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
476 fallback = true;
477 }
478 }
479
480create_child:
481 child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
482 req_unhash, own_req);
483
484 if (child && *own_req) {
485 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
486
487 tcp_rsk(req)->drop_req = false;
488
489
490
491
492
493 if (!ctx || fallback) {
494 if (fallback_is_fatal)
495 goto dispose_child;
496
497 subflow_drop_ctx(child);
498 goto out;
499 }
500
501 if (ctx->mp_capable) {
502
503
504
505 new_msk->sk_destruct = mptcp_sock_destruct;
506 mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
507 ctx->conn = new_msk;
508 new_msk = NULL;
509
510
511
512
513 ctx->remote_key = mp_opt.sndr_key;
514 ctx->fully_established = mp_opt.mp_capable;
515 ctx->can_ack = mp_opt.mp_capable;
516 } else if (ctx->mp_join) {
517 struct mptcp_sock *owner;
518
519 owner = subflow_req->msk;
520 if (!owner)
521 goto dispose_child;
522
523
524 subflow_req->msk = NULL;
525 ctx->conn = (struct sock *)owner;
526 if (!mptcp_finish_join(child))
527 goto dispose_child;
528
529 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
530 tcp_rsk(req)->drop_req = true;
531 }
532 }
533
534out:
535
536 if (unlikely(new_msk))
537 mptcp_force_close(new_msk);
538
539
540
541
542 WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
543 (!mptcp_subflow_ctx(child) ||
544 !mptcp_subflow_ctx(child)->conn));
545 return child;
546
547dispose_child:
548 subflow_drop_ctx(child);
549 tcp_rsk(req)->drop_req = true;
550 tcp_send_active_reset(child, GFP_ATOMIC);
551 inet_csk_prepare_for_destroy_sock(child);
552 tcp_done(child);
553
554
555 return child;
556}
557
558static struct inet_connection_sock_af_ops subflow_specific;
559
560enum mapping_status {
561 MAPPING_OK,
562 MAPPING_INVALID,
563 MAPPING_EMPTY,
564 MAPPING_DATA_FIN,
565 MAPPING_DUMMY
566};
567
568static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
569{
570 if ((u32)seq == (u32)old_seq)
571 return old_seq;
572
573
574 return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
575}
576
577static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
578{
579 WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
580 ssn, subflow->map_subflow_seq, subflow->map_data_len);
581}
582
583static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
584{
585 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
586 unsigned int skb_consumed;
587
588 skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
589 if (WARN_ON_ONCE(skb_consumed >= skb->len))
590 return true;
591
592 return skb->len - skb_consumed <= subflow->map_data_len -
593 mptcp_subflow_get_map_offset(subflow);
594}
595
596static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
597{
598 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
599 u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
600
601 if (unlikely(before(ssn, subflow->map_subflow_seq))) {
602
603
604
605 warn_bad_map(subflow, ssn);
606 return false;
607 }
608 if (unlikely(!before(ssn, subflow->map_subflow_seq +
609 subflow->map_data_len))) {
610
611 warn_bad_map(subflow, ssn + skb->len);
612 return false;
613 }
614 return true;
615}
616
617static enum mapping_status get_mapping_status(struct sock *ssk)
618{
619 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
620 struct mptcp_ext *mpext;
621 struct sk_buff *skb;
622 u16 data_len;
623 u64 map_seq;
624
625 skb = skb_peek(&ssk->sk_receive_queue);
626 if (!skb)
627 return MAPPING_EMPTY;
628
629 if (mptcp_check_fallback(ssk))
630 return MAPPING_DUMMY;
631
632 mpext = mptcp_get_ext(skb);
633 if (!mpext || !mpext->use_map) {
634 if (!subflow->map_valid && !skb->len) {
635
636
637
638
639 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
640 WARN_ONCE(1, "0len seq %d:%d flags %x",
641 TCP_SKB_CB(skb)->seq,
642 TCP_SKB_CB(skb)->end_seq,
643 TCP_SKB_CB(skb)->tcp_flags);
644 sk_eat_skb(ssk, skb);
645 return MAPPING_EMPTY;
646 }
647
648 if (!subflow->map_valid)
649 return MAPPING_INVALID;
650
651 goto validate_seq;
652 }
653
654 pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
655 mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
656 mpext->data_len, mpext->data_fin);
657
658 data_len = mpext->data_len;
659 if (data_len == 0) {
660 pr_err("Infinite mapping not handled");
661 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
662 return MAPPING_INVALID;
663 }
664
665 if (mpext->data_fin == 1) {
666 if (data_len == 1) {
667 pr_debug("DATA_FIN with no payload");
668 if (subflow->map_valid) {
669
670
671
672
673
674 skb_ext_del(skb, SKB_EXT_MPTCP);
675 return MAPPING_OK;
676 } else {
677 return MAPPING_DATA_FIN;
678 }
679 }
680
681
682 data_len--;
683 }
684
685 if (!mpext->dsn64) {
686 map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
687 mpext->data_seq);
688 pr_debug("expanded seq=%llu", subflow->map_seq);
689 } else {
690 map_seq = mpext->data_seq;
691 }
692
693 if (subflow->map_valid) {
694
695 if (subflow->map_seq == map_seq &&
696 subflow->map_subflow_seq == mpext->subflow_seq &&
697 subflow->map_data_len == data_len) {
698 skb_ext_del(skb, SKB_EXT_MPTCP);
699 return MAPPING_OK;
700 }
701
702
703
704
705 if (skb_is_fully_mapped(ssk, skb)) {
706 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
707 return MAPPING_INVALID;
708 }
709
710
711 return MAPPING_OK;
712 }
713
714 subflow->map_seq = map_seq;
715 subflow->map_subflow_seq = mpext->subflow_seq;
716 subflow->map_data_len = data_len;
717 subflow->map_valid = 1;
718 subflow->mpc_map = mpext->mpc_map;
719 pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
720 subflow->map_seq, subflow->map_subflow_seq,
721 subflow->map_data_len);
722
723validate_seq:
724
725
726
727 if (!validate_mapping(ssk, skb))
728 return MAPPING_INVALID;
729
730 skb_ext_del(skb, SKB_EXT_MPTCP);
731 return MAPPING_OK;
732}
733
734static int subflow_read_actor(read_descriptor_t *desc,
735 struct sk_buff *skb,
736 unsigned int offset, size_t len)
737{
738 size_t copy_len = min(desc->count, len);
739
740 desc->count -= copy_len;
741
742 pr_debug("flushed %zu bytes, %zu left", copy_len, desc->count);
743 return copy_len;
744}
745
746static bool subflow_check_data_avail(struct sock *ssk)
747{
748 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
749 enum mapping_status status;
750 struct mptcp_sock *msk;
751 struct sk_buff *skb;
752
753 pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
754 subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
755 if (subflow->data_avail)
756 return true;
757
758 msk = mptcp_sk(subflow->conn);
759 for (;;) {
760 u32 map_remaining;
761 size_t delta;
762 u64 ack_seq;
763 u64 old_ack;
764
765 status = get_mapping_status(ssk);
766 pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
767 if (status == MAPPING_INVALID) {
768 ssk->sk_err = EBADMSG;
769 goto fatal;
770 }
771 if (status == MAPPING_DUMMY) {
772 __mptcp_do_fallback(msk);
773 skb = skb_peek(&ssk->sk_receive_queue);
774 subflow->map_valid = 1;
775 subflow->map_seq = READ_ONCE(msk->ack_seq);
776 subflow->map_data_len = skb->len;
777 subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
778 subflow->ssn_offset;
779 return true;
780 }
781
782 if (status != MAPPING_OK)
783 return false;
784
785 skb = skb_peek(&ssk->sk_receive_queue);
786 if (WARN_ON_ONCE(!skb))
787 return false;
788
789
790
791
792 if (unlikely(!READ_ONCE(msk->can_ack))) {
793 if (!subflow->mpc_map) {
794 ssk->sk_err = EBADMSG;
795 goto fatal;
796 }
797 WRITE_ONCE(msk->remote_key, subflow->remote_key);
798 WRITE_ONCE(msk->ack_seq, subflow->map_seq);
799 WRITE_ONCE(msk->can_ack, true);
800 }
801
802 old_ack = READ_ONCE(msk->ack_seq);
803 ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
804 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
805 ack_seq);
806 if (ack_seq == old_ack)
807 break;
808
809
810
811
812
813
814
815
816 map_remaining = subflow->map_data_len -
817 mptcp_subflow_get_map_offset(subflow);
818 if (before64(ack_seq, old_ack))
819 delta = min_t(size_t, old_ack - ack_seq, map_remaining);
820 else
821 delta = min_t(size_t, ack_seq - old_ack, map_remaining);
822
823
824 pr_debug("discarding %zu bytes, current map len=%d", delta,
825 map_remaining);
826 if (delta) {
827 read_descriptor_t desc = {
828 .count = delta,
829 };
830 int ret;
831
832 ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
833 if (ret < 0) {
834 ssk->sk_err = -ret;
835 goto fatal;
836 }
837 if (ret < delta)
838 return false;
839 if (delta == map_remaining)
840 subflow->map_valid = 0;
841 }
842 }
843 return true;
844
845fatal:
846
847
848 smp_wmb();
849 ssk->sk_error_report(ssk);
850 tcp_set_state(ssk, TCP_CLOSE);
851 tcp_send_active_reset(ssk, GFP_ATOMIC);
852 return false;
853}
854
855bool mptcp_subflow_data_available(struct sock *sk)
856{
857 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
858 struct sk_buff *skb;
859
860
861 if (subflow->map_valid &&
862 mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
863 subflow->map_valid = 0;
864 subflow->data_avail = 0;
865
866 pr_debug("Done with mapping: seq=%u data_len=%u",
867 subflow->map_subflow_seq,
868 subflow->map_data_len);
869 }
870
871 if (!subflow_check_data_avail(sk)) {
872 subflow->data_avail = 0;
873 return false;
874 }
875
876 skb = skb_peek(&sk->sk_receive_queue);
877 subflow->data_avail = skb &&
878 before(tcp_sk(sk)->copied_seq, TCP_SKB_CB(skb)->end_seq);
879 return subflow->data_avail;
880}
881
882
883
884
885
886
887
888
889
890
891void mptcp_space(const struct sock *ssk, int *space, int *full_space)
892{
893 const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
894 const struct sock *sk = subflow->conn;
895
896 *space = tcp_space(sk);
897 *full_space = tcp_full_space(sk);
898}
899
900static void subflow_data_ready(struct sock *sk)
901{
902 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
903 u16 state = 1 << inet_sk_state_load(sk);
904 struct sock *parent = subflow->conn;
905 struct mptcp_sock *msk;
906
907 msk = mptcp_sk(parent);
908 if (state & TCPF_LISTEN) {
909 set_bit(MPTCP_DATA_READY, &msk->flags);
910 parent->sk_data_ready(parent);
911 return;
912 }
913
914 WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
915 !subflow->mp_join && !(state & TCPF_CLOSE));
916
917 if (mptcp_subflow_data_available(sk))
918 mptcp_data_ready(parent, sk);
919}
920
921static void subflow_write_space(struct sock *sk)
922{
923 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
924 struct sock *parent = subflow->conn;
925
926 sk_stream_write_space(sk);
927 if (sk_stream_is_writeable(sk)) {
928 set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
929 smp_mb__after_atomic();
930
931 sk_stream_write_space(parent);
932 }
933}
934
935static struct inet_connection_sock_af_ops *
936subflow_default_af_ops(struct sock *sk)
937{
938#if IS_ENABLED(CONFIG_MPTCP_IPV6)
939 if (sk->sk_family == AF_INET6)
940 return &subflow_v6_specific;
941#endif
942 return &subflow_specific;
943}
944
945#if IS_ENABLED(CONFIG_MPTCP_IPV6)
946void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
947{
948 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
949 struct inet_connection_sock *icsk = inet_csk(sk);
950 struct inet_connection_sock_af_ops *target;
951
952 target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
953
954 pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
955 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
956
957 if (likely(icsk->icsk_af_ops == target))
958 return;
959
960 subflow->icsk_af_ops = icsk->icsk_af_ops;
961 icsk->icsk_af_ops = target;
962}
963#endif
964
965static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
966 struct sockaddr_storage *addr)
967{
968 memset(addr, 0, sizeof(*addr));
969 addr->ss_family = info->family;
970 if (addr->ss_family == AF_INET) {
971 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
972
973 in_addr->sin_addr = info->addr;
974 in_addr->sin_port = info->port;
975 }
976#if IS_ENABLED(CONFIG_MPTCP_IPV6)
977 else if (addr->ss_family == AF_INET6) {
978 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
979
980 in6_addr->sin6_addr = info->addr6;
981 in6_addr->sin6_port = info->port;
982 }
983#endif
984}
985
986int __mptcp_subflow_connect(struct sock *sk, int ifindex,
987 const struct mptcp_addr_info *loc,
988 const struct mptcp_addr_info *remote)
989{
990 struct mptcp_sock *msk = mptcp_sk(sk);
991 struct mptcp_subflow_context *subflow;
992 struct sockaddr_storage addr;
993 struct socket *sf;
994 u32 remote_token;
995 int addrlen;
996 int err;
997
998 if (sk->sk_state != TCP_ESTABLISHED)
999 return -ENOTCONN;
1000
1001 err = mptcp_subflow_create_socket(sk, &sf);
1002 if (err)
1003 return err;
1004
1005 subflow = mptcp_subflow_ctx(sf->sk);
1006 subflow->remote_key = msk->remote_key;
1007 subflow->local_key = msk->local_key;
1008 subflow->token = msk->token;
1009 mptcp_info2sockaddr(loc, &addr);
1010
1011 addrlen = sizeof(struct sockaddr_in);
1012#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1013 if (loc->family == AF_INET6)
1014 addrlen = sizeof(struct sockaddr_in6);
1015#endif
1016 sf->sk->sk_bound_dev_if = ifindex;
1017 err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
1018 if (err)
1019 goto failed;
1020
1021 mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
1022 pr_debug("msk=%p remote_token=%u", msk, remote_token);
1023 subflow->remote_token = remote_token;
1024 subflow->local_id = loc->id;
1025 subflow->request_join = 1;
1026 subflow->request_bkup = 1;
1027 mptcp_info2sockaddr(remote, &addr);
1028
1029 err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
1030 if (err && err != -EINPROGRESS)
1031 goto failed;
1032
1033 spin_lock_bh(&msk->join_list_lock);
1034 list_add_tail(&subflow->node, &msk->join_list);
1035 spin_unlock_bh(&msk->join_list_lock);
1036
1037 return err;
1038
1039failed:
1040 sock_release(sf);
1041 return err;
1042}
1043
1044int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
1045{
1046 struct mptcp_subflow_context *subflow;
1047 struct net *net = sock_net(sk);
1048 struct socket *sf;
1049 int err;
1050
1051
1052
1053
1054 if (unlikely(!sk->sk_socket))
1055 return -EINVAL;
1056
1057 err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
1058 &sf);
1059 if (err)
1060 return err;
1061
1062 lock_sock(sf->sk);
1063
1064
1065
1066
1067 sf->sk->sk_net_refcnt = 1;
1068 get_net(net);
1069#ifdef CONFIG_PROC_FS
1070 this_cpu_add(*net->core.sock_inuse, 1);
1071#endif
1072 err = tcp_set_ulp(sf->sk, "mptcp");
1073 release_sock(sf->sk);
1074
1075 if (err) {
1076 sock_release(sf);
1077 return err;
1078 }
1079
1080
1081
1082
1083
1084
1085
1086 SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
1087 SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
1088 SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
1089
1090 subflow = mptcp_subflow_ctx(sf->sk);
1091 pr_debug("subflow=%p", subflow);
1092
1093 *new_sock = sf;
1094 sock_hold(sk);
1095 subflow->conn = sk;
1096
1097 return 0;
1098}
1099
1100static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
1101 gfp_t priority)
1102{
1103 struct inet_connection_sock *icsk = inet_csk(sk);
1104 struct mptcp_subflow_context *ctx;
1105
1106 ctx = kzalloc(sizeof(*ctx), priority);
1107 if (!ctx)
1108 return NULL;
1109
1110 rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
1111 INIT_LIST_HEAD(&ctx->node);
1112
1113 pr_debug("subflow=%p", ctx);
1114
1115 ctx->tcp_sock = sk;
1116
1117 return ctx;
1118}
1119
1120static void __subflow_state_change(struct sock *sk)
1121{
1122 struct socket_wq *wq;
1123
1124 rcu_read_lock();
1125 wq = rcu_dereference(sk->sk_wq);
1126 if (skwq_has_sleeper(wq))
1127 wake_up_interruptible_all(&wq->wait);
1128 rcu_read_unlock();
1129}
1130
1131static bool subflow_is_done(const struct sock *sk)
1132{
1133 return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1134}
1135
1136static void subflow_state_change(struct sock *sk)
1137{
1138 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1139 struct sock *parent = subflow->conn;
1140
1141 __subflow_state_change(sk);
1142
1143 if (subflow_simultaneous_connect(sk)) {
1144 mptcp_do_fallback(sk);
1145 mptcp_rcv_space_init(mptcp_sk(parent), sk);
1146 pr_fallback(mptcp_sk(parent));
1147 subflow->conn_finished = 1;
1148 if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
1149 inet_sk_state_store(parent, TCP_ESTABLISHED);
1150 parent->sk_state_change(parent);
1151 }
1152 }
1153
1154
1155
1156
1157
1158 if (mptcp_subflow_data_available(sk))
1159 mptcp_data_ready(parent, sk);
1160
1161 if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
1162 !subflow->rx_eof && subflow_is_done(sk)) {
1163 subflow->rx_eof = 1;
1164 mptcp_subflow_eof(parent);
1165 }
1166}
1167
1168static int subflow_ulp_init(struct sock *sk)
1169{
1170 struct inet_connection_sock *icsk = inet_csk(sk);
1171 struct mptcp_subflow_context *ctx;
1172 struct tcp_sock *tp = tcp_sk(sk);
1173 int err = 0;
1174
1175
1176
1177
1178 if (!sk->sk_kern_sock) {
1179 err = -EOPNOTSUPP;
1180 goto out;
1181 }
1182
1183 ctx = subflow_create_ctx(sk, GFP_KERNEL);
1184 if (!ctx) {
1185 err = -ENOMEM;
1186 goto out;
1187 }
1188
1189 pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
1190
1191 tp->is_mptcp = 1;
1192 ctx->icsk_af_ops = icsk->icsk_af_ops;
1193 icsk->icsk_af_ops = subflow_default_af_ops(sk);
1194 ctx->tcp_data_ready = sk->sk_data_ready;
1195 ctx->tcp_state_change = sk->sk_state_change;
1196 ctx->tcp_write_space = sk->sk_write_space;
1197 sk->sk_data_ready = subflow_data_ready;
1198 sk->sk_write_space = subflow_write_space;
1199 sk->sk_state_change = subflow_state_change;
1200out:
1201 return err;
1202}
1203
1204static void subflow_ulp_release(struct sock *sk)
1205{
1206 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
1207
1208 if (!ctx)
1209 return;
1210
1211 if (ctx->conn)
1212 sock_put(ctx->conn);
1213
1214 kfree_rcu(ctx, rcu);
1215}
1216
1217static void subflow_ulp_clone(const struct request_sock *req,
1218 struct sock *newsk,
1219 const gfp_t priority)
1220{
1221 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1222 struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1223 struct mptcp_subflow_context *new_ctx;
1224
1225 if (!tcp_rsk(req)->is_mptcp ||
1226 (!subflow_req->mp_capable && !subflow_req->mp_join)) {
1227 subflow_ulp_fallback(newsk, old_ctx);
1228 return;
1229 }
1230
1231 new_ctx = subflow_create_ctx(newsk, priority);
1232 if (!new_ctx) {
1233 subflow_ulp_fallback(newsk, old_ctx);
1234 return;
1235 }
1236
1237 new_ctx->conn_finished = 1;
1238 new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
1239 new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
1240 new_ctx->tcp_state_change = old_ctx->tcp_state_change;
1241 new_ctx->tcp_write_space = old_ctx->tcp_write_space;
1242 new_ctx->rel_write_seq = 1;
1243 new_ctx->tcp_sock = newsk;
1244
1245 if (subflow_req->mp_capable) {
1246
1247
1248
1249 new_ctx->mp_capable = 1;
1250 new_ctx->local_key = subflow_req->local_key;
1251 new_ctx->token = subflow_req->token;
1252 new_ctx->ssn_offset = subflow_req->ssn_offset;
1253 new_ctx->idsn = subflow_req->idsn;
1254 } else if (subflow_req->mp_join) {
1255 new_ctx->ssn_offset = subflow_req->ssn_offset;
1256 new_ctx->mp_join = 1;
1257 new_ctx->fully_established = 1;
1258 new_ctx->backup = subflow_req->backup;
1259 new_ctx->local_id = subflow_req->local_id;
1260 new_ctx->token = subflow_req->token;
1261 new_ctx->thmac = subflow_req->thmac;
1262 }
1263}
1264
1265static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
1266 .name = "mptcp",
1267 .owner = THIS_MODULE,
1268 .init = subflow_ulp_init,
1269 .release = subflow_ulp_release,
1270 .clone = subflow_ulp_clone,
1271};
1272
1273static int subflow_ops_init(struct request_sock_ops *subflow_ops)
1274{
1275 subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
1276 subflow_ops->slab_name = "request_sock_subflow";
1277
1278 subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
1279 subflow_ops->obj_size, 0,
1280 SLAB_ACCOUNT |
1281 SLAB_TYPESAFE_BY_RCU,
1282 NULL);
1283 if (!subflow_ops->slab)
1284 return -ENOMEM;
1285
1286 subflow_ops->destructor = subflow_req_destructor;
1287
1288 return 0;
1289}
1290
1291void mptcp_subflow_init(void)
1292{
1293 subflow_request_sock_ops = tcp_request_sock_ops;
1294 if (subflow_ops_init(&subflow_request_sock_ops) != 0)
1295 panic("MPTCP: failed to init subflow request sock ops\n");
1296
1297 subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
1298 subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
1299
1300 subflow_specific = ipv4_specific;
1301 subflow_specific.conn_request = subflow_v4_conn_request;
1302 subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
1303 subflow_specific.sk_rx_dst_set = subflow_finish_connect;
1304 subflow_specific.rebuild_header = subflow_rebuild_header;
1305
1306#if IS_ENABLED(CONFIG_MPTCP_IPV6)
1307 subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
1308 subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
1309
1310 subflow_v6_specific = ipv6_specific;
1311 subflow_v6_specific.conn_request = subflow_v6_conn_request;
1312 subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
1313 subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
1314 subflow_v6_specific.rebuild_header = subflow_rebuild_header;
1315
1316 subflow_v6m_specific = subflow_v6_specific;
1317 subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
1318 subflow_v6m_specific.send_check = ipv4_specific.send_check;
1319 subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
1320 subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
1321 subflow_v6m_specific.net_frag_header_len = 0;
1322#endif
1323
1324 mptcp_diag_subflow_init(&subflow_ulp_ops);
1325
1326 if (tcp_register_ulp(&subflow_ulp_ops) != 0)
1327 panic("MPTCP: failed to register subflows to ULP\n");
1328}
1329