1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88#include <linux/types.h>
89#include <linux/bitops.h>
90#include <linux/cred.h>
91#include <linux/init.h>
92#include <linux/io.h>
93#include <linux/kernel.h>
94#include <linux/sched/signal.h>
95#include <linux/kmod.h>
96#include <linux/list.h>
97#include <linux/miscdevice.h>
98#include <linux/module.h>
99#include <linux/mutex.h>
100#include <linux/net.h>
101#include <linux/poll.h>
102#include <linux/random.h>
103#include <linux/skbuff.h>
104#include <linux/smp.h>
105#include <linux/socket.h>
106#include <linux/stddef.h>
107#include <linux/unistd.h>
108#include <linux/wait.h>
109#include <linux/workqueue.h>
110#include <net/sock.h>
111#include <net/af_vsock.h>
112
113static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
114static void vsock_sk_destruct(struct sock *sk);
115static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
116
117
118static struct proto vsock_proto = {
119 .name = "AF_VSOCK",
120 .owner = THIS_MODULE,
121 .obj_size = sizeof(struct vsock_sock),
122};
123
124
125
126
127#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
128
129#define VSOCK_DEFAULT_BUFFER_SIZE (1024 * 256)
130#define VSOCK_DEFAULT_BUFFER_MAX_SIZE (1024 * 256)
131#define VSOCK_DEFAULT_BUFFER_MIN_SIZE 128
132
133
134static const struct vsock_transport *transport_h2g;
135
136static const struct vsock_transport *transport_g2h;
137
138static const struct vsock_transport *transport_dgram;
139
140static const struct vsock_transport *transport_local;
141static DEFINE_MUTEX(vsock_register_mutex);
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159#define MAX_PORT_RETRIES 24
160
161#define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE)
162#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)])
163#define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE])
164
165
166#define VSOCK_CONN_HASH(src, dst) \
167 (((src)->svm_cid ^ (dst)->svm_port) % VSOCK_HASH_SIZE)
168#define vsock_connected_sockets(src, dst) \
169 (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)])
170#define vsock_connected_sockets_vsk(vsk) \
171 vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
172
173struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
174EXPORT_SYMBOL_GPL(vsock_bind_table);
175struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
176EXPORT_SYMBOL_GPL(vsock_connected_table);
177DEFINE_SPINLOCK(vsock_table_lock);
178EXPORT_SYMBOL_GPL(vsock_table_lock);
179
180
181static int vsock_auto_bind(struct vsock_sock *vsk)
182{
183 struct sock *sk = sk_vsock(vsk);
184 struct sockaddr_vm local_addr;
185
186 if (vsock_addr_bound(&vsk->local_addr))
187 return 0;
188 vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
189 return __vsock_bind(sk, &local_addr);
190}
191
192static void vsock_init_tables(void)
193{
194 int i;
195
196 for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++)
197 INIT_LIST_HEAD(&vsock_bind_table[i]);
198
199 for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
200 INIT_LIST_HEAD(&vsock_connected_table[i]);
201}
202
203static void __vsock_insert_bound(struct list_head *list,
204 struct vsock_sock *vsk)
205{
206 sock_hold(&vsk->sk);
207 list_add(&vsk->bound_table, list);
208}
209
210static void __vsock_insert_connected(struct list_head *list,
211 struct vsock_sock *vsk)
212{
213 sock_hold(&vsk->sk);
214 list_add(&vsk->connected_table, list);
215}
216
217static void __vsock_remove_bound(struct vsock_sock *vsk)
218{
219 list_del_init(&vsk->bound_table);
220 sock_put(&vsk->sk);
221}
222
223static void __vsock_remove_connected(struct vsock_sock *vsk)
224{
225 list_del_init(&vsk->connected_table);
226 sock_put(&vsk->sk);
227}
228
229static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
230{
231 struct vsock_sock *vsk;
232
233 list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) {
234 if (vsock_addr_equals_addr(addr, &vsk->local_addr))
235 return sk_vsock(vsk);
236
237 if (addr->svm_port == vsk->local_addr.svm_port &&
238 (vsk->local_addr.svm_cid == VMADDR_CID_ANY ||
239 addr->svm_cid == VMADDR_CID_ANY))
240 return sk_vsock(vsk);
241 }
242
243 return NULL;
244}
245
246static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
247 struct sockaddr_vm *dst)
248{
249 struct vsock_sock *vsk;
250
251 list_for_each_entry(vsk, vsock_connected_sockets(src, dst),
252 connected_table) {
253 if (vsock_addr_equals_addr(src, &vsk->remote_addr) &&
254 dst->svm_port == vsk->local_addr.svm_port) {
255 return sk_vsock(vsk);
256 }
257 }
258
259 return NULL;
260}
261
262static void vsock_insert_unbound(struct vsock_sock *vsk)
263{
264 spin_lock_bh(&vsock_table_lock);
265 __vsock_insert_bound(vsock_unbound_sockets, vsk);
266 spin_unlock_bh(&vsock_table_lock);
267}
268
269void vsock_insert_connected(struct vsock_sock *vsk)
270{
271 struct list_head *list = vsock_connected_sockets(
272 &vsk->remote_addr, &vsk->local_addr);
273
274 spin_lock_bh(&vsock_table_lock);
275 __vsock_insert_connected(list, vsk);
276 spin_unlock_bh(&vsock_table_lock);
277}
278EXPORT_SYMBOL_GPL(vsock_insert_connected);
279
280void vsock_remove_bound(struct vsock_sock *vsk)
281{
282 spin_lock_bh(&vsock_table_lock);
283 if (__vsock_in_bound_table(vsk))
284 __vsock_remove_bound(vsk);
285 spin_unlock_bh(&vsock_table_lock);
286}
287EXPORT_SYMBOL_GPL(vsock_remove_bound);
288
289void vsock_remove_connected(struct vsock_sock *vsk)
290{
291 spin_lock_bh(&vsock_table_lock);
292 if (__vsock_in_connected_table(vsk))
293 __vsock_remove_connected(vsk);
294 spin_unlock_bh(&vsock_table_lock);
295}
296EXPORT_SYMBOL_GPL(vsock_remove_connected);
297
298struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr)
299{
300 struct sock *sk;
301
302 spin_lock_bh(&vsock_table_lock);
303 sk = __vsock_find_bound_socket(addr);
304 if (sk)
305 sock_hold(sk);
306
307 spin_unlock_bh(&vsock_table_lock);
308
309 return sk;
310}
311EXPORT_SYMBOL_GPL(vsock_find_bound_socket);
312
313struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
314 struct sockaddr_vm *dst)
315{
316 struct sock *sk;
317
318 spin_lock_bh(&vsock_table_lock);
319 sk = __vsock_find_connected_socket(src, dst);
320 if (sk)
321 sock_hold(sk);
322
323 spin_unlock_bh(&vsock_table_lock);
324
325 return sk;
326}
327EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
328
329void vsock_remove_sock(struct vsock_sock *vsk)
330{
331 vsock_remove_bound(vsk);
332 vsock_remove_connected(vsk);
333}
334EXPORT_SYMBOL_GPL(vsock_remove_sock);
335
336void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
337{
338 int i;
339
340 spin_lock_bh(&vsock_table_lock);
341
342 for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
343 struct vsock_sock *vsk;
344 list_for_each_entry(vsk, &vsock_connected_table[i],
345 connected_table)
346 fn(sk_vsock(vsk));
347 }
348
349 spin_unlock_bh(&vsock_table_lock);
350}
351EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket);
352
353void vsock_add_pending(struct sock *listener, struct sock *pending)
354{
355 struct vsock_sock *vlistener;
356 struct vsock_sock *vpending;
357
358 vlistener = vsock_sk(listener);
359 vpending = vsock_sk(pending);
360
361 sock_hold(pending);
362 sock_hold(listener);
363 list_add_tail(&vpending->pending_links, &vlistener->pending_links);
364}
365EXPORT_SYMBOL_GPL(vsock_add_pending);
366
367void vsock_remove_pending(struct sock *listener, struct sock *pending)
368{
369 struct vsock_sock *vpending = vsock_sk(pending);
370
371 list_del_init(&vpending->pending_links);
372 sock_put(listener);
373 sock_put(pending);
374}
375EXPORT_SYMBOL_GPL(vsock_remove_pending);
376
377void vsock_enqueue_accept(struct sock *listener, struct sock *connected)
378{
379 struct vsock_sock *vlistener;
380 struct vsock_sock *vconnected;
381
382 vlistener = vsock_sk(listener);
383 vconnected = vsock_sk(connected);
384
385 sock_hold(connected);
386 sock_hold(listener);
387 list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue);
388}
389EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
390
391static bool vsock_use_local_transport(unsigned int remote_cid)
392{
393 if (!transport_local)
394 return false;
395
396 if (remote_cid == VMADDR_CID_LOCAL)
397 return true;
398
399 if (transport_g2h) {
400 return remote_cid == transport_g2h->get_local_cid();
401 } else {
402 return remote_cid == VMADDR_CID_HOST;
403 }
404}
405
406static void vsock_deassign_transport(struct vsock_sock *vsk)
407{
408 if (!vsk->transport)
409 return;
410
411 vsk->transport->destruct(vsk);
412 module_put(vsk->transport->module);
413 vsk->transport = NULL;
414}
415
416
417
418
419
420
421
422
423
424
425
426
427
428int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
429{
430 const struct vsock_transport *new_transport;
431 struct sock *sk = sk_vsock(vsk);
432 unsigned int remote_cid = vsk->remote_addr.svm_cid;
433 __u8 remote_flags;
434 int ret;
435
436
437
438
439
440
441
442
443
444 if (psk && vsk->local_addr.svm_cid > VMADDR_CID_HOST &&
445 vsk->remote_addr.svm_cid > VMADDR_CID_HOST)
446 vsk->remote_addr.svm_flags |= VMADDR_FLAG_TO_HOST;
447
448 remote_flags = vsk->remote_addr.svm_flags;
449
450 switch (sk->sk_type) {
451 case SOCK_DGRAM:
452 new_transport = transport_dgram;
453 break;
454 case SOCK_STREAM:
455 if (vsock_use_local_transport(remote_cid))
456 new_transport = transport_local;
457 else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g ||
458 (remote_flags & VMADDR_FLAG_TO_HOST))
459 new_transport = transport_g2h;
460 else
461 new_transport = transport_h2g;
462 break;
463 default:
464 return -ESOCKTNOSUPPORT;
465 }
466
467 if (vsk->transport) {
468 if (vsk->transport == new_transport)
469 return 0;
470
471
472
473
474
475
476
477 vsk->transport->release(vsk);
478 vsock_deassign_transport(vsk);
479 }
480
481
482
483
484 if (!new_transport || !try_module_get(new_transport->module))
485 return -ENODEV;
486
487 ret = new_transport->init(vsk, psk);
488 if (ret) {
489 module_put(new_transport->module);
490 return ret;
491 }
492
493 vsk->transport = new_transport;
494
495 return 0;
496}
497EXPORT_SYMBOL_GPL(vsock_assign_transport);
498
499bool vsock_find_cid(unsigned int cid)
500{
501 if (transport_g2h && cid == transport_g2h->get_local_cid())
502 return true;
503
504 if (transport_h2g && cid == VMADDR_CID_HOST)
505 return true;
506
507 if (transport_local && cid == VMADDR_CID_LOCAL)
508 return true;
509
510 return false;
511}
512EXPORT_SYMBOL_GPL(vsock_find_cid);
513
514static struct sock *vsock_dequeue_accept(struct sock *listener)
515{
516 struct vsock_sock *vlistener;
517 struct vsock_sock *vconnected;
518
519 vlistener = vsock_sk(listener);
520
521 if (list_empty(&vlistener->accept_queue))
522 return NULL;
523
524 vconnected = list_entry(vlistener->accept_queue.next,
525 struct vsock_sock, accept_queue);
526
527 list_del_init(&vconnected->accept_queue);
528 sock_put(listener);
529
530
531
532
533 return sk_vsock(vconnected);
534}
535
536static bool vsock_is_accept_queue_empty(struct sock *sk)
537{
538 struct vsock_sock *vsk = vsock_sk(sk);
539 return list_empty(&vsk->accept_queue);
540}
541
542static bool vsock_is_pending(struct sock *sk)
543{
544 struct vsock_sock *vsk = vsock_sk(sk);
545 return !list_empty(&vsk->pending_links);
546}
547
548static int vsock_send_shutdown(struct sock *sk, int mode)
549{
550 struct vsock_sock *vsk = vsock_sk(sk);
551
552 if (!vsk->transport)
553 return -ENODEV;
554
555 return vsk->transport->shutdown(vsk, mode);
556}
557
558static void vsock_pending_work(struct work_struct *work)
559{
560 struct sock *sk;
561 struct sock *listener;
562 struct vsock_sock *vsk;
563 bool cleanup;
564
565 vsk = container_of(work, struct vsock_sock, pending_work.work);
566 sk = sk_vsock(vsk);
567 listener = vsk->listener;
568 cleanup = true;
569
570 lock_sock(listener);
571 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
572
573 if (vsock_is_pending(sk)) {
574 vsock_remove_pending(listener, sk);
575
576 sk_acceptq_removed(listener);
577 } else if (!vsk->rejected) {
578
579
580
581
582
583 cleanup = false;
584 goto out;
585 }
586
587
588
589
590
591 vsock_remove_connected(vsk);
592
593 sk->sk_state = TCP_CLOSE;
594
595out:
596 release_sock(sk);
597 release_sock(listener);
598 if (cleanup)
599 sock_put(sk);
600
601 sock_put(sk);
602 sock_put(listener);
603}
604
605
606
607static int __vsock_bind_stream(struct vsock_sock *vsk,
608 struct sockaddr_vm *addr)
609{
610 static u32 port;
611 struct sockaddr_vm new_addr;
612
613 if (!port)
614 port = LAST_RESERVED_PORT + 1 +
615 prandom_u32_max(U32_MAX - LAST_RESERVED_PORT);
616
617 vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port);
618
619 if (addr->svm_port == VMADDR_PORT_ANY) {
620 bool found = false;
621 unsigned int i;
622
623 for (i = 0; i < MAX_PORT_RETRIES; i++) {
624 if (port <= LAST_RESERVED_PORT)
625 port = LAST_RESERVED_PORT + 1;
626
627 new_addr.svm_port = port++;
628
629 if (!__vsock_find_bound_socket(&new_addr)) {
630 found = true;
631 break;
632 }
633 }
634
635 if (!found)
636 return -EADDRNOTAVAIL;
637 } else {
638
639
640
641 if (addr->svm_port <= LAST_RESERVED_PORT &&
642 !capable(CAP_NET_BIND_SERVICE)) {
643 return -EACCES;
644 }
645
646 if (__vsock_find_bound_socket(&new_addr))
647 return -EADDRINUSE;
648 }
649
650 vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port);
651
652
653
654
655
656 __vsock_remove_bound(vsk);
657 __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk);
658
659 return 0;
660}
661
662static int __vsock_bind_dgram(struct vsock_sock *vsk,
663 struct sockaddr_vm *addr)
664{
665 return vsk->transport->dgram_bind(vsk, addr);
666}
667
668static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
669{
670 struct vsock_sock *vsk = vsock_sk(sk);
671 int retval;
672
673
674 if (vsock_addr_bound(&vsk->local_addr))
675 return -EINVAL;
676
677
678
679
680
681
682 if (addr->svm_cid != VMADDR_CID_ANY && !vsock_find_cid(addr->svm_cid))
683 return -EADDRNOTAVAIL;
684
685 switch (sk->sk_socket->type) {
686 case SOCK_STREAM:
687 spin_lock_bh(&vsock_table_lock);
688 retval = __vsock_bind_stream(vsk, addr);
689 spin_unlock_bh(&vsock_table_lock);
690 break;
691
692 case SOCK_DGRAM:
693 retval = __vsock_bind_dgram(vsk, addr);
694 break;
695
696 default:
697 retval = -EINVAL;
698 break;
699 }
700
701 return retval;
702}
703
704static void vsock_connect_timeout(struct work_struct *work);
705
706static struct sock *__vsock_create(struct net *net,
707 struct socket *sock,
708 struct sock *parent,
709 gfp_t priority,
710 unsigned short type,
711 int kern)
712{
713 struct sock *sk;
714 struct vsock_sock *psk;
715 struct vsock_sock *vsk;
716
717 sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto, kern);
718 if (!sk)
719 return NULL;
720
721 sock_init_data(sock, sk);
722
723
724
725
726
727 if (!sock)
728 sk->sk_type = type;
729
730 vsk = vsock_sk(sk);
731 vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
732 vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
733
734 sk->sk_destruct = vsock_sk_destruct;
735 sk->sk_backlog_rcv = vsock_queue_rcv_skb;
736 sock_reset_flag(sk, SOCK_DONE);
737
738 INIT_LIST_HEAD(&vsk->bound_table);
739 INIT_LIST_HEAD(&vsk->connected_table);
740 vsk->listener = NULL;
741 INIT_LIST_HEAD(&vsk->pending_links);
742 INIT_LIST_HEAD(&vsk->accept_queue);
743 vsk->rejected = false;
744 vsk->sent_request = false;
745 vsk->ignore_connecting_rst = false;
746 vsk->peer_shutdown = 0;
747 INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout);
748 INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work);
749
750 psk = parent ? vsock_sk(parent) : NULL;
751 if (parent) {
752 vsk->trusted = psk->trusted;
753 vsk->owner = get_cred(psk->owner);
754 vsk->connect_timeout = psk->connect_timeout;
755 vsk->buffer_size = psk->buffer_size;
756 vsk->buffer_min_size = psk->buffer_min_size;
757 vsk->buffer_max_size = psk->buffer_max_size;
758 security_sk_clone(parent, sk);
759 } else {
760 vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN);
761 vsk->owner = get_current_cred();
762 vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
763 vsk->buffer_size = VSOCK_DEFAULT_BUFFER_SIZE;
764 vsk->buffer_min_size = VSOCK_DEFAULT_BUFFER_MIN_SIZE;
765 vsk->buffer_max_size = VSOCK_DEFAULT_BUFFER_MAX_SIZE;
766 }
767
768 return sk;
769}
770
771static void __vsock_release(struct sock *sk, int level)
772{
773 if (sk) {
774 struct sock *pending;
775 struct vsock_sock *vsk;
776
777 vsk = vsock_sk(sk);
778 pending = NULL;
779
780
781
782
783
784
785 lock_sock_nested(sk, level);
786
787 if (vsk->transport)
788 vsk->transport->release(vsk);
789 else if (sk->sk_type == SOCK_STREAM)
790 vsock_remove_sock(vsk);
791
792 sock_orphan(sk);
793 sk->sk_shutdown = SHUTDOWN_MASK;
794
795 skb_queue_purge(&sk->sk_receive_queue);
796
797
798 while ((pending = vsock_dequeue_accept(sk)) != NULL) {
799 __vsock_release(pending, SINGLE_DEPTH_NESTING);
800 sock_put(pending);
801 }
802
803 release_sock(sk);
804 sock_put(sk);
805 }
806}
807
808static void vsock_sk_destruct(struct sock *sk)
809{
810 struct vsock_sock *vsk = vsock_sk(sk);
811
812 vsock_deassign_transport(vsk);
813
814
815
816
817 vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
818 vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
819
820 put_cred(vsk->owner);
821}
822
823static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
824{
825 int err;
826
827 err = sock_queue_rcv_skb(sk, skb);
828 if (err)
829 kfree_skb(skb);
830
831 return err;
832}
833
834struct sock *vsock_create_connected(struct sock *parent)
835{
836 return __vsock_create(sock_net(parent), NULL, parent, GFP_KERNEL,
837 parent->sk_type, 0);
838}
839EXPORT_SYMBOL_GPL(vsock_create_connected);
840
841s64 vsock_stream_has_data(struct vsock_sock *vsk)
842{
843 return vsk->transport->stream_has_data(vsk);
844}
845EXPORT_SYMBOL_GPL(vsock_stream_has_data);
846
847s64 vsock_stream_has_space(struct vsock_sock *vsk)
848{
849 return vsk->transport->stream_has_space(vsk);
850}
851EXPORT_SYMBOL_GPL(vsock_stream_has_space);
852
853static int vsock_release(struct socket *sock)
854{
855 __vsock_release(sock->sk, 0);
856 sock->sk = NULL;
857 sock->state = SS_FREE;
858
859 return 0;
860}
861
862static int
863vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
864{
865 int err;
866 struct sock *sk;
867 struct sockaddr_vm *vm_addr;
868
869 sk = sock->sk;
870
871 if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0)
872 return -EINVAL;
873
874 lock_sock(sk);
875 err = __vsock_bind(sk, vm_addr);
876 release_sock(sk);
877
878 return err;
879}
880
881static int vsock_getname(struct socket *sock,
882 struct sockaddr *addr, int peer)
883{
884 int err;
885 struct sock *sk;
886 struct vsock_sock *vsk;
887 struct sockaddr_vm *vm_addr;
888
889 sk = sock->sk;
890 vsk = vsock_sk(sk);
891 err = 0;
892
893 lock_sock(sk);
894
895 if (peer) {
896 if (sock->state != SS_CONNECTED) {
897 err = -ENOTCONN;
898 goto out;
899 }
900 vm_addr = &vsk->remote_addr;
901 } else {
902 vm_addr = &vsk->local_addr;
903 }
904
905 if (!vm_addr) {
906 err = -EINVAL;
907 goto out;
908 }
909
910
911
912
913
914
915 BUILD_BUG_ON(sizeof(*vm_addr) > 128);
916 memcpy(addr, vm_addr, sizeof(*vm_addr));
917 err = sizeof(*vm_addr);
918
919out:
920 release_sock(sk);
921 return err;
922}
923
924static int vsock_shutdown(struct socket *sock, int mode)
925{
926 int err;
927 struct sock *sk;
928
929
930
931
932
933
934
935 mode++;
936
937 if ((mode & ~SHUTDOWN_MASK) || !mode)
938 return -EINVAL;
939
940
941
942
943
944
945
946 sk = sock->sk;
947
948 lock_sock(sk);
949 if (sock->state == SS_UNCONNECTED) {
950 err = -ENOTCONN;
951 if (sk->sk_type == SOCK_STREAM)
952 goto out;
953 } else {
954 sock->state = SS_DISCONNECTING;
955 err = 0;
956 }
957
958
959 mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
960 if (mode) {
961 sk->sk_shutdown |= mode;
962 sk->sk_state_change(sk);
963
964 if (sk->sk_type == SOCK_STREAM) {
965 sock_reset_flag(sk, SOCK_DONE);
966 vsock_send_shutdown(sk, mode);
967 }
968 }
969
970out:
971 release_sock(sk);
972 return err;
973}
974
975static __poll_t vsock_poll(struct file *file, struct socket *sock,
976 poll_table *wait)
977{
978 struct sock *sk;
979 __poll_t mask;
980 struct vsock_sock *vsk;
981
982 sk = sock->sk;
983 vsk = vsock_sk(sk);
984
985 poll_wait(file, sk_sleep(sk), wait);
986 mask = 0;
987
988 if (sk->sk_err)
989
990 mask |= EPOLLERR;
991
992
993
994
995 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
996 ((sk->sk_shutdown & SEND_SHUTDOWN) &&
997 (vsk->peer_shutdown & SEND_SHUTDOWN))) {
998 mask |= EPOLLHUP;
999 }
1000
1001 if (sk->sk_shutdown & RCV_SHUTDOWN ||
1002 vsk->peer_shutdown & SEND_SHUTDOWN) {
1003 mask |= EPOLLRDHUP;
1004 }
1005
1006 if (sock->type == SOCK_DGRAM) {
1007
1008
1009
1010
1011 if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
1012 (sk->sk_shutdown & RCV_SHUTDOWN)) {
1013 mask |= EPOLLIN | EPOLLRDNORM;
1014 }
1015
1016 if (!(sk->sk_shutdown & SEND_SHUTDOWN))
1017 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
1018
1019 } else if (sock->type == SOCK_STREAM) {
1020 const struct vsock_transport *transport;
1021
1022 lock_sock(sk);
1023
1024 transport = vsk->transport;
1025
1026
1027
1028
1029 if (sk->sk_state == TCP_LISTEN
1030 && !vsock_is_accept_queue_empty(sk))
1031 mask |= EPOLLIN | EPOLLRDNORM;
1032
1033
1034 if (transport && transport->stream_is_active(vsk) &&
1035 !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1036 bool data_ready_now = false;
1037 int ret = transport->notify_poll_in(
1038 vsk, 1, &data_ready_now);
1039 if (ret < 0) {
1040 mask |= EPOLLERR;
1041 } else {
1042 if (data_ready_now)
1043 mask |= EPOLLIN | EPOLLRDNORM;
1044
1045 }
1046 }
1047
1048
1049
1050
1051
1052 if (sk->sk_shutdown & RCV_SHUTDOWN ||
1053 vsk->peer_shutdown & SEND_SHUTDOWN) {
1054 mask |= EPOLLIN | EPOLLRDNORM;
1055 }
1056
1057
1058 if (transport && sk->sk_state == TCP_ESTABLISHED) {
1059 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
1060 bool space_avail_now = false;
1061 int ret = transport->notify_poll_out(
1062 vsk, 1, &space_avail_now);
1063 if (ret < 0) {
1064 mask |= EPOLLERR;
1065 } else {
1066 if (space_avail_now)
1067
1068
1069
1070 mask |= EPOLLOUT | EPOLLWRNORM;
1071
1072 }
1073 }
1074 }
1075
1076
1077
1078
1079
1080 if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) {
1081 if (!(sk->sk_shutdown & SEND_SHUTDOWN))
1082 mask |= EPOLLOUT | EPOLLWRNORM;
1083
1084 }
1085
1086 release_sock(sk);
1087 }
1088
1089 return mask;
1090}
1091
1092static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1093 size_t len)
1094{
1095 int err;
1096 struct sock *sk;
1097 struct vsock_sock *vsk;
1098 struct sockaddr_vm *remote_addr;
1099 const struct vsock_transport *transport;
1100
1101 if (msg->msg_flags & MSG_OOB)
1102 return -EOPNOTSUPP;
1103
1104
1105 err = 0;
1106 sk = sock->sk;
1107 vsk = vsock_sk(sk);
1108
1109 lock_sock(sk);
1110
1111 transport = vsk->transport;
1112
1113 err = vsock_auto_bind(vsk);
1114 if (err)
1115 goto out;
1116
1117
1118
1119
1120
1121 if (msg->msg_name &&
1122 vsock_addr_cast(msg->msg_name, msg->msg_namelen,
1123 &remote_addr) == 0) {
1124
1125
1126
1127
1128 if (remote_addr->svm_cid == VMADDR_CID_ANY)
1129 remote_addr->svm_cid = transport->get_local_cid();
1130
1131 if (!vsock_addr_bound(remote_addr)) {
1132 err = -EINVAL;
1133 goto out;
1134 }
1135 } else if (sock->state == SS_CONNECTED) {
1136 remote_addr = &vsk->remote_addr;
1137
1138 if (remote_addr->svm_cid == VMADDR_CID_ANY)
1139 remote_addr->svm_cid = transport->get_local_cid();
1140
1141
1142
1143
1144 if (!vsock_addr_bound(&vsk->remote_addr)) {
1145 err = -EINVAL;
1146 goto out;
1147 }
1148 } else {
1149 err = -EINVAL;
1150 goto out;
1151 }
1152
1153 if (!transport->dgram_allow(remote_addr->svm_cid,
1154 remote_addr->svm_port)) {
1155 err = -EINVAL;
1156 goto out;
1157 }
1158
1159 err = transport->dgram_enqueue(vsk, remote_addr, msg, len);
1160
1161out:
1162 release_sock(sk);
1163 return err;
1164}
1165
1166static int vsock_dgram_connect(struct socket *sock,
1167 struct sockaddr *addr, int addr_len, int flags)
1168{
1169 int err;
1170 struct sock *sk;
1171 struct vsock_sock *vsk;
1172 struct sockaddr_vm *remote_addr;
1173
1174 sk = sock->sk;
1175 vsk = vsock_sk(sk);
1176
1177 err = vsock_addr_cast(addr, addr_len, &remote_addr);
1178 if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) {
1179 lock_sock(sk);
1180 vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY,
1181 VMADDR_PORT_ANY);
1182 sock->state = SS_UNCONNECTED;
1183 release_sock(sk);
1184 return 0;
1185 } else if (err != 0)
1186 return -EINVAL;
1187
1188 lock_sock(sk);
1189
1190 err = vsock_auto_bind(vsk);
1191 if (err)
1192 goto out;
1193
1194 if (!vsk->transport->dgram_allow(remote_addr->svm_cid,
1195 remote_addr->svm_port)) {
1196 err = -EINVAL;
1197 goto out;
1198 }
1199
1200 memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr));
1201 sock->state = SS_CONNECTED;
1202
1203out:
1204 release_sock(sk);
1205 return err;
1206}
1207
1208static int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
1209 size_t len, int flags)
1210{
1211 struct vsock_sock *vsk = vsock_sk(sock->sk);
1212
1213 return vsk->transport->dgram_dequeue(vsk, msg, len, flags);
1214}
1215
1216static const struct proto_ops vsock_dgram_ops = {
1217 .family = PF_VSOCK,
1218 .owner = THIS_MODULE,
1219 .release = vsock_release,
1220 .bind = vsock_bind,
1221 .connect = vsock_dgram_connect,
1222 .socketpair = sock_no_socketpair,
1223 .accept = sock_no_accept,
1224 .getname = vsock_getname,
1225 .poll = vsock_poll,
1226 .ioctl = sock_no_ioctl,
1227 .listen = sock_no_listen,
1228 .shutdown = vsock_shutdown,
1229 .setsockopt = sock_no_setsockopt,
1230 .getsockopt = sock_no_getsockopt,
1231 .sendmsg = vsock_dgram_sendmsg,
1232 .recvmsg = vsock_dgram_recvmsg,
1233 .mmap = sock_no_mmap,
1234 .sendpage = sock_no_sendpage,
1235};
1236
1237static int vsock_transport_cancel_pkt(struct vsock_sock *vsk)
1238{
1239 const struct vsock_transport *transport = vsk->transport;
1240
1241 if (!transport || !transport->cancel_pkt)
1242 return -EOPNOTSUPP;
1243
1244 return transport->cancel_pkt(vsk);
1245}
1246
1247static void vsock_connect_timeout(struct work_struct *work)
1248{
1249 struct sock *sk;
1250 struct vsock_sock *vsk;
1251
1252 vsk = container_of(work, struct vsock_sock, connect_work.work);
1253 sk = sk_vsock(vsk);
1254
1255 lock_sock(sk);
1256 if (sk->sk_state == TCP_SYN_SENT &&
1257 (sk->sk_shutdown != SHUTDOWN_MASK)) {
1258 sk->sk_state = TCP_CLOSE;
1259 sk->sk_err = ETIMEDOUT;
1260 sk->sk_error_report(sk);
1261 vsock_transport_cancel_pkt(vsk);
1262 }
1263 release_sock(sk);
1264
1265 sock_put(sk);
1266}
1267
1268static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1269 int addr_len, int flags)
1270{
1271 int err;
1272 struct sock *sk;
1273 struct vsock_sock *vsk;
1274 const struct vsock_transport *transport;
1275 struct sockaddr_vm *remote_addr;
1276 long timeout;
1277 DEFINE_WAIT(wait);
1278
1279 err = 0;
1280 sk = sock->sk;
1281 vsk = vsock_sk(sk);
1282
1283 lock_sock(sk);
1284
1285
1286 switch (sock->state) {
1287 case SS_CONNECTED:
1288 err = -EISCONN;
1289 goto out;
1290 case SS_DISCONNECTING:
1291 err = -EINVAL;
1292 goto out;
1293 case SS_CONNECTING:
1294
1295
1296
1297
1298
1299
1300 err = -EALREADY;
1301 break;
1302 default:
1303 if ((sk->sk_state == TCP_LISTEN) ||
1304 vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
1305 err = -EINVAL;
1306 goto out;
1307 }
1308
1309
1310 memcpy(&vsk->remote_addr, remote_addr,
1311 sizeof(vsk->remote_addr));
1312
1313 err = vsock_assign_transport(vsk, NULL);
1314 if (err)
1315 goto out;
1316
1317 transport = vsk->transport;
1318
1319
1320
1321
1322 if (!transport ||
1323 !transport->stream_allow(remote_addr->svm_cid,
1324 remote_addr->svm_port)) {
1325 err = -ENETUNREACH;
1326 goto out;
1327 }
1328
1329 err = vsock_auto_bind(vsk);
1330 if (err)
1331 goto out;
1332
1333 sk->sk_state = TCP_SYN_SENT;
1334
1335 err = transport->connect(vsk);
1336 if (err < 0)
1337 goto out;
1338
1339
1340
1341
1342 sock->state = SS_CONNECTING;
1343 err = -EINPROGRESS;
1344 }
1345
1346
1347
1348
1349
1350 timeout = vsk->connect_timeout;
1351 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1352
1353 while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) {
1354 if (flags & O_NONBLOCK) {
1355
1356
1357
1358
1359
1360
1361 sock_hold(sk);
1362 schedule_delayed_work(&vsk->connect_work, timeout);
1363
1364
1365 goto out_wait;
1366 }
1367
1368 release_sock(sk);
1369 timeout = schedule_timeout(timeout);
1370 lock_sock(sk);
1371
1372 if (signal_pending(current)) {
1373 err = sock_intr_errno(timeout);
1374 sk->sk_state = TCP_CLOSE;
1375 sock->state = SS_UNCONNECTED;
1376 vsock_transport_cancel_pkt(vsk);
1377 goto out_wait;
1378 } else if (timeout == 0) {
1379 err = -ETIMEDOUT;
1380 sk->sk_state = TCP_CLOSE;
1381 sock->state = SS_UNCONNECTED;
1382 vsock_transport_cancel_pkt(vsk);
1383 goto out_wait;
1384 }
1385
1386 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1387 }
1388
1389 if (sk->sk_err) {
1390 err = -sk->sk_err;
1391 sk->sk_state = TCP_CLOSE;
1392 sock->state = SS_UNCONNECTED;
1393 } else {
1394 err = 0;
1395 }
1396
1397out_wait:
1398 finish_wait(sk_sleep(sk), &wait);
1399out:
1400 release_sock(sk);
1401 return err;
1402}
1403
1404static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
1405 bool kern)
1406{
1407 struct sock *listener;
1408 int err;
1409 struct sock *connected;
1410 struct vsock_sock *vconnected;
1411 long timeout;
1412 DEFINE_WAIT(wait);
1413
1414 err = 0;
1415 listener = sock->sk;
1416
1417 lock_sock(listener);
1418
1419 if (sock->type != SOCK_STREAM) {
1420 err = -EOPNOTSUPP;
1421 goto out;
1422 }
1423
1424 if (listener->sk_state != TCP_LISTEN) {
1425 err = -EINVAL;
1426 goto out;
1427 }
1428
1429
1430
1431
1432 timeout = sock_rcvtimeo(listener, flags & O_NONBLOCK);
1433 prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
1434
1435 while ((connected = vsock_dequeue_accept(listener)) == NULL &&
1436 listener->sk_err == 0) {
1437 release_sock(listener);
1438 timeout = schedule_timeout(timeout);
1439 finish_wait(sk_sleep(listener), &wait);
1440 lock_sock(listener);
1441
1442 if (signal_pending(current)) {
1443 err = sock_intr_errno(timeout);
1444 goto out;
1445 } else if (timeout == 0) {
1446 err = -EAGAIN;
1447 goto out;
1448 }
1449
1450 prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
1451 }
1452 finish_wait(sk_sleep(listener), &wait);
1453
1454 if (listener->sk_err)
1455 err = -listener->sk_err;
1456
1457 if (connected) {
1458 sk_acceptq_removed(listener);
1459
1460 lock_sock_nested(connected, SINGLE_DEPTH_NESTING);
1461 vconnected = vsock_sk(connected);
1462
1463
1464
1465
1466
1467
1468
1469
1470 if (err) {
1471 vconnected->rejected = true;
1472 } else {
1473 newsock->state = SS_CONNECTED;
1474 sock_graft(connected, newsock);
1475 }
1476
1477 release_sock(connected);
1478 sock_put(connected);
1479 }
1480
1481out:
1482 release_sock(listener);
1483 return err;
1484}
1485
1486static int vsock_listen(struct socket *sock, int backlog)
1487{
1488 int err;
1489 struct sock *sk;
1490 struct vsock_sock *vsk;
1491
1492 sk = sock->sk;
1493
1494 lock_sock(sk);
1495
1496 if (sock->type != SOCK_STREAM) {
1497 err = -EOPNOTSUPP;
1498 goto out;
1499 }
1500
1501 if (sock->state != SS_UNCONNECTED) {
1502 err = -EINVAL;
1503 goto out;
1504 }
1505
1506 vsk = vsock_sk(sk);
1507
1508 if (!vsock_addr_bound(&vsk->local_addr)) {
1509 err = -EINVAL;
1510 goto out;
1511 }
1512
1513 sk->sk_max_ack_backlog = backlog;
1514 sk->sk_state = TCP_LISTEN;
1515
1516 err = 0;
1517
1518out:
1519 release_sock(sk);
1520 return err;
1521}
1522
1523static void vsock_update_buffer_size(struct vsock_sock *vsk,
1524 const struct vsock_transport *transport,
1525 u64 val)
1526{
1527 if (val > vsk->buffer_max_size)
1528 val = vsk->buffer_max_size;
1529
1530 if (val < vsk->buffer_min_size)
1531 val = vsk->buffer_min_size;
1532
1533 if (val != vsk->buffer_size &&
1534 transport && transport->notify_buffer_size)
1535 transport->notify_buffer_size(vsk, &val);
1536
1537 vsk->buffer_size = val;
1538}
1539
1540static int vsock_stream_setsockopt(struct socket *sock,
1541 int level,
1542 int optname,
1543 char __user *optval,
1544 unsigned int optlen)
1545{
1546 int err;
1547 struct sock *sk;
1548 struct vsock_sock *vsk;
1549 const struct vsock_transport *transport;
1550 u64 val;
1551
1552 if (level != AF_VSOCK)
1553 return -ENOPROTOOPT;
1554
1555#define COPY_IN(_v) \
1556 do { \
1557 if (optlen < sizeof(_v)) { \
1558 err = -EINVAL; \
1559 goto exit; \
1560 } \
1561 if (copy_from_user(&_v, optval, sizeof(_v)) != 0) { \
1562 err = -EFAULT; \
1563 goto exit; \
1564 } \
1565 } while (0)
1566
1567 err = 0;
1568 sk = sock->sk;
1569 vsk = vsock_sk(sk);
1570
1571 lock_sock(sk);
1572
1573 transport = vsk->transport;
1574
1575 switch (optname) {
1576 case SO_VM_SOCKETS_BUFFER_SIZE:
1577 COPY_IN(val);
1578 vsock_update_buffer_size(vsk, transport, val);
1579 break;
1580
1581 case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
1582 COPY_IN(val);
1583 vsk->buffer_max_size = val;
1584 vsock_update_buffer_size(vsk, transport, vsk->buffer_size);
1585 break;
1586
1587 case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
1588 COPY_IN(val);
1589 vsk->buffer_min_size = val;
1590 vsock_update_buffer_size(vsk, transport, vsk->buffer_size);
1591 break;
1592
1593 case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
1594 struct __kernel_old_timeval tv;
1595 COPY_IN(tv);
1596 if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC &&
1597 tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) {
1598 vsk->connect_timeout = tv.tv_sec * HZ +
1599 DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ));
1600 if (vsk->connect_timeout == 0)
1601 vsk->connect_timeout =
1602 VSOCK_DEFAULT_CONNECT_TIMEOUT;
1603
1604 } else {
1605 err = -ERANGE;
1606 }
1607 break;
1608 }
1609
1610 default:
1611 err = -ENOPROTOOPT;
1612 break;
1613 }
1614
1615#undef COPY_IN
1616
1617exit:
1618 release_sock(sk);
1619 return err;
1620}
1621
1622static int vsock_stream_getsockopt(struct socket *sock,
1623 int level, int optname,
1624 char __user *optval,
1625 int __user *optlen)
1626{
1627 int err;
1628 int len;
1629 struct sock *sk;
1630 struct vsock_sock *vsk;
1631 u64 val;
1632
1633 if (level != AF_VSOCK)
1634 return -ENOPROTOOPT;
1635
1636 err = get_user(len, optlen);
1637 if (err != 0)
1638 return err;
1639
1640#define COPY_OUT(_v) \
1641 do { \
1642 if (len < sizeof(_v)) \
1643 return -EINVAL; \
1644 \
1645 len = sizeof(_v); \
1646 if (copy_to_user(optval, &_v, len) != 0) \
1647 return -EFAULT; \
1648 \
1649 } while (0)
1650
1651 err = 0;
1652 sk = sock->sk;
1653 vsk = vsock_sk(sk);
1654
1655 switch (optname) {
1656 case SO_VM_SOCKETS_BUFFER_SIZE:
1657 val = vsk->buffer_size;
1658 COPY_OUT(val);
1659 break;
1660
1661 case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
1662 val = vsk->buffer_max_size;
1663 COPY_OUT(val);
1664 break;
1665
1666 case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
1667 val = vsk->buffer_min_size;
1668 COPY_OUT(val);
1669 break;
1670
1671 case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
1672 struct __kernel_old_timeval tv;
1673 tv.tv_sec = vsk->connect_timeout / HZ;
1674 tv.tv_usec =
1675 (vsk->connect_timeout -
1676 tv.tv_sec * HZ) * (1000000 / HZ);
1677 COPY_OUT(tv);
1678 break;
1679 }
1680 default:
1681 return -ENOPROTOOPT;
1682 }
1683
1684 err = put_user(len, optlen);
1685 if (err != 0)
1686 return -EFAULT;
1687
1688#undef COPY_OUT
1689
1690 return 0;
1691}
1692
1693static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1694 size_t len)
1695{
1696 struct sock *sk;
1697 struct vsock_sock *vsk;
1698 const struct vsock_transport *transport;
1699 ssize_t total_written;
1700 long timeout;
1701 int err;
1702 struct vsock_transport_send_notify_data send_data;
1703 DEFINE_WAIT_FUNC(wait, woken_wake_function);
1704
1705 sk = sock->sk;
1706 vsk = vsock_sk(sk);
1707 total_written = 0;
1708 err = 0;
1709
1710 if (msg->msg_flags & MSG_OOB)
1711 return -EOPNOTSUPP;
1712
1713 lock_sock(sk);
1714
1715 transport = vsk->transport;
1716
1717
1718 if (msg->msg_namelen) {
1719 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1720 goto out;
1721 }
1722
1723
1724 if (sk->sk_shutdown & SEND_SHUTDOWN ||
1725 vsk->peer_shutdown & RCV_SHUTDOWN) {
1726 err = -EPIPE;
1727 goto out;
1728 }
1729
1730 if (!transport || sk->sk_state != TCP_ESTABLISHED ||
1731 !vsock_addr_bound(&vsk->local_addr)) {
1732 err = -ENOTCONN;
1733 goto out;
1734 }
1735
1736 if (!vsock_addr_bound(&vsk->remote_addr)) {
1737 err = -EDESTADDRREQ;
1738 goto out;
1739 }
1740
1741
1742 timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1743
1744 err = transport->notify_send_init(vsk, &send_data);
1745 if (err < 0)
1746 goto out;
1747
1748 while (total_written < len) {
1749 ssize_t written;
1750
1751 add_wait_queue(sk_sleep(sk), &wait);
1752 while (vsock_stream_has_space(vsk) == 0 &&
1753 sk->sk_err == 0 &&
1754 !(sk->sk_shutdown & SEND_SHUTDOWN) &&
1755 !(vsk->peer_shutdown & RCV_SHUTDOWN)) {
1756
1757
1758 if (timeout == 0) {
1759 err = -EAGAIN;
1760 remove_wait_queue(sk_sleep(sk), &wait);
1761 goto out_err;
1762 }
1763
1764 err = transport->notify_send_pre_block(vsk, &send_data);
1765 if (err < 0) {
1766 remove_wait_queue(sk_sleep(sk), &wait);
1767 goto out_err;
1768 }
1769
1770 release_sock(sk);
1771 timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
1772 lock_sock(sk);
1773 if (signal_pending(current)) {
1774 err = sock_intr_errno(timeout);
1775 remove_wait_queue(sk_sleep(sk), &wait);
1776 goto out_err;
1777 } else if (timeout == 0) {
1778 err = -EAGAIN;
1779 remove_wait_queue(sk_sleep(sk), &wait);
1780 goto out_err;
1781 }
1782 }
1783 remove_wait_queue(sk_sleep(sk), &wait);
1784
1785
1786
1787
1788
1789 if (sk->sk_err) {
1790 err = -sk->sk_err;
1791 goto out_err;
1792 } else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
1793 (vsk->peer_shutdown & RCV_SHUTDOWN)) {
1794 err = -EPIPE;
1795 goto out_err;
1796 }
1797
1798 err = transport->notify_send_pre_enqueue(vsk, &send_data);
1799 if (err < 0)
1800 goto out_err;
1801
1802
1803
1804
1805
1806
1807
1808 written = transport->stream_enqueue(
1809 vsk, msg,
1810 len - total_written);
1811 if (written < 0) {
1812 err = -ENOMEM;
1813 goto out_err;
1814 }
1815
1816 total_written += written;
1817
1818 err = transport->notify_send_post_enqueue(
1819 vsk, written, &send_data);
1820 if (err < 0)
1821 goto out_err;
1822
1823 }
1824
1825out_err:
1826 if (total_written > 0)
1827 err = total_written;
1828out:
1829 release_sock(sk);
1830 return err;
1831}
1832
1833
1834static int
1835vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1836 int flags)
1837{
1838 struct sock *sk;
1839 struct vsock_sock *vsk;
1840 const struct vsock_transport *transport;
1841 int err;
1842 size_t target;
1843 ssize_t copied;
1844 long timeout;
1845 struct vsock_transport_recv_notify_data recv_data;
1846
1847 DEFINE_WAIT(wait);
1848
1849 sk = sock->sk;
1850 vsk = vsock_sk(sk);
1851 err = 0;
1852
1853 lock_sock(sk);
1854
1855 transport = vsk->transport;
1856
1857 if (!transport || sk->sk_state != TCP_ESTABLISHED) {
1858
1859
1860
1861
1862
1863 if (sock_flag(sk, SOCK_DONE))
1864 err = 0;
1865 else
1866 err = -ENOTCONN;
1867
1868 goto out;
1869 }
1870
1871 if (flags & MSG_OOB) {
1872 err = -EOPNOTSUPP;
1873 goto out;
1874 }
1875
1876
1877
1878
1879
1880 if (sk->sk_shutdown & RCV_SHUTDOWN) {
1881 err = 0;
1882 goto out;
1883 }
1884
1885
1886
1887
1888 if (!len) {
1889 err = 0;
1890 goto out;
1891 }
1892
1893
1894
1895
1896
1897
1898
1899 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1900 if (target >= transport->stream_rcvhiwat(vsk)) {
1901 err = -ENOMEM;
1902 goto out;
1903 }
1904 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1905 copied = 0;
1906
1907 err = transport->notify_recv_init(vsk, target, &recv_data);
1908 if (err < 0)
1909 goto out;
1910
1911
1912 while (1) {
1913 s64 ready;
1914
1915 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1916 ready = vsock_stream_has_data(vsk);
1917
1918 if (ready == 0) {
1919 if (sk->sk_err != 0 ||
1920 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1921 (vsk->peer_shutdown & SEND_SHUTDOWN)) {
1922 finish_wait(sk_sleep(sk), &wait);
1923 break;
1924 }
1925
1926 if (timeout == 0) {
1927 err = -EAGAIN;
1928 finish_wait(sk_sleep(sk), &wait);
1929 break;
1930 }
1931
1932 err = transport->notify_recv_pre_block(
1933 vsk, target, &recv_data);
1934 if (err < 0) {
1935 finish_wait(sk_sleep(sk), &wait);
1936 break;
1937 }
1938 release_sock(sk);
1939 timeout = schedule_timeout(timeout);
1940 lock_sock(sk);
1941
1942 if (signal_pending(current)) {
1943 err = sock_intr_errno(timeout);
1944 finish_wait(sk_sleep(sk), &wait);
1945 break;
1946 } else if (timeout == 0) {
1947 err = -EAGAIN;
1948 finish_wait(sk_sleep(sk), &wait);
1949 break;
1950 }
1951 } else {
1952 ssize_t read;
1953
1954 finish_wait(sk_sleep(sk), &wait);
1955
1956 if (ready < 0) {
1957
1958
1959
1960
1961
1962 err = -ENOMEM;
1963 goto out;
1964 }
1965
1966 err = transport->notify_recv_pre_dequeue(
1967 vsk, target, &recv_data);
1968 if (err < 0)
1969 break;
1970
1971 read = transport->stream_dequeue(
1972 vsk, msg,
1973 len - copied, flags);
1974 if (read < 0) {
1975 err = -ENOMEM;
1976 break;
1977 }
1978
1979 copied += read;
1980
1981 err = transport->notify_recv_post_dequeue(
1982 vsk, target, read,
1983 !(flags & MSG_PEEK), &recv_data);
1984 if (err < 0)
1985 goto out;
1986
1987 if (read >= target || flags & MSG_PEEK)
1988 break;
1989
1990 target -= read;
1991 }
1992 }
1993
1994 if (sk->sk_err)
1995 err = -sk->sk_err;
1996 else if (sk->sk_shutdown & RCV_SHUTDOWN)
1997 err = 0;
1998
1999 if (copied > 0)
2000 err = copied;
2001
2002out:
2003 release_sock(sk);
2004 return err;
2005}
2006
2007static const struct proto_ops vsock_stream_ops = {
2008 .family = PF_VSOCK,
2009 .owner = THIS_MODULE,
2010 .release = vsock_release,
2011 .bind = vsock_bind,
2012 .connect = vsock_stream_connect,
2013 .socketpair = sock_no_socketpair,
2014 .accept = vsock_accept,
2015 .getname = vsock_getname,
2016 .poll = vsock_poll,
2017 .ioctl = sock_no_ioctl,
2018 .listen = vsock_listen,
2019 .shutdown = vsock_shutdown,
2020 .setsockopt = vsock_stream_setsockopt,
2021 .getsockopt = vsock_stream_getsockopt,
2022 .sendmsg = vsock_stream_sendmsg,
2023 .recvmsg = vsock_stream_recvmsg,
2024 .mmap = sock_no_mmap,
2025 .sendpage = sock_no_sendpage,
2026};
2027
2028static int vsock_create(struct net *net, struct socket *sock,
2029 int protocol, int kern)
2030{
2031 struct vsock_sock *vsk;
2032 struct sock *sk;
2033 int ret;
2034
2035 if (!sock)
2036 return -EINVAL;
2037
2038 if (protocol && protocol != PF_VSOCK)
2039 return -EPROTONOSUPPORT;
2040
2041 switch (sock->type) {
2042 case SOCK_DGRAM:
2043 sock->ops = &vsock_dgram_ops;
2044 break;
2045 case SOCK_STREAM:
2046 sock->ops = &vsock_stream_ops;
2047 break;
2048 default:
2049 return -ESOCKTNOSUPPORT;
2050 }
2051
2052 sock->state = SS_UNCONNECTED;
2053
2054 sk = __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern);
2055 if (!sk)
2056 return -ENOMEM;
2057
2058 vsk = vsock_sk(sk);
2059
2060 if (sock->type == SOCK_DGRAM) {
2061 ret = vsock_assign_transport(vsk, NULL);
2062 if (ret < 0) {
2063 sock_put(sk);
2064 return ret;
2065 }
2066 }
2067
2068 vsock_insert_unbound(vsk);
2069
2070 return 0;
2071}
2072
2073static const struct net_proto_family vsock_family_ops = {
2074 .family = AF_VSOCK,
2075 .create = vsock_create,
2076 .owner = THIS_MODULE,
2077};
2078
2079static long vsock_dev_do_ioctl(struct file *filp,
2080 unsigned int cmd, void __user *ptr)
2081{
2082 u32 __user *p = ptr;
2083 u32 cid = VMADDR_CID_ANY;
2084 int retval = 0;
2085
2086 switch (cmd) {
2087 case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
2088
2089
2090
2091 if (transport_g2h)
2092 cid = transport_g2h->get_local_cid();
2093 else if (transport_h2g)
2094 cid = transport_h2g->get_local_cid();
2095
2096 if (put_user(cid, p) != 0)
2097 retval = -EFAULT;
2098 break;
2099
2100 default:
2101 retval = -ENOIOCTLCMD;
2102 }
2103
2104 return retval;
2105}
2106
2107static long vsock_dev_ioctl(struct file *filp,
2108 unsigned int cmd, unsigned long arg)
2109{
2110 return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg);
2111}
2112
2113#ifdef CONFIG_COMPAT
2114static long vsock_dev_compat_ioctl(struct file *filp,
2115 unsigned int cmd, unsigned long arg)
2116{
2117 return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg));
2118}
2119#endif
2120
2121static const struct file_operations vsock_device_ops = {
2122 .owner = THIS_MODULE,
2123 .unlocked_ioctl = vsock_dev_ioctl,
2124#ifdef CONFIG_COMPAT
2125 .compat_ioctl = vsock_dev_compat_ioctl,
2126#endif
2127 .open = nonseekable_open,
2128};
2129
2130static struct miscdevice vsock_device = {
2131 .name = "vsock",
2132 .fops = &vsock_device_ops,
2133};
2134
2135static int __init vsock_init(void)
2136{
2137 int err = 0;
2138
2139 vsock_init_tables();
2140
2141 vsock_proto.owner = THIS_MODULE;
2142 vsock_device.minor = MISC_DYNAMIC_MINOR;
2143 err = misc_register(&vsock_device);
2144 if (err) {
2145 pr_err("Failed to register misc device\n");
2146 goto err_reset_transport;
2147 }
2148
2149 err = proto_register(&vsock_proto, 1);
2150 if (err) {
2151 pr_err("Cannot register vsock protocol\n");
2152 goto err_deregister_misc;
2153 }
2154
2155 err = sock_register(&vsock_family_ops);
2156 if (err) {
2157 pr_err("could not register af_vsock (%d) address family: %d\n",
2158 AF_VSOCK, err);
2159 goto err_unregister_proto;
2160 }
2161
2162 return 0;
2163
2164err_unregister_proto:
2165 proto_unregister(&vsock_proto);
2166err_deregister_misc:
2167 misc_deregister(&vsock_device);
2168err_reset_transport:
2169 return err;
2170}
2171
2172static void __exit vsock_exit(void)
2173{
2174 misc_deregister(&vsock_device);
2175 sock_unregister(AF_VSOCK);
2176 proto_unregister(&vsock_proto);
2177}
2178
2179const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk)
2180{
2181 return vsk->transport;
2182}
2183EXPORT_SYMBOL_GPL(vsock_core_get_transport);
2184
2185int vsock_core_register(const struct vsock_transport *t, int features)
2186{
2187 const struct vsock_transport *t_h2g, *t_g2h, *t_dgram, *t_local;
2188 int err = mutex_lock_interruptible(&vsock_register_mutex);
2189
2190 if (err)
2191 return err;
2192
2193 t_h2g = transport_h2g;
2194 t_g2h = transport_g2h;
2195 t_dgram = transport_dgram;
2196 t_local = transport_local;
2197
2198 if (features & VSOCK_TRANSPORT_F_H2G) {
2199 if (t_h2g) {
2200 err = -EBUSY;
2201 goto err_busy;
2202 }
2203 t_h2g = t;
2204 }
2205
2206 if (features & VSOCK_TRANSPORT_F_G2H) {
2207 if (t_g2h) {
2208 err = -EBUSY;
2209 goto err_busy;
2210 }
2211 t_g2h = t;
2212 }
2213
2214 if (features & VSOCK_TRANSPORT_F_DGRAM) {
2215 if (t_dgram) {
2216 err = -EBUSY;
2217 goto err_busy;
2218 }
2219 t_dgram = t;
2220 }
2221
2222 if (features & VSOCK_TRANSPORT_F_LOCAL) {
2223 if (t_local) {
2224 err = -EBUSY;
2225 goto err_busy;
2226 }
2227 t_local = t;
2228 }
2229
2230 transport_h2g = t_h2g;
2231 transport_g2h = t_g2h;
2232 transport_dgram = t_dgram;
2233 transport_local = t_local;
2234
2235err_busy:
2236 mutex_unlock(&vsock_register_mutex);
2237 return err;
2238}
2239EXPORT_SYMBOL_GPL(vsock_core_register);
2240
2241void vsock_core_unregister(const struct vsock_transport *t)
2242{
2243 mutex_lock(&vsock_register_mutex);
2244
2245 if (transport_h2g == t)
2246 transport_h2g = NULL;
2247
2248 if (transport_g2h == t)
2249 transport_g2h = NULL;
2250
2251 if (transport_dgram == t)
2252 transport_dgram = NULL;
2253
2254 if (transport_local == t)
2255 transport_local = NULL;
2256
2257 mutex_unlock(&vsock_register_mutex);
2258}
2259EXPORT_SYMBOL_GPL(vsock_core_unregister);
2260
2261module_init(vsock_init);
2262module_exit(vsock_exit);
2263
2264MODULE_AUTHOR("VMware, Inc.");
2265MODULE_DESCRIPTION("VMware Virtual Socket Family");
2266MODULE_VERSION("1.0.2.0-k");
2267MODULE_LICENSE("GPL v2");
2268