linux/net/vmw_vsock/vmci_transport.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * VMware vSockets Driver
   4 *
   5 * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
   6 */
   7
   8#include <linux/types.h>
   9#include <linux/bitops.h>
  10#include <linux/cred.h>
  11#include <linux/init.h>
  12#include <linux/io.h>
  13#include <linux/kernel.h>
  14#include <linux/kmod.h>
  15#include <linux/list.h>
  16#include <linux/module.h>
  17#include <linux/mutex.h>
  18#include <linux/net.h>
  19#include <linux/poll.h>
  20#include <linux/skbuff.h>
  21#include <linux/smp.h>
  22#include <linux/socket.h>
  23#include <linux/stddef.h>
  24#include <linux/unistd.h>
  25#include <linux/wait.h>
  26#include <linux/workqueue.h>
  27#include <net/sock.h>
  28#include <net/af_vsock.h>
  29
  30#include "vmci_transport_notify.h"
  31
  32static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
  33static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
  34static void vmci_transport_peer_detach_cb(u32 sub_id,
  35                                          const struct vmci_event_data *ed,
  36                                          void *client_data);
  37static void vmci_transport_recv_pkt_work(struct work_struct *work);
  38static void vmci_transport_cleanup(struct work_struct *work);
  39static int vmci_transport_recv_listen(struct sock *sk,
  40                                      struct vmci_transport_packet *pkt);
  41static int vmci_transport_recv_connecting_server(
  42                                        struct sock *sk,
  43                                        struct sock *pending,
  44                                        struct vmci_transport_packet *pkt);
  45static int vmci_transport_recv_connecting_client(
  46                                        struct sock *sk,
  47                                        struct vmci_transport_packet *pkt);
  48static int vmci_transport_recv_connecting_client_negotiate(
  49                                        struct sock *sk,
  50                                        struct vmci_transport_packet *pkt);
  51static int vmci_transport_recv_connecting_client_invalid(
  52                                        struct sock *sk,
  53                                        struct vmci_transport_packet *pkt);
  54static int vmci_transport_recv_connected(struct sock *sk,
  55                                         struct vmci_transport_packet *pkt);
  56static bool vmci_transport_old_proto_override(bool *old_pkt_proto);
  57static u16 vmci_transport_new_proto_supported_versions(void);
  58static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto,
  59                                                  bool old_pkt_proto);
  60
  61struct vmci_transport_recv_pkt_info {
  62        struct work_struct work;
  63        struct sock *sk;
  64        struct vmci_transport_packet pkt;
  65};
  66
  67static LIST_HEAD(vmci_transport_cleanup_list);
  68static DEFINE_SPINLOCK(vmci_transport_cleanup_lock);
  69static DECLARE_WORK(vmci_transport_cleanup_work, vmci_transport_cleanup);
  70
  71static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
  72                                                           VMCI_INVALID_ID };
  73static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
  74
  75static int PROTOCOL_OVERRIDE = -1;
  76
  77#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN   128
  78#define VMCI_TRANSPORT_DEFAULT_QP_SIZE       262144
  79#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX   262144
  80
  81/* The default peer timeout indicates how long we will wait for a peer response
  82 * to a control message.
  83 */
  84#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
  85
  86/* Helper function to convert from a VMCI error code to a VSock error code. */
  87
  88static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
  89{
  90        switch (vmci_error) {
  91        case VMCI_ERROR_NO_MEM:
  92                return -ENOMEM;
  93        case VMCI_ERROR_DUPLICATE_ENTRY:
  94        case VMCI_ERROR_ALREADY_EXISTS:
  95                return -EADDRINUSE;
  96        case VMCI_ERROR_NO_ACCESS:
  97                return -EPERM;
  98        case VMCI_ERROR_NO_RESOURCES:
  99                return -ENOBUFS;
 100        case VMCI_ERROR_INVALID_RESOURCE:
 101                return -EHOSTUNREACH;
 102        case VMCI_ERROR_INVALID_ARGS:
 103        default:
 104                break;
 105        }
 106        return -EINVAL;
 107}
 108
 109static u32 vmci_transport_peer_rid(u32 peer_cid)
 110{
 111        if (VMADDR_CID_HYPERVISOR == peer_cid)
 112                return VMCI_TRANSPORT_HYPERVISOR_PACKET_RID;
 113
 114        return VMCI_TRANSPORT_PACKET_RID;
 115}
 116
 117static inline void
 118vmci_transport_packet_init(struct vmci_transport_packet *pkt,
 119                           struct sockaddr_vm *src,
 120                           struct sockaddr_vm *dst,
 121                           u8 type,
 122                           u64 size,
 123                           u64 mode,
 124                           struct vmci_transport_waiting_info *wait,
 125                           u16 proto,
 126                           struct vmci_handle handle)
 127{
 128        /* We register the stream control handler as an any cid handle so we
 129         * must always send from a source address of VMADDR_CID_ANY
 130         */
 131        pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
 132                                       VMCI_TRANSPORT_PACKET_RID);
 133        pkt->dg.dst = vmci_make_handle(dst->svm_cid,
 134                                       vmci_transport_peer_rid(dst->svm_cid));
 135        pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
 136        pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
 137        pkt->type = type;
 138        pkt->src_port = src->svm_port;
 139        pkt->dst_port = dst->svm_port;
 140        memset(&pkt->proto, 0, sizeof(pkt->proto));
 141        memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
 142
 143        switch (pkt->type) {
 144        case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
 145                pkt->u.size = 0;
 146                break;
 147
 148        case VMCI_TRANSPORT_PACKET_TYPE_REQUEST:
 149        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
 150                pkt->u.size = size;
 151                break;
 152
 153        case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
 154        case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
 155                pkt->u.handle = handle;
 156                break;
 157
 158        case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
 159        case VMCI_TRANSPORT_PACKET_TYPE_READ:
 160        case VMCI_TRANSPORT_PACKET_TYPE_RST:
 161                pkt->u.size = 0;
 162                break;
 163
 164        case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
 165                pkt->u.mode = mode;
 166                break;
 167
 168        case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
 169        case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
 170                memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait));
 171                break;
 172
 173        case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2:
 174        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
 175                pkt->u.size = size;
 176                pkt->proto = proto;
 177                break;
 178        }
 179}
 180
 181static inline void
 182vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt,
 183                                    struct sockaddr_vm *local,
 184                                    struct sockaddr_vm *remote)
 185{
 186        vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port);
 187        vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port);
 188}
 189
 190static int
 191__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt,
 192                                  struct sockaddr_vm *src,
 193                                  struct sockaddr_vm *dst,
 194                                  enum vmci_transport_packet_type type,
 195                                  u64 size,
 196                                  u64 mode,
 197                                  struct vmci_transport_waiting_info *wait,
 198                                  u16 proto,
 199                                  struct vmci_handle handle,
 200                                  bool convert_error)
 201{
 202        int err;
 203
 204        vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait,
 205                                   proto, handle);
 206        err = vmci_datagram_send(&pkt->dg);
 207        if (convert_error && (err < 0))
 208                return vmci_transport_error_to_vsock_error(err);
 209
 210        return err;
 211}
 212
 213static int
 214vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt,
 215                                      enum vmci_transport_packet_type type,
 216                                      u64 size,
 217                                      u64 mode,
 218                                      struct vmci_transport_waiting_info *wait,
 219                                      struct vmci_handle handle)
 220{
 221        struct vmci_transport_packet reply;
 222        struct sockaddr_vm src, dst;
 223
 224        if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) {
 225                return 0;
 226        } else {
 227                vmci_transport_packet_get_addresses(pkt, &src, &dst);
 228                return __vmci_transport_send_control_pkt(&reply, &src, &dst,
 229                                                         type,
 230                                                         size, mode, wait,
 231                                                         VSOCK_PROTO_INVALID,
 232                                                         handle, true);
 233        }
 234}
 235
 236static int
 237vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
 238                                   struct sockaddr_vm *dst,
 239                                   enum vmci_transport_packet_type type,
 240                                   u64 size,
 241                                   u64 mode,
 242                                   struct vmci_transport_waiting_info *wait,
 243                                   struct vmci_handle handle)
 244{
 245        /* Note that it is safe to use a single packet across all CPUs since
 246         * two tasklets of the same type are guaranteed to not ever run
 247         * simultaneously. If that ever changes, or VMCI stops using tasklets,
 248         * we can use per-cpu packets.
 249         */
 250        static struct vmci_transport_packet pkt;
 251
 252        return __vmci_transport_send_control_pkt(&pkt, src, dst, type,
 253                                                 size, mode, wait,
 254                                                 VSOCK_PROTO_INVALID, handle,
 255                                                 false);
 256}
 257
 258static int
 259vmci_transport_alloc_send_control_pkt(struct sockaddr_vm *src,
 260                                      struct sockaddr_vm *dst,
 261                                      enum vmci_transport_packet_type type,
 262                                      u64 size,
 263                                      u64 mode,
 264                                      struct vmci_transport_waiting_info *wait,
 265                                      u16 proto,
 266                                      struct vmci_handle handle)
 267{
 268        struct vmci_transport_packet *pkt;
 269        int err;
 270
 271        pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
 272        if (!pkt)
 273                return -ENOMEM;
 274
 275        err = __vmci_transport_send_control_pkt(pkt, src, dst, type, size,
 276                                                mode, wait, proto, handle,
 277                                                true);
 278        kfree(pkt);
 279
 280        return err;
 281}
 282
 283static int
 284vmci_transport_send_control_pkt(struct sock *sk,
 285                                enum vmci_transport_packet_type type,
 286                                u64 size,
 287                                u64 mode,
 288                                struct vmci_transport_waiting_info *wait,
 289                                u16 proto,
 290                                struct vmci_handle handle)
 291{
 292        struct vsock_sock *vsk;
 293
 294        vsk = vsock_sk(sk);
 295
 296        if (!vsock_addr_bound(&vsk->local_addr))
 297                return -EINVAL;
 298
 299        if (!vsock_addr_bound(&vsk->remote_addr))
 300                return -EINVAL;
 301
 302        return vmci_transport_alloc_send_control_pkt(&vsk->local_addr,
 303                                                     &vsk->remote_addr,
 304                                                     type, size, mode,
 305                                                     wait, proto, handle);
 306}
 307
 308static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
 309                                        struct sockaddr_vm *src,
 310                                        struct vmci_transport_packet *pkt)
 311{
 312        if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
 313                return 0;
 314        return vmci_transport_send_control_pkt_bh(
 315                                        dst, src,
 316                                        VMCI_TRANSPORT_PACKET_TYPE_RST, 0,
 317                                        0, NULL, VMCI_INVALID_HANDLE);
 318}
 319
 320static int vmci_transport_send_reset(struct sock *sk,
 321                                     struct vmci_transport_packet *pkt)
 322{
 323        struct sockaddr_vm *dst_ptr;
 324        struct sockaddr_vm dst;
 325        struct vsock_sock *vsk;
 326
 327        if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
 328                return 0;
 329
 330        vsk = vsock_sk(sk);
 331
 332        if (!vsock_addr_bound(&vsk->local_addr))
 333                return -EINVAL;
 334
 335        if (vsock_addr_bound(&vsk->remote_addr)) {
 336                dst_ptr = &vsk->remote_addr;
 337        } else {
 338                vsock_addr_init(&dst, pkt->dg.src.context,
 339                                pkt->src_port);
 340                dst_ptr = &dst;
 341        }
 342        return vmci_transport_alloc_send_control_pkt(&vsk->local_addr, dst_ptr,
 343                                             VMCI_TRANSPORT_PACKET_TYPE_RST,
 344                                             0, 0, NULL, VSOCK_PROTO_INVALID,
 345                                             VMCI_INVALID_HANDLE);
 346}
 347
 348static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
 349{
 350        return vmci_transport_send_control_pkt(
 351                                        sk,
 352                                        VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
 353                                        size, 0, NULL,
 354                                        VSOCK_PROTO_INVALID,
 355                                        VMCI_INVALID_HANDLE);
 356}
 357
 358static int vmci_transport_send_negotiate2(struct sock *sk, size_t size,
 359                                          u16 version)
 360{
 361        return vmci_transport_send_control_pkt(
 362                                        sk,
 363                                        VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
 364                                        size, 0, NULL, version,
 365                                        VMCI_INVALID_HANDLE);
 366}
 367
 368static int vmci_transport_send_qp_offer(struct sock *sk,
 369                                        struct vmci_handle handle)
 370{
 371        return vmci_transport_send_control_pkt(
 372                                        sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0,
 373                                        0, NULL,
 374                                        VSOCK_PROTO_INVALID, handle);
 375}
 376
 377static int vmci_transport_send_attach(struct sock *sk,
 378                                      struct vmci_handle handle)
 379{
 380        return vmci_transport_send_control_pkt(
 381                                        sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
 382                                        0, 0, NULL, VSOCK_PROTO_INVALID,
 383                                        handle);
 384}
 385
 386static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt)
 387{
 388        return vmci_transport_reply_control_pkt_fast(
 389                                                pkt,
 390                                                VMCI_TRANSPORT_PACKET_TYPE_RST,
 391                                                0, 0, NULL,
 392                                                VMCI_INVALID_HANDLE);
 393}
 394
 395static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst,
 396                                          struct sockaddr_vm *src)
 397{
 398        return vmci_transport_send_control_pkt_bh(
 399                                        dst, src,
 400                                        VMCI_TRANSPORT_PACKET_TYPE_INVALID,
 401                                        0, 0, NULL, VMCI_INVALID_HANDLE);
 402}
 403
 404int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
 405                                 struct sockaddr_vm *src)
 406{
 407        return vmci_transport_send_control_pkt_bh(
 408                                        dst, src,
 409                                        VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
 410                                        0, NULL, VMCI_INVALID_HANDLE);
 411}
 412
 413int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
 414                                struct sockaddr_vm *src)
 415{
 416        return vmci_transport_send_control_pkt_bh(
 417                                        dst, src,
 418                                        VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
 419                                        0, NULL, VMCI_INVALID_HANDLE);
 420}
 421
 422int vmci_transport_send_wrote(struct sock *sk)
 423{
 424        return vmci_transport_send_control_pkt(
 425                                        sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
 426                                        0, NULL, VSOCK_PROTO_INVALID,
 427                                        VMCI_INVALID_HANDLE);
 428}
 429
 430int vmci_transport_send_read(struct sock *sk)
 431{
 432        return vmci_transport_send_control_pkt(
 433                                        sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
 434                                        0, NULL, VSOCK_PROTO_INVALID,
 435                                        VMCI_INVALID_HANDLE);
 436}
 437
 438int vmci_transport_send_waiting_write(struct sock *sk,
 439                                      struct vmci_transport_waiting_info *wait)
 440{
 441        return vmci_transport_send_control_pkt(
 442                                sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
 443                                0, 0, wait, VSOCK_PROTO_INVALID,
 444                                VMCI_INVALID_HANDLE);
 445}
 446
 447int vmci_transport_send_waiting_read(struct sock *sk,
 448                                     struct vmci_transport_waiting_info *wait)
 449{
 450        return vmci_transport_send_control_pkt(
 451                                sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
 452                                0, 0, wait, VSOCK_PROTO_INVALID,
 453                                VMCI_INVALID_HANDLE);
 454}
 455
 456static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode)
 457{
 458        return vmci_transport_send_control_pkt(
 459                                        &vsk->sk,
 460                                        VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
 461                                        0, mode, NULL,
 462                                        VSOCK_PROTO_INVALID,
 463                                        VMCI_INVALID_HANDLE);
 464}
 465
 466static int vmci_transport_send_conn_request(struct sock *sk, size_t size)
 467{
 468        return vmci_transport_send_control_pkt(sk,
 469                                        VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
 470                                        size, 0, NULL,
 471                                        VSOCK_PROTO_INVALID,
 472                                        VMCI_INVALID_HANDLE);
 473}
 474
 475static int vmci_transport_send_conn_request2(struct sock *sk, size_t size,
 476                                             u16 version)
 477{
 478        return vmci_transport_send_control_pkt(
 479                                        sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
 480                                        size, 0, NULL, version,
 481                                        VMCI_INVALID_HANDLE);
 482}
 483
 484static struct sock *vmci_transport_get_pending(
 485                                        struct sock *listener,
 486                                        struct vmci_transport_packet *pkt)
 487{
 488        struct vsock_sock *vlistener;
 489        struct vsock_sock *vpending;
 490        struct sock *pending;
 491        struct sockaddr_vm src;
 492
 493        vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
 494
 495        vlistener = vsock_sk(listener);
 496
 497        list_for_each_entry(vpending, &vlistener->pending_links,
 498                            pending_links) {
 499                if (vsock_addr_equals_addr(&src, &vpending->remote_addr) &&
 500                    pkt->dst_port == vpending->local_addr.svm_port) {
 501                        pending = sk_vsock(vpending);
 502                        sock_hold(pending);
 503                        goto found;
 504                }
 505        }
 506
 507        pending = NULL;
 508found:
 509        return pending;
 510
 511}
 512
 513static void vmci_transport_release_pending(struct sock *pending)
 514{
 515        sock_put(pending);
 516}
 517
 518/* We allow two kinds of sockets to communicate with a restricted VM: 1)
 519 * trusted sockets 2) sockets from applications running as the same user as the
 520 * VM (this is only true for the host side and only when using hosted products)
 521 */
 522
 523static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
 524{
 525        return vsock->trusted ||
 526               vmci_is_context_owner(peer_cid, vsock->owner->uid);
 527}
 528
 529/* We allow sending datagrams to and receiving datagrams from a restricted VM
 530 * only if it is trusted as described in vmci_transport_is_trusted.
 531 */
 532
 533static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
 534{
 535        if (VMADDR_CID_HYPERVISOR == peer_cid)
 536                return true;
 537
 538        if (vsock->cached_peer != peer_cid) {
 539                vsock->cached_peer = peer_cid;
 540                if (!vmci_transport_is_trusted(vsock, peer_cid) &&
 541                    (vmci_context_get_priv_flags(peer_cid) &
 542                     VMCI_PRIVILEGE_FLAG_RESTRICTED)) {
 543                        vsock->cached_peer_allow_dgram = false;
 544                } else {
 545                        vsock->cached_peer_allow_dgram = true;
 546                }
 547        }
 548
 549        return vsock->cached_peer_allow_dgram;
 550}
 551
 552static int
 553vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
 554                                struct vmci_handle *handle,
 555                                u64 produce_size,
 556                                u64 consume_size,
 557                                u32 peer, u32 flags, bool trusted)
 558{
 559        int err = 0;
 560
 561        if (trusted) {
 562                /* Try to allocate our queue pair as trusted. This will only
 563                 * work if vsock is running in the host.
 564                 */
 565
 566                err = vmci_qpair_alloc(qpair, handle, produce_size,
 567                                       consume_size,
 568                                       peer, flags,
 569                                       VMCI_PRIVILEGE_FLAG_TRUSTED);
 570                if (err != VMCI_ERROR_NO_ACCESS)
 571                        goto out;
 572
 573        }
 574
 575        err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size,
 576                               peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
 577out:
 578        if (err < 0) {
 579                pr_err("Could not attach to queue pair with %d\n",
 580                       err);
 581                err = vmci_transport_error_to_vsock_error(err);
 582        }
 583
 584        return err;
 585}
 586
 587static int
 588vmci_transport_datagram_create_hnd(u32 resource_id,
 589                                   u32 flags,
 590                                   vmci_datagram_recv_cb recv_cb,
 591                                   void *client_data,
 592                                   struct vmci_handle *out_handle)
 593{
 594        int err = 0;
 595
 596        /* Try to allocate our datagram handler as trusted. This will only work
 597         * if vsock is running in the host.
 598         */
 599
 600        err = vmci_datagram_create_handle_priv(resource_id, flags,
 601                                               VMCI_PRIVILEGE_FLAG_TRUSTED,
 602                                               recv_cb,
 603                                               client_data, out_handle);
 604
 605        if (err == VMCI_ERROR_NO_ACCESS)
 606                err = vmci_datagram_create_handle(resource_id, flags,
 607                                                  recv_cb, client_data,
 608                                                  out_handle);
 609
 610        return err;
 611}
 612
 613/* This is invoked as part of a tasklet that's scheduled when the VMCI
 614 * interrupt fires.  This is run in bottom-half context and if it ever needs to
 615 * sleep it should defer that work to a work queue.
 616 */
 617
 618static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
 619{
 620        struct sock *sk;
 621        size_t size;
 622        struct sk_buff *skb;
 623        struct vsock_sock *vsk;
 624
 625        sk = (struct sock *)data;
 626
 627        /* This handler is privileged when this module is running on the host.
 628         * We will get datagrams from all endpoints (even VMs that are in a
 629         * restricted context). If we get one from a restricted context then
 630         * the destination socket must be trusted.
 631         *
 632         * NOTE: We access the socket struct without holding the lock here.
 633         * This is ok because the field we are interested is never modified
 634         * outside of the create and destruct socket functions.
 635         */
 636        vsk = vsock_sk(sk);
 637        if (!vmci_transport_allow_dgram(vsk, dg->src.context))
 638                return VMCI_ERROR_NO_ACCESS;
 639
 640        size = VMCI_DG_SIZE(dg);
 641
 642        /* Attach the packet to the socket's receive queue as an sk_buff. */
 643        skb = alloc_skb(size, GFP_ATOMIC);
 644        if (!skb)
 645                return VMCI_ERROR_NO_MEM;
 646
 647        /* sk_receive_skb() will do a sock_put(), so hold here. */
 648        sock_hold(sk);
 649        skb_put(skb, size);
 650        memcpy(skb->data, dg, size);
 651        sk_receive_skb(sk, skb, 0);
 652
 653        return VMCI_SUCCESS;
 654}
 655
 656static bool vmci_transport_stream_allow(u32 cid, u32 port)
 657{
 658        static const u32 non_socket_contexts[] = {
 659                VMADDR_CID_RESERVED,
 660        };
 661        int i;
 662
 663        BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts));
 664
 665        for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) {
 666                if (cid == non_socket_contexts[i])
 667                        return false;
 668        }
 669
 670        return true;
 671}
 672
 673/* This is invoked as part of a tasklet that's scheduled when the VMCI
 674 * interrupt fires.  This is run in bottom-half context but it defers most of
 675 * its work to the packet handling work queue.
 676 */
 677
 678static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
 679{
 680        struct sock *sk;
 681        struct sockaddr_vm dst;
 682        struct sockaddr_vm src;
 683        struct vmci_transport_packet *pkt;
 684        struct vsock_sock *vsk;
 685        bool bh_process_pkt;
 686        int err;
 687
 688        sk = NULL;
 689        err = VMCI_SUCCESS;
 690        bh_process_pkt = false;
 691
 692        /* Ignore incoming packets from contexts without sockets, or resources
 693         * that aren't vsock implementations.
 694         */
 695
 696        if (!vmci_transport_stream_allow(dg->src.context, -1)
 697            || vmci_transport_peer_rid(dg->src.context) != dg->src.resource)
 698                return VMCI_ERROR_NO_ACCESS;
 699
 700        if (VMCI_DG_SIZE(dg) < sizeof(*pkt))
 701                /* Drop datagrams that do not contain full VSock packets. */
 702                return VMCI_ERROR_INVALID_ARGS;
 703
 704        pkt = (struct vmci_transport_packet *)dg;
 705
 706        /* Find the socket that should handle this packet.  First we look for a
 707         * connected socket and if there is none we look for a socket bound to
 708         * the destintation address.
 709         */
 710        vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
 711        vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
 712
 713        sk = vsock_find_connected_socket(&src, &dst);
 714        if (!sk) {
 715                sk = vsock_find_bound_socket(&dst);
 716                if (!sk) {
 717                        /* We could not find a socket for this specified
 718                         * address.  If this packet is a RST, we just drop it.
 719                         * If it is another packet, we send a RST.  Note that
 720                         * we do not send a RST reply to RSTs so that we do not
 721                         * continually send RSTs between two endpoints.
 722                         *
 723                         * Note that since this is a reply, dst is src and src
 724                         * is dst.
 725                         */
 726                        if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
 727                                pr_err("unable to send reset\n");
 728
 729                        err = VMCI_ERROR_NOT_FOUND;
 730                        goto out;
 731                }
 732        }
 733
 734        /* If the received packet type is beyond all types known to this
 735         * implementation, reply with an invalid message.  Hopefully this will
 736         * help when implementing backwards compatibility in the future.
 737         */
 738        if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) {
 739                vmci_transport_send_invalid_bh(&dst, &src);
 740                err = VMCI_ERROR_INVALID_ARGS;
 741                goto out;
 742        }
 743
 744        /* This handler is privileged when this module is running on the host.
 745         * We will get datagram connect requests from all endpoints (even VMs
 746         * that are in a restricted context). If we get one from a restricted
 747         * context then the destination socket must be trusted.
 748         *
 749         * NOTE: We access the socket struct without holding the lock here.
 750         * This is ok because the field we are interested is never modified
 751         * outside of the create and destruct socket functions.
 752         */
 753        vsk = vsock_sk(sk);
 754        if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) {
 755                err = VMCI_ERROR_NO_ACCESS;
 756                goto out;
 757        }
 758
 759        /* We do most everything in a work queue, but let's fast path the
 760         * notification of reads and writes to help data transfer performance.
 761         * We can only do this if there is no process context code executing
 762         * for this socket since that may change the state.
 763         */
 764        bh_lock_sock(sk);
 765
 766        if (!sock_owned_by_user(sk)) {
 767                /* The local context ID may be out of date, update it. */
 768                vsk->local_addr.svm_cid = dst.svm_cid;
 769
 770                if (sk->sk_state == TCP_ESTABLISHED)
 771                        vmci_trans(vsk)->notify_ops->handle_notify_pkt(
 772                                        sk, pkt, true, &dst, &src,
 773                                        &bh_process_pkt);
 774        }
 775
 776        bh_unlock_sock(sk);
 777
 778        if (!bh_process_pkt) {
 779                struct vmci_transport_recv_pkt_info *recv_pkt_info;
 780
 781                recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC);
 782                if (!recv_pkt_info) {
 783                        if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
 784                                pr_err("unable to send reset\n");
 785
 786                        err = VMCI_ERROR_NO_MEM;
 787                        goto out;
 788                }
 789
 790                recv_pkt_info->sk = sk;
 791                memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt));
 792                INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work);
 793
 794                schedule_work(&recv_pkt_info->work);
 795                /* Clear sk so that the reference count incremented by one of
 796                 * the Find functions above is not decremented below.  We need
 797                 * that reference count for the packet handler we've scheduled
 798                 * to run.
 799                 */
 800                sk = NULL;
 801        }
 802
 803out:
 804        if (sk)
 805                sock_put(sk);
 806
 807        return err;
 808}
 809
 810static void vmci_transport_handle_detach(struct sock *sk)
 811{
 812        struct vsock_sock *vsk;
 813
 814        vsk = vsock_sk(sk);
 815        if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
 816                sock_set_flag(sk, SOCK_DONE);
 817
 818                /* On a detach the peer will not be sending or receiving
 819                 * anymore.
 820                 */
 821                vsk->peer_shutdown = SHUTDOWN_MASK;
 822
 823                /* We should not be sending anymore since the peer won't be
 824                 * there to receive, but we can still receive if there is data
 825                 * left in our consume queue. If the local endpoint is a host,
 826                 * we can't call vsock_stream_has_data, since that may block,
 827                 * but a host endpoint can't read data once the VM has
 828                 * detached, so there is no available data in that case.
 829                 */
 830                if (vsk->local_addr.svm_cid == VMADDR_CID_HOST ||
 831                    vsock_stream_has_data(vsk) <= 0) {
 832                        if (sk->sk_state == TCP_SYN_SENT) {
 833                                /* The peer may detach from a queue pair while
 834                                 * we are still in the connecting state, i.e.,
 835                                 * if the peer VM is killed after attaching to
 836                                 * a queue pair, but before we complete the
 837                                 * handshake. In that case, we treat the detach
 838                                 * event like a reset.
 839                                 */
 840
 841                                sk->sk_state = TCP_CLOSE;
 842                                sk->sk_err = ECONNRESET;
 843                                sk->sk_error_report(sk);
 844                                return;
 845                        }
 846                        sk->sk_state = TCP_CLOSE;
 847                }
 848                sk->sk_state_change(sk);
 849        }
 850}
 851
 852static void vmci_transport_peer_detach_cb(u32 sub_id,
 853                                          const struct vmci_event_data *e_data,
 854                                          void *client_data)
 855{
 856        struct vmci_transport *trans = client_data;
 857        const struct vmci_event_payload_qp *e_payload;
 858
 859        e_payload = vmci_event_data_const_payload(e_data);
 860
 861        /* XXX This is lame, we should provide a way to lookup sockets by
 862         * qp_handle.
 863         */
 864        if (vmci_handle_is_invalid(e_payload->handle) ||
 865            !vmci_handle_is_equal(trans->qp_handle, e_payload->handle))
 866                return;
 867
 868        /* We don't ask for delayed CBs when we subscribe to this event (we
 869         * pass 0 as flags to vmci_event_subscribe()).  VMCI makes no
 870         * guarantees in that case about what context we might be running in,
 871         * so it could be BH or process, blockable or non-blockable.  So we
 872         * need to account for all possible contexts here.
 873         */
 874        spin_lock_bh(&trans->lock);
 875        if (!trans->sk)
 876                goto out;
 877
 878        /* Apart from here, trans->lock is only grabbed as part of sk destruct,
 879         * where trans->sk isn't locked.
 880         */
 881        bh_lock_sock(trans->sk);
 882
 883        vmci_transport_handle_detach(trans->sk);
 884
 885        bh_unlock_sock(trans->sk);
 886 out:
 887        spin_unlock_bh(&trans->lock);
 888}
 889
 890static void vmci_transport_qp_resumed_cb(u32 sub_id,
 891                                         const struct vmci_event_data *e_data,
 892                                         void *client_data)
 893{
 894        vsock_for_each_connected_socket(vmci_transport_handle_detach);
 895}
 896
 897static void vmci_transport_recv_pkt_work(struct work_struct *work)
 898{
 899        struct vmci_transport_recv_pkt_info *recv_pkt_info;
 900        struct vmci_transport_packet *pkt;
 901        struct sock *sk;
 902
 903        recv_pkt_info =
 904                container_of(work, struct vmci_transport_recv_pkt_info, work);
 905        sk = recv_pkt_info->sk;
 906        pkt = &recv_pkt_info->pkt;
 907
 908        lock_sock(sk);
 909
 910        /* The local context ID may be out of date. */
 911        vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
 912
 913        switch (sk->sk_state) {
 914        case TCP_LISTEN:
 915                vmci_transport_recv_listen(sk, pkt);
 916                break;
 917        case TCP_SYN_SENT:
 918                /* Processing of pending connections for servers goes through
 919                 * the listening socket, so see vmci_transport_recv_listen()
 920                 * for that path.
 921                 */
 922                vmci_transport_recv_connecting_client(sk, pkt);
 923                break;
 924        case TCP_ESTABLISHED:
 925                vmci_transport_recv_connected(sk, pkt);
 926                break;
 927        default:
 928                /* Because this function does not run in the same context as
 929                 * vmci_transport_recv_stream_cb it is possible that the
 930                 * socket has closed. We need to let the other side know or it
 931                 * could be sitting in a connect and hang forever. Send a
 932                 * reset to prevent that.
 933                 */
 934                vmci_transport_send_reset(sk, pkt);
 935                break;
 936        }
 937
 938        release_sock(sk);
 939        kfree(recv_pkt_info);
 940        /* Release reference obtained in the stream callback when we fetched
 941         * this socket out of the bound or connected list.
 942         */
 943        sock_put(sk);
 944}
 945
 946static int vmci_transport_recv_listen(struct sock *sk,
 947                                      struct vmci_transport_packet *pkt)
 948{
 949        struct sock *pending;
 950        struct vsock_sock *vpending;
 951        int err;
 952        u64 qp_size;
 953        bool old_request = false;
 954        bool old_pkt_proto = false;
 955
 956        err = 0;
 957
 958        /* Because we are in the listen state, we could be receiving a packet
 959         * for ourself or any previous connection requests that we received.
 960         * If it's the latter, we try to find a socket in our list of pending
 961         * connections and, if we do, call the appropriate handler for the
 962         * state that that socket is in.  Otherwise we try to service the
 963         * connection request.
 964         */
 965        pending = vmci_transport_get_pending(sk, pkt);
 966        if (pending) {
 967                lock_sock(pending);
 968
 969                /* The local context ID may be out of date. */
 970                vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
 971
 972                switch (pending->sk_state) {
 973                case TCP_SYN_SENT:
 974                        err = vmci_transport_recv_connecting_server(sk,
 975                                                                    pending,
 976                                                                    pkt);
 977                        break;
 978                default:
 979                        vmci_transport_send_reset(pending, pkt);
 980                        err = -EINVAL;
 981                }
 982
 983                if (err < 0)
 984                        vsock_remove_pending(sk, pending);
 985
 986                release_sock(pending);
 987                vmci_transport_release_pending(pending);
 988
 989                return err;
 990        }
 991
 992        /* The listen state only accepts connection requests.  Reply with a
 993         * reset unless we received a reset.
 994         */
 995
 996        if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST ||
 997              pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) {
 998                vmci_transport_reply_reset(pkt);
 999                return -EINVAL;
1000        }
1001
1002        if (pkt->u.size == 0) {
1003                vmci_transport_reply_reset(pkt);
1004                return -EINVAL;
1005        }
1006
1007        /* If this socket can't accommodate this connection request, we send a
1008         * reset.  Otherwise we create and initialize a child socket and reply
1009         * with a connection negotiation.
1010         */
1011        if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) {
1012                vmci_transport_reply_reset(pkt);
1013                return -ECONNREFUSED;
1014        }
1015
1016        pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
1017                                 sk->sk_type, 0);
1018        if (!pending) {
1019                vmci_transport_send_reset(sk, pkt);
1020                return -ENOMEM;
1021        }
1022
1023        vpending = vsock_sk(pending);
1024
1025        vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context,
1026                        pkt->dst_port);
1027        vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context,
1028                        pkt->src_port);
1029
1030        /* If the proposed size fits within our min/max, accept it. Otherwise
1031         * propose our own size.
1032         */
1033        if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size &&
1034            pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) {
1035                qp_size = pkt->u.size;
1036        } else {
1037                qp_size = vmci_trans(vpending)->queue_pair_size;
1038        }
1039
1040        /* Figure out if we are using old or new requests based on the
1041         * overrides pkt types sent by our peer.
1042         */
1043        if (vmci_transport_old_proto_override(&old_pkt_proto)) {
1044                old_request = old_pkt_proto;
1045        } else {
1046                if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST)
1047                        old_request = true;
1048                else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)
1049                        old_request = false;
1050
1051        }
1052
1053        if (old_request) {
1054                /* Handle a REQUEST (or override) */
1055                u16 version = VSOCK_PROTO_INVALID;
1056                if (vmci_transport_proto_to_notify_struct(
1057                        pending, &version, true))
1058                        err = vmci_transport_send_negotiate(pending, qp_size);
1059                else
1060                        err = -EINVAL;
1061
1062        } else {
1063                /* Handle a REQUEST2 (or override) */
1064                int proto_int = pkt->proto;
1065                int pos;
1066                u16 active_proto_version = 0;
1067
1068                /* The list of possible protocols is the intersection of all
1069                 * protocols the client supports ... plus all the protocols we
1070                 * support.
1071                 */
1072                proto_int &= vmci_transport_new_proto_supported_versions();
1073
1074                /* We choose the highest possible protocol version and use that
1075                 * one.
1076                 */
1077                pos = fls(proto_int);
1078                if (pos) {
1079                        active_proto_version = (1 << (pos - 1));
1080                        if (vmci_transport_proto_to_notify_struct(
1081                                pending, &active_proto_version, false))
1082                                err = vmci_transport_send_negotiate2(pending,
1083                                                        qp_size,
1084                                                        active_proto_version);
1085                        else
1086                                err = -EINVAL;
1087
1088                } else {
1089                        err = -EINVAL;
1090                }
1091        }
1092
1093        if (err < 0) {
1094                vmci_transport_send_reset(sk, pkt);
1095                sock_put(pending);
1096                err = vmci_transport_error_to_vsock_error(err);
1097                goto out;
1098        }
1099
1100        vsock_add_pending(sk, pending);
1101        sk->sk_ack_backlog++;
1102
1103        pending->sk_state = TCP_SYN_SENT;
1104        vmci_trans(vpending)->produce_size =
1105                vmci_trans(vpending)->consume_size = qp_size;
1106        vmci_trans(vpending)->queue_pair_size = qp_size;
1107
1108        vmci_trans(vpending)->notify_ops->process_request(pending);
1109
1110        /* We might never receive another message for this socket and it's not
1111         * connected to any process, so we have to ensure it gets cleaned up
1112         * ourself.  Our delayed work function will take care of that.  Note
1113         * that we do not ever cancel this function since we have few
1114         * guarantees about its state when calling cancel_delayed_work().
1115         * Instead we hold a reference on the socket for that function and make
1116         * it capable of handling cases where it needs to do nothing but
1117         * release that reference.
1118         */
1119        vpending->listener = sk;
1120        sock_hold(sk);
1121        sock_hold(pending);
1122        schedule_delayed_work(&vpending->pending_work, HZ);
1123
1124out:
1125        return err;
1126}
1127
1128static int
1129vmci_transport_recv_connecting_server(struct sock *listener,
1130                                      struct sock *pending,
1131                                      struct vmci_transport_packet *pkt)
1132{
1133        struct vsock_sock *vpending;
1134        struct vmci_handle handle;
1135        struct vmci_qp *qpair;
1136        bool is_local;
1137        u32 flags;
1138        u32 detach_sub_id;
1139        int err;
1140        int skerr;
1141
1142        vpending = vsock_sk(pending);
1143        detach_sub_id = VMCI_INVALID_ID;
1144
1145        switch (pkt->type) {
1146        case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
1147                if (vmci_handle_is_invalid(pkt->u.handle)) {
1148                        vmci_transport_send_reset(pending, pkt);
1149                        skerr = EPROTO;
1150                        err = -EINVAL;
1151                        goto destroy;
1152                }
1153                break;
1154        default:
1155                /* Close and cleanup the connection. */
1156                vmci_transport_send_reset(pending, pkt);
1157                skerr = EPROTO;
1158                err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL;
1159                goto destroy;
1160        }
1161
1162        /* In order to complete the connection we need to attach to the offered
1163         * queue pair and send an attach notification.  We also subscribe to the
1164         * detach event so we know when our peer goes away, and we do that
1165         * before attaching so we don't miss an event.  If all this succeeds,
1166         * we update our state and wakeup anything waiting in accept() for a
1167         * connection.
1168         */
1169
1170        /* We don't care about attach since we ensure the other side has
1171         * attached by specifying the ATTACH_ONLY flag below.
1172         */
1173        err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1174                                   vmci_transport_peer_detach_cb,
1175                                   vmci_trans(vpending), &detach_sub_id);
1176        if (err < VMCI_SUCCESS) {
1177                vmci_transport_send_reset(pending, pkt);
1178                err = vmci_transport_error_to_vsock_error(err);
1179                skerr = -err;
1180                goto destroy;
1181        }
1182
1183        vmci_trans(vpending)->detach_sub_id = detach_sub_id;
1184
1185        /* Now attach to the queue pair the client created. */
1186        handle = pkt->u.handle;
1187
1188        /* vpending->local_addr always has a context id so we do not need to
1189         * worry about VMADDR_CID_ANY in this case.
1190         */
1191        is_local =
1192            vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid;
1193        flags = VMCI_QPFLAG_ATTACH_ONLY;
1194        flags |= is_local ? VMCI_QPFLAG_LOCAL : 0;
1195
1196        err = vmci_transport_queue_pair_alloc(
1197                                        &qpair,
1198                                        &handle,
1199                                        vmci_trans(vpending)->produce_size,
1200                                        vmci_trans(vpending)->consume_size,
1201                                        pkt->dg.src.context,
1202                                        flags,
1203                                        vmci_transport_is_trusted(
1204                                                vpending,
1205                                                vpending->remote_addr.svm_cid));
1206        if (err < 0) {
1207                vmci_transport_send_reset(pending, pkt);
1208                skerr = -err;
1209                goto destroy;
1210        }
1211
1212        vmci_trans(vpending)->qp_handle = handle;
1213        vmci_trans(vpending)->qpair = qpair;
1214
1215        /* When we send the attach message, we must be ready to handle incoming
1216         * control messages on the newly connected socket. So we move the
1217         * pending socket to the connected state before sending the attach
1218         * message. Otherwise, an incoming packet triggered by the attach being
1219         * received by the peer may be processed concurrently with what happens
1220         * below after sending the attach message, and that incoming packet
1221         * will find the listening socket instead of the (currently) pending
1222         * socket. Note that enqueueing the socket increments the reference
1223         * count, so even if a reset comes before the connection is accepted,
1224         * the socket will be valid until it is removed from the queue.
1225         *
1226         * If we fail sending the attach below, we remove the socket from the
1227         * connected list and move the socket to TCP_CLOSE before
1228         * releasing the lock, so a pending slow path processing of an incoming
1229         * packet will not see the socket in the connected state in that case.
1230         */
1231        pending->sk_state = TCP_ESTABLISHED;
1232
1233        vsock_insert_connected(vpending);
1234
1235        /* Notify our peer of our attach. */
1236        err = vmci_transport_send_attach(pending, handle);
1237        if (err < 0) {
1238                vsock_remove_connected(vpending);
1239                pr_err("Could not send attach\n");
1240                vmci_transport_send_reset(pending, pkt);
1241                err = vmci_transport_error_to_vsock_error(err);
1242                skerr = -err;
1243                goto destroy;
1244        }
1245
1246        /* We have a connection. Move the now connected socket from the
1247         * listener's pending list to the accept queue so callers of accept()
1248         * can find it.
1249         */
1250        vsock_remove_pending(listener, pending);
1251        vsock_enqueue_accept(listener, pending);
1252
1253        /* Callers of accept() will be be waiting on the listening socket, not
1254         * the pending socket.
1255         */
1256        listener->sk_data_ready(listener);
1257
1258        return 0;
1259
1260destroy:
1261        pending->sk_err = skerr;
1262        pending->sk_state = TCP_CLOSE;
1263        /* As long as we drop our reference, all necessary cleanup will handle
1264         * when the cleanup function drops its reference and our destruct
1265         * implementation is called.  Note that since the listen handler will
1266         * remove pending from the pending list upon our failure, the cleanup
1267         * function won't drop the additional reference, which is why we do it
1268         * here.
1269         */
1270        sock_put(pending);
1271
1272        return err;
1273}
1274
1275static int
1276vmci_transport_recv_connecting_client(struct sock *sk,
1277                                      struct vmci_transport_packet *pkt)
1278{
1279        struct vsock_sock *vsk;
1280        int err;
1281        int skerr;
1282
1283        vsk = vsock_sk(sk);
1284
1285        switch (pkt->type) {
1286        case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
1287                if (vmci_handle_is_invalid(pkt->u.handle) ||
1288                    !vmci_handle_is_equal(pkt->u.handle,
1289                                          vmci_trans(vsk)->qp_handle)) {
1290                        skerr = EPROTO;
1291                        err = -EINVAL;
1292                        goto destroy;
1293                }
1294
1295                /* Signify the socket is connected and wakeup the waiter in
1296                 * connect(). Also place the socket in the connected table for
1297                 * accounting (it can already be found since it's in the bound
1298                 * table).
1299                 */
1300                sk->sk_state = TCP_ESTABLISHED;
1301                sk->sk_socket->state = SS_CONNECTED;
1302                vsock_insert_connected(vsk);
1303                sk->sk_state_change(sk);
1304
1305                break;
1306        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
1307        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
1308                if (pkt->u.size == 0
1309                    || pkt->dg.src.context != vsk->remote_addr.svm_cid
1310                    || pkt->src_port != vsk->remote_addr.svm_port
1311                    || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)
1312                    || vmci_trans(vsk)->qpair
1313                    || vmci_trans(vsk)->produce_size != 0
1314                    || vmci_trans(vsk)->consume_size != 0
1315                    || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
1316                        skerr = EPROTO;
1317                        err = -EINVAL;
1318
1319                        goto destroy;
1320                }
1321
1322                err = vmci_transport_recv_connecting_client_negotiate(sk, pkt);
1323                if (err) {
1324                        skerr = -err;
1325                        goto destroy;
1326                }
1327
1328                break;
1329        case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
1330                err = vmci_transport_recv_connecting_client_invalid(sk, pkt);
1331                if (err) {
1332                        skerr = -err;
1333                        goto destroy;
1334                }
1335
1336                break;
1337        case VMCI_TRANSPORT_PACKET_TYPE_RST:
1338                /* Older versions of the linux code (WS 6.5 / ESX 4.0) used to
1339                 * continue processing here after they sent an INVALID packet.
1340                 * This meant that we got a RST after the INVALID. We ignore a
1341                 * RST after an INVALID. The common code doesn't send the RST
1342                 * ... so we can hang if an old version of the common code
1343                 * fails between getting a REQUEST and sending an OFFER back.
1344                 * Not much we can do about it... except hope that it doesn't
1345                 * happen.
1346                 */
1347                if (vsk->ignore_connecting_rst) {
1348                        vsk->ignore_connecting_rst = false;
1349                } else {
1350                        skerr = ECONNRESET;
1351                        err = 0;
1352                        goto destroy;
1353                }
1354
1355                break;
1356        default:
1357                /* Close and cleanup the connection. */
1358                skerr = EPROTO;
1359                err = -EINVAL;
1360                goto destroy;
1361        }
1362
1363        return 0;
1364
1365destroy:
1366        vmci_transport_send_reset(sk, pkt);
1367
1368        sk->sk_state = TCP_CLOSE;
1369        sk->sk_err = skerr;
1370        sk->sk_error_report(sk);
1371        return err;
1372}
1373
1374static int vmci_transport_recv_connecting_client_negotiate(
1375                                        struct sock *sk,
1376                                        struct vmci_transport_packet *pkt)
1377{
1378        int err;
1379        struct vsock_sock *vsk;
1380        struct vmci_handle handle;
1381        struct vmci_qp *qpair;
1382        u32 detach_sub_id;
1383        bool is_local;
1384        u32 flags;
1385        bool old_proto = true;
1386        bool old_pkt_proto;
1387        u16 version;
1388
1389        vsk = vsock_sk(sk);
1390        handle = VMCI_INVALID_HANDLE;
1391        detach_sub_id = VMCI_INVALID_ID;
1392
1393        /* If we have gotten here then we should be past the point where old
1394         * linux vsock could have sent the bogus rst.
1395         */
1396        vsk->sent_request = false;
1397        vsk->ignore_connecting_rst = false;
1398
1399        /* Verify that we're OK with the proposed queue pair size */
1400        if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size ||
1401            pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) {
1402                err = -EINVAL;
1403                goto destroy;
1404        }
1405
1406        /* At this point we know the CID the peer is using to talk to us. */
1407
1408        if (vsk->local_addr.svm_cid == VMADDR_CID_ANY)
1409                vsk->local_addr.svm_cid = pkt->dg.dst.context;
1410
1411        /* Setup the notify ops to be the highest supported version that both
1412         * the server and the client support.
1413         */
1414
1415        if (vmci_transport_old_proto_override(&old_pkt_proto)) {
1416                old_proto = old_pkt_proto;
1417        } else {
1418                if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE)
1419                        old_proto = true;
1420                else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2)
1421                        old_proto = false;
1422
1423        }
1424
1425        if (old_proto)
1426                version = VSOCK_PROTO_INVALID;
1427        else
1428                version = pkt->proto;
1429
1430        if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) {
1431                err = -EINVAL;
1432                goto destroy;
1433        }
1434
1435        /* Subscribe to detach events first.
1436         *
1437         * XXX We attach once for each queue pair created for now so it is easy
1438         * to find the socket (it's provided), but later we should only
1439         * subscribe once and add a way to lookup sockets by queue pair handle.
1440         */
1441        err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1442                                   vmci_transport_peer_detach_cb,
1443                                   vmci_trans(vsk), &detach_sub_id);
1444        if (err < VMCI_SUCCESS) {
1445                err = vmci_transport_error_to_vsock_error(err);
1446                goto destroy;
1447        }
1448
1449        /* Make VMCI select the handle for us. */
1450        handle = VMCI_INVALID_HANDLE;
1451        is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid;
1452        flags = is_local ? VMCI_QPFLAG_LOCAL : 0;
1453
1454        err = vmci_transport_queue_pair_alloc(&qpair,
1455                                              &handle,
1456                                              pkt->u.size,
1457                                              pkt->u.size,
1458                                              vsk->remote_addr.svm_cid,
1459                                              flags,
1460                                              vmci_transport_is_trusted(
1461                                                  vsk,
1462                                                  vsk->
1463                                                  remote_addr.svm_cid));
1464        if (err < 0)
1465                goto destroy;
1466
1467        err = vmci_transport_send_qp_offer(sk, handle);
1468        if (err < 0) {
1469                err = vmci_transport_error_to_vsock_error(err);
1470                goto destroy;
1471        }
1472
1473        vmci_trans(vsk)->qp_handle = handle;
1474        vmci_trans(vsk)->qpair = qpair;
1475
1476        vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
1477                pkt->u.size;
1478
1479        vmci_trans(vsk)->detach_sub_id = detach_sub_id;
1480
1481        vmci_trans(vsk)->notify_ops->process_negotiate(sk);
1482
1483        return 0;
1484
1485destroy:
1486        if (detach_sub_id != VMCI_INVALID_ID)
1487                vmci_event_unsubscribe(detach_sub_id);
1488
1489        if (!vmci_handle_is_invalid(handle))
1490                vmci_qpair_detach(&qpair);
1491
1492        return err;
1493}
1494
1495static int
1496vmci_transport_recv_connecting_client_invalid(struct sock *sk,
1497                                              struct vmci_transport_packet *pkt)
1498{
1499        int err = 0;
1500        struct vsock_sock *vsk = vsock_sk(sk);
1501
1502        if (vsk->sent_request) {
1503                vsk->sent_request = false;
1504                vsk->ignore_connecting_rst = true;
1505
1506                err = vmci_transport_send_conn_request(
1507                        sk, vmci_trans(vsk)->queue_pair_size);
1508                if (err < 0)
1509                        err = vmci_transport_error_to_vsock_error(err);
1510                else
1511                        err = 0;
1512
1513        }
1514
1515        return err;
1516}
1517
1518static int vmci_transport_recv_connected(struct sock *sk,
1519                                         struct vmci_transport_packet *pkt)
1520{
1521        struct vsock_sock *vsk;
1522        bool pkt_processed = false;
1523
1524        /* In cases where we are closing the connection, it's sufficient to
1525         * mark the state change (and maybe error) and wake up any waiting
1526         * threads. Since this is a connected socket, it's owned by a user
1527         * process and will be cleaned up when the failure is passed back on
1528         * the current or next system call.  Our system call implementations
1529         * must therefore check for error and state changes on entry and when
1530         * being awoken.
1531         */
1532        switch (pkt->type) {
1533        case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
1534                if (pkt->u.mode) {
1535                        vsk = vsock_sk(sk);
1536
1537                        vsk->peer_shutdown |= pkt->u.mode;
1538                        sk->sk_state_change(sk);
1539                }
1540                break;
1541
1542        case VMCI_TRANSPORT_PACKET_TYPE_RST:
1543                vsk = vsock_sk(sk);
1544                /* It is possible that we sent our peer a message (e.g a
1545                 * WAITING_READ) right before we got notified that the peer had
1546                 * detached. If that happens then we can get a RST pkt back
1547                 * from our peer even though there is data available for us to
1548                 * read. In that case, don't shutdown the socket completely but
1549                 * instead allow the local client to finish reading data off
1550                 * the queuepair. Always treat a RST pkt in connected mode like
1551                 * a clean shutdown.
1552                 */
1553                sock_set_flag(sk, SOCK_DONE);
1554                vsk->peer_shutdown = SHUTDOWN_MASK;
1555                if (vsock_stream_has_data(vsk) <= 0)
1556                        sk->sk_state = TCP_CLOSING;
1557
1558                sk->sk_state_change(sk);
1559                break;
1560
1561        default:
1562                vsk = vsock_sk(sk);
1563                vmci_trans(vsk)->notify_ops->handle_notify_pkt(
1564                                sk, pkt, false, NULL, NULL,
1565                                &pkt_processed);
1566                if (!pkt_processed)
1567                        return -EINVAL;
1568
1569                break;
1570        }
1571
1572        return 0;
1573}
1574
1575static int vmci_transport_socket_init(struct vsock_sock *vsk,
1576                                      struct vsock_sock *psk)
1577{
1578        vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL);
1579        if (!vsk->trans)
1580                return -ENOMEM;
1581
1582        vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
1583        vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
1584        vmci_trans(vsk)->qpair = NULL;
1585        vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
1586        vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
1587        vmci_trans(vsk)->notify_ops = NULL;
1588        INIT_LIST_HEAD(&vmci_trans(vsk)->elem);
1589        vmci_trans(vsk)->sk = &vsk->sk;
1590        spin_lock_init(&vmci_trans(vsk)->lock);
1591        if (psk) {
1592                vmci_trans(vsk)->queue_pair_size =
1593                        vmci_trans(psk)->queue_pair_size;
1594                vmci_trans(vsk)->queue_pair_min_size =
1595                        vmci_trans(psk)->queue_pair_min_size;
1596                vmci_trans(vsk)->queue_pair_max_size =
1597                        vmci_trans(psk)->queue_pair_max_size;
1598        } else {
1599                vmci_trans(vsk)->queue_pair_size =
1600                        VMCI_TRANSPORT_DEFAULT_QP_SIZE;
1601                vmci_trans(vsk)->queue_pair_min_size =
1602                         VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN;
1603                vmci_trans(vsk)->queue_pair_max_size =
1604                        VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX;
1605        }
1606
1607        return 0;
1608}
1609
1610static void vmci_transport_free_resources(struct list_head *transport_list)
1611{
1612        while (!list_empty(transport_list)) {
1613                struct vmci_transport *transport =
1614                    list_first_entry(transport_list, struct vmci_transport,
1615                                     elem);
1616                list_del(&transport->elem);
1617
1618                if (transport->detach_sub_id != VMCI_INVALID_ID) {
1619                        vmci_event_unsubscribe(transport->detach_sub_id);
1620                        transport->detach_sub_id = VMCI_INVALID_ID;
1621                }
1622
1623                if (!vmci_handle_is_invalid(transport->qp_handle)) {
1624                        vmci_qpair_detach(&transport->qpair);
1625                        transport->qp_handle = VMCI_INVALID_HANDLE;
1626                        transport->produce_size = 0;
1627                        transport->consume_size = 0;
1628                }
1629
1630                kfree(transport);
1631        }
1632}
1633
1634static void vmci_transport_cleanup(struct work_struct *work)
1635{
1636        LIST_HEAD(pending);
1637
1638        spin_lock_bh(&vmci_transport_cleanup_lock);
1639        list_replace_init(&vmci_transport_cleanup_list, &pending);
1640        spin_unlock_bh(&vmci_transport_cleanup_lock);
1641        vmci_transport_free_resources(&pending);
1642}
1643
1644static void vmci_transport_destruct(struct vsock_sock *vsk)
1645{
1646        /* transport can be NULL if we hit a failure at init() time */
1647        if (!vmci_trans(vsk))
1648                return;
1649
1650        /* Ensure that the detach callback doesn't use the sk/vsk
1651         * we are about to destruct.
1652         */
1653        spin_lock_bh(&vmci_trans(vsk)->lock);
1654        vmci_trans(vsk)->sk = NULL;
1655        spin_unlock_bh(&vmci_trans(vsk)->lock);
1656
1657        if (vmci_trans(vsk)->notify_ops)
1658                vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
1659
1660        spin_lock_bh(&vmci_transport_cleanup_lock);
1661        list_add(&vmci_trans(vsk)->elem, &vmci_transport_cleanup_list);
1662        spin_unlock_bh(&vmci_transport_cleanup_lock);
1663        schedule_work(&vmci_transport_cleanup_work);
1664
1665        vsk->trans = NULL;
1666}
1667
1668static void vmci_transport_release(struct vsock_sock *vsk)
1669{
1670        vsock_remove_sock(vsk);
1671
1672        if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) {
1673                vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle);
1674                vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
1675        }
1676}
1677
1678static int vmci_transport_dgram_bind(struct vsock_sock *vsk,
1679                                     struct sockaddr_vm *addr)
1680{
1681        u32 port;
1682        u32 flags;
1683        int err;
1684
1685        /* VMCI will select a resource ID for us if we provide
1686         * VMCI_INVALID_ID.
1687         */
1688        port = addr->svm_port == VMADDR_PORT_ANY ?
1689                        VMCI_INVALID_ID : addr->svm_port;
1690
1691        if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE))
1692                return -EACCES;
1693
1694        flags = addr->svm_cid == VMADDR_CID_ANY ?
1695                                VMCI_FLAG_ANYCID_DG_HND : 0;
1696
1697        err = vmci_transport_datagram_create_hnd(port, flags,
1698                                                 vmci_transport_recv_dgram_cb,
1699                                                 &vsk->sk,
1700                                                 &vmci_trans(vsk)->dg_handle);
1701        if (err < VMCI_SUCCESS)
1702                return vmci_transport_error_to_vsock_error(err);
1703        vsock_addr_init(&vsk->local_addr, addr->svm_cid,
1704                        vmci_trans(vsk)->dg_handle.resource);
1705
1706        return 0;
1707}
1708
1709static int vmci_transport_dgram_enqueue(
1710        struct vsock_sock *vsk,
1711        struct sockaddr_vm *remote_addr,
1712        struct msghdr *msg,
1713        size_t len)
1714{
1715        int err;
1716        struct vmci_datagram *dg;
1717
1718        if (len > VMCI_MAX_DG_PAYLOAD_SIZE)
1719                return -EMSGSIZE;
1720
1721        if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid))
1722                return -EPERM;
1723
1724        /* Allocate a buffer for the user's message and our packet header. */
1725        dg = kmalloc(len + sizeof(*dg), GFP_KERNEL);
1726        if (!dg)
1727                return -ENOMEM;
1728
1729        memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len);
1730
1731        dg->dst = vmci_make_handle(remote_addr->svm_cid,
1732                                   remote_addr->svm_port);
1733        dg->src = vmci_make_handle(vsk->local_addr.svm_cid,
1734                                   vsk->local_addr.svm_port);
1735        dg->payload_size = len;
1736
1737        err = vmci_datagram_send(dg);
1738        kfree(dg);
1739        if (err < 0)
1740                return vmci_transport_error_to_vsock_error(err);
1741
1742        return err - sizeof(*dg);
1743}
1744
1745static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk,
1746                                        struct msghdr *msg, size_t len,
1747                                        int flags)
1748{
1749        int err;
1750        int noblock;
1751        struct vmci_datagram *dg;
1752        size_t payload_len;
1753        struct sk_buff *skb;
1754
1755        noblock = flags & MSG_DONTWAIT;
1756
1757        if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
1758                return -EOPNOTSUPP;
1759
1760        /* Retrieve the head sk_buff from the socket's receive queue. */
1761        err = 0;
1762        skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
1763        if (!skb)
1764                return err;
1765
1766        dg = (struct vmci_datagram *)skb->data;
1767        if (!dg)
1768                /* err is 0, meaning we read zero bytes. */
1769                goto out;
1770
1771        payload_len = dg->payload_size;
1772        /* Ensure the sk_buff matches the payload size claimed in the packet. */
1773        if (payload_len != skb->len - sizeof(*dg)) {
1774                err = -EINVAL;
1775                goto out;
1776        }
1777
1778        if (payload_len > len) {
1779                payload_len = len;
1780                msg->msg_flags |= MSG_TRUNC;
1781        }
1782
1783        /* Place the datagram payload in the user's iovec. */
1784        err = skb_copy_datagram_msg(skb, sizeof(*dg), msg, payload_len);
1785        if (err)
1786                goto out;
1787
1788        if (msg->msg_name) {
1789                /* Provide the address of the sender. */
1790                DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
1791                vsock_addr_init(vm_addr, dg->src.context, dg->src.resource);
1792                msg->msg_namelen = sizeof(*vm_addr);
1793        }
1794        err = payload_len;
1795
1796out:
1797        skb_free_datagram(&vsk->sk, skb);
1798        return err;
1799}
1800
1801static bool vmci_transport_dgram_allow(u32 cid, u32 port)
1802{
1803        if (cid == VMADDR_CID_HYPERVISOR) {
1804                /* Registrations of PBRPC Servers do not modify VMX/Hypervisor
1805                 * state and are allowed.
1806                 */
1807                return port == VMCI_UNITY_PBRPC_REGISTER;
1808        }
1809
1810        return true;
1811}
1812
1813static int vmci_transport_connect(struct vsock_sock *vsk)
1814{
1815        int err;
1816        bool old_pkt_proto = false;
1817        struct sock *sk = &vsk->sk;
1818
1819        if (vmci_transport_old_proto_override(&old_pkt_proto) &&
1820                old_pkt_proto) {
1821                err = vmci_transport_send_conn_request(
1822                        sk, vmci_trans(vsk)->queue_pair_size);
1823                if (err < 0) {
1824                        sk->sk_state = TCP_CLOSE;
1825                        return err;
1826                }
1827        } else {
1828                int supported_proto_versions =
1829                        vmci_transport_new_proto_supported_versions();
1830                err = vmci_transport_send_conn_request2(
1831                                sk, vmci_trans(vsk)->queue_pair_size,
1832                                supported_proto_versions);
1833                if (err < 0) {
1834                        sk->sk_state = TCP_CLOSE;
1835                        return err;
1836                }
1837
1838                vsk->sent_request = true;
1839        }
1840
1841        return err;
1842}
1843
1844static ssize_t vmci_transport_stream_dequeue(
1845        struct vsock_sock *vsk,
1846        struct msghdr *msg,
1847        size_t len,
1848        int flags)
1849{
1850        if (flags & MSG_PEEK)
1851                return vmci_qpair_peekv(vmci_trans(vsk)->qpair, msg, len, 0);
1852        else
1853                return vmci_qpair_dequev(vmci_trans(vsk)->qpair, msg, len, 0);
1854}
1855
1856static ssize_t vmci_transport_stream_enqueue(
1857        struct vsock_sock *vsk,
1858        struct msghdr *msg,
1859        size_t len)
1860{
1861        return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
1862}
1863
1864static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
1865{
1866        return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair);
1867}
1868
1869static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk)
1870{
1871        return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair);
1872}
1873
1874static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk)
1875{
1876        return vmci_trans(vsk)->consume_size;
1877}
1878
1879static bool vmci_transport_stream_is_active(struct vsock_sock *vsk)
1880{
1881        return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle);
1882}
1883
1884static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk)
1885{
1886        return vmci_trans(vsk)->queue_pair_size;
1887}
1888
1889static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk)
1890{
1891        return vmci_trans(vsk)->queue_pair_min_size;
1892}
1893
1894static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk)
1895{
1896        return vmci_trans(vsk)->queue_pair_max_size;
1897}
1898
1899static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
1900{
1901        if (val < vmci_trans(vsk)->queue_pair_min_size)
1902                vmci_trans(vsk)->queue_pair_min_size = val;
1903        if (val > vmci_trans(vsk)->queue_pair_max_size)
1904                vmci_trans(vsk)->queue_pair_max_size = val;
1905        vmci_trans(vsk)->queue_pair_size = val;
1906}
1907
1908static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk,
1909                                               u64 val)
1910{
1911        if (val > vmci_trans(vsk)->queue_pair_size)
1912                vmci_trans(vsk)->queue_pair_size = val;
1913        vmci_trans(vsk)->queue_pair_min_size = val;
1914}
1915
1916static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk,
1917                                               u64 val)
1918{
1919        if (val < vmci_trans(vsk)->queue_pair_size)
1920                vmci_trans(vsk)->queue_pair_size = val;
1921        vmci_trans(vsk)->queue_pair_max_size = val;
1922}
1923
1924static int vmci_transport_notify_poll_in(
1925        struct vsock_sock *vsk,
1926        size_t target,
1927        bool *data_ready_now)
1928{
1929        return vmci_trans(vsk)->notify_ops->poll_in(
1930                        &vsk->sk, target, data_ready_now);
1931}
1932
1933static int vmci_transport_notify_poll_out(
1934        struct vsock_sock *vsk,
1935        size_t target,
1936        bool *space_available_now)
1937{
1938        return vmci_trans(vsk)->notify_ops->poll_out(
1939                        &vsk->sk, target, space_available_now);
1940}
1941
1942static int vmci_transport_notify_recv_init(
1943        struct vsock_sock *vsk,
1944        size_t target,
1945        struct vsock_transport_recv_notify_data *data)
1946{
1947        return vmci_trans(vsk)->notify_ops->recv_init(
1948                        &vsk->sk, target,
1949                        (struct vmci_transport_recv_notify_data *)data);
1950}
1951
1952static int vmci_transport_notify_recv_pre_block(
1953        struct vsock_sock *vsk,
1954        size_t target,
1955        struct vsock_transport_recv_notify_data *data)
1956{
1957        return vmci_trans(vsk)->notify_ops->recv_pre_block(
1958                        &vsk->sk, target,
1959                        (struct vmci_transport_recv_notify_data *)data);
1960}
1961
1962static int vmci_transport_notify_recv_pre_dequeue(
1963        struct vsock_sock *vsk,
1964        size_t target,
1965        struct vsock_transport_recv_notify_data *data)
1966{
1967        return vmci_trans(vsk)->notify_ops->recv_pre_dequeue(
1968                        &vsk->sk, target,
1969                        (struct vmci_transport_recv_notify_data *)data);
1970}
1971
1972static int vmci_transport_notify_recv_post_dequeue(
1973        struct vsock_sock *vsk,
1974        size_t target,
1975        ssize_t copied,
1976        bool data_read,
1977        struct vsock_transport_recv_notify_data *data)
1978{
1979        return vmci_trans(vsk)->notify_ops->recv_post_dequeue(
1980                        &vsk->sk, target, copied, data_read,
1981                        (struct vmci_transport_recv_notify_data *)data);
1982}
1983
1984static int vmci_transport_notify_send_init(
1985        struct vsock_sock *vsk,
1986        struct vsock_transport_send_notify_data *data)
1987{
1988        return vmci_trans(vsk)->notify_ops->send_init(
1989                        &vsk->sk,
1990                        (struct vmci_transport_send_notify_data *)data);
1991}
1992
1993static int vmci_transport_notify_send_pre_block(
1994        struct vsock_sock *vsk,
1995        struct vsock_transport_send_notify_data *data)
1996{
1997        return vmci_trans(vsk)->notify_ops->send_pre_block(
1998                        &vsk->sk,
1999                        (struct vmci_transport_send_notify_data *)data);
2000}
2001
2002static int vmci_transport_notify_send_pre_enqueue(
2003        struct vsock_sock *vsk,
2004        struct vsock_transport_send_notify_data *data)
2005{
2006        return vmci_trans(vsk)->notify_ops->send_pre_enqueue(
2007                        &vsk->sk,
2008                        (struct vmci_transport_send_notify_data *)data);
2009}
2010
2011static int vmci_transport_notify_send_post_enqueue(
2012        struct vsock_sock *vsk,
2013        ssize_t written,
2014        struct vsock_transport_send_notify_data *data)
2015{
2016        return vmci_trans(vsk)->notify_ops->send_post_enqueue(
2017                        &vsk->sk, written,
2018                        (struct vmci_transport_send_notify_data *)data);
2019}
2020
2021static bool vmci_transport_old_proto_override(bool *old_pkt_proto)
2022{
2023        if (PROTOCOL_OVERRIDE != -1) {
2024                if (PROTOCOL_OVERRIDE == 0)
2025                        *old_pkt_proto = true;
2026                else
2027                        *old_pkt_proto = false;
2028
2029                pr_info("Proto override in use\n");
2030                return true;
2031        }
2032
2033        return false;
2034}
2035
2036static bool vmci_transport_proto_to_notify_struct(struct sock *sk,
2037                                                  u16 *proto,
2038                                                  bool old_pkt_proto)
2039{
2040        struct vsock_sock *vsk = vsock_sk(sk);
2041
2042        if (old_pkt_proto) {
2043                if (*proto != VSOCK_PROTO_INVALID) {
2044                        pr_err("Can't set both an old and new protocol\n");
2045                        return false;
2046                }
2047                vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops;
2048                goto exit;
2049        }
2050
2051        switch (*proto) {
2052        case VSOCK_PROTO_PKT_ON_NOTIFY:
2053                vmci_trans(vsk)->notify_ops =
2054                        &vmci_transport_notify_pkt_q_state_ops;
2055                break;
2056        default:
2057                pr_err("Unknown notify protocol version\n");
2058                return false;
2059        }
2060
2061exit:
2062        vmci_trans(vsk)->notify_ops->socket_init(sk);
2063        return true;
2064}
2065
2066static u16 vmci_transport_new_proto_supported_versions(void)
2067{
2068        if (PROTOCOL_OVERRIDE != -1)
2069                return PROTOCOL_OVERRIDE;
2070
2071        return VSOCK_PROTO_ALL_SUPPORTED;
2072}
2073
2074static u32 vmci_transport_get_local_cid(void)
2075{
2076        return vmci_get_context_id();
2077}
2078
2079static const struct vsock_transport vmci_transport = {
2080        .init = vmci_transport_socket_init,
2081        .destruct = vmci_transport_destruct,
2082        .release = vmci_transport_release,
2083        .connect = vmci_transport_connect,
2084        .dgram_bind = vmci_transport_dgram_bind,
2085        .dgram_dequeue = vmci_transport_dgram_dequeue,
2086        .dgram_enqueue = vmci_transport_dgram_enqueue,
2087        .dgram_allow = vmci_transport_dgram_allow,
2088        .stream_dequeue = vmci_transport_stream_dequeue,
2089        .stream_enqueue = vmci_transport_stream_enqueue,
2090        .stream_has_data = vmci_transport_stream_has_data,
2091        .stream_has_space = vmci_transport_stream_has_space,
2092        .stream_rcvhiwat = vmci_transport_stream_rcvhiwat,
2093        .stream_is_active = vmci_transport_stream_is_active,
2094        .stream_allow = vmci_transport_stream_allow,
2095        .notify_poll_in = vmci_transport_notify_poll_in,
2096        .notify_poll_out = vmci_transport_notify_poll_out,
2097        .notify_recv_init = vmci_transport_notify_recv_init,
2098        .notify_recv_pre_block = vmci_transport_notify_recv_pre_block,
2099        .notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue,
2100        .notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue,
2101        .notify_send_init = vmci_transport_notify_send_init,
2102        .notify_send_pre_block = vmci_transport_notify_send_pre_block,
2103        .notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue,
2104        .notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue,
2105        .shutdown = vmci_transport_shutdown,
2106        .set_buffer_size = vmci_transport_set_buffer_size,
2107        .set_min_buffer_size = vmci_transport_set_min_buffer_size,
2108        .set_max_buffer_size = vmci_transport_set_max_buffer_size,
2109        .get_buffer_size = vmci_transport_get_buffer_size,
2110        .get_min_buffer_size = vmci_transport_get_min_buffer_size,
2111        .get_max_buffer_size = vmci_transport_get_max_buffer_size,
2112        .get_local_cid = vmci_transport_get_local_cid,
2113};
2114
2115static int __init vmci_transport_init(void)
2116{
2117        int err;
2118
2119        /* Create the datagram handle that we will use to send and receive all
2120         * VSocket control messages for this context.
2121         */
2122        err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID,
2123                                                 VMCI_FLAG_ANYCID_DG_HND,
2124                                                 vmci_transport_recv_stream_cb,
2125                                                 NULL,
2126                                                 &vmci_transport_stream_handle);
2127        if (err < VMCI_SUCCESS) {
2128                pr_err("Unable to create datagram handle. (%d)\n", err);
2129                return vmci_transport_error_to_vsock_error(err);
2130        }
2131
2132        err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED,
2133                                   vmci_transport_qp_resumed_cb,
2134                                   NULL, &vmci_transport_qp_resumed_sub_id);
2135        if (err < VMCI_SUCCESS) {
2136                pr_err("Unable to subscribe to resumed event. (%d)\n", err);
2137                err = vmci_transport_error_to_vsock_error(err);
2138                vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
2139                goto err_destroy_stream_handle;
2140        }
2141
2142        err = vsock_core_init(&vmci_transport);
2143        if (err < 0)
2144                goto err_unsubscribe;
2145
2146        return 0;
2147
2148err_unsubscribe:
2149        vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
2150err_destroy_stream_handle:
2151        vmci_datagram_destroy_handle(vmci_transport_stream_handle);
2152        return err;
2153}
2154module_init(vmci_transport_init);
2155
2156static void __exit vmci_transport_exit(void)
2157{
2158        cancel_work_sync(&vmci_transport_cleanup_work);
2159        vmci_transport_free_resources(&vmci_transport_cleanup_list);
2160
2161        if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
2162                if (vmci_datagram_destroy_handle(
2163                        vmci_transport_stream_handle) != VMCI_SUCCESS)
2164                        pr_err("Couldn't destroy datagram handle\n");
2165                vmci_transport_stream_handle = VMCI_INVALID_HANDLE;
2166        }
2167
2168        if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) {
2169                vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
2170                vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
2171        }
2172
2173        vsock_core_exit();
2174}
2175module_exit(vmci_transport_exit);
2176
2177MODULE_AUTHOR("VMware, Inc.");
2178MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
2179MODULE_VERSION("1.0.5.0-k");
2180MODULE_LICENSE("GPL v2");
2181MODULE_ALIAS("vmware_vsock");
2182MODULE_ALIAS_NETPROTO(PF_VSOCK);
2183