linux/net/vmw_vsock/vmci_transport.c
<<
>>
Prefs
   1/*
   2 * VMware vSockets Driver
   3 *
   4 * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License as published by the Free
   8 * Software Foundation version 2 and no later version.
   9 *
  10 * This program is distributed in the hope that it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 */
  15
  16#include <linux/types.h>
  17#include <linux/bitops.h>
  18#include <linux/cred.h>
  19#include <linux/init.h>
  20#include <linux/io.h>
  21#include <linux/kernel.h>
  22#include <linux/kmod.h>
  23#include <linux/list.h>
  24#include <linux/miscdevice.h>
  25#include <linux/module.h>
  26#include <linux/mutex.h>
  27#include <linux/net.h>
  28#include <linux/poll.h>
  29#include <linux/skbuff.h>
  30#include <linux/smp.h>
  31#include <linux/socket.h>
  32#include <linux/stddef.h>
  33#include <linux/unistd.h>
  34#include <linux/wait.h>
  35#include <linux/workqueue.h>
  36#include <net/sock.h>
  37#include <net/af_vsock.h>
  38
  39#include "vmci_transport_notify.h"
  40
  41static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
  42static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
  43static void vmci_transport_peer_detach_cb(u32 sub_id,
  44                                          const struct vmci_event_data *ed,
  45                                          void *client_data);
  46static void vmci_transport_recv_pkt_work(struct work_struct *work);
  47static void vmci_transport_cleanup(struct work_struct *work);
  48static int vmci_transport_recv_listen(struct sock *sk,
  49                                      struct vmci_transport_packet *pkt);
  50static int vmci_transport_recv_connecting_server(
  51                                        struct sock *sk,
  52                                        struct sock *pending,
  53                                        struct vmci_transport_packet *pkt);
  54static int vmci_transport_recv_connecting_client(
  55                                        struct sock *sk,
  56                                        struct vmci_transport_packet *pkt);
  57static int vmci_transport_recv_connecting_client_negotiate(
  58                                        struct sock *sk,
  59                                        struct vmci_transport_packet *pkt);
  60static int vmci_transport_recv_connecting_client_invalid(
  61                                        struct sock *sk,
  62                                        struct vmci_transport_packet *pkt);
  63static int vmci_transport_recv_connected(struct sock *sk,
  64                                         struct vmci_transport_packet *pkt);
  65static bool vmci_transport_old_proto_override(bool *old_pkt_proto);
  66static u16 vmci_transport_new_proto_supported_versions(void);
  67static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto,
  68                                                  bool old_pkt_proto);
  69
  70struct vmci_transport_recv_pkt_info {
  71        struct work_struct work;
  72        struct sock *sk;
  73        struct vmci_transport_packet pkt;
  74};
  75
  76static LIST_HEAD(vmci_transport_cleanup_list);
  77static DEFINE_SPINLOCK(vmci_transport_cleanup_lock);
  78static DECLARE_WORK(vmci_transport_cleanup_work, vmci_transport_cleanup);
  79
  80static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
  81                                                           VMCI_INVALID_ID };
  82static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
  83
  84static int PROTOCOL_OVERRIDE = -1;
  85
  86#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN   128
  87#define VMCI_TRANSPORT_DEFAULT_QP_SIZE       262144
  88#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX   262144
  89
  90/* The default peer timeout indicates how long we will wait for a peer response
  91 * to a control message.
  92 */
  93#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
  94
  95/* Helper function to convert from a VMCI error code to a VSock error code. */
  96
  97static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
  98{
  99        switch (vmci_error) {
 100        case VMCI_ERROR_NO_MEM:
 101                return -ENOMEM;
 102        case VMCI_ERROR_DUPLICATE_ENTRY:
 103        case VMCI_ERROR_ALREADY_EXISTS:
 104                return -EADDRINUSE;
 105        case VMCI_ERROR_NO_ACCESS:
 106                return -EPERM;
 107        case VMCI_ERROR_NO_RESOURCES:
 108                return -ENOBUFS;
 109        case VMCI_ERROR_INVALID_RESOURCE:
 110                return -EHOSTUNREACH;
 111        case VMCI_ERROR_INVALID_ARGS:
 112        default:
 113                break;
 114        }
 115        return -EINVAL;
 116}
 117
 118static u32 vmci_transport_peer_rid(u32 peer_cid)
 119{
 120        if (VMADDR_CID_HYPERVISOR == peer_cid)
 121                return VMCI_TRANSPORT_HYPERVISOR_PACKET_RID;
 122
 123        return VMCI_TRANSPORT_PACKET_RID;
 124}
 125
 126static inline void
 127vmci_transport_packet_init(struct vmci_transport_packet *pkt,
 128                           struct sockaddr_vm *src,
 129                           struct sockaddr_vm *dst,
 130                           u8 type,
 131                           u64 size,
 132                           u64 mode,
 133                           struct vmci_transport_waiting_info *wait,
 134                           u16 proto,
 135                           struct vmci_handle handle)
 136{
 137        /* We register the stream control handler as an any cid handle so we
 138         * must always send from a source address of VMADDR_CID_ANY
 139         */
 140        pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
 141                                       VMCI_TRANSPORT_PACKET_RID);
 142        pkt->dg.dst = vmci_make_handle(dst->svm_cid,
 143                                       vmci_transport_peer_rid(dst->svm_cid));
 144        pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
 145        pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
 146        pkt->type = type;
 147        pkt->src_port = src->svm_port;
 148        pkt->dst_port = dst->svm_port;
 149        memset(&pkt->proto, 0, sizeof(pkt->proto));
 150        memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
 151
 152        switch (pkt->type) {
 153        case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
 154                pkt->u.size = 0;
 155                break;
 156
 157        case VMCI_TRANSPORT_PACKET_TYPE_REQUEST:
 158        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
 159                pkt->u.size = size;
 160                break;
 161
 162        case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
 163        case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
 164                pkt->u.handle = handle;
 165                break;
 166
 167        case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
 168        case VMCI_TRANSPORT_PACKET_TYPE_READ:
 169        case VMCI_TRANSPORT_PACKET_TYPE_RST:
 170                pkt->u.size = 0;
 171                break;
 172
 173        case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
 174                pkt->u.mode = mode;
 175                break;
 176
 177        case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
 178        case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
 179                memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait));
 180                break;
 181
 182        case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2:
 183        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
 184                pkt->u.size = size;
 185                pkt->proto = proto;
 186                break;
 187        }
 188}
 189
 190static inline void
 191vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt,
 192                                    struct sockaddr_vm *local,
 193                                    struct sockaddr_vm *remote)
 194{
 195        vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port);
 196        vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port);
 197}
 198
 199static int
 200__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt,
 201                                  struct sockaddr_vm *src,
 202                                  struct sockaddr_vm *dst,
 203                                  enum vmci_transport_packet_type type,
 204                                  u64 size,
 205                                  u64 mode,
 206                                  struct vmci_transport_waiting_info *wait,
 207                                  u16 proto,
 208                                  struct vmci_handle handle,
 209                                  bool convert_error)
 210{
 211        int err;
 212
 213        vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait,
 214                                   proto, handle);
 215        err = vmci_datagram_send(&pkt->dg);
 216        if (convert_error && (err < 0))
 217                return vmci_transport_error_to_vsock_error(err);
 218
 219        return err;
 220}
 221
 222static int
 223vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt,
 224                                      enum vmci_transport_packet_type type,
 225                                      u64 size,
 226                                      u64 mode,
 227                                      struct vmci_transport_waiting_info *wait,
 228                                      struct vmci_handle handle)
 229{
 230        struct vmci_transport_packet reply;
 231        struct sockaddr_vm src, dst;
 232
 233        if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) {
 234                return 0;
 235        } else {
 236                vmci_transport_packet_get_addresses(pkt, &src, &dst);
 237                return __vmci_transport_send_control_pkt(&reply, &src, &dst,
 238                                                         type,
 239                                                         size, mode, wait,
 240                                                         VSOCK_PROTO_INVALID,
 241                                                         handle, true);
 242        }
 243}
 244
 245static int
 246vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
 247                                   struct sockaddr_vm *dst,
 248                                   enum vmci_transport_packet_type type,
 249                                   u64 size,
 250                                   u64 mode,
 251                                   struct vmci_transport_waiting_info *wait,
 252                                   struct vmci_handle handle)
 253{
 254        /* Note that it is safe to use a single packet across all CPUs since
 255         * two tasklets of the same type are guaranteed to not ever run
 256         * simultaneously. If that ever changes, or VMCI stops using tasklets,
 257         * we can use per-cpu packets.
 258         */
 259        static struct vmci_transport_packet pkt;
 260
 261        return __vmci_transport_send_control_pkt(&pkt, src, dst, type,
 262                                                 size, mode, wait,
 263                                                 VSOCK_PROTO_INVALID, handle,
 264                                                 false);
 265}
 266
 267static int
 268vmci_transport_send_control_pkt(struct sock *sk,
 269                                enum vmci_transport_packet_type type,
 270                                u64 size,
 271                                u64 mode,
 272                                struct vmci_transport_waiting_info *wait,
 273                                u16 proto,
 274                                struct vmci_handle handle)
 275{
 276        struct vmci_transport_packet *pkt;
 277        struct vsock_sock *vsk;
 278        int err;
 279
 280        vsk = vsock_sk(sk);
 281
 282        if (!vsock_addr_bound(&vsk->local_addr))
 283                return -EINVAL;
 284
 285        if (!vsock_addr_bound(&vsk->remote_addr))
 286                return -EINVAL;
 287
 288        pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
 289        if (!pkt)
 290                return -ENOMEM;
 291
 292        err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr,
 293                                                &vsk->remote_addr, type, size,
 294                                                mode, wait, proto, handle,
 295                                                true);
 296        kfree(pkt);
 297
 298        return err;
 299}
 300
 301static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
 302                                        struct sockaddr_vm *src,
 303                                        struct vmci_transport_packet *pkt)
 304{
 305        if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
 306                return 0;
 307        return vmci_transport_send_control_pkt_bh(
 308                                        dst, src,
 309                                        VMCI_TRANSPORT_PACKET_TYPE_RST, 0,
 310                                        0, NULL, VMCI_INVALID_HANDLE);
 311}
 312
 313static int vmci_transport_send_reset(struct sock *sk,
 314                                     struct vmci_transport_packet *pkt)
 315{
 316        if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
 317                return 0;
 318        return vmci_transport_send_control_pkt(sk,
 319                                        VMCI_TRANSPORT_PACKET_TYPE_RST,
 320                                        0, 0, NULL, VSOCK_PROTO_INVALID,
 321                                        VMCI_INVALID_HANDLE);
 322}
 323
 324static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
 325{
 326        return vmci_transport_send_control_pkt(
 327                                        sk,
 328                                        VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
 329                                        size, 0, NULL,
 330                                        VSOCK_PROTO_INVALID,
 331                                        VMCI_INVALID_HANDLE);
 332}
 333
 334static int vmci_transport_send_negotiate2(struct sock *sk, size_t size,
 335                                          u16 version)
 336{
 337        return vmci_transport_send_control_pkt(
 338                                        sk,
 339                                        VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
 340                                        size, 0, NULL, version,
 341                                        VMCI_INVALID_HANDLE);
 342}
 343
 344static int vmci_transport_send_qp_offer(struct sock *sk,
 345                                        struct vmci_handle handle)
 346{
 347        return vmci_transport_send_control_pkt(
 348                                        sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0,
 349                                        0, NULL,
 350                                        VSOCK_PROTO_INVALID, handle);
 351}
 352
 353static int vmci_transport_send_attach(struct sock *sk,
 354                                      struct vmci_handle handle)
 355{
 356        return vmci_transport_send_control_pkt(
 357                                        sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
 358                                        0, 0, NULL, VSOCK_PROTO_INVALID,
 359                                        handle);
 360}
 361
 362static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt)
 363{
 364        return vmci_transport_reply_control_pkt_fast(
 365                                                pkt,
 366                                                VMCI_TRANSPORT_PACKET_TYPE_RST,
 367                                                0, 0, NULL,
 368                                                VMCI_INVALID_HANDLE);
 369}
 370
 371static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst,
 372                                          struct sockaddr_vm *src)
 373{
 374        return vmci_transport_send_control_pkt_bh(
 375                                        dst, src,
 376                                        VMCI_TRANSPORT_PACKET_TYPE_INVALID,
 377                                        0, 0, NULL, VMCI_INVALID_HANDLE);
 378}
 379
 380int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
 381                                 struct sockaddr_vm *src)
 382{
 383        return vmci_transport_send_control_pkt_bh(
 384                                        dst, src,
 385                                        VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
 386                                        0, NULL, VMCI_INVALID_HANDLE);
 387}
 388
 389int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
 390                                struct sockaddr_vm *src)
 391{
 392        return vmci_transport_send_control_pkt_bh(
 393                                        dst, src,
 394                                        VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
 395                                        0, NULL, VMCI_INVALID_HANDLE);
 396}
 397
 398int vmci_transport_send_wrote(struct sock *sk)
 399{
 400        return vmci_transport_send_control_pkt(
 401                                        sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
 402                                        0, NULL, VSOCK_PROTO_INVALID,
 403                                        VMCI_INVALID_HANDLE);
 404}
 405
 406int vmci_transport_send_read(struct sock *sk)
 407{
 408        return vmci_transport_send_control_pkt(
 409                                        sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
 410                                        0, NULL, VSOCK_PROTO_INVALID,
 411                                        VMCI_INVALID_HANDLE);
 412}
 413
 414int vmci_transport_send_waiting_write(struct sock *sk,
 415                                      struct vmci_transport_waiting_info *wait)
 416{
 417        return vmci_transport_send_control_pkt(
 418                                sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
 419                                0, 0, wait, VSOCK_PROTO_INVALID,
 420                                VMCI_INVALID_HANDLE);
 421}
 422
 423int vmci_transport_send_waiting_read(struct sock *sk,
 424                                     struct vmci_transport_waiting_info *wait)
 425{
 426        return vmci_transport_send_control_pkt(
 427                                sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
 428                                0, 0, wait, VSOCK_PROTO_INVALID,
 429                                VMCI_INVALID_HANDLE);
 430}
 431
 432static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode)
 433{
 434        return vmci_transport_send_control_pkt(
 435                                        &vsk->sk,
 436                                        VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
 437                                        0, mode, NULL,
 438                                        VSOCK_PROTO_INVALID,
 439                                        VMCI_INVALID_HANDLE);
 440}
 441
 442static int vmci_transport_send_conn_request(struct sock *sk, size_t size)
 443{
 444        return vmci_transport_send_control_pkt(sk,
 445                                        VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
 446                                        size, 0, NULL,
 447                                        VSOCK_PROTO_INVALID,
 448                                        VMCI_INVALID_HANDLE);
 449}
 450
 451static int vmci_transport_send_conn_request2(struct sock *sk, size_t size,
 452                                             u16 version)
 453{
 454        return vmci_transport_send_control_pkt(
 455                                        sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
 456                                        size, 0, NULL, version,
 457                                        VMCI_INVALID_HANDLE);
 458}
 459
 460static struct sock *vmci_transport_get_pending(
 461                                        struct sock *listener,
 462                                        struct vmci_transport_packet *pkt)
 463{
 464        struct vsock_sock *vlistener;
 465        struct vsock_sock *vpending;
 466        struct sock *pending;
 467        struct sockaddr_vm src;
 468
 469        vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
 470
 471        vlistener = vsock_sk(listener);
 472
 473        list_for_each_entry(vpending, &vlistener->pending_links,
 474                            pending_links) {
 475                if (vsock_addr_equals_addr(&src, &vpending->remote_addr) &&
 476                    pkt->dst_port == vpending->local_addr.svm_port) {
 477                        pending = sk_vsock(vpending);
 478                        sock_hold(pending);
 479                        goto found;
 480                }
 481        }
 482
 483        pending = NULL;
 484found:
 485        return pending;
 486
 487}
 488
 489static void vmci_transport_release_pending(struct sock *pending)
 490{
 491        sock_put(pending);
 492}
 493
 494/* We allow two kinds of sockets to communicate with a restricted VM: 1)
 495 * trusted sockets 2) sockets from applications running as the same user as the
 496 * VM (this is only true for the host side and only when using hosted products)
 497 */
 498
 499static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
 500{
 501        return vsock->trusted ||
 502               vmci_is_context_owner(peer_cid, vsock->owner->uid);
 503}
 504
 505/* We allow sending datagrams to and receiving datagrams from a restricted VM
 506 * only if it is trusted as described in vmci_transport_is_trusted.
 507 */
 508
 509static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
 510{
 511        if (VMADDR_CID_HYPERVISOR == peer_cid)
 512                return true;
 513
 514        if (vsock->cached_peer != peer_cid) {
 515                vsock->cached_peer = peer_cid;
 516                if (!vmci_transport_is_trusted(vsock, peer_cid) &&
 517                    (vmci_context_get_priv_flags(peer_cid) &
 518                     VMCI_PRIVILEGE_FLAG_RESTRICTED)) {
 519                        vsock->cached_peer_allow_dgram = false;
 520                } else {
 521                        vsock->cached_peer_allow_dgram = true;
 522                }
 523        }
 524
 525        return vsock->cached_peer_allow_dgram;
 526}
 527
 528static int
 529vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
 530                                struct vmci_handle *handle,
 531                                u64 produce_size,
 532                                u64 consume_size,
 533                                u32 peer, u32 flags, bool trusted)
 534{
 535        int err = 0;
 536
 537        if (trusted) {
 538                /* Try to allocate our queue pair as trusted. This will only
 539                 * work if vsock is running in the host.
 540                 */
 541
 542                err = vmci_qpair_alloc(qpair, handle, produce_size,
 543                                       consume_size,
 544                                       peer, flags,
 545                                       VMCI_PRIVILEGE_FLAG_TRUSTED);
 546                if (err != VMCI_ERROR_NO_ACCESS)
 547                        goto out;
 548
 549        }
 550
 551        err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size,
 552                               peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
 553out:
 554        if (err < 0) {
 555                pr_err("Could not attach to queue pair with %d\n",
 556                       err);
 557                err = vmci_transport_error_to_vsock_error(err);
 558        }
 559
 560        return err;
 561}
 562
 563static int
 564vmci_transport_datagram_create_hnd(u32 resource_id,
 565                                   u32 flags,
 566                                   vmci_datagram_recv_cb recv_cb,
 567                                   void *client_data,
 568                                   struct vmci_handle *out_handle)
 569{
 570        int err = 0;
 571
 572        /* Try to allocate our datagram handler as trusted. This will only work
 573         * if vsock is running in the host.
 574         */
 575
 576        err = vmci_datagram_create_handle_priv(resource_id, flags,
 577                                               VMCI_PRIVILEGE_FLAG_TRUSTED,
 578                                               recv_cb,
 579                                               client_data, out_handle);
 580
 581        if (err == VMCI_ERROR_NO_ACCESS)
 582                err = vmci_datagram_create_handle(resource_id, flags,
 583                                                  recv_cb, client_data,
 584                                                  out_handle);
 585
 586        return err;
 587}
 588
 589/* This is invoked as part of a tasklet that's scheduled when the VMCI
 590 * interrupt fires.  This is run in bottom-half context and if it ever needs to
 591 * sleep it should defer that work to a work queue.
 592 */
 593
 594static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
 595{
 596        struct sock *sk;
 597        size_t size;
 598        struct sk_buff *skb;
 599        struct vsock_sock *vsk;
 600
 601        sk = (struct sock *)data;
 602
 603        /* This handler is privileged when this module is running on the host.
 604         * We will get datagrams from all endpoints (even VMs that are in a
 605         * restricted context). If we get one from a restricted context then
 606         * the destination socket must be trusted.
 607         *
 608         * NOTE: We access the socket struct without holding the lock here.
 609         * This is ok because the field we are interested is never modified
 610         * outside of the create and destruct socket functions.
 611         */
 612        vsk = vsock_sk(sk);
 613        if (!vmci_transport_allow_dgram(vsk, dg->src.context))
 614                return VMCI_ERROR_NO_ACCESS;
 615
 616        size = VMCI_DG_SIZE(dg);
 617
 618        /* Attach the packet to the socket's receive queue as an sk_buff. */
 619        skb = alloc_skb(size, GFP_ATOMIC);
 620        if (!skb)
 621                return VMCI_ERROR_NO_MEM;
 622
 623        /* sk_receive_skb() will do a sock_put(), so hold here. */
 624        sock_hold(sk);
 625        skb_put(skb, size);
 626        memcpy(skb->data, dg, size);
 627        sk_receive_skb(sk, skb, 0);
 628
 629        return VMCI_SUCCESS;
 630}
 631
 632static bool vmci_transport_stream_allow(u32 cid, u32 port)
 633{
 634        static const u32 non_socket_contexts[] = {
 635                VMADDR_CID_RESERVED,
 636        };
 637        int i;
 638
 639        BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts));
 640
 641        for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) {
 642                if (cid == non_socket_contexts[i])
 643                        return false;
 644        }
 645
 646        return true;
 647}
 648
 649/* This is invoked as part of a tasklet that's scheduled when the VMCI
 650 * interrupt fires.  This is run in bottom-half context but it defers most of
 651 * its work to the packet handling work queue.
 652 */
 653
 654static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
 655{
 656        struct sock *sk;
 657        struct sockaddr_vm dst;
 658        struct sockaddr_vm src;
 659        struct vmci_transport_packet *pkt;
 660        struct vsock_sock *vsk;
 661        bool bh_process_pkt;
 662        int err;
 663
 664        sk = NULL;
 665        err = VMCI_SUCCESS;
 666        bh_process_pkt = false;
 667
 668        /* Ignore incoming packets from contexts without sockets, or resources
 669         * that aren't vsock implementations.
 670         */
 671
 672        if (!vmci_transport_stream_allow(dg->src.context, -1)
 673            || vmci_transport_peer_rid(dg->src.context) != dg->src.resource)
 674                return VMCI_ERROR_NO_ACCESS;
 675
 676        if (VMCI_DG_SIZE(dg) < sizeof(*pkt))
 677                /* Drop datagrams that do not contain full VSock packets. */
 678                return VMCI_ERROR_INVALID_ARGS;
 679
 680        pkt = (struct vmci_transport_packet *)dg;
 681
 682        /* Find the socket that should handle this packet.  First we look for a
 683         * connected socket and if there is none we look for a socket bound to
 684         * the destintation address.
 685         */
 686        vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
 687        vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
 688
 689        sk = vsock_find_connected_socket(&src, &dst);
 690        if (!sk) {
 691                sk = vsock_find_bound_socket(&dst);
 692                if (!sk) {
 693                        /* We could not find a socket for this specified
 694                         * address.  If this packet is a RST, we just drop it.
 695                         * If it is another packet, we send a RST.  Note that
 696                         * we do not send a RST reply to RSTs so that we do not
 697                         * continually send RSTs between two endpoints.
 698                         *
 699                         * Note that since this is a reply, dst is src and src
 700                         * is dst.
 701                         */
 702                        if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
 703                                pr_err("unable to send reset\n");
 704
 705                        err = VMCI_ERROR_NOT_FOUND;
 706                        goto out;
 707                }
 708        }
 709
 710        /* If the received packet type is beyond all types known to this
 711         * implementation, reply with an invalid message.  Hopefully this will
 712         * help when implementing backwards compatibility in the future.
 713         */
 714        if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) {
 715                vmci_transport_send_invalid_bh(&dst, &src);
 716                err = VMCI_ERROR_INVALID_ARGS;
 717                goto out;
 718        }
 719
 720        /* This handler is privileged when this module is running on the host.
 721         * We will get datagram connect requests from all endpoints (even VMs
 722         * that are in a restricted context). If we get one from a restricted
 723         * context then the destination socket must be trusted.
 724         *
 725         * NOTE: We access the socket struct without holding the lock here.
 726         * This is ok because the field we are interested is never modified
 727         * outside of the create and destruct socket functions.
 728         */
 729        vsk = vsock_sk(sk);
 730        if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) {
 731                err = VMCI_ERROR_NO_ACCESS;
 732                goto out;
 733        }
 734
 735        /* We do most everything in a work queue, but let's fast path the
 736         * notification of reads and writes to help data transfer performance.
 737         * We can only do this if there is no process context code executing
 738         * for this socket since that may change the state.
 739         */
 740        bh_lock_sock(sk);
 741
 742        if (!sock_owned_by_user(sk)) {
 743                /* The local context ID may be out of date, update it. */
 744                vsk->local_addr.svm_cid = dst.svm_cid;
 745
 746                if (sk->sk_state == SS_CONNECTED)
 747                        vmci_trans(vsk)->notify_ops->handle_notify_pkt(
 748                                        sk, pkt, true, &dst, &src,
 749                                        &bh_process_pkt);
 750        }
 751
 752        bh_unlock_sock(sk);
 753
 754        if (!bh_process_pkt) {
 755                struct vmci_transport_recv_pkt_info *recv_pkt_info;
 756
 757                recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC);
 758                if (!recv_pkt_info) {
 759                        if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
 760                                pr_err("unable to send reset\n");
 761
 762                        err = VMCI_ERROR_NO_MEM;
 763                        goto out;
 764                }
 765
 766                recv_pkt_info->sk = sk;
 767                memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt));
 768                INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work);
 769
 770                schedule_work(&recv_pkt_info->work);
 771                /* Clear sk so that the reference count incremented by one of
 772                 * the Find functions above is not decremented below.  We need
 773                 * that reference count for the packet handler we've scheduled
 774                 * to run.
 775                 */
 776                sk = NULL;
 777        }
 778
 779out:
 780        if (sk)
 781                sock_put(sk);
 782
 783        return err;
 784}
 785
 786static void vmci_transport_handle_detach(struct sock *sk)
 787{
 788        struct vsock_sock *vsk;
 789
 790        vsk = vsock_sk(sk);
 791        if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
 792                sock_set_flag(sk, SOCK_DONE);
 793
 794                /* On a detach the peer will not be sending or receiving
 795                 * anymore.
 796                 */
 797                vsk->peer_shutdown = SHUTDOWN_MASK;
 798
 799                /* We should not be sending anymore since the peer won't be
 800                 * there to receive, but we can still receive if there is data
 801                 * left in our consume queue.
 802                 */
 803                if (vsock_stream_has_data(vsk) <= 0) {
 804                        if (sk->sk_state == SS_CONNECTING) {
 805                                /* The peer may detach from a queue pair while
 806                                 * we are still in the connecting state, i.e.,
 807                                 * if the peer VM is killed after attaching to
 808                                 * a queue pair, but before we complete the
 809                                 * handshake. In that case, we treat the detach
 810                                 * event like a reset.
 811                                 */
 812
 813                                sk->sk_state = SS_UNCONNECTED;
 814                                sk->sk_err = ECONNRESET;
 815                                sk->sk_error_report(sk);
 816                                return;
 817                        }
 818                        sk->sk_state = SS_UNCONNECTED;
 819                }
 820                sk->sk_state_change(sk);
 821        }
 822}
 823
 824static void vmci_transport_peer_detach_cb(u32 sub_id,
 825                                          const struct vmci_event_data *e_data,
 826                                          void *client_data)
 827{
 828        struct vmci_transport *trans = client_data;
 829        const struct vmci_event_payload_qp *e_payload;
 830
 831        e_payload = vmci_event_data_const_payload(e_data);
 832
 833        /* XXX This is lame, we should provide a way to lookup sockets by
 834         * qp_handle.
 835         */
 836        if (vmci_handle_is_invalid(e_payload->handle) ||
 837            !vmci_handle_is_equal(trans->qp_handle, e_payload->handle))
 838                return;
 839
 840        /* We don't ask for delayed CBs when we subscribe to this event (we
 841         * pass 0 as flags to vmci_event_subscribe()).  VMCI makes no
 842         * guarantees in that case about what context we might be running in,
 843         * so it could be BH or process, blockable or non-blockable.  So we
 844         * need to account for all possible contexts here.
 845         */
 846        spin_lock_bh(&trans->lock);
 847        if (!trans->sk)
 848                goto out;
 849
 850        /* Apart from here, trans->lock is only grabbed as part of sk destruct,
 851         * where trans->sk isn't locked.
 852         */
 853        bh_lock_sock(trans->sk);
 854
 855        vmci_transport_handle_detach(trans->sk);
 856
 857        bh_unlock_sock(trans->sk);
 858 out:
 859        spin_unlock_bh(&trans->lock);
 860}
 861
 862static void vmci_transport_qp_resumed_cb(u32 sub_id,
 863                                         const struct vmci_event_data *e_data,
 864                                         void *client_data)
 865{
 866        vsock_for_each_connected_socket(vmci_transport_handle_detach);
 867}
 868
 869static void vmci_transport_recv_pkt_work(struct work_struct *work)
 870{
 871        struct vmci_transport_recv_pkt_info *recv_pkt_info;
 872        struct vmci_transport_packet *pkt;
 873        struct sock *sk;
 874
 875        recv_pkt_info =
 876                container_of(work, struct vmci_transport_recv_pkt_info, work);
 877        sk = recv_pkt_info->sk;
 878        pkt = &recv_pkt_info->pkt;
 879
 880        lock_sock(sk);
 881
 882        /* The local context ID may be out of date. */
 883        vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
 884
 885        switch (sk->sk_state) {
 886        case VSOCK_SS_LISTEN:
 887                vmci_transport_recv_listen(sk, pkt);
 888                break;
 889        case SS_CONNECTING:
 890                /* Processing of pending connections for servers goes through
 891                 * the listening socket, so see vmci_transport_recv_listen()
 892                 * for that path.
 893                 */
 894                vmci_transport_recv_connecting_client(sk, pkt);
 895                break;
 896        case SS_CONNECTED:
 897                vmci_transport_recv_connected(sk, pkt);
 898                break;
 899        default:
 900                /* Because this function does not run in the same context as
 901                 * vmci_transport_recv_stream_cb it is possible that the
 902                 * socket has closed. We need to let the other side know or it
 903                 * could be sitting in a connect and hang forever. Send a
 904                 * reset to prevent that.
 905                 */
 906                vmci_transport_send_reset(sk, pkt);
 907                break;
 908        }
 909
 910        release_sock(sk);
 911        kfree(recv_pkt_info);
 912        /* Release reference obtained in the stream callback when we fetched
 913         * this socket out of the bound or connected list.
 914         */
 915        sock_put(sk);
 916}
 917
 918static int vmci_transport_recv_listen(struct sock *sk,
 919                                      struct vmci_transport_packet *pkt)
 920{
 921        struct sock *pending;
 922        struct vsock_sock *vpending;
 923        int err;
 924        u64 qp_size;
 925        bool old_request = false;
 926        bool old_pkt_proto = false;
 927
 928        err = 0;
 929
 930        /* Because we are in the listen state, we could be receiving a packet
 931         * for ourself or any previous connection requests that we received.
 932         * If it's the latter, we try to find a socket in our list of pending
 933         * connections and, if we do, call the appropriate handler for the
 934         * state that that socket is in.  Otherwise we try to service the
 935         * connection request.
 936         */
 937        pending = vmci_transport_get_pending(sk, pkt);
 938        if (pending) {
 939                lock_sock(pending);
 940
 941                /* The local context ID may be out of date. */
 942                vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
 943
 944                switch (pending->sk_state) {
 945                case SS_CONNECTING:
 946                        err = vmci_transport_recv_connecting_server(sk,
 947                                                                    pending,
 948                                                                    pkt);
 949                        break;
 950                default:
 951                        vmci_transport_send_reset(pending, pkt);
 952                        err = -EINVAL;
 953                }
 954
 955                if (err < 0)
 956                        vsock_remove_pending(sk, pending);
 957
 958                release_sock(pending);
 959                vmci_transport_release_pending(pending);
 960
 961                return err;
 962        }
 963
 964        /* The listen state only accepts connection requests.  Reply with a
 965         * reset unless we received a reset.
 966         */
 967
 968        if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST ||
 969              pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) {
 970                vmci_transport_reply_reset(pkt);
 971                return -EINVAL;
 972        }
 973
 974        if (pkt->u.size == 0) {
 975                vmci_transport_reply_reset(pkt);
 976                return -EINVAL;
 977        }
 978
 979        /* If this socket can't accommodate this connection request, we send a
 980         * reset.  Otherwise we create and initialize a child socket and reply
 981         * with a connection negotiation.
 982         */
 983        if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) {
 984                vmci_transport_reply_reset(pkt);
 985                return -ECONNREFUSED;
 986        }
 987
 988        pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
 989                                 sk->sk_type, 0);
 990        if (!pending) {
 991                vmci_transport_send_reset(sk, pkt);
 992                return -ENOMEM;
 993        }
 994
 995        vpending = vsock_sk(pending);
 996
 997        vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context,
 998                        pkt->dst_port);
 999        vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context,
1000                        pkt->src_port);
1001
1002        /* If the proposed size fits within our min/max, accept it. Otherwise
1003         * propose our own size.
1004         */
1005        if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size &&
1006            pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) {
1007                qp_size = pkt->u.size;
1008        } else {
1009                qp_size = vmci_trans(vpending)->queue_pair_size;
1010        }
1011
1012        /* Figure out if we are using old or new requests based on the
1013         * overrides pkt types sent by our peer.
1014         */
1015        if (vmci_transport_old_proto_override(&old_pkt_proto)) {
1016                old_request = old_pkt_proto;
1017        } else {
1018                if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST)
1019                        old_request = true;
1020                else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)
1021                        old_request = false;
1022
1023        }
1024
1025        if (old_request) {
1026                /* Handle a REQUEST (or override) */
1027                u16 version = VSOCK_PROTO_INVALID;
1028                if (vmci_transport_proto_to_notify_struct(
1029                        pending, &version, true))
1030                        err = vmci_transport_send_negotiate(pending, qp_size);
1031                else
1032                        err = -EINVAL;
1033
1034        } else {
1035                /* Handle a REQUEST2 (or override) */
1036                int proto_int = pkt->proto;
1037                int pos;
1038                u16 active_proto_version = 0;
1039
1040                /* The list of possible protocols is the intersection of all
1041                 * protocols the client supports ... plus all the protocols we
1042                 * support.
1043                 */
1044                proto_int &= vmci_transport_new_proto_supported_versions();
1045
1046                /* We choose the highest possible protocol version and use that
1047                 * one.
1048                 */
1049                pos = fls(proto_int);
1050                if (pos) {
1051                        active_proto_version = (1 << (pos - 1));
1052                        if (vmci_transport_proto_to_notify_struct(
1053                                pending, &active_proto_version, false))
1054                                err = vmci_transport_send_negotiate2(pending,
1055                                                        qp_size,
1056                                                        active_proto_version);
1057                        else
1058                                err = -EINVAL;
1059
1060                } else {
1061                        err = -EINVAL;
1062                }
1063        }
1064
1065        if (err < 0) {
1066                vmci_transport_send_reset(sk, pkt);
1067                sock_put(pending);
1068                err = vmci_transport_error_to_vsock_error(err);
1069                goto out;
1070        }
1071
1072        vsock_add_pending(sk, pending);
1073        sk->sk_ack_backlog++;
1074
1075        pending->sk_state = SS_CONNECTING;
1076        vmci_trans(vpending)->produce_size =
1077                vmci_trans(vpending)->consume_size = qp_size;
1078        vmci_trans(vpending)->queue_pair_size = qp_size;
1079
1080        vmci_trans(vpending)->notify_ops->process_request(pending);
1081
1082        /* We might never receive another message for this socket and it's not
1083         * connected to any process, so we have to ensure it gets cleaned up
1084         * ourself.  Our delayed work function will take care of that.  Note
1085         * that we do not ever cancel this function since we have few
1086         * guarantees about its state when calling cancel_delayed_work().
1087         * Instead we hold a reference on the socket for that function and make
1088         * it capable of handling cases where it needs to do nothing but
1089         * release that reference.
1090         */
1091        vpending->listener = sk;
1092        sock_hold(sk);
1093        sock_hold(pending);
1094        INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
1095        schedule_delayed_work(&vpending->dwork, HZ);
1096
1097out:
1098        return err;
1099}
1100
1101static int
1102vmci_transport_recv_connecting_server(struct sock *listener,
1103                                      struct sock *pending,
1104                                      struct vmci_transport_packet *pkt)
1105{
1106        struct vsock_sock *vpending;
1107        struct vmci_handle handle;
1108        struct vmci_qp *qpair;
1109        bool is_local;
1110        u32 flags;
1111        u32 detach_sub_id;
1112        int err;
1113        int skerr;
1114
1115        vpending = vsock_sk(pending);
1116        detach_sub_id = VMCI_INVALID_ID;
1117
1118        switch (pkt->type) {
1119        case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
1120                if (vmci_handle_is_invalid(pkt->u.handle)) {
1121                        vmci_transport_send_reset(pending, pkt);
1122                        skerr = EPROTO;
1123                        err = -EINVAL;
1124                        goto destroy;
1125                }
1126                break;
1127        default:
1128                /* Close and cleanup the connection. */
1129                vmci_transport_send_reset(pending, pkt);
1130                skerr = EPROTO;
1131                err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL;
1132                goto destroy;
1133        }
1134
1135        /* In order to complete the connection we need to attach to the offered
1136         * queue pair and send an attach notification.  We also subscribe to the
1137         * detach event so we know when our peer goes away, and we do that
1138         * before attaching so we don't miss an event.  If all this succeeds,
1139         * we update our state and wakeup anything waiting in accept() for a
1140         * connection.
1141         */
1142
1143        /* We don't care about attach since we ensure the other side has
1144         * attached by specifying the ATTACH_ONLY flag below.
1145         */
1146        err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1147                                   vmci_transport_peer_detach_cb,
1148                                   vmci_trans(vpending), &detach_sub_id);
1149        if (err < VMCI_SUCCESS) {
1150                vmci_transport_send_reset(pending, pkt);
1151                err = vmci_transport_error_to_vsock_error(err);
1152                skerr = -err;
1153                goto destroy;
1154        }
1155
1156        vmci_trans(vpending)->detach_sub_id = detach_sub_id;
1157
1158        /* Now attach to the queue pair the client created. */
1159        handle = pkt->u.handle;
1160
1161        /* vpending->local_addr always has a context id so we do not need to
1162         * worry about VMADDR_CID_ANY in this case.
1163         */
1164        is_local =
1165            vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid;
1166        flags = VMCI_QPFLAG_ATTACH_ONLY;
1167        flags |= is_local ? VMCI_QPFLAG_LOCAL : 0;
1168
1169        err = vmci_transport_queue_pair_alloc(
1170                                        &qpair,
1171                                        &handle,
1172                                        vmci_trans(vpending)->produce_size,
1173                                        vmci_trans(vpending)->consume_size,
1174                                        pkt->dg.src.context,
1175                                        flags,
1176                                        vmci_transport_is_trusted(
1177                                                vpending,
1178                                                vpending->remote_addr.svm_cid));
1179        if (err < 0) {
1180                vmci_transport_send_reset(pending, pkt);
1181                skerr = -err;
1182                goto destroy;
1183        }
1184
1185        vmci_trans(vpending)->qp_handle = handle;
1186        vmci_trans(vpending)->qpair = qpair;
1187
1188        /* When we send the attach message, we must be ready to handle incoming
1189         * control messages on the newly connected socket. So we move the
1190         * pending socket to the connected state before sending the attach
1191         * message. Otherwise, an incoming packet triggered by the attach being
1192         * received by the peer may be processed concurrently with what happens
1193         * below after sending the attach message, and that incoming packet
1194         * will find the listening socket instead of the (currently) pending
1195         * socket. Note that enqueueing the socket increments the reference
1196         * count, so even if a reset comes before the connection is accepted,
1197         * the socket will be valid until it is removed from the queue.
1198         *
1199         * If we fail sending the attach below, we remove the socket from the
1200         * connected list and move the socket to SS_UNCONNECTED before
1201         * releasing the lock, so a pending slow path processing of an incoming
1202         * packet will not see the socket in the connected state in that case.
1203         */
1204        pending->sk_state = SS_CONNECTED;
1205
1206        vsock_insert_connected(vpending);
1207
1208        /* Notify our peer of our attach. */
1209        err = vmci_transport_send_attach(pending, handle);
1210        if (err < 0) {
1211                vsock_remove_connected(vpending);
1212                pr_err("Could not send attach\n");
1213                vmci_transport_send_reset(pending, pkt);
1214                err = vmci_transport_error_to_vsock_error(err);
1215                skerr = -err;
1216                goto destroy;
1217        }
1218
1219        /* We have a connection. Move the now connected socket from the
1220         * listener's pending list to the accept queue so callers of accept()
1221         * can find it.
1222         */
1223        vsock_remove_pending(listener, pending);
1224        vsock_enqueue_accept(listener, pending);
1225
1226        /* Callers of accept() will be be waiting on the listening socket, not
1227         * the pending socket.
1228         */
1229        listener->sk_data_ready(listener);
1230
1231        return 0;
1232
1233destroy:
1234        pending->sk_err = skerr;
1235        pending->sk_state = SS_UNCONNECTED;
1236        /* As long as we drop our reference, all necessary cleanup will handle
1237         * when the cleanup function drops its reference and our destruct
1238         * implementation is called.  Note that since the listen handler will
1239         * remove pending from the pending list upon our failure, the cleanup
1240         * function won't drop the additional reference, which is why we do it
1241         * here.
1242         */
1243        sock_put(pending);
1244
1245        return err;
1246}
1247
1248static int
1249vmci_transport_recv_connecting_client(struct sock *sk,
1250                                      struct vmci_transport_packet *pkt)
1251{
1252        struct vsock_sock *vsk;
1253        int err;
1254        int skerr;
1255
1256        vsk = vsock_sk(sk);
1257
1258        switch (pkt->type) {
1259        case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
1260                if (vmci_handle_is_invalid(pkt->u.handle) ||
1261                    !vmci_handle_is_equal(pkt->u.handle,
1262                                          vmci_trans(vsk)->qp_handle)) {
1263                        skerr = EPROTO;
1264                        err = -EINVAL;
1265                        goto destroy;
1266                }
1267
1268                /* Signify the socket is connected and wakeup the waiter in
1269                 * connect(). Also place the socket in the connected table for
1270                 * accounting (it can already be found since it's in the bound
1271                 * table).
1272                 */
1273                sk->sk_state = SS_CONNECTED;
1274                sk->sk_socket->state = SS_CONNECTED;
1275                vsock_insert_connected(vsk);
1276                sk->sk_state_change(sk);
1277
1278                break;
1279        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
1280        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
1281                if (pkt->u.size == 0
1282                    || pkt->dg.src.context != vsk->remote_addr.svm_cid
1283                    || pkt->src_port != vsk->remote_addr.svm_port
1284                    || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)
1285                    || vmci_trans(vsk)->qpair
1286                    || vmci_trans(vsk)->produce_size != 0
1287                    || vmci_trans(vsk)->consume_size != 0
1288                    || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
1289                        skerr = EPROTO;
1290                        err = -EINVAL;
1291
1292                        goto destroy;
1293                }
1294
1295                err = vmci_transport_recv_connecting_client_negotiate(sk, pkt);
1296                if (err) {
1297                        skerr = -err;
1298                        goto destroy;
1299                }
1300
1301                break;
1302        case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
1303                err = vmci_transport_recv_connecting_client_invalid(sk, pkt);
1304                if (err) {
1305                        skerr = -err;
1306                        goto destroy;
1307                }
1308
1309                break;
1310        case VMCI_TRANSPORT_PACKET_TYPE_RST:
1311                /* Older versions of the linux code (WS 6.5 / ESX 4.0) used to
1312                 * continue processing here after they sent an INVALID packet.
1313                 * This meant that we got a RST after the INVALID. We ignore a
1314                 * RST after an INVALID. The common code doesn't send the RST
1315                 * ... so we can hang if an old version of the common code
1316                 * fails between getting a REQUEST and sending an OFFER back.
1317                 * Not much we can do about it... except hope that it doesn't
1318                 * happen.
1319                 */
1320                if (vsk->ignore_connecting_rst) {
1321                        vsk->ignore_connecting_rst = false;
1322                } else {
1323                        skerr = ECONNRESET;
1324                        err = 0;
1325                        goto destroy;
1326                }
1327
1328                break;
1329        default:
1330                /* Close and cleanup the connection. */
1331                skerr = EPROTO;
1332                err = -EINVAL;
1333                goto destroy;
1334        }
1335
1336        return 0;
1337
1338destroy:
1339        vmci_transport_send_reset(sk, pkt);
1340
1341        sk->sk_state = SS_UNCONNECTED;
1342        sk->sk_err = skerr;
1343        sk->sk_error_report(sk);
1344        return err;
1345}
1346
1347static int vmci_transport_recv_connecting_client_negotiate(
1348                                        struct sock *sk,
1349                                        struct vmci_transport_packet *pkt)
1350{
1351        int err;
1352        struct vsock_sock *vsk;
1353        struct vmci_handle handle;
1354        struct vmci_qp *qpair;
1355        u32 detach_sub_id;
1356        bool is_local;
1357        u32 flags;
1358        bool old_proto = true;
1359        bool old_pkt_proto;
1360        u16 version;
1361
1362        vsk = vsock_sk(sk);
1363        handle = VMCI_INVALID_HANDLE;
1364        detach_sub_id = VMCI_INVALID_ID;
1365
1366        /* If we have gotten here then we should be past the point where old
1367         * linux vsock could have sent the bogus rst.
1368         */
1369        vsk->sent_request = false;
1370        vsk->ignore_connecting_rst = false;
1371
1372        /* Verify that we're OK with the proposed queue pair size */
1373        if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size ||
1374            pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) {
1375                err = -EINVAL;
1376                goto destroy;
1377        }
1378
1379        /* At this point we know the CID the peer is using to talk to us. */
1380
1381        if (vsk->local_addr.svm_cid == VMADDR_CID_ANY)
1382                vsk->local_addr.svm_cid = pkt->dg.dst.context;
1383
1384        /* Setup the notify ops to be the highest supported version that both
1385         * the server and the client support.
1386         */
1387
1388        if (vmci_transport_old_proto_override(&old_pkt_proto)) {
1389                old_proto = old_pkt_proto;
1390        } else {
1391                if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE)
1392                        old_proto = true;
1393                else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2)
1394                        old_proto = false;
1395
1396        }
1397
1398        if (old_proto)
1399                version = VSOCK_PROTO_INVALID;
1400        else
1401                version = pkt->proto;
1402
1403        if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) {
1404                err = -EINVAL;
1405                goto destroy;
1406        }
1407
1408        /* Subscribe to detach events first.
1409         *
1410         * XXX We attach once for each queue pair created for now so it is easy
1411         * to find the socket (it's provided), but later we should only
1412         * subscribe once and add a way to lookup sockets by queue pair handle.
1413         */
1414        err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1415                                   vmci_transport_peer_detach_cb,
1416                                   vmci_trans(vsk), &detach_sub_id);
1417        if (err < VMCI_SUCCESS) {
1418                err = vmci_transport_error_to_vsock_error(err);
1419                goto destroy;
1420        }
1421
1422        /* Make VMCI select the handle for us. */
1423        handle = VMCI_INVALID_HANDLE;
1424        is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid;
1425        flags = is_local ? VMCI_QPFLAG_LOCAL : 0;
1426
1427        err = vmci_transport_queue_pair_alloc(&qpair,
1428                                              &handle,
1429                                              pkt->u.size,
1430                                              pkt->u.size,
1431                                              vsk->remote_addr.svm_cid,
1432                                              flags,
1433                                              vmci_transport_is_trusted(
1434                                                  vsk,
1435                                                  vsk->
1436                                                  remote_addr.svm_cid));
1437        if (err < 0)
1438                goto destroy;
1439
1440        err = vmci_transport_send_qp_offer(sk, handle);
1441        if (err < 0) {
1442                err = vmci_transport_error_to_vsock_error(err);
1443                goto destroy;
1444        }
1445
1446        vmci_trans(vsk)->qp_handle = handle;
1447        vmci_trans(vsk)->qpair = qpair;
1448
1449        vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
1450                pkt->u.size;
1451
1452        vmci_trans(vsk)->detach_sub_id = detach_sub_id;
1453
1454        vmci_trans(vsk)->notify_ops->process_negotiate(sk);
1455
1456        return 0;
1457
1458destroy:
1459        if (detach_sub_id != VMCI_INVALID_ID)
1460                vmci_event_unsubscribe(detach_sub_id);
1461
1462        if (!vmci_handle_is_invalid(handle))
1463                vmci_qpair_detach(&qpair);
1464
1465        return err;
1466}
1467
1468static int
1469vmci_transport_recv_connecting_client_invalid(struct sock *sk,
1470                                              struct vmci_transport_packet *pkt)
1471{
1472        int err = 0;
1473        struct vsock_sock *vsk = vsock_sk(sk);
1474
1475        if (vsk->sent_request) {
1476                vsk->sent_request = false;
1477                vsk->ignore_connecting_rst = true;
1478
1479                err = vmci_transport_send_conn_request(
1480                        sk, vmci_trans(vsk)->queue_pair_size);
1481                if (err < 0)
1482                        err = vmci_transport_error_to_vsock_error(err);
1483                else
1484                        err = 0;
1485
1486        }
1487
1488        return err;
1489}
1490
1491static int vmci_transport_recv_connected(struct sock *sk,
1492                                         struct vmci_transport_packet *pkt)
1493{
1494        struct vsock_sock *vsk;
1495        bool pkt_processed = false;
1496
1497        /* In cases where we are closing the connection, it's sufficient to
1498         * mark the state change (and maybe error) and wake up any waiting
1499         * threads. Since this is a connected socket, it's owned by a user
1500         * process and will be cleaned up when the failure is passed back on
1501         * the current or next system call.  Our system call implementations
1502         * must therefore check for error and state changes on entry and when
1503         * being awoken.
1504         */
1505        switch (pkt->type) {
1506        case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
1507                if (pkt->u.mode) {
1508                        vsk = vsock_sk(sk);
1509
1510                        vsk->peer_shutdown |= pkt->u.mode;
1511                        sk->sk_state_change(sk);
1512                }
1513                break;
1514
1515        case VMCI_TRANSPORT_PACKET_TYPE_RST:
1516                vsk = vsock_sk(sk);
1517                /* It is possible that we sent our peer a message (e.g a
1518                 * WAITING_READ) right before we got notified that the peer had
1519                 * detached. If that happens then we can get a RST pkt back
1520                 * from our peer even though there is data available for us to
1521                 * read. In that case, don't shutdown the socket completely but
1522                 * instead allow the local client to finish reading data off
1523                 * the queuepair. Always treat a RST pkt in connected mode like
1524                 * a clean shutdown.
1525                 */
1526                sock_set_flag(sk, SOCK_DONE);
1527                vsk->peer_shutdown = SHUTDOWN_MASK;
1528                if (vsock_stream_has_data(vsk) <= 0)
1529                        sk->sk_state = SS_DISCONNECTING;
1530
1531                sk->sk_state_change(sk);
1532                break;
1533
1534        default:
1535                vsk = vsock_sk(sk);
1536                vmci_trans(vsk)->notify_ops->handle_notify_pkt(
1537                                sk, pkt, false, NULL, NULL,
1538                                &pkt_processed);
1539                if (!pkt_processed)
1540                        return -EINVAL;
1541
1542                break;
1543        }
1544
1545        return 0;
1546}
1547
1548static int vmci_transport_socket_init(struct vsock_sock *vsk,
1549                                      struct vsock_sock *psk)
1550{
1551        vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL);
1552        if (!vsk->trans)
1553                return -ENOMEM;
1554
1555        vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
1556        vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
1557        vmci_trans(vsk)->qpair = NULL;
1558        vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
1559        vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
1560        vmci_trans(vsk)->notify_ops = NULL;
1561        INIT_LIST_HEAD(&vmci_trans(vsk)->elem);
1562        vmci_trans(vsk)->sk = &vsk->sk;
1563        spin_lock_init(&vmci_trans(vsk)->lock);
1564        if (psk) {
1565                vmci_trans(vsk)->queue_pair_size =
1566                        vmci_trans(psk)->queue_pair_size;
1567                vmci_trans(vsk)->queue_pair_min_size =
1568                        vmci_trans(psk)->queue_pair_min_size;
1569                vmci_trans(vsk)->queue_pair_max_size =
1570                        vmci_trans(psk)->queue_pair_max_size;
1571        } else {
1572                vmci_trans(vsk)->queue_pair_size =
1573                        VMCI_TRANSPORT_DEFAULT_QP_SIZE;
1574                vmci_trans(vsk)->queue_pair_min_size =
1575                         VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN;
1576                vmci_trans(vsk)->queue_pair_max_size =
1577                        VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX;
1578        }
1579
1580        return 0;
1581}
1582
1583static void vmci_transport_free_resources(struct list_head *transport_list)
1584{
1585        while (!list_empty(transport_list)) {
1586                struct vmci_transport *transport =
1587                    list_first_entry(transport_list, struct vmci_transport,
1588                                     elem);
1589                list_del(&transport->elem);
1590
1591                if (transport->detach_sub_id != VMCI_INVALID_ID) {
1592                        vmci_event_unsubscribe(transport->detach_sub_id);
1593                        transport->detach_sub_id = VMCI_INVALID_ID;
1594                }
1595
1596                if (!vmci_handle_is_invalid(transport->qp_handle)) {
1597                        vmci_qpair_detach(&transport->qpair);
1598                        transport->qp_handle = VMCI_INVALID_HANDLE;
1599                        transport->produce_size = 0;
1600                        transport->consume_size = 0;
1601                }
1602
1603                kfree(transport);
1604        }
1605}
1606
1607static void vmci_transport_cleanup(struct work_struct *work)
1608{
1609        LIST_HEAD(pending);
1610
1611        spin_lock_bh(&vmci_transport_cleanup_lock);
1612        list_replace_init(&vmci_transport_cleanup_list, &pending);
1613        spin_unlock_bh(&vmci_transport_cleanup_lock);
1614        vmci_transport_free_resources(&pending);
1615}
1616
1617static void vmci_transport_destruct(struct vsock_sock *vsk)
1618{
1619        /* Ensure that the detach callback doesn't use the sk/vsk
1620         * we are about to destruct.
1621         */
1622        spin_lock_bh(&vmci_trans(vsk)->lock);
1623        vmci_trans(vsk)->sk = NULL;
1624        spin_unlock_bh(&vmci_trans(vsk)->lock);
1625
1626        if (vmci_trans(vsk)->notify_ops)
1627                vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
1628
1629        spin_lock_bh(&vmci_transport_cleanup_lock);
1630        list_add(&vmci_trans(vsk)->elem, &vmci_transport_cleanup_list);
1631        spin_unlock_bh(&vmci_transport_cleanup_lock);
1632        schedule_work(&vmci_transport_cleanup_work);
1633
1634        vsk->trans = NULL;
1635}
1636
1637static void vmci_transport_release(struct vsock_sock *vsk)
1638{
1639        vsock_remove_sock(vsk);
1640
1641        if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) {
1642                vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle);
1643                vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
1644        }
1645}
1646
1647static int vmci_transport_dgram_bind(struct vsock_sock *vsk,
1648                                     struct sockaddr_vm *addr)
1649{
1650        u32 port;
1651        u32 flags;
1652        int err;
1653
1654        /* VMCI will select a resource ID for us if we provide
1655         * VMCI_INVALID_ID.
1656         */
1657        port = addr->svm_port == VMADDR_PORT_ANY ?
1658                        VMCI_INVALID_ID : addr->svm_port;
1659
1660        if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE))
1661                return -EACCES;
1662
1663        flags = addr->svm_cid == VMADDR_CID_ANY ?
1664                                VMCI_FLAG_ANYCID_DG_HND : 0;
1665
1666        err = vmci_transport_datagram_create_hnd(port, flags,
1667                                                 vmci_transport_recv_dgram_cb,
1668                                                 &vsk->sk,
1669                                                 &vmci_trans(vsk)->dg_handle);
1670        if (err < VMCI_SUCCESS)
1671                return vmci_transport_error_to_vsock_error(err);
1672        vsock_addr_init(&vsk->local_addr, addr->svm_cid,
1673                        vmci_trans(vsk)->dg_handle.resource);
1674
1675        return 0;
1676}
1677
1678static int vmci_transport_dgram_enqueue(
1679        struct vsock_sock *vsk,
1680        struct sockaddr_vm *remote_addr,
1681        struct msghdr *msg,
1682        size_t len)
1683{
1684        int err;
1685        struct vmci_datagram *dg;
1686
1687        if (len > VMCI_MAX_DG_PAYLOAD_SIZE)
1688                return -EMSGSIZE;
1689
1690        if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid))
1691                return -EPERM;
1692
1693        /* Allocate a buffer for the user's message and our packet header. */
1694        dg = kmalloc(len + sizeof(*dg), GFP_KERNEL);
1695        if (!dg)
1696                return -ENOMEM;
1697
1698        memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len);
1699
1700        dg->dst = vmci_make_handle(remote_addr->svm_cid,
1701                                   remote_addr->svm_port);
1702        dg->src = vmci_make_handle(vsk->local_addr.svm_cid,
1703                                   vsk->local_addr.svm_port);
1704        dg->payload_size = len;
1705
1706        err = vmci_datagram_send(dg);
1707        kfree(dg);
1708        if (err < 0)
1709                return vmci_transport_error_to_vsock_error(err);
1710
1711        return err - sizeof(*dg);
1712}
1713
1714static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk,
1715                                        struct msghdr *msg, size_t len,
1716                                        int flags)
1717{
1718        int err;
1719        int noblock;
1720        struct vmci_datagram *dg;
1721        size_t payload_len;
1722        struct sk_buff *skb;
1723
1724        noblock = flags & MSG_DONTWAIT;
1725
1726        if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
1727                return -EOPNOTSUPP;
1728
1729        /* Retrieve the head sk_buff from the socket's receive queue. */
1730        err = 0;
1731        skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
1732        if (!skb)
1733                return err;
1734
1735        dg = (struct vmci_datagram *)skb->data;
1736        if (!dg)
1737                /* err is 0, meaning we read zero bytes. */
1738                goto out;
1739
1740        payload_len = dg->payload_size;
1741        /* Ensure the sk_buff matches the payload size claimed in the packet. */
1742        if (payload_len != skb->len - sizeof(*dg)) {
1743                err = -EINVAL;
1744                goto out;
1745        }
1746
1747        if (payload_len > len) {
1748                payload_len = len;
1749                msg->msg_flags |= MSG_TRUNC;
1750        }
1751
1752        /* Place the datagram payload in the user's iovec. */
1753        err = skb_copy_datagram_msg(skb, sizeof(*dg), msg, payload_len);
1754        if (err)
1755                goto out;
1756
1757        if (msg->msg_name) {
1758                /* Provide the address of the sender. */
1759                DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
1760                vsock_addr_init(vm_addr, dg->src.context, dg->src.resource);
1761                msg->msg_namelen = sizeof(*vm_addr);
1762        }
1763        err = payload_len;
1764
1765out:
1766        skb_free_datagram(&vsk->sk, skb);
1767        return err;
1768}
1769
1770static bool vmci_transport_dgram_allow(u32 cid, u32 port)
1771{
1772        if (cid == VMADDR_CID_HYPERVISOR) {
1773                /* Registrations of PBRPC Servers do not modify VMX/Hypervisor
1774                 * state and are allowed.
1775                 */
1776                return port == VMCI_UNITY_PBRPC_REGISTER;
1777        }
1778
1779        return true;
1780}
1781
1782static int vmci_transport_connect(struct vsock_sock *vsk)
1783{
1784        int err;
1785        bool old_pkt_proto = false;
1786        struct sock *sk = &vsk->sk;
1787
1788        if (vmci_transport_old_proto_override(&old_pkt_proto) &&
1789                old_pkt_proto) {
1790                err = vmci_transport_send_conn_request(
1791                        sk, vmci_trans(vsk)->queue_pair_size);
1792                if (err < 0) {
1793                        sk->sk_state = SS_UNCONNECTED;
1794                        return err;
1795                }
1796        } else {
1797                int supported_proto_versions =
1798                        vmci_transport_new_proto_supported_versions();
1799                err = vmci_transport_send_conn_request2(
1800                                sk, vmci_trans(vsk)->queue_pair_size,
1801                                supported_proto_versions);
1802                if (err < 0) {
1803                        sk->sk_state = SS_UNCONNECTED;
1804                        return err;
1805                }
1806
1807                vsk->sent_request = true;
1808        }
1809
1810        return err;
1811}
1812
1813static ssize_t vmci_transport_stream_dequeue(
1814        struct vsock_sock *vsk,
1815        struct msghdr *msg,
1816        size_t len,
1817        int flags)
1818{
1819        if (flags & MSG_PEEK)
1820                return vmci_qpair_peekv(vmci_trans(vsk)->qpair, msg, len, 0);
1821        else
1822                return vmci_qpair_dequev(vmci_trans(vsk)->qpair, msg, len, 0);
1823}
1824
1825static ssize_t vmci_transport_stream_enqueue(
1826        struct vsock_sock *vsk,
1827        struct msghdr *msg,
1828        size_t len)
1829{
1830        return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
1831}
1832
1833static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
1834{
1835        return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair);
1836}
1837
1838static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk)
1839{
1840        return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair);
1841}
1842
1843static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk)
1844{
1845        return vmci_trans(vsk)->consume_size;
1846}
1847
1848static bool vmci_transport_stream_is_active(struct vsock_sock *vsk)
1849{
1850        return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle);
1851}
1852
1853static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk)
1854{
1855        return vmci_trans(vsk)->queue_pair_size;
1856}
1857
1858static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk)
1859{
1860        return vmci_trans(vsk)->queue_pair_min_size;
1861}
1862
1863static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk)
1864{
1865        return vmci_trans(vsk)->queue_pair_max_size;
1866}
1867
1868static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
1869{
1870        if (val < vmci_trans(vsk)->queue_pair_min_size)
1871                vmci_trans(vsk)->queue_pair_min_size = val;
1872        if (val > vmci_trans(vsk)->queue_pair_max_size)
1873                vmci_trans(vsk)->queue_pair_max_size = val;
1874        vmci_trans(vsk)->queue_pair_size = val;
1875}
1876
1877static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk,
1878                                               u64 val)
1879{
1880        if (val > vmci_trans(vsk)->queue_pair_size)
1881                vmci_trans(vsk)->queue_pair_size = val;
1882        vmci_trans(vsk)->queue_pair_min_size = val;
1883}
1884
1885static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk,
1886                                               u64 val)
1887{
1888        if (val < vmci_trans(vsk)->queue_pair_size)
1889                vmci_trans(vsk)->queue_pair_size = val;
1890        vmci_trans(vsk)->queue_pair_max_size = val;
1891}
1892
1893static int vmci_transport_notify_poll_in(
1894        struct vsock_sock *vsk,
1895        size_t target,
1896        bool *data_ready_now)
1897{
1898        return vmci_trans(vsk)->notify_ops->poll_in(
1899                        &vsk->sk, target, data_ready_now);
1900}
1901
1902static int vmci_transport_notify_poll_out(
1903        struct vsock_sock *vsk,
1904        size_t target,
1905        bool *space_available_now)
1906{
1907        return vmci_trans(vsk)->notify_ops->poll_out(
1908                        &vsk->sk, target, space_available_now);
1909}
1910
1911static int vmci_transport_notify_recv_init(
1912        struct vsock_sock *vsk,
1913        size_t target,
1914        struct vsock_transport_recv_notify_data *data)
1915{
1916        return vmci_trans(vsk)->notify_ops->recv_init(
1917                        &vsk->sk, target,
1918                        (struct vmci_transport_recv_notify_data *)data);
1919}
1920
1921static int vmci_transport_notify_recv_pre_block(
1922        struct vsock_sock *vsk,
1923        size_t target,
1924        struct vsock_transport_recv_notify_data *data)
1925{
1926        return vmci_trans(vsk)->notify_ops->recv_pre_block(
1927                        &vsk->sk, target,
1928                        (struct vmci_transport_recv_notify_data *)data);
1929}
1930
1931static int vmci_transport_notify_recv_pre_dequeue(
1932        struct vsock_sock *vsk,
1933        size_t target,
1934        struct vsock_transport_recv_notify_data *data)
1935{
1936        return vmci_trans(vsk)->notify_ops->recv_pre_dequeue(
1937                        &vsk->sk, target,
1938                        (struct vmci_transport_recv_notify_data *)data);
1939}
1940
1941static int vmci_transport_notify_recv_post_dequeue(
1942        struct vsock_sock *vsk,
1943        size_t target,
1944        ssize_t copied,
1945        bool data_read,
1946        struct vsock_transport_recv_notify_data *data)
1947{
1948        return vmci_trans(vsk)->notify_ops->recv_post_dequeue(
1949                        &vsk->sk, target, copied, data_read,
1950                        (struct vmci_transport_recv_notify_data *)data);
1951}
1952
1953static int vmci_transport_notify_send_init(
1954        struct vsock_sock *vsk,
1955        struct vsock_transport_send_notify_data *data)
1956{
1957        return vmci_trans(vsk)->notify_ops->send_init(
1958                        &vsk->sk,
1959                        (struct vmci_transport_send_notify_data *)data);
1960}
1961
1962static int vmci_transport_notify_send_pre_block(
1963        struct vsock_sock *vsk,
1964        struct vsock_transport_send_notify_data *data)
1965{
1966        return vmci_trans(vsk)->notify_ops->send_pre_block(
1967                        &vsk->sk,
1968                        (struct vmci_transport_send_notify_data *)data);
1969}
1970
1971static int vmci_transport_notify_send_pre_enqueue(
1972        struct vsock_sock *vsk,
1973        struct vsock_transport_send_notify_data *data)
1974{
1975        return vmci_trans(vsk)->notify_ops->send_pre_enqueue(
1976                        &vsk->sk,
1977                        (struct vmci_transport_send_notify_data *)data);
1978}
1979
1980static int vmci_transport_notify_send_post_enqueue(
1981        struct vsock_sock *vsk,
1982        ssize_t written,
1983        struct vsock_transport_send_notify_data *data)
1984{
1985        return vmci_trans(vsk)->notify_ops->send_post_enqueue(
1986                        &vsk->sk, written,
1987                        (struct vmci_transport_send_notify_data *)data);
1988}
1989
1990static bool vmci_transport_old_proto_override(bool *old_pkt_proto)
1991{
1992        if (PROTOCOL_OVERRIDE != -1) {
1993                if (PROTOCOL_OVERRIDE == 0)
1994                        *old_pkt_proto = true;
1995                else
1996                        *old_pkt_proto = false;
1997
1998                pr_info("Proto override in use\n");
1999                return true;
2000        }
2001
2002        return false;
2003}
2004
2005static bool vmci_transport_proto_to_notify_struct(struct sock *sk,
2006                                                  u16 *proto,
2007                                                  bool old_pkt_proto)
2008{
2009        struct vsock_sock *vsk = vsock_sk(sk);
2010
2011        if (old_pkt_proto) {
2012                if (*proto != VSOCK_PROTO_INVALID) {
2013                        pr_err("Can't set both an old and new protocol\n");
2014                        return false;
2015                }
2016                vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops;
2017                goto exit;
2018        }
2019
2020        switch (*proto) {
2021        case VSOCK_PROTO_PKT_ON_NOTIFY:
2022                vmci_trans(vsk)->notify_ops =
2023                        &vmci_transport_notify_pkt_q_state_ops;
2024                break;
2025        default:
2026                pr_err("Unknown notify protocol version\n");
2027                return false;
2028        }
2029
2030exit:
2031        vmci_trans(vsk)->notify_ops->socket_init(sk);
2032        return true;
2033}
2034
2035static u16 vmci_transport_new_proto_supported_versions(void)
2036{
2037        if (PROTOCOL_OVERRIDE != -1)
2038                return PROTOCOL_OVERRIDE;
2039
2040        return VSOCK_PROTO_ALL_SUPPORTED;
2041}
2042
2043static u32 vmci_transport_get_local_cid(void)
2044{
2045        return vmci_get_context_id();
2046}
2047
2048static const struct vsock_transport vmci_transport = {
2049        .init = vmci_transport_socket_init,
2050        .destruct = vmci_transport_destruct,
2051        .release = vmci_transport_release,
2052        .connect = vmci_transport_connect,
2053        .dgram_bind = vmci_transport_dgram_bind,
2054        .dgram_dequeue = vmci_transport_dgram_dequeue,
2055        .dgram_enqueue = vmci_transport_dgram_enqueue,
2056        .dgram_allow = vmci_transport_dgram_allow,
2057        .stream_dequeue = vmci_transport_stream_dequeue,
2058        .stream_enqueue = vmci_transport_stream_enqueue,
2059        .stream_has_data = vmci_transport_stream_has_data,
2060        .stream_has_space = vmci_transport_stream_has_space,
2061        .stream_rcvhiwat = vmci_transport_stream_rcvhiwat,
2062        .stream_is_active = vmci_transport_stream_is_active,
2063        .stream_allow = vmci_transport_stream_allow,
2064        .notify_poll_in = vmci_transport_notify_poll_in,
2065        .notify_poll_out = vmci_transport_notify_poll_out,
2066        .notify_recv_init = vmci_transport_notify_recv_init,
2067        .notify_recv_pre_block = vmci_transport_notify_recv_pre_block,
2068        .notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue,
2069        .notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue,
2070        .notify_send_init = vmci_transport_notify_send_init,
2071        .notify_send_pre_block = vmci_transport_notify_send_pre_block,
2072        .notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue,
2073        .notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue,
2074        .shutdown = vmci_transport_shutdown,
2075        .set_buffer_size = vmci_transport_set_buffer_size,
2076        .set_min_buffer_size = vmci_transport_set_min_buffer_size,
2077        .set_max_buffer_size = vmci_transport_set_max_buffer_size,
2078        .get_buffer_size = vmci_transport_get_buffer_size,
2079        .get_min_buffer_size = vmci_transport_get_min_buffer_size,
2080        .get_max_buffer_size = vmci_transport_get_max_buffer_size,
2081        .get_local_cid = vmci_transport_get_local_cid,
2082};
2083
2084static int __init vmci_transport_init(void)
2085{
2086        int err;
2087
2088        /* Create the datagram handle that we will use to send and receive all
2089         * VSocket control messages for this context.
2090         */
2091        err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID,
2092                                                 VMCI_FLAG_ANYCID_DG_HND,
2093                                                 vmci_transport_recv_stream_cb,
2094                                                 NULL,
2095                                                 &vmci_transport_stream_handle);
2096        if (err < VMCI_SUCCESS) {
2097                pr_err("Unable to create datagram handle. (%d)\n", err);
2098                return vmci_transport_error_to_vsock_error(err);
2099        }
2100
2101        err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED,
2102                                   vmci_transport_qp_resumed_cb,
2103                                   NULL, &vmci_transport_qp_resumed_sub_id);
2104        if (err < VMCI_SUCCESS) {
2105                pr_err("Unable to subscribe to resumed event. (%d)\n", err);
2106                err = vmci_transport_error_to_vsock_error(err);
2107                vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
2108                goto err_destroy_stream_handle;
2109        }
2110
2111        err = vsock_core_init(&vmci_transport);
2112        if (err < 0)
2113                goto err_unsubscribe;
2114
2115        return 0;
2116
2117err_unsubscribe:
2118        vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
2119err_destroy_stream_handle:
2120        vmci_datagram_destroy_handle(vmci_transport_stream_handle);
2121        return err;
2122}
2123module_init(vmci_transport_init);
2124
2125static void __exit vmci_transport_exit(void)
2126{
2127        cancel_work_sync(&vmci_transport_cleanup_work);
2128        vmci_transport_free_resources(&vmci_transport_cleanup_list);
2129
2130        if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
2131                if (vmci_datagram_destroy_handle(
2132                        vmci_transport_stream_handle) != VMCI_SUCCESS)
2133                        pr_err("Couldn't destroy datagram handle\n");
2134                vmci_transport_stream_handle = VMCI_INVALID_HANDLE;
2135        }
2136
2137        if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) {
2138                vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
2139                vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
2140        }
2141
2142        vsock_core_exit();
2143}
2144module_exit(vmci_transport_exit);
2145
2146MODULE_AUTHOR("VMware, Inc.");
2147MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
2148MODULE_VERSION("1.0.4.0-k");
2149MODULE_LICENSE("GPL v2");
2150MODULE_ALIAS("vmware_vsock");
2151MODULE_ALIAS_NETPROTO(PF_VSOCK);
2152