linux/net/vmw_vsock/vmci_transport.c
<<
>>
Prefs
   1/*
   2 * VMware vSockets Driver
   3 *
   4 * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License as published by the Free
   8 * Software Foundation version 2 and no later version.
   9 *
  10 * This program is distributed in the hope that it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 */
  15
  16#include <linux/types.h>
  17#include <linux/bitops.h>
  18#include <linux/cred.h>
  19#include <linux/init.h>
  20#include <linux/io.h>
  21#include <linux/kernel.h>
  22#include <linux/kmod.h>
  23#include <linux/list.h>
  24#include <linux/module.h>
  25#include <linux/mutex.h>
  26#include <linux/net.h>
  27#include <linux/poll.h>
  28#include <linux/skbuff.h>
  29#include <linux/smp.h>
  30#include <linux/socket.h>
  31#include <linux/stddef.h>
  32#include <linux/unistd.h>
  33#include <linux/wait.h>
  34#include <linux/workqueue.h>
  35#include <net/sock.h>
  36#include <net/af_vsock.h>
  37
  38#include "vmci_transport_notify.h"
  39
  40static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
  41static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
  42static void vmci_transport_peer_detach_cb(u32 sub_id,
  43                                          const struct vmci_event_data *ed,
  44                                          void *client_data);
  45static void vmci_transport_recv_pkt_work(struct work_struct *work);
  46static void vmci_transport_cleanup(struct work_struct *work);
  47static int vmci_transport_recv_listen(struct sock *sk,
  48                                      struct vmci_transport_packet *pkt);
  49static int vmci_transport_recv_connecting_server(
  50                                        struct sock *sk,
  51                                        struct sock *pending,
  52                                        struct vmci_transport_packet *pkt);
  53static int vmci_transport_recv_connecting_client(
  54                                        struct sock *sk,
  55                                        struct vmci_transport_packet *pkt);
  56static int vmci_transport_recv_connecting_client_negotiate(
  57                                        struct sock *sk,
  58                                        struct vmci_transport_packet *pkt);
  59static int vmci_transport_recv_connecting_client_invalid(
  60                                        struct sock *sk,
  61                                        struct vmci_transport_packet *pkt);
  62static int vmci_transport_recv_connected(struct sock *sk,
  63                                         struct vmci_transport_packet *pkt);
  64static bool vmci_transport_old_proto_override(bool *old_pkt_proto);
  65static u16 vmci_transport_new_proto_supported_versions(void);
  66static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto,
  67                                                  bool old_pkt_proto);
  68
  69struct vmci_transport_recv_pkt_info {
  70        struct work_struct work;
  71        struct sock *sk;
  72        struct vmci_transport_packet pkt;
  73};
  74
  75static LIST_HEAD(vmci_transport_cleanup_list);
  76static DEFINE_SPINLOCK(vmci_transport_cleanup_lock);
  77static DECLARE_WORK(vmci_transport_cleanup_work, vmci_transport_cleanup);
  78
  79static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
  80                                                           VMCI_INVALID_ID };
  81static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
  82
  83static int PROTOCOL_OVERRIDE = -1;
  84
  85#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN   128
  86#define VMCI_TRANSPORT_DEFAULT_QP_SIZE       262144
  87#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX   262144
  88
  89/* The default peer timeout indicates how long we will wait for a peer response
  90 * to a control message.
  91 */
  92#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
  93
  94/* Helper function to convert from a VMCI error code to a VSock error code. */
  95
  96static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
  97{
  98        switch (vmci_error) {
  99        case VMCI_ERROR_NO_MEM:
 100                return -ENOMEM;
 101        case VMCI_ERROR_DUPLICATE_ENTRY:
 102        case VMCI_ERROR_ALREADY_EXISTS:
 103                return -EADDRINUSE;
 104        case VMCI_ERROR_NO_ACCESS:
 105                return -EPERM;
 106        case VMCI_ERROR_NO_RESOURCES:
 107                return -ENOBUFS;
 108        case VMCI_ERROR_INVALID_RESOURCE:
 109                return -EHOSTUNREACH;
 110        case VMCI_ERROR_INVALID_ARGS:
 111        default:
 112                break;
 113        }
 114        return -EINVAL;
 115}
 116
 117static u32 vmci_transport_peer_rid(u32 peer_cid)
 118{
 119        if (VMADDR_CID_HYPERVISOR == peer_cid)
 120                return VMCI_TRANSPORT_HYPERVISOR_PACKET_RID;
 121
 122        return VMCI_TRANSPORT_PACKET_RID;
 123}
 124
 125static inline void
 126vmci_transport_packet_init(struct vmci_transport_packet *pkt,
 127                           struct sockaddr_vm *src,
 128                           struct sockaddr_vm *dst,
 129                           u8 type,
 130                           u64 size,
 131                           u64 mode,
 132                           struct vmci_transport_waiting_info *wait,
 133                           u16 proto,
 134                           struct vmci_handle handle)
 135{
 136        /* We register the stream control handler as an any cid handle so we
 137         * must always send from a source address of VMADDR_CID_ANY
 138         */
 139        pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
 140                                       VMCI_TRANSPORT_PACKET_RID);
 141        pkt->dg.dst = vmci_make_handle(dst->svm_cid,
 142                                       vmci_transport_peer_rid(dst->svm_cid));
 143        pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
 144        pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
 145        pkt->type = type;
 146        pkt->src_port = src->svm_port;
 147        pkt->dst_port = dst->svm_port;
 148        memset(&pkt->proto, 0, sizeof(pkt->proto));
 149        memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
 150
 151        switch (pkt->type) {
 152        case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
 153                pkt->u.size = 0;
 154                break;
 155
 156        case VMCI_TRANSPORT_PACKET_TYPE_REQUEST:
 157        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
 158                pkt->u.size = size;
 159                break;
 160
 161        case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
 162        case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
 163                pkt->u.handle = handle;
 164                break;
 165
 166        case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
 167        case VMCI_TRANSPORT_PACKET_TYPE_READ:
 168        case VMCI_TRANSPORT_PACKET_TYPE_RST:
 169                pkt->u.size = 0;
 170                break;
 171
 172        case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
 173                pkt->u.mode = mode;
 174                break;
 175
 176        case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
 177        case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
 178                memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait));
 179                break;
 180
 181        case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2:
 182        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
 183                pkt->u.size = size;
 184                pkt->proto = proto;
 185                break;
 186        }
 187}
 188
 189static inline void
 190vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt,
 191                                    struct sockaddr_vm *local,
 192                                    struct sockaddr_vm *remote)
 193{
 194        vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port);
 195        vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port);
 196}
 197
 198static int
 199__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt,
 200                                  struct sockaddr_vm *src,
 201                                  struct sockaddr_vm *dst,
 202                                  enum vmci_transport_packet_type type,
 203                                  u64 size,
 204                                  u64 mode,
 205                                  struct vmci_transport_waiting_info *wait,
 206                                  u16 proto,
 207                                  struct vmci_handle handle,
 208                                  bool convert_error)
 209{
 210        int err;
 211
 212        vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait,
 213                                   proto, handle);
 214        err = vmci_datagram_send(&pkt->dg);
 215        if (convert_error && (err < 0))
 216                return vmci_transport_error_to_vsock_error(err);
 217
 218        return err;
 219}
 220
 221static int
 222vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt,
 223                                      enum vmci_transport_packet_type type,
 224                                      u64 size,
 225                                      u64 mode,
 226                                      struct vmci_transport_waiting_info *wait,
 227                                      struct vmci_handle handle)
 228{
 229        struct vmci_transport_packet reply;
 230        struct sockaddr_vm src, dst;
 231
 232        if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) {
 233                return 0;
 234        } else {
 235                vmci_transport_packet_get_addresses(pkt, &src, &dst);
 236                return __vmci_transport_send_control_pkt(&reply, &src, &dst,
 237                                                         type,
 238                                                         size, mode, wait,
 239                                                         VSOCK_PROTO_INVALID,
 240                                                         handle, true);
 241        }
 242}
 243
 244static int
 245vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
 246                                   struct sockaddr_vm *dst,
 247                                   enum vmci_transport_packet_type type,
 248                                   u64 size,
 249                                   u64 mode,
 250                                   struct vmci_transport_waiting_info *wait,
 251                                   struct vmci_handle handle)
 252{
 253        /* Note that it is safe to use a single packet across all CPUs since
 254         * two tasklets of the same type are guaranteed to not ever run
 255         * simultaneously. If that ever changes, or VMCI stops using tasklets,
 256         * we can use per-cpu packets.
 257         */
 258        static struct vmci_transport_packet pkt;
 259
 260        return __vmci_transport_send_control_pkt(&pkt, src, dst, type,
 261                                                 size, mode, wait,
 262                                                 VSOCK_PROTO_INVALID, handle,
 263                                                 false);
 264}
 265
 266static int
 267vmci_transport_send_control_pkt(struct sock *sk,
 268                                enum vmci_transport_packet_type type,
 269                                u64 size,
 270                                u64 mode,
 271                                struct vmci_transport_waiting_info *wait,
 272                                u16 proto,
 273                                struct vmci_handle handle)
 274{
 275        struct vmci_transport_packet *pkt;
 276        struct vsock_sock *vsk;
 277        int err;
 278
 279        vsk = vsock_sk(sk);
 280
 281        if (!vsock_addr_bound(&vsk->local_addr))
 282                return -EINVAL;
 283
 284        if (!vsock_addr_bound(&vsk->remote_addr))
 285                return -EINVAL;
 286
 287        pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
 288        if (!pkt)
 289                return -ENOMEM;
 290
 291        err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr,
 292                                                &vsk->remote_addr, type, size,
 293                                                mode, wait, proto, handle,
 294                                                true);
 295        kfree(pkt);
 296
 297        return err;
 298}
 299
 300static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
 301                                        struct sockaddr_vm *src,
 302                                        struct vmci_transport_packet *pkt)
 303{
 304        if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
 305                return 0;
 306        return vmci_transport_send_control_pkt_bh(
 307                                        dst, src,
 308                                        VMCI_TRANSPORT_PACKET_TYPE_RST, 0,
 309                                        0, NULL, VMCI_INVALID_HANDLE);
 310}
 311
 312static int vmci_transport_send_reset(struct sock *sk,
 313                                     struct vmci_transport_packet *pkt)
 314{
 315        if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
 316                return 0;
 317        return vmci_transport_send_control_pkt(sk,
 318                                        VMCI_TRANSPORT_PACKET_TYPE_RST,
 319                                        0, 0, NULL, VSOCK_PROTO_INVALID,
 320                                        VMCI_INVALID_HANDLE);
 321}
 322
 323static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
 324{
 325        return vmci_transport_send_control_pkt(
 326                                        sk,
 327                                        VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
 328                                        size, 0, NULL,
 329                                        VSOCK_PROTO_INVALID,
 330                                        VMCI_INVALID_HANDLE);
 331}
 332
 333static int vmci_transport_send_negotiate2(struct sock *sk, size_t size,
 334                                          u16 version)
 335{
 336        return vmci_transport_send_control_pkt(
 337                                        sk,
 338                                        VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
 339                                        size, 0, NULL, version,
 340                                        VMCI_INVALID_HANDLE);
 341}
 342
 343static int vmci_transport_send_qp_offer(struct sock *sk,
 344                                        struct vmci_handle handle)
 345{
 346        return vmci_transport_send_control_pkt(
 347                                        sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0,
 348                                        0, NULL,
 349                                        VSOCK_PROTO_INVALID, handle);
 350}
 351
 352static int vmci_transport_send_attach(struct sock *sk,
 353                                      struct vmci_handle handle)
 354{
 355        return vmci_transport_send_control_pkt(
 356                                        sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
 357                                        0, 0, NULL, VSOCK_PROTO_INVALID,
 358                                        handle);
 359}
 360
 361static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt)
 362{
 363        return vmci_transport_reply_control_pkt_fast(
 364                                                pkt,
 365                                                VMCI_TRANSPORT_PACKET_TYPE_RST,
 366                                                0, 0, NULL,
 367                                                VMCI_INVALID_HANDLE);
 368}
 369
 370static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst,
 371                                          struct sockaddr_vm *src)
 372{
 373        return vmci_transport_send_control_pkt_bh(
 374                                        dst, src,
 375                                        VMCI_TRANSPORT_PACKET_TYPE_INVALID,
 376                                        0, 0, NULL, VMCI_INVALID_HANDLE);
 377}
 378
 379int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
 380                                 struct sockaddr_vm *src)
 381{
 382        return vmci_transport_send_control_pkt_bh(
 383                                        dst, src,
 384                                        VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
 385                                        0, NULL, VMCI_INVALID_HANDLE);
 386}
 387
 388int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
 389                                struct sockaddr_vm *src)
 390{
 391        return vmci_transport_send_control_pkt_bh(
 392                                        dst, src,
 393                                        VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
 394                                        0, NULL, VMCI_INVALID_HANDLE);
 395}
 396
 397int vmci_transport_send_wrote(struct sock *sk)
 398{
 399        return vmci_transport_send_control_pkt(
 400                                        sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
 401                                        0, NULL, VSOCK_PROTO_INVALID,
 402                                        VMCI_INVALID_HANDLE);
 403}
 404
 405int vmci_transport_send_read(struct sock *sk)
 406{
 407        return vmci_transport_send_control_pkt(
 408                                        sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
 409                                        0, NULL, VSOCK_PROTO_INVALID,
 410                                        VMCI_INVALID_HANDLE);
 411}
 412
 413int vmci_transport_send_waiting_write(struct sock *sk,
 414                                      struct vmci_transport_waiting_info *wait)
 415{
 416        return vmci_transport_send_control_pkt(
 417                                sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
 418                                0, 0, wait, VSOCK_PROTO_INVALID,
 419                                VMCI_INVALID_HANDLE);
 420}
 421
 422int vmci_transport_send_waiting_read(struct sock *sk,
 423                                     struct vmci_transport_waiting_info *wait)
 424{
 425        return vmci_transport_send_control_pkt(
 426                                sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
 427                                0, 0, wait, VSOCK_PROTO_INVALID,
 428                                VMCI_INVALID_HANDLE);
 429}
 430
 431static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode)
 432{
 433        return vmci_transport_send_control_pkt(
 434                                        &vsk->sk,
 435                                        VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
 436                                        0, mode, NULL,
 437                                        VSOCK_PROTO_INVALID,
 438                                        VMCI_INVALID_HANDLE);
 439}
 440
 441static int vmci_transport_send_conn_request(struct sock *sk, size_t size)
 442{
 443        return vmci_transport_send_control_pkt(sk,
 444                                        VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
 445                                        size, 0, NULL,
 446                                        VSOCK_PROTO_INVALID,
 447                                        VMCI_INVALID_HANDLE);
 448}
 449
 450static int vmci_transport_send_conn_request2(struct sock *sk, size_t size,
 451                                             u16 version)
 452{
 453        return vmci_transport_send_control_pkt(
 454                                        sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
 455                                        size, 0, NULL, version,
 456                                        VMCI_INVALID_HANDLE);
 457}
 458
 459static struct sock *vmci_transport_get_pending(
 460                                        struct sock *listener,
 461                                        struct vmci_transport_packet *pkt)
 462{
 463        struct vsock_sock *vlistener;
 464        struct vsock_sock *vpending;
 465        struct sock *pending;
 466        struct sockaddr_vm src;
 467
 468        vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
 469
 470        vlistener = vsock_sk(listener);
 471
 472        list_for_each_entry(vpending, &vlistener->pending_links,
 473                            pending_links) {
 474                if (vsock_addr_equals_addr(&src, &vpending->remote_addr) &&
 475                    pkt->dst_port == vpending->local_addr.svm_port) {
 476                        pending = sk_vsock(vpending);
 477                        sock_hold(pending);
 478                        goto found;
 479                }
 480        }
 481
 482        pending = NULL;
 483found:
 484        return pending;
 485
 486}
 487
 488static void vmci_transport_release_pending(struct sock *pending)
 489{
 490        sock_put(pending);
 491}
 492
 493/* We allow two kinds of sockets to communicate with a restricted VM: 1)
 494 * trusted sockets 2) sockets from applications running as the same user as the
 495 * VM (this is only true for the host side and only when using hosted products)
 496 */
 497
 498static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
 499{
 500        return vsock->trusted ||
 501               vmci_is_context_owner(peer_cid, vsock->owner->uid);
 502}
 503
 504/* We allow sending datagrams to and receiving datagrams from a restricted VM
 505 * only if it is trusted as described in vmci_transport_is_trusted.
 506 */
 507
 508static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
 509{
 510        if (VMADDR_CID_HYPERVISOR == peer_cid)
 511                return true;
 512
 513        if (vsock->cached_peer != peer_cid) {
 514                vsock->cached_peer = peer_cid;
 515                if (!vmci_transport_is_trusted(vsock, peer_cid) &&
 516                    (vmci_context_get_priv_flags(peer_cid) &
 517                     VMCI_PRIVILEGE_FLAG_RESTRICTED)) {
 518                        vsock->cached_peer_allow_dgram = false;
 519                } else {
 520                        vsock->cached_peer_allow_dgram = true;
 521                }
 522        }
 523
 524        return vsock->cached_peer_allow_dgram;
 525}
 526
 527static int
 528vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
 529                                struct vmci_handle *handle,
 530                                u64 produce_size,
 531                                u64 consume_size,
 532                                u32 peer, u32 flags, bool trusted)
 533{
 534        int err = 0;
 535
 536        if (trusted) {
 537                /* Try to allocate our queue pair as trusted. This will only
 538                 * work if vsock is running in the host.
 539                 */
 540
 541                err = vmci_qpair_alloc(qpair, handle, produce_size,
 542                                       consume_size,
 543                                       peer, flags,
 544                                       VMCI_PRIVILEGE_FLAG_TRUSTED);
 545                if (err != VMCI_ERROR_NO_ACCESS)
 546                        goto out;
 547
 548        }
 549
 550        err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size,
 551                               peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
 552out:
 553        if (err < 0) {
 554                pr_err("Could not attach to queue pair with %d\n",
 555                       err);
 556                err = vmci_transport_error_to_vsock_error(err);
 557        }
 558
 559        return err;
 560}
 561
 562static int
 563vmci_transport_datagram_create_hnd(u32 resource_id,
 564                                   u32 flags,
 565                                   vmci_datagram_recv_cb recv_cb,
 566                                   void *client_data,
 567                                   struct vmci_handle *out_handle)
 568{
 569        int err = 0;
 570
 571        /* Try to allocate our datagram handler as trusted. This will only work
 572         * if vsock is running in the host.
 573         */
 574
 575        err = vmci_datagram_create_handle_priv(resource_id, flags,
 576                                               VMCI_PRIVILEGE_FLAG_TRUSTED,
 577                                               recv_cb,
 578                                               client_data, out_handle);
 579
 580        if (err == VMCI_ERROR_NO_ACCESS)
 581                err = vmci_datagram_create_handle(resource_id, flags,
 582                                                  recv_cb, client_data,
 583                                                  out_handle);
 584
 585        return err;
 586}
 587
 588/* This is invoked as part of a tasklet that's scheduled when the VMCI
 589 * interrupt fires.  This is run in bottom-half context and if it ever needs to
 590 * sleep it should defer that work to a work queue.
 591 */
 592
 593static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
 594{
 595        struct sock *sk;
 596        size_t size;
 597        struct sk_buff *skb;
 598        struct vsock_sock *vsk;
 599
 600        sk = (struct sock *)data;
 601
 602        /* This handler is privileged when this module is running on the host.
 603         * We will get datagrams from all endpoints (even VMs that are in a
 604         * restricted context). If we get one from a restricted context then
 605         * the destination socket must be trusted.
 606         *
 607         * NOTE: We access the socket struct without holding the lock here.
 608         * This is ok because the field we are interested is never modified
 609         * outside of the create and destruct socket functions.
 610         */
 611        vsk = vsock_sk(sk);
 612        if (!vmci_transport_allow_dgram(vsk, dg->src.context))
 613                return VMCI_ERROR_NO_ACCESS;
 614
 615        size = VMCI_DG_SIZE(dg);
 616
 617        /* Attach the packet to the socket's receive queue as an sk_buff. */
 618        skb = alloc_skb(size, GFP_ATOMIC);
 619        if (!skb)
 620                return VMCI_ERROR_NO_MEM;
 621
 622        /* sk_receive_skb() will do a sock_put(), so hold here. */
 623        sock_hold(sk);
 624        skb_put(skb, size);
 625        memcpy(skb->data, dg, size);
 626        sk_receive_skb(sk, skb, 0);
 627
 628        return VMCI_SUCCESS;
 629}
 630
 631static bool vmci_transport_stream_allow(u32 cid, u32 port)
 632{
 633        static const u32 non_socket_contexts[] = {
 634                VMADDR_CID_RESERVED,
 635        };
 636        int i;
 637
 638        BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts));
 639
 640        for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) {
 641                if (cid == non_socket_contexts[i])
 642                        return false;
 643        }
 644
 645        return true;
 646}
 647
 648/* This is invoked as part of a tasklet that's scheduled when the VMCI
 649 * interrupt fires.  This is run in bottom-half context but it defers most of
 650 * its work to the packet handling work queue.
 651 */
 652
 653static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
 654{
 655        struct sock *sk;
 656        struct sockaddr_vm dst;
 657        struct sockaddr_vm src;
 658        struct vmci_transport_packet *pkt;
 659        struct vsock_sock *vsk;
 660        bool bh_process_pkt;
 661        int err;
 662
 663        sk = NULL;
 664        err = VMCI_SUCCESS;
 665        bh_process_pkt = false;
 666
 667        /* Ignore incoming packets from contexts without sockets, or resources
 668         * that aren't vsock implementations.
 669         */
 670
 671        if (!vmci_transport_stream_allow(dg->src.context, -1)
 672            || vmci_transport_peer_rid(dg->src.context) != dg->src.resource)
 673                return VMCI_ERROR_NO_ACCESS;
 674
 675        if (VMCI_DG_SIZE(dg) < sizeof(*pkt))
 676                /* Drop datagrams that do not contain full VSock packets. */
 677                return VMCI_ERROR_INVALID_ARGS;
 678
 679        pkt = (struct vmci_transport_packet *)dg;
 680
 681        /* Find the socket that should handle this packet.  First we look for a
 682         * connected socket and if there is none we look for a socket bound to
 683         * the destintation address.
 684         */
 685        vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
 686        vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
 687
 688        sk = vsock_find_connected_socket(&src, &dst);
 689        if (!sk) {
 690                sk = vsock_find_bound_socket(&dst);
 691                if (!sk) {
 692                        /* We could not find a socket for this specified
 693                         * address.  If this packet is a RST, we just drop it.
 694                         * If it is another packet, we send a RST.  Note that
 695                         * we do not send a RST reply to RSTs so that we do not
 696                         * continually send RSTs between two endpoints.
 697                         *
 698                         * Note that since this is a reply, dst is src and src
 699                         * is dst.
 700                         */
 701                        if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
 702                                pr_err("unable to send reset\n");
 703
 704                        err = VMCI_ERROR_NOT_FOUND;
 705                        goto out;
 706                }
 707        }
 708
 709        /* If the received packet type is beyond all types known to this
 710         * implementation, reply with an invalid message.  Hopefully this will
 711         * help when implementing backwards compatibility in the future.
 712         */
 713        if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) {
 714                vmci_transport_send_invalid_bh(&dst, &src);
 715                err = VMCI_ERROR_INVALID_ARGS;
 716                goto out;
 717        }
 718
 719        /* This handler is privileged when this module is running on the host.
 720         * We will get datagram connect requests from all endpoints (even VMs
 721         * that are in a restricted context). If we get one from a restricted
 722         * context then the destination socket must be trusted.
 723         *
 724         * NOTE: We access the socket struct without holding the lock here.
 725         * This is ok because the field we are interested is never modified
 726         * outside of the create and destruct socket functions.
 727         */
 728        vsk = vsock_sk(sk);
 729        if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) {
 730                err = VMCI_ERROR_NO_ACCESS;
 731                goto out;
 732        }
 733
 734        /* We do most everything in a work queue, but let's fast path the
 735         * notification of reads and writes to help data transfer performance.
 736         * We can only do this if there is no process context code executing
 737         * for this socket since that may change the state.
 738         */
 739        bh_lock_sock(sk);
 740
 741        if (!sock_owned_by_user(sk)) {
 742                /* The local context ID may be out of date, update it. */
 743                vsk->local_addr.svm_cid = dst.svm_cid;
 744
 745                if (sk->sk_state == TCP_ESTABLISHED)
 746                        vmci_trans(vsk)->notify_ops->handle_notify_pkt(
 747                                        sk, pkt, true, &dst, &src,
 748                                        &bh_process_pkt);
 749        }
 750
 751        bh_unlock_sock(sk);
 752
 753        if (!bh_process_pkt) {
 754                struct vmci_transport_recv_pkt_info *recv_pkt_info;
 755
 756                recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC);
 757                if (!recv_pkt_info) {
 758                        if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
 759                                pr_err("unable to send reset\n");
 760
 761                        err = VMCI_ERROR_NO_MEM;
 762                        goto out;
 763                }
 764
 765                recv_pkt_info->sk = sk;
 766                memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt));
 767                INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work);
 768
 769                schedule_work(&recv_pkt_info->work);
 770                /* Clear sk so that the reference count incremented by one of
 771                 * the Find functions above is not decremented below.  We need
 772                 * that reference count for the packet handler we've scheduled
 773                 * to run.
 774                 */
 775                sk = NULL;
 776        }
 777
 778out:
 779        if (sk)
 780                sock_put(sk);
 781
 782        return err;
 783}
 784
 785static void vmci_transport_handle_detach(struct sock *sk)
 786{
 787        struct vsock_sock *vsk;
 788
 789        vsk = vsock_sk(sk);
 790        if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
 791                sock_set_flag(sk, SOCK_DONE);
 792
 793                /* On a detach the peer will not be sending or receiving
 794                 * anymore.
 795                 */
 796                vsk->peer_shutdown = SHUTDOWN_MASK;
 797
 798                /* We should not be sending anymore since the peer won't be
 799                 * there to receive, but we can still receive if there is data
 800                 * left in our consume queue. If the local endpoint is a host,
 801                 * we can't call vsock_stream_has_data, since that may block,
 802                 * but a host endpoint can't read data once the VM has
 803                 * detached, so there is no available data in that case.
 804                 */
 805                if (vsk->local_addr.svm_cid == VMADDR_CID_HOST ||
 806                    vsock_stream_has_data(vsk) <= 0) {
 807                        if (sk->sk_state == TCP_SYN_SENT) {
 808                                /* The peer may detach from a queue pair while
 809                                 * we are still in the connecting state, i.e.,
 810                                 * if the peer VM is killed after attaching to
 811                                 * a queue pair, but before we complete the
 812                                 * handshake. In that case, we treat the detach
 813                                 * event like a reset.
 814                                 */
 815
 816                                sk->sk_state = TCP_CLOSE;
 817                                sk->sk_err = ECONNRESET;
 818                                sk->sk_error_report(sk);
 819                                return;
 820                        }
 821                        sk->sk_state = TCP_CLOSE;
 822                }
 823                sk->sk_state_change(sk);
 824        }
 825}
 826
 827static void vmci_transport_peer_detach_cb(u32 sub_id,
 828                                          const struct vmci_event_data *e_data,
 829                                          void *client_data)
 830{
 831        struct vmci_transport *trans = client_data;
 832        const struct vmci_event_payload_qp *e_payload;
 833
 834        e_payload = vmci_event_data_const_payload(e_data);
 835
 836        /* XXX This is lame, we should provide a way to lookup sockets by
 837         * qp_handle.
 838         */
 839        if (vmci_handle_is_invalid(e_payload->handle) ||
 840            !vmci_handle_is_equal(trans->qp_handle, e_payload->handle))
 841                return;
 842
 843        /* We don't ask for delayed CBs when we subscribe to this event (we
 844         * pass 0 as flags to vmci_event_subscribe()).  VMCI makes no
 845         * guarantees in that case about what context we might be running in,
 846         * so it could be BH or process, blockable or non-blockable.  So we
 847         * need to account for all possible contexts here.
 848         */
 849        spin_lock_bh(&trans->lock);
 850        if (!trans->sk)
 851                goto out;
 852
 853        /* Apart from here, trans->lock is only grabbed as part of sk destruct,
 854         * where trans->sk isn't locked.
 855         */
 856        bh_lock_sock(trans->sk);
 857
 858        vmci_transport_handle_detach(trans->sk);
 859
 860        bh_unlock_sock(trans->sk);
 861 out:
 862        spin_unlock_bh(&trans->lock);
 863}
 864
 865static void vmci_transport_qp_resumed_cb(u32 sub_id,
 866                                         const struct vmci_event_data *e_data,
 867                                         void *client_data)
 868{
 869        vsock_for_each_connected_socket(vmci_transport_handle_detach);
 870}
 871
 872static void vmci_transport_recv_pkt_work(struct work_struct *work)
 873{
 874        struct vmci_transport_recv_pkt_info *recv_pkt_info;
 875        struct vmci_transport_packet *pkt;
 876        struct sock *sk;
 877
 878        recv_pkt_info =
 879                container_of(work, struct vmci_transport_recv_pkt_info, work);
 880        sk = recv_pkt_info->sk;
 881        pkt = &recv_pkt_info->pkt;
 882
 883        lock_sock(sk);
 884
 885        /* The local context ID may be out of date. */
 886        vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
 887
 888        switch (sk->sk_state) {
 889        case TCP_LISTEN:
 890                vmci_transport_recv_listen(sk, pkt);
 891                break;
 892        case TCP_SYN_SENT:
 893                /* Processing of pending connections for servers goes through
 894                 * the listening socket, so see vmci_transport_recv_listen()
 895                 * for that path.
 896                 */
 897                vmci_transport_recv_connecting_client(sk, pkt);
 898                break;
 899        case TCP_ESTABLISHED:
 900                vmci_transport_recv_connected(sk, pkt);
 901                break;
 902        default:
 903                /* Because this function does not run in the same context as
 904                 * vmci_transport_recv_stream_cb it is possible that the
 905                 * socket has closed. We need to let the other side know or it
 906                 * could be sitting in a connect and hang forever. Send a
 907                 * reset to prevent that.
 908                 */
 909                vmci_transport_send_reset(sk, pkt);
 910                break;
 911        }
 912
 913        release_sock(sk);
 914        kfree(recv_pkt_info);
 915        /* Release reference obtained in the stream callback when we fetched
 916         * this socket out of the bound or connected list.
 917         */
 918        sock_put(sk);
 919}
 920
 921static int vmci_transport_recv_listen(struct sock *sk,
 922                                      struct vmci_transport_packet *pkt)
 923{
 924        struct sock *pending;
 925        struct vsock_sock *vpending;
 926        int err;
 927        u64 qp_size;
 928        bool old_request = false;
 929        bool old_pkt_proto = false;
 930
 931        err = 0;
 932
 933        /* Because we are in the listen state, we could be receiving a packet
 934         * for ourself or any previous connection requests that we received.
 935         * If it's the latter, we try to find a socket in our list of pending
 936         * connections and, if we do, call the appropriate handler for the
 937         * state that that socket is in.  Otherwise we try to service the
 938         * connection request.
 939         */
 940        pending = vmci_transport_get_pending(sk, pkt);
 941        if (pending) {
 942                lock_sock(pending);
 943
 944                /* The local context ID may be out of date. */
 945                vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
 946
 947                switch (pending->sk_state) {
 948                case TCP_SYN_SENT:
 949                        err = vmci_transport_recv_connecting_server(sk,
 950                                                                    pending,
 951                                                                    pkt);
 952                        break;
 953                default:
 954                        vmci_transport_send_reset(pending, pkt);
 955                        err = -EINVAL;
 956                }
 957
 958                if (err < 0)
 959                        vsock_remove_pending(sk, pending);
 960
 961                release_sock(pending);
 962                vmci_transport_release_pending(pending);
 963
 964                return err;
 965        }
 966
 967        /* The listen state only accepts connection requests.  Reply with a
 968         * reset unless we received a reset.
 969         */
 970
 971        if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST ||
 972              pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) {
 973                vmci_transport_reply_reset(pkt);
 974                return -EINVAL;
 975        }
 976
 977        if (pkt->u.size == 0) {
 978                vmci_transport_reply_reset(pkt);
 979                return -EINVAL;
 980        }
 981
 982        /* If this socket can't accommodate this connection request, we send a
 983         * reset.  Otherwise we create and initialize a child socket and reply
 984         * with a connection negotiation.
 985         */
 986        if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) {
 987                vmci_transport_reply_reset(pkt);
 988                return -ECONNREFUSED;
 989        }
 990
 991        pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
 992                                 sk->sk_type, 0);
 993        if (!pending) {
 994                vmci_transport_send_reset(sk, pkt);
 995                return -ENOMEM;
 996        }
 997
 998        vpending = vsock_sk(pending);
 999
1000        vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context,
1001                        pkt->dst_port);
1002        vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context,
1003                        pkt->src_port);
1004
1005        /* If the proposed size fits within our min/max, accept it. Otherwise
1006         * propose our own size.
1007         */
1008        if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size &&
1009            pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) {
1010                qp_size = pkt->u.size;
1011        } else {
1012                qp_size = vmci_trans(vpending)->queue_pair_size;
1013        }
1014
1015        /* Figure out if we are using old or new requests based on the
1016         * overrides pkt types sent by our peer.
1017         */
1018        if (vmci_transport_old_proto_override(&old_pkt_proto)) {
1019                old_request = old_pkt_proto;
1020        } else {
1021                if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST)
1022                        old_request = true;
1023                else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)
1024                        old_request = false;
1025
1026        }
1027
1028        if (old_request) {
1029                /* Handle a REQUEST (or override) */
1030                u16 version = VSOCK_PROTO_INVALID;
1031                if (vmci_transport_proto_to_notify_struct(
1032                        pending, &version, true))
1033                        err = vmci_transport_send_negotiate(pending, qp_size);
1034                else
1035                        err = -EINVAL;
1036
1037        } else {
1038                /* Handle a REQUEST2 (or override) */
1039                int proto_int = pkt->proto;
1040                int pos;
1041                u16 active_proto_version = 0;
1042
1043                /* The list of possible protocols is the intersection of all
1044                 * protocols the client supports ... plus all the protocols we
1045                 * support.
1046                 */
1047                proto_int &= vmci_transport_new_proto_supported_versions();
1048
1049                /* We choose the highest possible protocol version and use that
1050                 * one.
1051                 */
1052                pos = fls(proto_int);
1053                if (pos) {
1054                        active_proto_version = (1 << (pos - 1));
1055                        if (vmci_transport_proto_to_notify_struct(
1056                                pending, &active_proto_version, false))
1057                                err = vmci_transport_send_negotiate2(pending,
1058                                                        qp_size,
1059                                                        active_proto_version);
1060                        else
1061                                err = -EINVAL;
1062
1063                } else {
1064                        err = -EINVAL;
1065                }
1066        }
1067
1068        if (err < 0) {
1069                vmci_transport_send_reset(sk, pkt);
1070                sock_put(pending);
1071                err = vmci_transport_error_to_vsock_error(err);
1072                goto out;
1073        }
1074
1075        vsock_add_pending(sk, pending);
1076        sk->sk_ack_backlog++;
1077
1078        pending->sk_state = TCP_SYN_SENT;
1079        vmci_trans(vpending)->produce_size =
1080                vmci_trans(vpending)->consume_size = qp_size;
1081        vmci_trans(vpending)->queue_pair_size = qp_size;
1082
1083        vmci_trans(vpending)->notify_ops->process_request(pending);
1084
1085        /* We might never receive another message for this socket and it's not
1086         * connected to any process, so we have to ensure it gets cleaned up
1087         * ourself.  Our delayed work function will take care of that.  Note
1088         * that we do not ever cancel this function since we have few
1089         * guarantees about its state when calling cancel_delayed_work().
1090         * Instead we hold a reference on the socket for that function and make
1091         * it capable of handling cases where it needs to do nothing but
1092         * release that reference.
1093         */
1094        vpending->listener = sk;
1095        sock_hold(sk);
1096        sock_hold(pending);
1097        INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
1098        schedule_delayed_work(&vpending->dwork, HZ);
1099
1100out:
1101        return err;
1102}
1103
1104static int
1105vmci_transport_recv_connecting_server(struct sock *listener,
1106                                      struct sock *pending,
1107                                      struct vmci_transport_packet *pkt)
1108{
1109        struct vsock_sock *vpending;
1110        struct vmci_handle handle;
1111        struct vmci_qp *qpair;
1112        bool is_local;
1113        u32 flags;
1114        u32 detach_sub_id;
1115        int err;
1116        int skerr;
1117
1118        vpending = vsock_sk(pending);
1119        detach_sub_id = VMCI_INVALID_ID;
1120
1121        switch (pkt->type) {
1122        case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
1123                if (vmci_handle_is_invalid(pkt->u.handle)) {
1124                        vmci_transport_send_reset(pending, pkt);
1125                        skerr = EPROTO;
1126                        err = -EINVAL;
1127                        goto destroy;
1128                }
1129                break;
1130        default:
1131                /* Close and cleanup the connection. */
1132                vmci_transport_send_reset(pending, pkt);
1133                skerr = EPROTO;
1134                err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL;
1135                goto destroy;
1136        }
1137
1138        /* In order to complete the connection we need to attach to the offered
1139         * queue pair and send an attach notification.  We also subscribe to the
1140         * detach event so we know when our peer goes away, and we do that
1141         * before attaching so we don't miss an event.  If all this succeeds,
1142         * we update our state and wakeup anything waiting in accept() for a
1143         * connection.
1144         */
1145
1146        /* We don't care about attach since we ensure the other side has
1147         * attached by specifying the ATTACH_ONLY flag below.
1148         */
1149        err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1150                                   vmci_transport_peer_detach_cb,
1151                                   vmci_trans(vpending), &detach_sub_id);
1152        if (err < VMCI_SUCCESS) {
1153                vmci_transport_send_reset(pending, pkt);
1154                err = vmci_transport_error_to_vsock_error(err);
1155                skerr = -err;
1156                goto destroy;
1157        }
1158
1159        vmci_trans(vpending)->detach_sub_id = detach_sub_id;
1160
1161        /* Now attach to the queue pair the client created. */
1162        handle = pkt->u.handle;
1163
1164        /* vpending->local_addr always has a context id so we do not need to
1165         * worry about VMADDR_CID_ANY in this case.
1166         */
1167        is_local =
1168            vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid;
1169        flags = VMCI_QPFLAG_ATTACH_ONLY;
1170        flags |= is_local ? VMCI_QPFLAG_LOCAL : 0;
1171
1172        err = vmci_transport_queue_pair_alloc(
1173                                        &qpair,
1174                                        &handle,
1175                                        vmci_trans(vpending)->produce_size,
1176                                        vmci_trans(vpending)->consume_size,
1177                                        pkt->dg.src.context,
1178                                        flags,
1179                                        vmci_transport_is_trusted(
1180                                                vpending,
1181                                                vpending->remote_addr.svm_cid));
1182        if (err < 0) {
1183                vmci_transport_send_reset(pending, pkt);
1184                skerr = -err;
1185                goto destroy;
1186        }
1187
1188        vmci_trans(vpending)->qp_handle = handle;
1189        vmci_trans(vpending)->qpair = qpair;
1190
1191        /* When we send the attach message, we must be ready to handle incoming
1192         * control messages on the newly connected socket. So we move the
1193         * pending socket to the connected state before sending the attach
1194         * message. Otherwise, an incoming packet triggered by the attach being
1195         * received by the peer may be processed concurrently with what happens
1196         * below after sending the attach message, and that incoming packet
1197         * will find the listening socket instead of the (currently) pending
1198         * socket. Note that enqueueing the socket increments the reference
1199         * count, so even if a reset comes before the connection is accepted,
1200         * the socket will be valid until it is removed from the queue.
1201         *
1202         * If we fail sending the attach below, we remove the socket from the
1203         * connected list and move the socket to TCP_CLOSE before
1204         * releasing the lock, so a pending slow path processing of an incoming
1205         * packet will not see the socket in the connected state in that case.
1206         */
1207        pending->sk_state = TCP_ESTABLISHED;
1208
1209        vsock_insert_connected(vpending);
1210
1211        /* Notify our peer of our attach. */
1212        err = vmci_transport_send_attach(pending, handle);
1213        if (err < 0) {
1214                vsock_remove_connected(vpending);
1215                pr_err("Could not send attach\n");
1216                vmci_transport_send_reset(pending, pkt);
1217                err = vmci_transport_error_to_vsock_error(err);
1218                skerr = -err;
1219                goto destroy;
1220        }
1221
1222        /* We have a connection. Move the now connected socket from the
1223         * listener's pending list to the accept queue so callers of accept()
1224         * can find it.
1225         */
1226        vsock_remove_pending(listener, pending);
1227        vsock_enqueue_accept(listener, pending);
1228
1229        /* Callers of accept() will be be waiting on the listening socket, not
1230         * the pending socket.
1231         */
1232        listener->sk_data_ready(listener);
1233
1234        return 0;
1235
1236destroy:
1237        pending->sk_err = skerr;
1238        pending->sk_state = TCP_CLOSE;
1239        /* As long as we drop our reference, all necessary cleanup will handle
1240         * when the cleanup function drops its reference and our destruct
1241         * implementation is called.  Note that since the listen handler will
1242         * remove pending from the pending list upon our failure, the cleanup
1243         * function won't drop the additional reference, which is why we do it
1244         * here.
1245         */
1246        sock_put(pending);
1247
1248        return err;
1249}
1250
1251static int
1252vmci_transport_recv_connecting_client(struct sock *sk,
1253                                      struct vmci_transport_packet *pkt)
1254{
1255        struct vsock_sock *vsk;
1256        int err;
1257        int skerr;
1258
1259        vsk = vsock_sk(sk);
1260
1261        switch (pkt->type) {
1262        case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
1263                if (vmci_handle_is_invalid(pkt->u.handle) ||
1264                    !vmci_handle_is_equal(pkt->u.handle,
1265                                          vmci_trans(vsk)->qp_handle)) {
1266                        skerr = EPROTO;
1267                        err = -EINVAL;
1268                        goto destroy;
1269                }
1270
1271                /* Signify the socket is connected and wakeup the waiter in
1272                 * connect(). Also place the socket in the connected table for
1273                 * accounting (it can already be found since it's in the bound
1274                 * table).
1275                 */
1276                sk->sk_state = TCP_ESTABLISHED;
1277                sk->sk_socket->state = SS_CONNECTED;
1278                vsock_insert_connected(vsk);
1279                sk->sk_state_change(sk);
1280
1281                break;
1282        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
1283        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
1284                if (pkt->u.size == 0
1285                    || pkt->dg.src.context != vsk->remote_addr.svm_cid
1286                    || pkt->src_port != vsk->remote_addr.svm_port
1287                    || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)
1288                    || vmci_trans(vsk)->qpair
1289                    || vmci_trans(vsk)->produce_size != 0
1290                    || vmci_trans(vsk)->consume_size != 0
1291                    || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
1292                        skerr = EPROTO;
1293                        err = -EINVAL;
1294
1295                        goto destroy;
1296                }
1297
1298                err = vmci_transport_recv_connecting_client_negotiate(sk, pkt);
1299                if (err) {
1300                        skerr = -err;
1301                        goto destroy;
1302                }
1303
1304                break;
1305        case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
1306                err = vmci_transport_recv_connecting_client_invalid(sk, pkt);
1307                if (err) {
1308                        skerr = -err;
1309                        goto destroy;
1310                }
1311
1312                break;
1313        case VMCI_TRANSPORT_PACKET_TYPE_RST:
1314                /* Older versions of the linux code (WS 6.5 / ESX 4.0) used to
1315                 * continue processing here after they sent an INVALID packet.
1316                 * This meant that we got a RST after the INVALID. We ignore a
1317                 * RST after an INVALID. The common code doesn't send the RST
1318                 * ... so we can hang if an old version of the common code
1319                 * fails between getting a REQUEST and sending an OFFER back.
1320                 * Not much we can do about it... except hope that it doesn't
1321                 * happen.
1322                 */
1323                if (vsk->ignore_connecting_rst) {
1324                        vsk->ignore_connecting_rst = false;
1325                } else {
1326                        skerr = ECONNRESET;
1327                        err = 0;
1328                        goto destroy;
1329                }
1330
1331                break;
1332        default:
1333                /* Close and cleanup the connection. */
1334                skerr = EPROTO;
1335                err = -EINVAL;
1336                goto destroy;
1337        }
1338
1339        return 0;
1340
1341destroy:
1342        vmci_transport_send_reset(sk, pkt);
1343
1344        sk->sk_state = TCP_CLOSE;
1345        sk->sk_err = skerr;
1346        sk->sk_error_report(sk);
1347        return err;
1348}
1349
1350static int vmci_transport_recv_connecting_client_negotiate(
1351                                        struct sock *sk,
1352                                        struct vmci_transport_packet *pkt)
1353{
1354        int err;
1355        struct vsock_sock *vsk;
1356        struct vmci_handle handle;
1357        struct vmci_qp *qpair;
1358        u32 detach_sub_id;
1359        bool is_local;
1360        u32 flags;
1361        bool old_proto = true;
1362        bool old_pkt_proto;
1363        u16 version;
1364
1365        vsk = vsock_sk(sk);
1366        handle = VMCI_INVALID_HANDLE;
1367        detach_sub_id = VMCI_INVALID_ID;
1368
1369        /* If we have gotten here then we should be past the point where old
1370         * linux vsock could have sent the bogus rst.
1371         */
1372        vsk->sent_request = false;
1373        vsk->ignore_connecting_rst = false;
1374
1375        /* Verify that we're OK with the proposed queue pair size */
1376        if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size ||
1377            pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) {
1378                err = -EINVAL;
1379                goto destroy;
1380        }
1381
1382        /* At this point we know the CID the peer is using to talk to us. */
1383
1384        if (vsk->local_addr.svm_cid == VMADDR_CID_ANY)
1385                vsk->local_addr.svm_cid = pkt->dg.dst.context;
1386
1387        /* Setup the notify ops to be the highest supported version that both
1388         * the server and the client support.
1389         */
1390
1391        if (vmci_transport_old_proto_override(&old_pkt_proto)) {
1392                old_proto = old_pkt_proto;
1393        } else {
1394                if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE)
1395                        old_proto = true;
1396                else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2)
1397                        old_proto = false;
1398
1399        }
1400
1401        if (old_proto)
1402                version = VSOCK_PROTO_INVALID;
1403        else
1404                version = pkt->proto;
1405
1406        if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) {
1407                err = -EINVAL;
1408                goto destroy;
1409        }
1410
1411        /* Subscribe to detach events first.
1412         *
1413         * XXX We attach once for each queue pair created for now so it is easy
1414         * to find the socket (it's provided), but later we should only
1415         * subscribe once and add a way to lookup sockets by queue pair handle.
1416         */
1417        err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1418                                   vmci_transport_peer_detach_cb,
1419                                   vmci_trans(vsk), &detach_sub_id);
1420        if (err < VMCI_SUCCESS) {
1421                err = vmci_transport_error_to_vsock_error(err);
1422                goto destroy;
1423        }
1424
1425        /* Make VMCI select the handle for us. */
1426        handle = VMCI_INVALID_HANDLE;
1427        is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid;
1428        flags = is_local ? VMCI_QPFLAG_LOCAL : 0;
1429
1430        err = vmci_transport_queue_pair_alloc(&qpair,
1431                                              &handle,
1432                                              pkt->u.size,
1433                                              pkt->u.size,
1434                                              vsk->remote_addr.svm_cid,
1435                                              flags,
1436                                              vmci_transport_is_trusted(
1437                                                  vsk,
1438                                                  vsk->
1439                                                  remote_addr.svm_cid));
1440        if (err < 0)
1441                goto destroy;
1442
1443        err = vmci_transport_send_qp_offer(sk, handle);
1444        if (err < 0) {
1445                err = vmci_transport_error_to_vsock_error(err);
1446                goto destroy;
1447        }
1448
1449        vmci_trans(vsk)->qp_handle = handle;
1450        vmci_trans(vsk)->qpair = qpair;
1451
1452        vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
1453                pkt->u.size;
1454
1455        vmci_trans(vsk)->detach_sub_id = detach_sub_id;
1456
1457        vmci_trans(vsk)->notify_ops->process_negotiate(sk);
1458
1459        return 0;
1460
1461destroy:
1462        if (detach_sub_id != VMCI_INVALID_ID)
1463                vmci_event_unsubscribe(detach_sub_id);
1464
1465        if (!vmci_handle_is_invalid(handle))
1466                vmci_qpair_detach(&qpair);
1467
1468        return err;
1469}
1470
1471static int
1472vmci_transport_recv_connecting_client_invalid(struct sock *sk,
1473                                              struct vmci_transport_packet *pkt)
1474{
1475        int err = 0;
1476        struct vsock_sock *vsk = vsock_sk(sk);
1477
1478        if (vsk->sent_request) {
1479                vsk->sent_request = false;
1480                vsk->ignore_connecting_rst = true;
1481
1482                err = vmci_transport_send_conn_request(
1483                        sk, vmci_trans(vsk)->queue_pair_size);
1484                if (err < 0)
1485                        err = vmci_transport_error_to_vsock_error(err);
1486                else
1487                        err = 0;
1488
1489        }
1490
1491        return err;
1492}
1493
1494static int vmci_transport_recv_connected(struct sock *sk,
1495                                         struct vmci_transport_packet *pkt)
1496{
1497        struct vsock_sock *vsk;
1498        bool pkt_processed = false;
1499
1500        /* In cases where we are closing the connection, it's sufficient to
1501         * mark the state change (and maybe error) and wake up any waiting
1502         * threads. Since this is a connected socket, it's owned by a user
1503         * process and will be cleaned up when the failure is passed back on
1504         * the current or next system call.  Our system call implementations
1505         * must therefore check for error and state changes on entry and when
1506         * being awoken.
1507         */
1508        switch (pkt->type) {
1509        case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
1510                if (pkt->u.mode) {
1511                        vsk = vsock_sk(sk);
1512
1513                        vsk->peer_shutdown |= pkt->u.mode;
1514                        sk->sk_state_change(sk);
1515                }
1516                break;
1517
1518        case VMCI_TRANSPORT_PACKET_TYPE_RST:
1519                vsk = vsock_sk(sk);
1520                /* It is possible that we sent our peer a message (e.g a
1521                 * WAITING_READ) right before we got notified that the peer had
1522                 * detached. If that happens then we can get a RST pkt back
1523                 * from our peer even though there is data available for us to
1524                 * read. In that case, don't shutdown the socket completely but
1525                 * instead allow the local client to finish reading data off
1526                 * the queuepair. Always treat a RST pkt in connected mode like
1527                 * a clean shutdown.
1528                 */
1529                sock_set_flag(sk, SOCK_DONE);
1530                vsk->peer_shutdown = SHUTDOWN_MASK;
1531                if (vsock_stream_has_data(vsk) <= 0)
1532                        sk->sk_state = TCP_CLOSING;
1533
1534                sk->sk_state_change(sk);
1535                break;
1536
1537        default:
1538                vsk = vsock_sk(sk);
1539                vmci_trans(vsk)->notify_ops->handle_notify_pkt(
1540                                sk, pkt, false, NULL, NULL,
1541                                &pkt_processed);
1542                if (!pkt_processed)
1543                        return -EINVAL;
1544
1545                break;
1546        }
1547
1548        return 0;
1549}
1550
1551static int vmci_transport_socket_init(struct vsock_sock *vsk,
1552                                      struct vsock_sock *psk)
1553{
1554        vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL);
1555        if (!vsk->trans)
1556                return -ENOMEM;
1557
1558        vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
1559        vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
1560        vmci_trans(vsk)->qpair = NULL;
1561        vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
1562        vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
1563        vmci_trans(vsk)->notify_ops = NULL;
1564        INIT_LIST_HEAD(&vmci_trans(vsk)->elem);
1565        vmci_trans(vsk)->sk = &vsk->sk;
1566        spin_lock_init(&vmci_trans(vsk)->lock);
1567        if (psk) {
1568                vmci_trans(vsk)->queue_pair_size =
1569                        vmci_trans(psk)->queue_pair_size;
1570                vmci_trans(vsk)->queue_pair_min_size =
1571                        vmci_trans(psk)->queue_pair_min_size;
1572                vmci_trans(vsk)->queue_pair_max_size =
1573                        vmci_trans(psk)->queue_pair_max_size;
1574        } else {
1575                vmci_trans(vsk)->queue_pair_size =
1576                        VMCI_TRANSPORT_DEFAULT_QP_SIZE;
1577                vmci_trans(vsk)->queue_pair_min_size =
1578                         VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN;
1579                vmci_trans(vsk)->queue_pair_max_size =
1580                        VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX;
1581        }
1582
1583        return 0;
1584}
1585
1586static void vmci_transport_free_resources(struct list_head *transport_list)
1587{
1588        while (!list_empty(transport_list)) {
1589                struct vmci_transport *transport =
1590                    list_first_entry(transport_list, struct vmci_transport,
1591                                     elem);
1592                list_del(&transport->elem);
1593
1594                if (transport->detach_sub_id != VMCI_INVALID_ID) {
1595                        vmci_event_unsubscribe(transport->detach_sub_id);
1596                        transport->detach_sub_id = VMCI_INVALID_ID;
1597                }
1598
1599                if (!vmci_handle_is_invalid(transport->qp_handle)) {
1600                        vmci_qpair_detach(&transport->qpair);
1601                        transport->qp_handle = VMCI_INVALID_HANDLE;
1602                        transport->produce_size = 0;
1603                        transport->consume_size = 0;
1604                }
1605
1606                kfree(transport);
1607        }
1608}
1609
1610static void vmci_transport_cleanup(struct work_struct *work)
1611{
1612        LIST_HEAD(pending);
1613
1614        spin_lock_bh(&vmci_transport_cleanup_lock);
1615        list_replace_init(&vmci_transport_cleanup_list, &pending);
1616        spin_unlock_bh(&vmci_transport_cleanup_lock);
1617        vmci_transport_free_resources(&pending);
1618}
1619
1620static void vmci_transport_destruct(struct vsock_sock *vsk)
1621{
1622        /* Ensure that the detach callback doesn't use the sk/vsk
1623         * we are about to destruct.
1624         */
1625        spin_lock_bh(&vmci_trans(vsk)->lock);
1626        vmci_trans(vsk)->sk = NULL;
1627        spin_unlock_bh(&vmci_trans(vsk)->lock);
1628
1629        if (vmci_trans(vsk)->notify_ops)
1630                vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
1631
1632        spin_lock_bh(&vmci_transport_cleanup_lock);
1633        list_add(&vmci_trans(vsk)->elem, &vmci_transport_cleanup_list);
1634        spin_unlock_bh(&vmci_transport_cleanup_lock);
1635        schedule_work(&vmci_transport_cleanup_work);
1636
1637        vsk->trans = NULL;
1638}
1639
1640static void vmci_transport_release(struct vsock_sock *vsk)
1641{
1642        vsock_remove_sock(vsk);
1643
1644        if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) {
1645                vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle);
1646                vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
1647        }
1648}
1649
1650static int vmci_transport_dgram_bind(struct vsock_sock *vsk,
1651                                     struct sockaddr_vm *addr)
1652{
1653        u32 port;
1654        u32 flags;
1655        int err;
1656
1657        /* VMCI will select a resource ID for us if we provide
1658         * VMCI_INVALID_ID.
1659         */
1660        port = addr->svm_port == VMADDR_PORT_ANY ?
1661                        VMCI_INVALID_ID : addr->svm_port;
1662
1663        if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE))
1664                return -EACCES;
1665
1666        flags = addr->svm_cid == VMADDR_CID_ANY ?
1667                                VMCI_FLAG_ANYCID_DG_HND : 0;
1668
1669        err = vmci_transport_datagram_create_hnd(port, flags,
1670                                                 vmci_transport_recv_dgram_cb,
1671                                                 &vsk->sk,
1672                                                 &vmci_trans(vsk)->dg_handle);
1673        if (err < VMCI_SUCCESS)
1674                return vmci_transport_error_to_vsock_error(err);
1675        vsock_addr_init(&vsk->local_addr, addr->svm_cid,
1676                        vmci_trans(vsk)->dg_handle.resource);
1677
1678        return 0;
1679}
1680
1681static int vmci_transport_dgram_enqueue(
1682        struct vsock_sock *vsk,
1683        struct sockaddr_vm *remote_addr,
1684        struct msghdr *msg,
1685        size_t len)
1686{
1687        int err;
1688        struct vmci_datagram *dg;
1689
1690        if (len > VMCI_MAX_DG_PAYLOAD_SIZE)
1691                return -EMSGSIZE;
1692
1693        if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid))
1694                return -EPERM;
1695
1696        /* Allocate a buffer for the user's message and our packet header. */
1697        dg = kmalloc(len + sizeof(*dg), GFP_KERNEL);
1698        if (!dg)
1699                return -ENOMEM;
1700
1701        memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len);
1702
1703        dg->dst = vmci_make_handle(remote_addr->svm_cid,
1704                                   remote_addr->svm_port);
1705        dg->src = vmci_make_handle(vsk->local_addr.svm_cid,
1706                                   vsk->local_addr.svm_port);
1707        dg->payload_size = len;
1708
1709        err = vmci_datagram_send(dg);
1710        kfree(dg);
1711        if (err < 0)
1712                return vmci_transport_error_to_vsock_error(err);
1713
1714        return err - sizeof(*dg);
1715}
1716
1717static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk,
1718                                        struct msghdr *msg, size_t len,
1719                                        int flags)
1720{
1721        int err;
1722        int noblock;
1723        struct vmci_datagram *dg;
1724        size_t payload_len;
1725        struct sk_buff *skb;
1726
1727        noblock = flags & MSG_DONTWAIT;
1728
1729        if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
1730                return -EOPNOTSUPP;
1731
1732        /* Retrieve the head sk_buff from the socket's receive queue. */
1733        err = 0;
1734        skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
1735        if (!skb)
1736                return err;
1737
1738        dg = (struct vmci_datagram *)skb->data;
1739        if (!dg)
1740                /* err is 0, meaning we read zero bytes. */
1741                goto out;
1742
1743        payload_len = dg->payload_size;
1744        /* Ensure the sk_buff matches the payload size claimed in the packet. */
1745        if (payload_len != skb->len - sizeof(*dg)) {
1746                err = -EINVAL;
1747                goto out;
1748        }
1749
1750        if (payload_len > len) {
1751                payload_len = len;
1752                msg->msg_flags |= MSG_TRUNC;
1753        }
1754
1755        /* Place the datagram payload in the user's iovec. */
1756        err = skb_copy_datagram_msg(skb, sizeof(*dg), msg, payload_len);
1757        if (err)
1758                goto out;
1759
1760        if (msg->msg_name) {
1761                /* Provide the address of the sender. */
1762                DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
1763                vsock_addr_init(vm_addr, dg->src.context, dg->src.resource);
1764                msg->msg_namelen = sizeof(*vm_addr);
1765        }
1766        err = payload_len;
1767
1768out:
1769        skb_free_datagram(&vsk->sk, skb);
1770        return err;
1771}
1772
1773static bool vmci_transport_dgram_allow(u32 cid, u32 port)
1774{
1775        if (cid == VMADDR_CID_HYPERVISOR) {
1776                /* Registrations of PBRPC Servers do not modify VMX/Hypervisor
1777                 * state and are allowed.
1778                 */
1779                return port == VMCI_UNITY_PBRPC_REGISTER;
1780        }
1781
1782        return true;
1783}
1784
1785static int vmci_transport_connect(struct vsock_sock *vsk)
1786{
1787        int err;
1788        bool old_pkt_proto = false;
1789        struct sock *sk = &vsk->sk;
1790
1791        if (vmci_transport_old_proto_override(&old_pkt_proto) &&
1792                old_pkt_proto) {
1793                err = vmci_transport_send_conn_request(
1794                        sk, vmci_trans(vsk)->queue_pair_size);
1795                if (err < 0) {
1796                        sk->sk_state = TCP_CLOSE;
1797                        return err;
1798                }
1799        } else {
1800                int supported_proto_versions =
1801                        vmci_transport_new_proto_supported_versions();
1802                err = vmci_transport_send_conn_request2(
1803                                sk, vmci_trans(vsk)->queue_pair_size,
1804                                supported_proto_versions);
1805                if (err < 0) {
1806                        sk->sk_state = TCP_CLOSE;
1807                        return err;
1808                }
1809
1810                vsk->sent_request = true;
1811        }
1812
1813        return err;
1814}
1815
1816static ssize_t vmci_transport_stream_dequeue(
1817        struct vsock_sock *vsk,
1818        struct msghdr *msg,
1819        size_t len,
1820        int flags)
1821{
1822        if (flags & MSG_PEEK)
1823                return vmci_qpair_peekv(vmci_trans(vsk)->qpair, msg, len, 0);
1824        else
1825                return vmci_qpair_dequev(vmci_trans(vsk)->qpair, msg, len, 0);
1826}
1827
1828static ssize_t vmci_transport_stream_enqueue(
1829        struct vsock_sock *vsk,
1830        struct msghdr *msg,
1831        size_t len)
1832{
1833        return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
1834}
1835
1836static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
1837{
1838        return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair);
1839}
1840
1841static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk)
1842{
1843        return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair);
1844}
1845
1846static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk)
1847{
1848        return vmci_trans(vsk)->consume_size;
1849}
1850
1851static bool vmci_transport_stream_is_active(struct vsock_sock *vsk)
1852{
1853        return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle);
1854}
1855
1856static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk)
1857{
1858        return vmci_trans(vsk)->queue_pair_size;
1859}
1860
1861static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk)
1862{
1863        return vmci_trans(vsk)->queue_pair_min_size;
1864}
1865
1866static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk)
1867{
1868        return vmci_trans(vsk)->queue_pair_max_size;
1869}
1870
1871static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
1872{
1873        if (val < vmci_trans(vsk)->queue_pair_min_size)
1874                vmci_trans(vsk)->queue_pair_min_size = val;
1875        if (val > vmci_trans(vsk)->queue_pair_max_size)
1876                vmci_trans(vsk)->queue_pair_max_size = val;
1877        vmci_trans(vsk)->queue_pair_size = val;
1878}
1879
1880static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk,
1881                                               u64 val)
1882{
1883        if (val > vmci_trans(vsk)->queue_pair_size)
1884                vmci_trans(vsk)->queue_pair_size = val;
1885        vmci_trans(vsk)->queue_pair_min_size = val;
1886}
1887
1888static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk,
1889                                               u64 val)
1890{
1891        if (val < vmci_trans(vsk)->queue_pair_size)
1892                vmci_trans(vsk)->queue_pair_size = val;
1893        vmci_trans(vsk)->queue_pair_max_size = val;
1894}
1895
1896static int vmci_transport_notify_poll_in(
1897        struct vsock_sock *vsk,
1898        size_t target,
1899        bool *data_ready_now)
1900{
1901        return vmci_trans(vsk)->notify_ops->poll_in(
1902                        &vsk->sk, target, data_ready_now);
1903}
1904
1905static int vmci_transport_notify_poll_out(
1906        struct vsock_sock *vsk,
1907        size_t target,
1908        bool *space_available_now)
1909{
1910        return vmci_trans(vsk)->notify_ops->poll_out(
1911                        &vsk->sk, target, space_available_now);
1912}
1913
1914static int vmci_transport_notify_recv_init(
1915        struct vsock_sock *vsk,
1916        size_t target,
1917        struct vsock_transport_recv_notify_data *data)
1918{
1919        return vmci_trans(vsk)->notify_ops->recv_init(
1920                        &vsk->sk, target,
1921                        (struct vmci_transport_recv_notify_data *)data);
1922}
1923
1924static int vmci_transport_notify_recv_pre_block(
1925        struct vsock_sock *vsk,
1926        size_t target,
1927        struct vsock_transport_recv_notify_data *data)
1928{
1929        return vmci_trans(vsk)->notify_ops->recv_pre_block(
1930                        &vsk->sk, target,
1931                        (struct vmci_transport_recv_notify_data *)data);
1932}
1933
1934static int vmci_transport_notify_recv_pre_dequeue(
1935        struct vsock_sock *vsk,
1936        size_t target,
1937        struct vsock_transport_recv_notify_data *data)
1938{
1939        return vmci_trans(vsk)->notify_ops->recv_pre_dequeue(
1940                        &vsk->sk, target,
1941                        (struct vmci_transport_recv_notify_data *)data);
1942}
1943
1944static int vmci_transport_notify_recv_post_dequeue(
1945        struct vsock_sock *vsk,
1946        size_t target,
1947        ssize_t copied,
1948        bool data_read,
1949        struct vsock_transport_recv_notify_data *data)
1950{
1951        return vmci_trans(vsk)->notify_ops->recv_post_dequeue(
1952                        &vsk->sk, target, copied, data_read,
1953                        (struct vmci_transport_recv_notify_data *)data);
1954}
1955
1956static int vmci_transport_notify_send_init(
1957        struct vsock_sock *vsk,
1958        struct vsock_transport_send_notify_data *data)
1959{
1960        return vmci_trans(vsk)->notify_ops->send_init(
1961                        &vsk->sk,
1962                        (struct vmci_transport_send_notify_data *)data);
1963}
1964
1965static int vmci_transport_notify_send_pre_block(
1966        struct vsock_sock *vsk,
1967        struct vsock_transport_send_notify_data *data)
1968{
1969        return vmci_trans(vsk)->notify_ops->send_pre_block(
1970                        &vsk->sk,
1971                        (struct vmci_transport_send_notify_data *)data);
1972}
1973
1974static int vmci_transport_notify_send_pre_enqueue(
1975        struct vsock_sock *vsk,
1976        struct vsock_transport_send_notify_data *data)
1977{
1978        return vmci_trans(vsk)->notify_ops->send_pre_enqueue(
1979                        &vsk->sk,
1980                        (struct vmci_transport_send_notify_data *)data);
1981}
1982
1983static int vmci_transport_notify_send_post_enqueue(
1984        struct vsock_sock *vsk,
1985        ssize_t written,
1986        struct vsock_transport_send_notify_data *data)
1987{
1988        return vmci_trans(vsk)->notify_ops->send_post_enqueue(
1989                        &vsk->sk, written,
1990                        (struct vmci_transport_send_notify_data *)data);
1991}
1992
1993static bool vmci_transport_old_proto_override(bool *old_pkt_proto)
1994{
1995        if (PROTOCOL_OVERRIDE != -1) {
1996                if (PROTOCOL_OVERRIDE == 0)
1997                        *old_pkt_proto = true;
1998                else
1999                        *old_pkt_proto = false;
2000
2001                pr_info("Proto override in use\n");
2002                return true;
2003        }
2004
2005        return false;
2006}
2007
2008static bool vmci_transport_proto_to_notify_struct(struct sock *sk,
2009                                                  u16 *proto,
2010                                                  bool old_pkt_proto)
2011{
2012        struct vsock_sock *vsk = vsock_sk(sk);
2013
2014        if (old_pkt_proto) {
2015                if (*proto != VSOCK_PROTO_INVALID) {
2016                        pr_err("Can't set both an old and new protocol\n");
2017                        return false;
2018                }
2019                vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops;
2020                goto exit;
2021        }
2022
2023        switch (*proto) {
2024        case VSOCK_PROTO_PKT_ON_NOTIFY:
2025                vmci_trans(vsk)->notify_ops =
2026                        &vmci_transport_notify_pkt_q_state_ops;
2027                break;
2028        default:
2029                pr_err("Unknown notify protocol version\n");
2030                return false;
2031        }
2032
2033exit:
2034        vmci_trans(vsk)->notify_ops->socket_init(sk);
2035        return true;
2036}
2037
2038static u16 vmci_transport_new_proto_supported_versions(void)
2039{
2040        if (PROTOCOL_OVERRIDE != -1)
2041                return PROTOCOL_OVERRIDE;
2042
2043        return VSOCK_PROTO_ALL_SUPPORTED;
2044}
2045
2046static u32 vmci_transport_get_local_cid(void)
2047{
2048        return vmci_get_context_id();
2049}
2050
2051static const struct vsock_transport vmci_transport = {
2052        .init = vmci_transport_socket_init,
2053        .destruct = vmci_transport_destruct,
2054        .release = vmci_transport_release,
2055        .connect = vmci_transport_connect,
2056        .dgram_bind = vmci_transport_dgram_bind,
2057        .dgram_dequeue = vmci_transport_dgram_dequeue,
2058        .dgram_enqueue = vmci_transport_dgram_enqueue,
2059        .dgram_allow = vmci_transport_dgram_allow,
2060        .stream_dequeue = vmci_transport_stream_dequeue,
2061        .stream_enqueue = vmci_transport_stream_enqueue,
2062        .stream_has_data = vmci_transport_stream_has_data,
2063        .stream_has_space = vmci_transport_stream_has_space,
2064        .stream_rcvhiwat = vmci_transport_stream_rcvhiwat,
2065        .stream_is_active = vmci_transport_stream_is_active,
2066        .stream_allow = vmci_transport_stream_allow,
2067        .notify_poll_in = vmci_transport_notify_poll_in,
2068        .notify_poll_out = vmci_transport_notify_poll_out,
2069        .notify_recv_init = vmci_transport_notify_recv_init,
2070        .notify_recv_pre_block = vmci_transport_notify_recv_pre_block,
2071        .notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue,
2072        .notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue,
2073        .notify_send_init = vmci_transport_notify_send_init,
2074        .notify_send_pre_block = vmci_transport_notify_send_pre_block,
2075        .notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue,
2076        .notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue,
2077        .shutdown = vmci_transport_shutdown,
2078        .set_buffer_size = vmci_transport_set_buffer_size,
2079        .set_min_buffer_size = vmci_transport_set_min_buffer_size,
2080        .set_max_buffer_size = vmci_transport_set_max_buffer_size,
2081        .get_buffer_size = vmci_transport_get_buffer_size,
2082        .get_min_buffer_size = vmci_transport_get_min_buffer_size,
2083        .get_max_buffer_size = vmci_transport_get_max_buffer_size,
2084        .get_local_cid = vmci_transport_get_local_cid,
2085};
2086
2087static int __init vmci_transport_init(void)
2088{
2089        int err;
2090
2091        /* Create the datagram handle that we will use to send and receive all
2092         * VSocket control messages for this context.
2093         */
2094        err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID,
2095                                                 VMCI_FLAG_ANYCID_DG_HND,
2096                                                 vmci_transport_recv_stream_cb,
2097                                                 NULL,
2098                                                 &vmci_transport_stream_handle);
2099        if (err < VMCI_SUCCESS) {
2100                pr_err("Unable to create datagram handle. (%d)\n", err);
2101                return vmci_transport_error_to_vsock_error(err);
2102        }
2103
2104        err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED,
2105                                   vmci_transport_qp_resumed_cb,
2106                                   NULL, &vmci_transport_qp_resumed_sub_id);
2107        if (err < VMCI_SUCCESS) {
2108                pr_err("Unable to subscribe to resumed event. (%d)\n", err);
2109                err = vmci_transport_error_to_vsock_error(err);
2110                vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
2111                goto err_destroy_stream_handle;
2112        }
2113
2114        err = vsock_core_init(&vmci_transport);
2115        if (err < 0)
2116                goto err_unsubscribe;
2117
2118        return 0;
2119
2120err_unsubscribe:
2121        vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
2122err_destroy_stream_handle:
2123        vmci_datagram_destroy_handle(vmci_transport_stream_handle);
2124        return err;
2125}
2126module_init(vmci_transport_init);
2127
2128static void __exit vmci_transport_exit(void)
2129{
2130        cancel_work_sync(&vmci_transport_cleanup_work);
2131        vmci_transport_free_resources(&vmci_transport_cleanup_list);
2132
2133        if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
2134                if (vmci_datagram_destroy_handle(
2135                        vmci_transport_stream_handle) != VMCI_SUCCESS)
2136                        pr_err("Couldn't destroy datagram handle\n");
2137                vmci_transport_stream_handle = VMCI_INVALID_HANDLE;
2138        }
2139
2140        if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) {
2141                vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
2142                vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
2143        }
2144
2145        vsock_core_exit();
2146}
2147module_exit(vmci_transport_exit);
2148
2149MODULE_AUTHOR("VMware, Inc.");
2150MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
2151MODULE_VERSION("1.0.5.0-k");
2152MODULE_LICENSE("GPL v2");
2153MODULE_ALIAS("vmware_vsock");
2154MODULE_ALIAS_NETPROTO(PF_VSOCK);
2155