linux/net/vmw_vsock/vmci_transport.c
<<
>>
Prefs
   1/*
   2 * VMware vSockets Driver
   3 *
   4 * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License as published by the Free
   8 * Software Foundation version 2 and no later version.
   9 *
  10 * This program is distributed in the hope that it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 */
  15
  16#include <linux/types.h>
  17#include <linux/bitops.h>
  18#include <linux/cred.h>
  19#include <linux/init.h>
  20#include <linux/io.h>
  21#include <linux/kernel.h>
  22#include <linux/kmod.h>
  23#include <linux/list.h>
  24#include <linux/miscdevice.h>
  25#include <linux/module.h>
  26#include <linux/mutex.h>
  27#include <linux/net.h>
  28#include <linux/poll.h>
  29#include <linux/skbuff.h>
  30#include <linux/smp.h>
  31#include <linux/socket.h>
  32#include <linux/stddef.h>
  33#include <linux/unistd.h>
  34#include <linux/wait.h>
  35#include <linux/workqueue.h>
  36#include <net/sock.h>
  37
  38#include "af_vsock.h"
  39#include "vmci_transport_notify.h"
  40
  41static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
  42static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
  43static void vmci_transport_peer_attach_cb(u32 sub_id,
  44                                          const struct vmci_event_data *ed,
  45                                          void *client_data);
  46static void vmci_transport_peer_detach_cb(u32 sub_id,
  47                                          const struct vmci_event_data *ed,
  48                                          void *client_data);
  49static void vmci_transport_recv_pkt_work(struct work_struct *work);
  50static int vmci_transport_recv_listen(struct sock *sk,
  51                                      struct vmci_transport_packet *pkt);
  52static int vmci_transport_recv_connecting_server(
  53                                        struct sock *sk,
  54                                        struct sock *pending,
  55                                        struct vmci_transport_packet *pkt);
  56static int vmci_transport_recv_connecting_client(
  57                                        struct sock *sk,
  58                                        struct vmci_transport_packet *pkt);
  59static int vmci_transport_recv_connecting_client_negotiate(
  60                                        struct sock *sk,
  61                                        struct vmci_transport_packet *pkt);
  62static int vmci_transport_recv_connecting_client_invalid(
  63                                        struct sock *sk,
  64                                        struct vmci_transport_packet *pkt);
  65static int vmci_transport_recv_connected(struct sock *sk,
  66                                         struct vmci_transport_packet *pkt);
  67static bool vmci_transport_old_proto_override(bool *old_pkt_proto);
  68static u16 vmci_transport_new_proto_supported_versions(void);
  69static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto,
  70                                                  bool old_pkt_proto);
  71
  72struct vmci_transport_recv_pkt_info {
  73        struct work_struct work;
  74        struct sock *sk;
  75        struct vmci_transport_packet pkt;
  76};
  77
  78static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
  79                                                           VMCI_INVALID_ID };
  80static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
  81
  82static int PROTOCOL_OVERRIDE = -1;
  83
  84#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN   128
  85#define VMCI_TRANSPORT_DEFAULT_QP_SIZE       262144
  86#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX   262144
  87
  88/* The default peer timeout indicates how long we will wait for a peer response
  89 * to a control message.
  90 */
  91#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
  92
  93#define SS_LISTEN 255
  94
  95/* Helper function to convert from a VMCI error code to a VSock error code. */
  96
  97static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
  98{
  99        int err;
 100
 101        switch (vmci_error) {
 102        case VMCI_ERROR_NO_MEM:
 103                err = ENOMEM;
 104                break;
 105        case VMCI_ERROR_DUPLICATE_ENTRY:
 106        case VMCI_ERROR_ALREADY_EXISTS:
 107                err = EADDRINUSE;
 108                break;
 109        case VMCI_ERROR_NO_ACCESS:
 110                err = EPERM;
 111                break;
 112        case VMCI_ERROR_NO_RESOURCES:
 113                err = ENOBUFS;
 114                break;
 115        case VMCI_ERROR_INVALID_RESOURCE:
 116                err = EHOSTUNREACH;
 117                break;
 118        case VMCI_ERROR_INVALID_ARGS:
 119        default:
 120                err = EINVAL;
 121        }
 122
 123        return err > 0 ? -err : err;
 124}
 125
 126static u32 vmci_transport_peer_rid(u32 peer_cid)
 127{
 128        if (VMADDR_CID_HYPERVISOR == peer_cid)
 129                return VMCI_TRANSPORT_HYPERVISOR_PACKET_RID;
 130
 131        return VMCI_TRANSPORT_PACKET_RID;
 132}
 133
 134static inline void
 135vmci_transport_packet_init(struct vmci_transport_packet *pkt,
 136                           struct sockaddr_vm *src,
 137                           struct sockaddr_vm *dst,
 138                           u8 type,
 139                           u64 size,
 140                           u64 mode,
 141                           struct vmci_transport_waiting_info *wait,
 142                           u16 proto,
 143                           struct vmci_handle handle)
 144{
 145        /* We register the stream control handler as an any cid handle so we
 146         * must always send from a source address of VMADDR_CID_ANY
 147         */
 148        pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
 149                                       VMCI_TRANSPORT_PACKET_RID);
 150        pkt->dg.dst = vmci_make_handle(dst->svm_cid,
 151                                       vmci_transport_peer_rid(dst->svm_cid));
 152        pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
 153        pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
 154        pkt->type = type;
 155        pkt->src_port = src->svm_port;
 156        pkt->dst_port = dst->svm_port;
 157        memset(&pkt->proto, 0, sizeof(pkt->proto));
 158        memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
 159
 160        switch (pkt->type) {
 161        case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
 162                pkt->u.size = 0;
 163                break;
 164
 165        case VMCI_TRANSPORT_PACKET_TYPE_REQUEST:
 166        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
 167                pkt->u.size = size;
 168                break;
 169
 170        case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
 171        case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
 172                pkt->u.handle = handle;
 173                break;
 174
 175        case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
 176        case VMCI_TRANSPORT_PACKET_TYPE_READ:
 177        case VMCI_TRANSPORT_PACKET_TYPE_RST:
 178                pkt->u.size = 0;
 179                break;
 180
 181        case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
 182                pkt->u.mode = mode;
 183                break;
 184
 185        case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
 186        case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
 187                memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait));
 188                break;
 189
 190        case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2:
 191        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
 192                pkt->u.size = size;
 193                pkt->proto = proto;
 194                break;
 195        }
 196}
 197
 198static inline void
 199vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt,
 200                                    struct sockaddr_vm *local,
 201                                    struct sockaddr_vm *remote)
 202{
 203        vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port);
 204        vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port);
 205}
 206
 207static int
 208__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt,
 209                                  struct sockaddr_vm *src,
 210                                  struct sockaddr_vm *dst,
 211                                  enum vmci_transport_packet_type type,
 212                                  u64 size,
 213                                  u64 mode,
 214                                  struct vmci_transport_waiting_info *wait,
 215                                  u16 proto,
 216                                  struct vmci_handle handle,
 217                                  bool convert_error)
 218{
 219        int err;
 220
 221        vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait,
 222                                   proto, handle);
 223        err = vmci_datagram_send(&pkt->dg);
 224        if (convert_error && (err < 0))
 225                return vmci_transport_error_to_vsock_error(err);
 226
 227        return err;
 228}
 229
 230static int
 231vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt,
 232                                      enum vmci_transport_packet_type type,
 233                                      u64 size,
 234                                      u64 mode,
 235                                      struct vmci_transport_waiting_info *wait,
 236                                      struct vmci_handle handle)
 237{
 238        struct vmci_transport_packet reply;
 239        struct sockaddr_vm src, dst;
 240
 241        if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) {
 242                return 0;
 243        } else {
 244                vmci_transport_packet_get_addresses(pkt, &src, &dst);
 245                return __vmci_transport_send_control_pkt(&reply, &src, &dst,
 246                                                         type,
 247                                                         size, mode, wait,
 248                                                         VSOCK_PROTO_INVALID,
 249                                                         handle, true);
 250        }
 251}
 252
 253static int
 254vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
 255                                   struct sockaddr_vm *dst,
 256                                   enum vmci_transport_packet_type type,
 257                                   u64 size,
 258                                   u64 mode,
 259                                   struct vmci_transport_waiting_info *wait,
 260                                   struct vmci_handle handle)
 261{
 262        /* Note that it is safe to use a single packet across all CPUs since
 263         * two tasklets of the same type are guaranteed to not ever run
 264         * simultaneously. If that ever changes, or VMCI stops using tasklets,
 265         * we can use per-cpu packets.
 266         */
 267        static struct vmci_transport_packet pkt;
 268
 269        return __vmci_transport_send_control_pkt(&pkt, src, dst, type,
 270                                                 size, mode, wait,
 271                                                 VSOCK_PROTO_INVALID, handle,
 272                                                 false);
 273}
 274
 275static int
 276vmci_transport_send_control_pkt(struct sock *sk,
 277                                enum vmci_transport_packet_type type,
 278                                u64 size,
 279                                u64 mode,
 280                                struct vmci_transport_waiting_info *wait,
 281                                u16 proto,
 282                                struct vmci_handle handle)
 283{
 284        struct vmci_transport_packet *pkt;
 285        struct vsock_sock *vsk;
 286        int err;
 287
 288        vsk = vsock_sk(sk);
 289
 290        if (!vsock_addr_bound(&vsk->local_addr))
 291                return -EINVAL;
 292
 293        if (!vsock_addr_bound(&vsk->remote_addr))
 294                return -EINVAL;
 295
 296        pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
 297        if (!pkt)
 298                return -ENOMEM;
 299
 300        err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr,
 301                                                &vsk->remote_addr, type, size,
 302                                                mode, wait, proto, handle,
 303                                                true);
 304        kfree(pkt);
 305
 306        return err;
 307}
 308
 309static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
 310                                        struct sockaddr_vm *src,
 311                                        struct vmci_transport_packet *pkt)
 312{
 313        if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
 314                return 0;
 315        return vmci_transport_send_control_pkt_bh(
 316                                        dst, src,
 317                                        VMCI_TRANSPORT_PACKET_TYPE_RST, 0,
 318                                        0, NULL, VMCI_INVALID_HANDLE);
 319}
 320
 321static int vmci_transport_send_reset(struct sock *sk,
 322                                     struct vmci_transport_packet *pkt)
 323{
 324        if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
 325                return 0;
 326        return vmci_transport_send_control_pkt(sk,
 327                                        VMCI_TRANSPORT_PACKET_TYPE_RST,
 328                                        0, 0, NULL, VSOCK_PROTO_INVALID,
 329                                        VMCI_INVALID_HANDLE);
 330}
 331
 332static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
 333{
 334        return vmci_transport_send_control_pkt(
 335                                        sk,
 336                                        VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
 337                                        size, 0, NULL,
 338                                        VSOCK_PROTO_INVALID,
 339                                        VMCI_INVALID_HANDLE);
 340}
 341
 342static int vmci_transport_send_negotiate2(struct sock *sk, size_t size,
 343                                          u16 version)
 344{
 345        return vmci_transport_send_control_pkt(
 346                                        sk,
 347                                        VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
 348                                        size, 0, NULL, version,
 349                                        VMCI_INVALID_HANDLE);
 350}
 351
 352static int vmci_transport_send_qp_offer(struct sock *sk,
 353                                        struct vmci_handle handle)
 354{
 355        return vmci_transport_send_control_pkt(
 356                                        sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0,
 357                                        0, NULL,
 358                                        VSOCK_PROTO_INVALID, handle);
 359}
 360
 361static int vmci_transport_send_attach(struct sock *sk,
 362                                      struct vmci_handle handle)
 363{
 364        return vmci_transport_send_control_pkt(
 365                                        sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
 366                                        0, 0, NULL, VSOCK_PROTO_INVALID,
 367                                        handle);
 368}
 369
 370static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt)
 371{
 372        return vmci_transport_reply_control_pkt_fast(
 373                                                pkt,
 374                                                VMCI_TRANSPORT_PACKET_TYPE_RST,
 375                                                0, 0, NULL,
 376                                                VMCI_INVALID_HANDLE);
 377}
 378
 379static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst,
 380                                          struct sockaddr_vm *src)
 381{
 382        return vmci_transport_send_control_pkt_bh(
 383                                        dst, src,
 384                                        VMCI_TRANSPORT_PACKET_TYPE_INVALID,
 385                                        0, 0, NULL, VMCI_INVALID_HANDLE);
 386}
 387
 388int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
 389                                 struct sockaddr_vm *src)
 390{
 391        return vmci_transport_send_control_pkt_bh(
 392                                        dst, src,
 393                                        VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
 394                                        0, NULL, VMCI_INVALID_HANDLE);
 395}
 396
 397int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
 398                                struct sockaddr_vm *src)
 399{
 400        return vmci_transport_send_control_pkt_bh(
 401                                        dst, src,
 402                                        VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
 403                                        0, NULL, VMCI_INVALID_HANDLE);
 404}
 405
 406int vmci_transport_send_wrote(struct sock *sk)
 407{
 408        return vmci_transport_send_control_pkt(
 409                                        sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
 410                                        0, NULL, VSOCK_PROTO_INVALID,
 411                                        VMCI_INVALID_HANDLE);
 412}
 413
 414int vmci_transport_send_read(struct sock *sk)
 415{
 416        return vmci_transport_send_control_pkt(
 417                                        sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
 418                                        0, NULL, VSOCK_PROTO_INVALID,
 419                                        VMCI_INVALID_HANDLE);
 420}
 421
 422int vmci_transport_send_waiting_write(struct sock *sk,
 423                                      struct vmci_transport_waiting_info *wait)
 424{
 425        return vmci_transport_send_control_pkt(
 426                                sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
 427                                0, 0, wait, VSOCK_PROTO_INVALID,
 428                                VMCI_INVALID_HANDLE);
 429}
 430
 431int vmci_transport_send_waiting_read(struct sock *sk,
 432                                     struct vmci_transport_waiting_info *wait)
 433{
 434        return vmci_transport_send_control_pkt(
 435                                sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
 436                                0, 0, wait, VSOCK_PROTO_INVALID,
 437                                VMCI_INVALID_HANDLE);
 438}
 439
 440static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode)
 441{
 442        return vmci_transport_send_control_pkt(
 443                                        &vsk->sk,
 444                                        VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
 445                                        0, mode, NULL,
 446                                        VSOCK_PROTO_INVALID,
 447                                        VMCI_INVALID_HANDLE);
 448}
 449
 450static int vmci_transport_send_conn_request(struct sock *sk, size_t size)
 451{
 452        return vmci_transport_send_control_pkt(sk,
 453                                        VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
 454                                        size, 0, NULL,
 455                                        VSOCK_PROTO_INVALID,
 456                                        VMCI_INVALID_HANDLE);
 457}
 458
 459static int vmci_transport_send_conn_request2(struct sock *sk, size_t size,
 460                                             u16 version)
 461{
 462        return vmci_transport_send_control_pkt(
 463                                        sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
 464                                        size, 0, NULL, version,
 465                                        VMCI_INVALID_HANDLE);
 466}
 467
 468static struct sock *vmci_transport_get_pending(
 469                                        struct sock *listener,
 470                                        struct vmci_transport_packet *pkt)
 471{
 472        struct vsock_sock *vlistener;
 473        struct vsock_sock *vpending;
 474        struct sock *pending;
 475        struct sockaddr_vm src;
 476
 477        vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
 478
 479        vlistener = vsock_sk(listener);
 480
 481        list_for_each_entry(vpending, &vlistener->pending_links,
 482                            pending_links) {
 483                if (vsock_addr_equals_addr(&src, &vpending->remote_addr) &&
 484                    pkt->dst_port == vpending->local_addr.svm_port) {
 485                        pending = sk_vsock(vpending);
 486                        sock_hold(pending);
 487                        goto found;
 488                }
 489        }
 490
 491        pending = NULL;
 492found:
 493        return pending;
 494
 495}
 496
 497static void vmci_transport_release_pending(struct sock *pending)
 498{
 499        sock_put(pending);
 500}
 501
 502/* We allow two kinds of sockets to communicate with a restricted VM: 1)
 503 * trusted sockets 2) sockets from applications running as the same user as the
 504 * VM (this is only true for the host side and only when using hosted products)
 505 */
 506
 507static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
 508{
 509        return vsock->trusted ||
 510               vmci_is_context_owner(peer_cid, vsock->owner->uid);
 511}
 512
 513/* We allow sending datagrams to and receiving datagrams from a restricted VM
 514 * only if it is trusted as described in vmci_transport_is_trusted.
 515 */
 516
 517static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
 518{
 519        if (VMADDR_CID_HYPERVISOR == peer_cid)
 520                return true;
 521
 522        if (vsock->cached_peer != peer_cid) {
 523                vsock->cached_peer = peer_cid;
 524                if (!vmci_transport_is_trusted(vsock, peer_cid) &&
 525                    (vmci_context_get_priv_flags(peer_cid) &
 526                     VMCI_PRIVILEGE_FLAG_RESTRICTED)) {
 527                        vsock->cached_peer_allow_dgram = false;
 528                } else {
 529                        vsock->cached_peer_allow_dgram = true;
 530                }
 531        }
 532
 533        return vsock->cached_peer_allow_dgram;
 534}
 535
 536static int
 537vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
 538                                struct vmci_handle *handle,
 539                                u64 produce_size,
 540                                u64 consume_size,
 541                                u32 peer, u32 flags, bool trusted)
 542{
 543        int err = 0;
 544
 545        if (trusted) {
 546                /* Try to allocate our queue pair as trusted. This will only
 547                 * work if vsock is running in the host.
 548                 */
 549
 550                err = vmci_qpair_alloc(qpair, handle, produce_size,
 551                                       consume_size,
 552                                       peer, flags,
 553                                       VMCI_PRIVILEGE_FLAG_TRUSTED);
 554                if (err != VMCI_ERROR_NO_ACCESS)
 555                        goto out;
 556
 557        }
 558
 559        err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size,
 560                               peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
 561out:
 562        if (err < 0) {
 563                pr_err("Could not attach to queue pair with %d\n",
 564                       err);
 565                err = vmci_transport_error_to_vsock_error(err);
 566        }
 567
 568        return err;
 569}
 570
 571static int
 572vmci_transport_datagram_create_hnd(u32 resource_id,
 573                                   u32 flags,
 574                                   vmci_datagram_recv_cb recv_cb,
 575                                   void *client_data,
 576                                   struct vmci_handle *out_handle)
 577{
 578        int err = 0;
 579
 580        /* Try to allocate our datagram handler as trusted. This will only work
 581         * if vsock is running in the host.
 582         */
 583
 584        err = vmci_datagram_create_handle_priv(resource_id, flags,
 585                                               VMCI_PRIVILEGE_FLAG_TRUSTED,
 586                                               recv_cb,
 587                                               client_data, out_handle);
 588
 589        if (err == VMCI_ERROR_NO_ACCESS)
 590                err = vmci_datagram_create_handle(resource_id, flags,
 591                                                  recv_cb, client_data,
 592                                                  out_handle);
 593
 594        return err;
 595}
 596
 597/* This is invoked as part of a tasklet that's scheduled when the VMCI
 598 * interrupt fires.  This is run in bottom-half context and if it ever needs to
 599 * sleep it should defer that work to a work queue.
 600 */
 601
 602static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
 603{
 604        struct sock *sk;
 605        size_t size;
 606        struct sk_buff *skb;
 607        struct vsock_sock *vsk;
 608
 609        sk = (struct sock *)data;
 610
 611        /* This handler is privileged when this module is running on the host.
 612         * We will get datagrams from all endpoints (even VMs that are in a
 613         * restricted context). If we get one from a restricted context then
 614         * the destination socket must be trusted.
 615         *
 616         * NOTE: We access the socket struct without holding the lock here.
 617         * This is ok because the field we are interested is never modified
 618         * outside of the create and destruct socket functions.
 619         */
 620        vsk = vsock_sk(sk);
 621        if (!vmci_transport_allow_dgram(vsk, dg->src.context))
 622                return VMCI_ERROR_NO_ACCESS;
 623
 624        size = VMCI_DG_SIZE(dg);
 625
 626        /* Attach the packet to the socket's receive queue as an sk_buff. */
 627        skb = alloc_skb(size, GFP_ATOMIC);
 628        if (skb) {
 629                /* sk_receive_skb() will do a sock_put(), so hold here. */
 630                sock_hold(sk);
 631                skb_put(skb, size);
 632                memcpy(skb->data, dg, size);
 633                sk_receive_skb(sk, skb, 0);
 634        }
 635
 636        return VMCI_SUCCESS;
 637}
 638
 639static bool vmci_transport_stream_allow(u32 cid, u32 port)
 640{
 641        static const u32 non_socket_contexts[] = {
 642                VMADDR_CID_RESERVED,
 643        };
 644        int i;
 645
 646        BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts));
 647
 648        for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) {
 649                if (cid == non_socket_contexts[i])
 650                        return false;
 651        }
 652
 653        return true;
 654}
 655
 656/* This is invoked as part of a tasklet that's scheduled when the VMCI
 657 * interrupt fires.  This is run in bottom-half context but it defers most of
 658 * its work to the packet handling work queue.
 659 */
 660
 661static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
 662{
 663        struct sock *sk;
 664        struct sockaddr_vm dst;
 665        struct sockaddr_vm src;
 666        struct vmci_transport_packet *pkt;
 667        struct vsock_sock *vsk;
 668        bool bh_process_pkt;
 669        int err;
 670
 671        sk = NULL;
 672        err = VMCI_SUCCESS;
 673        bh_process_pkt = false;
 674
 675        /* Ignore incoming packets from contexts without sockets, or resources
 676         * that aren't vsock implementations.
 677         */
 678
 679        if (!vmci_transport_stream_allow(dg->src.context, -1)
 680            || vmci_transport_peer_rid(dg->src.context) != dg->src.resource)
 681                return VMCI_ERROR_NO_ACCESS;
 682
 683        if (VMCI_DG_SIZE(dg) < sizeof(*pkt))
 684                /* Drop datagrams that do not contain full VSock packets. */
 685                return VMCI_ERROR_INVALID_ARGS;
 686
 687        pkt = (struct vmci_transport_packet *)dg;
 688
 689        /* Find the socket that should handle this packet.  First we look for a
 690         * connected socket and if there is none we look for a socket bound to
 691         * the destintation address.
 692         */
 693        vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
 694        vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
 695
 696        sk = vsock_find_connected_socket(&src, &dst);
 697        if (!sk) {
 698                sk = vsock_find_bound_socket(&dst);
 699                if (!sk) {
 700                        /* We could not find a socket for this specified
 701                         * address.  If this packet is a RST, we just drop it.
 702                         * If it is another packet, we send a RST.  Note that
 703                         * we do not send a RST reply to RSTs so that we do not
 704                         * continually send RSTs between two endpoints.
 705                         *
 706                         * Note that since this is a reply, dst is src and src
 707                         * is dst.
 708                         */
 709                        if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
 710                                pr_err("unable to send reset\n");
 711
 712                        err = VMCI_ERROR_NOT_FOUND;
 713                        goto out;
 714                }
 715        }
 716
 717        /* If the received packet type is beyond all types known to this
 718         * implementation, reply with an invalid message.  Hopefully this will
 719         * help when implementing backwards compatibility in the future.
 720         */
 721        if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) {
 722                vmci_transport_send_invalid_bh(&dst, &src);
 723                err = VMCI_ERROR_INVALID_ARGS;
 724                goto out;
 725        }
 726
 727        /* This handler is privileged when this module is running on the host.
 728         * We will get datagram connect requests from all endpoints (even VMs
 729         * that are in a restricted context). If we get one from a restricted
 730         * context then the destination socket must be trusted.
 731         *
 732         * NOTE: We access the socket struct without holding the lock here.
 733         * This is ok because the field we are interested is never modified
 734         * outside of the create and destruct socket functions.
 735         */
 736        vsk = vsock_sk(sk);
 737        if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) {
 738                err = VMCI_ERROR_NO_ACCESS;
 739                goto out;
 740        }
 741
 742        /* We do most everything in a work queue, but let's fast path the
 743         * notification of reads and writes to help data transfer performance.
 744         * We can only do this if there is no process context code executing
 745         * for this socket since that may change the state.
 746         */
 747        bh_lock_sock(sk);
 748
 749        if (!sock_owned_by_user(sk)) {
 750                /* The local context ID may be out of date, update it. */
 751                vsk->local_addr.svm_cid = dst.svm_cid;
 752
 753                if (sk->sk_state == SS_CONNECTED)
 754                        vmci_trans(vsk)->notify_ops->handle_notify_pkt(
 755                                        sk, pkt, true, &dst, &src,
 756                                        &bh_process_pkt);
 757        }
 758
 759        bh_unlock_sock(sk);
 760
 761        if (!bh_process_pkt) {
 762                struct vmci_transport_recv_pkt_info *recv_pkt_info;
 763
 764                recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC);
 765                if (!recv_pkt_info) {
 766                        if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
 767                                pr_err("unable to send reset\n");
 768
 769                        err = VMCI_ERROR_NO_MEM;
 770                        goto out;
 771                }
 772
 773                recv_pkt_info->sk = sk;
 774                memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt));
 775                INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work);
 776
 777                schedule_work(&recv_pkt_info->work);
 778                /* Clear sk so that the reference count incremented by one of
 779                 * the Find functions above is not decremented below.  We need
 780                 * that reference count for the packet handler we've scheduled
 781                 * to run.
 782                 */
 783                sk = NULL;
 784        }
 785
 786out:
 787        if (sk)
 788                sock_put(sk);
 789
 790        return err;
 791}
 792
 793static void vmci_transport_peer_attach_cb(u32 sub_id,
 794                                          const struct vmci_event_data *e_data,
 795                                          void *client_data)
 796{
 797        struct sock *sk = client_data;
 798        const struct vmci_event_payload_qp *e_payload;
 799        struct vsock_sock *vsk;
 800
 801        e_payload = vmci_event_data_const_payload(e_data);
 802
 803        vsk = vsock_sk(sk);
 804
 805        /* We don't ask for delayed CBs when we subscribe to this event (we
 806         * pass 0 as flags to vmci_event_subscribe()).  VMCI makes no
 807         * guarantees in that case about what context we might be running in,
 808         * so it could be BH or process, blockable or non-blockable.  So we
 809         * need to account for all possible contexts here.
 810         */
 811        local_bh_disable();
 812        bh_lock_sock(sk);
 813
 814        /* XXX This is lame, we should provide a way to lookup sockets by
 815         * qp_handle.
 816         */
 817        if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
 818                                 e_payload->handle)) {
 819                /* XXX This doesn't do anything, but in the future we may want
 820                 * to set a flag here to verify the attach really did occur and
 821                 * we weren't just sent a datagram claiming it was.
 822                 */
 823                goto out;
 824        }
 825
 826out:
 827        bh_unlock_sock(sk);
 828        local_bh_enable();
 829}
 830
 831static void vmci_transport_handle_detach(struct sock *sk)
 832{
 833        struct vsock_sock *vsk;
 834
 835        vsk = vsock_sk(sk);
 836        if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
 837                sock_set_flag(sk, SOCK_DONE);
 838
 839                /* On a detach the peer will not be sending or receiving
 840                 * anymore.
 841                 */
 842                vsk->peer_shutdown = SHUTDOWN_MASK;
 843
 844                /* We should not be sending anymore since the peer won't be
 845                 * there to receive, but we can still receive if there is data
 846                 * left in our consume queue.
 847                 */
 848                if (vsock_stream_has_data(vsk) <= 0) {
 849                        if (sk->sk_state == SS_CONNECTING) {
 850                                /* The peer may detach from a queue pair while
 851                                 * we are still in the connecting state, i.e.,
 852                                 * if the peer VM is killed after attaching to
 853                                 * a queue pair, but before we complete the
 854                                 * handshake. In that case, we treat the detach
 855                                 * event like a reset.
 856                                 */
 857
 858                                sk->sk_state = SS_UNCONNECTED;
 859                                sk->sk_err = ECONNRESET;
 860                                sk->sk_error_report(sk);
 861                                return;
 862                        }
 863                        sk->sk_state = SS_UNCONNECTED;
 864                }
 865                sk->sk_state_change(sk);
 866        }
 867}
 868
 869static void vmci_transport_peer_detach_cb(u32 sub_id,
 870                                          const struct vmci_event_data *e_data,
 871                                          void *client_data)
 872{
 873        struct sock *sk = client_data;
 874        const struct vmci_event_payload_qp *e_payload;
 875        struct vsock_sock *vsk;
 876
 877        e_payload = vmci_event_data_const_payload(e_data);
 878        vsk = vsock_sk(sk);
 879        if (vmci_handle_is_invalid(e_payload->handle))
 880                return;
 881
 882        /* Same rules for locking as for peer_attach_cb(). */
 883        local_bh_disable();
 884        bh_lock_sock(sk);
 885
 886        /* XXX This is lame, we should provide a way to lookup sockets by
 887         * qp_handle.
 888         */
 889        if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
 890                                 e_payload->handle))
 891                vmci_transport_handle_detach(sk);
 892
 893        bh_unlock_sock(sk);
 894        local_bh_enable();
 895}
 896
 897static void vmci_transport_qp_resumed_cb(u32 sub_id,
 898                                         const struct vmci_event_data *e_data,
 899                                         void *client_data)
 900{
 901        vsock_for_each_connected_socket(vmci_transport_handle_detach);
 902}
 903
 904static void vmci_transport_recv_pkt_work(struct work_struct *work)
 905{
 906        struct vmci_transport_recv_pkt_info *recv_pkt_info;
 907        struct vmci_transport_packet *pkt;
 908        struct sock *sk;
 909
 910        recv_pkt_info =
 911                container_of(work, struct vmci_transport_recv_pkt_info, work);
 912        sk = recv_pkt_info->sk;
 913        pkt = &recv_pkt_info->pkt;
 914
 915        lock_sock(sk);
 916
 917        /* The local context ID may be out of date. */
 918        vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
 919
 920        switch (sk->sk_state) {
 921        case SS_LISTEN:
 922                vmci_transport_recv_listen(sk, pkt);
 923                break;
 924        case SS_CONNECTING:
 925                /* Processing of pending connections for servers goes through
 926                 * the listening socket, so see vmci_transport_recv_listen()
 927                 * for that path.
 928                 */
 929                vmci_transport_recv_connecting_client(sk, pkt);
 930                break;
 931        case SS_CONNECTED:
 932                vmci_transport_recv_connected(sk, pkt);
 933                break;
 934        default:
 935                /* Because this function does not run in the same context as
 936                 * vmci_transport_recv_stream_cb it is possible that the
 937                 * socket has closed. We need to let the other side know or it
 938                 * could be sitting in a connect and hang forever. Send a
 939                 * reset to prevent that.
 940                 */
 941                vmci_transport_send_reset(sk, pkt);
 942                goto out;
 943        }
 944
 945out:
 946        release_sock(sk);
 947        kfree(recv_pkt_info);
 948        /* Release reference obtained in the stream callback when we fetched
 949         * this socket out of the bound or connected list.
 950         */
 951        sock_put(sk);
 952}
 953
 954static int vmci_transport_recv_listen(struct sock *sk,
 955                                      struct vmci_transport_packet *pkt)
 956{
 957        struct sock *pending;
 958        struct vsock_sock *vpending;
 959        int err;
 960        u64 qp_size;
 961        bool old_request = false;
 962        bool old_pkt_proto = false;
 963
 964        err = 0;
 965
 966        /* Because we are in the listen state, we could be receiving a packet
 967         * for ourself or any previous connection requests that we received.
 968         * If it's the latter, we try to find a socket in our list of pending
 969         * connections and, if we do, call the appropriate handler for the
 970         * state that that socket is in.  Otherwise we try to service the
 971         * connection request.
 972         */
 973        pending = vmci_transport_get_pending(sk, pkt);
 974        if (pending) {
 975                lock_sock(pending);
 976
 977                /* The local context ID may be out of date. */
 978                vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
 979
 980                switch (pending->sk_state) {
 981                case SS_CONNECTING:
 982                        err = vmci_transport_recv_connecting_server(sk,
 983                                                                    pending,
 984                                                                    pkt);
 985                        break;
 986                default:
 987                        vmci_transport_send_reset(pending, pkt);
 988                        err = -EINVAL;
 989                }
 990
 991                if (err < 0)
 992                        vsock_remove_pending(sk, pending);
 993
 994                release_sock(pending);
 995                vmci_transport_release_pending(pending);
 996
 997                return err;
 998        }
 999
1000        /* The listen state only accepts connection requests.  Reply with a
1001         * reset unless we received a reset.
1002         */
1003
1004        if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST ||
1005              pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) {
1006                vmci_transport_reply_reset(pkt);
1007                return -EINVAL;
1008        }
1009
1010        if (pkt->u.size == 0) {
1011                vmci_transport_reply_reset(pkt);
1012                return -EINVAL;
1013        }
1014
1015        /* If this socket can't accommodate this connection request, we send a
1016         * reset.  Otherwise we create and initialize a child socket and reply
1017         * with a connection negotiation.
1018         */
1019        if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) {
1020                vmci_transport_reply_reset(pkt);
1021                return -ECONNREFUSED;
1022        }
1023
1024        pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
1025                                 sk->sk_type);
1026        if (!pending) {
1027                vmci_transport_send_reset(sk, pkt);
1028                return -ENOMEM;
1029        }
1030
1031        vpending = vsock_sk(pending);
1032
1033        vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context,
1034                        pkt->dst_port);
1035        vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context,
1036                        pkt->src_port);
1037
1038        /* If the proposed size fits within our min/max, accept it. Otherwise
1039         * propose our own size.
1040         */
1041        if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size &&
1042            pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) {
1043                qp_size = pkt->u.size;
1044        } else {
1045                qp_size = vmci_trans(vpending)->queue_pair_size;
1046        }
1047
1048        /* Figure out if we are using old or new requests based on the
1049         * overrides pkt types sent by our peer.
1050         */
1051        if (vmci_transport_old_proto_override(&old_pkt_proto)) {
1052                old_request = old_pkt_proto;
1053        } else {
1054                if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST)
1055                        old_request = true;
1056                else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)
1057                        old_request = false;
1058
1059        }
1060
1061        if (old_request) {
1062                /* Handle a REQUEST (or override) */
1063                u16 version = VSOCK_PROTO_INVALID;
1064                if (vmci_transport_proto_to_notify_struct(
1065                        pending, &version, true))
1066                        err = vmci_transport_send_negotiate(pending, qp_size);
1067                else
1068                        err = -EINVAL;
1069
1070        } else {
1071                /* Handle a REQUEST2 (or override) */
1072                int proto_int = pkt->proto;
1073                int pos;
1074                u16 active_proto_version = 0;
1075
1076                /* The list of possible protocols is the intersection of all
1077                 * protocols the client supports ... plus all the protocols we
1078                 * support.
1079                 */
1080                proto_int &= vmci_transport_new_proto_supported_versions();
1081
1082                /* We choose the highest possible protocol version and use that
1083                 * one.
1084                 */
1085                pos = fls(proto_int);
1086                if (pos) {
1087                        active_proto_version = (1 << (pos - 1));
1088                        if (vmci_transport_proto_to_notify_struct(
1089                                pending, &active_proto_version, false))
1090                                err = vmci_transport_send_negotiate2(pending,
1091                                                        qp_size,
1092                                                        active_proto_version);
1093                        else
1094                                err = -EINVAL;
1095
1096                } else {
1097                        err = -EINVAL;
1098                }
1099        }
1100
1101        if (err < 0) {
1102                vmci_transport_send_reset(sk, pkt);
1103                sock_put(pending);
1104                err = vmci_transport_error_to_vsock_error(err);
1105                goto out;
1106        }
1107
1108        vsock_add_pending(sk, pending);
1109        sk->sk_ack_backlog++;
1110
1111        pending->sk_state = SS_CONNECTING;
1112        vmci_trans(vpending)->produce_size =
1113                vmci_trans(vpending)->consume_size = qp_size;
1114        vmci_trans(vpending)->queue_pair_size = qp_size;
1115
1116        vmci_trans(vpending)->notify_ops->process_request(pending);
1117
1118        /* We might never receive another message for this socket and it's not
1119         * connected to any process, so we have to ensure it gets cleaned up
1120         * ourself.  Our delayed work function will take care of that.  Note
1121         * that we do not ever cancel this function since we have few
1122         * guarantees about its state when calling cancel_delayed_work().
1123         * Instead we hold a reference on the socket for that function and make
1124         * it capable of handling cases where it needs to do nothing but
1125         * release that reference.
1126         */
1127        vpending->listener = sk;
1128        sock_hold(sk);
1129        sock_hold(pending);
1130        INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
1131        schedule_delayed_work(&vpending->dwork, HZ);
1132
1133out:
1134        return err;
1135}
1136
1137static int
1138vmci_transport_recv_connecting_server(struct sock *listener,
1139                                      struct sock *pending,
1140                                      struct vmci_transport_packet *pkt)
1141{
1142        struct vsock_sock *vpending;
1143        struct vmci_handle handle;
1144        struct vmci_qp *qpair;
1145        bool is_local;
1146        u32 flags;
1147        u32 detach_sub_id;
1148        int err;
1149        int skerr;
1150
1151        vpending = vsock_sk(pending);
1152        detach_sub_id = VMCI_INVALID_ID;
1153
1154        switch (pkt->type) {
1155        case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
1156                if (vmci_handle_is_invalid(pkt->u.handle)) {
1157                        vmci_transport_send_reset(pending, pkt);
1158                        skerr = EPROTO;
1159                        err = -EINVAL;
1160                        goto destroy;
1161                }
1162                break;
1163        default:
1164                /* Close and cleanup the connection. */
1165                vmci_transport_send_reset(pending, pkt);
1166                skerr = EPROTO;
1167                err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL;
1168                goto destroy;
1169        }
1170
1171        /* In order to complete the connection we need to attach to the offered
1172         * queue pair and send an attach notification.  We also subscribe to the
1173         * detach event so we know when our peer goes away, and we do that
1174         * before attaching so we don't miss an event.  If all this succeeds,
1175         * we update our state and wakeup anything waiting in accept() for a
1176         * connection.
1177         */
1178
1179        /* We don't care about attach since we ensure the other side has
1180         * attached by specifying the ATTACH_ONLY flag below.
1181         */
1182        err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1183                                   vmci_transport_peer_detach_cb,
1184                                   pending, &detach_sub_id);
1185        if (err < VMCI_SUCCESS) {
1186                vmci_transport_send_reset(pending, pkt);
1187                err = vmci_transport_error_to_vsock_error(err);
1188                skerr = -err;
1189                goto destroy;
1190        }
1191
1192        vmci_trans(vpending)->detach_sub_id = detach_sub_id;
1193
1194        /* Now attach to the queue pair the client created. */
1195        handle = pkt->u.handle;
1196
1197        /* vpending->local_addr always has a context id so we do not need to
1198         * worry about VMADDR_CID_ANY in this case.
1199         */
1200        is_local =
1201            vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid;
1202        flags = VMCI_QPFLAG_ATTACH_ONLY;
1203        flags |= is_local ? VMCI_QPFLAG_LOCAL : 0;
1204
1205        err = vmci_transport_queue_pair_alloc(
1206                                        &qpair,
1207                                        &handle,
1208                                        vmci_trans(vpending)->produce_size,
1209                                        vmci_trans(vpending)->consume_size,
1210                                        pkt->dg.src.context,
1211                                        flags,
1212                                        vmci_transport_is_trusted(
1213                                                vpending,
1214                                                vpending->remote_addr.svm_cid));
1215        if (err < 0) {
1216                vmci_transport_send_reset(pending, pkt);
1217                skerr = -err;
1218                goto destroy;
1219        }
1220
1221        vmci_trans(vpending)->qp_handle = handle;
1222        vmci_trans(vpending)->qpair = qpair;
1223
1224        /* When we send the attach message, we must be ready to handle incoming
1225         * control messages on the newly connected socket. So we move the
1226         * pending socket to the connected state before sending the attach
1227         * message. Otherwise, an incoming packet triggered by the attach being
1228         * received by the peer may be processed concurrently with what happens
1229         * below after sending the attach message, and that incoming packet
1230         * will find the listening socket instead of the (currently) pending
1231         * socket. Note that enqueueing the socket increments the reference
1232         * count, so even if a reset comes before the connection is accepted,
1233         * the socket will be valid until it is removed from the queue.
1234         *
1235         * If we fail sending the attach below, we remove the socket from the
1236         * connected list and move the socket to SS_UNCONNECTED before
1237         * releasing the lock, so a pending slow path processing of an incoming
1238         * packet will not see the socket in the connected state in that case.
1239         */
1240        pending->sk_state = SS_CONNECTED;
1241
1242        vsock_insert_connected(vpending);
1243
1244        /* Notify our peer of our attach. */
1245        err = vmci_transport_send_attach(pending, handle);
1246        if (err < 0) {
1247                vsock_remove_connected(vpending);
1248                pr_err("Could not send attach\n");
1249                vmci_transport_send_reset(pending, pkt);
1250                err = vmci_transport_error_to_vsock_error(err);
1251                skerr = -err;
1252                goto destroy;
1253        }
1254
1255        /* We have a connection. Move the now connected socket from the
1256         * listener's pending list to the accept queue so callers of accept()
1257         * can find it.
1258         */
1259        vsock_remove_pending(listener, pending);
1260        vsock_enqueue_accept(listener, pending);
1261
1262        /* Callers of accept() will be be waiting on the listening socket, not
1263         * the pending socket.
1264         */
1265        listener->sk_state_change(listener);
1266
1267        return 0;
1268
1269destroy:
1270        pending->sk_err = skerr;
1271        pending->sk_state = SS_UNCONNECTED;
1272        /* As long as we drop our reference, all necessary cleanup will handle
1273         * when the cleanup function drops its reference and our destruct
1274         * implementation is called.  Note that since the listen handler will
1275         * remove pending from the pending list upon our failure, the cleanup
1276         * function won't drop the additional reference, which is why we do it
1277         * here.
1278         */
1279        sock_put(pending);
1280
1281        return err;
1282}
1283
1284static int
1285vmci_transport_recv_connecting_client(struct sock *sk,
1286                                      struct vmci_transport_packet *pkt)
1287{
1288        struct vsock_sock *vsk;
1289        int err;
1290        int skerr;
1291
1292        vsk = vsock_sk(sk);
1293
1294        switch (pkt->type) {
1295        case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
1296                if (vmci_handle_is_invalid(pkt->u.handle) ||
1297                    !vmci_handle_is_equal(pkt->u.handle,
1298                                          vmci_trans(vsk)->qp_handle)) {
1299                        skerr = EPROTO;
1300                        err = -EINVAL;
1301                        goto destroy;
1302                }
1303
1304                /* Signify the socket is connected and wakeup the waiter in
1305                 * connect(). Also place the socket in the connected table for
1306                 * accounting (it can already be found since it's in the bound
1307                 * table).
1308                 */
1309                sk->sk_state = SS_CONNECTED;
1310                sk->sk_socket->state = SS_CONNECTED;
1311                vsock_insert_connected(vsk);
1312                sk->sk_state_change(sk);
1313
1314                break;
1315        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
1316        case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
1317                if (pkt->u.size == 0
1318                    || pkt->dg.src.context != vsk->remote_addr.svm_cid
1319                    || pkt->src_port != vsk->remote_addr.svm_port
1320                    || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)
1321                    || vmci_trans(vsk)->qpair
1322                    || vmci_trans(vsk)->produce_size != 0
1323                    || vmci_trans(vsk)->consume_size != 0
1324                    || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID
1325                    || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
1326                        skerr = EPROTO;
1327                        err = -EINVAL;
1328
1329                        goto destroy;
1330                }
1331
1332                err = vmci_transport_recv_connecting_client_negotiate(sk, pkt);
1333                if (err) {
1334                        skerr = -err;
1335                        goto destroy;
1336                }
1337
1338                break;
1339        case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
1340                err = vmci_transport_recv_connecting_client_invalid(sk, pkt);
1341                if (err) {
1342                        skerr = -err;
1343                        goto destroy;
1344                }
1345
1346                break;
1347        case VMCI_TRANSPORT_PACKET_TYPE_RST:
1348                /* Older versions of the linux code (WS 6.5 / ESX 4.0) used to
1349                 * continue processing here after they sent an INVALID packet.
1350                 * This meant that we got a RST after the INVALID. We ignore a
1351                 * RST after an INVALID. The common code doesn't send the RST
1352                 * ... so we can hang if an old version of the common code
1353                 * fails between getting a REQUEST and sending an OFFER back.
1354                 * Not much we can do about it... except hope that it doesn't
1355                 * happen.
1356                 */
1357                if (vsk->ignore_connecting_rst) {
1358                        vsk->ignore_connecting_rst = false;
1359                } else {
1360                        skerr = ECONNRESET;
1361                        err = 0;
1362                        goto destroy;
1363                }
1364
1365                break;
1366        default:
1367                /* Close and cleanup the connection. */
1368                skerr = EPROTO;
1369                err = -EINVAL;
1370                goto destroy;
1371        }
1372
1373        return 0;
1374
1375destroy:
1376        vmci_transport_send_reset(sk, pkt);
1377
1378        sk->sk_state = SS_UNCONNECTED;
1379        sk->sk_err = skerr;
1380        sk->sk_error_report(sk);
1381        return err;
1382}
1383
1384static int vmci_transport_recv_connecting_client_negotiate(
1385                                        struct sock *sk,
1386                                        struct vmci_transport_packet *pkt)
1387{
1388        int err;
1389        struct vsock_sock *vsk;
1390        struct vmci_handle handle;
1391        struct vmci_qp *qpair;
1392        u32 attach_sub_id;
1393        u32 detach_sub_id;
1394        bool is_local;
1395        u32 flags;
1396        bool old_proto = true;
1397        bool old_pkt_proto;
1398        u16 version;
1399
1400        vsk = vsock_sk(sk);
1401        handle = VMCI_INVALID_HANDLE;
1402        attach_sub_id = VMCI_INVALID_ID;
1403        detach_sub_id = VMCI_INVALID_ID;
1404
1405        /* If we have gotten here then we should be past the point where old
1406         * linux vsock could have sent the bogus rst.
1407         */
1408        vsk->sent_request = false;
1409        vsk->ignore_connecting_rst = false;
1410
1411        /* Verify that we're OK with the proposed queue pair size */
1412        if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size ||
1413            pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) {
1414                err = -EINVAL;
1415                goto destroy;
1416        }
1417
1418        /* At this point we know the CID the peer is using to talk to us. */
1419
1420        if (vsk->local_addr.svm_cid == VMADDR_CID_ANY)
1421                vsk->local_addr.svm_cid = pkt->dg.dst.context;
1422
1423        /* Setup the notify ops to be the highest supported version that both
1424         * the server and the client support.
1425         */
1426
1427        if (vmci_transport_old_proto_override(&old_pkt_proto)) {
1428                old_proto = old_pkt_proto;
1429        } else {
1430                if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE)
1431                        old_proto = true;
1432                else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2)
1433                        old_proto = false;
1434
1435        }
1436
1437        if (old_proto)
1438                version = VSOCK_PROTO_INVALID;
1439        else
1440                version = pkt->proto;
1441
1442        if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) {
1443                err = -EINVAL;
1444                goto destroy;
1445        }
1446
1447        /* Subscribe to attach and detach events first.
1448         *
1449         * XXX We attach once for each queue pair created for now so it is easy
1450         * to find the socket (it's provided), but later we should only
1451         * subscribe once and add a way to lookup sockets by queue pair handle.
1452         */
1453        err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH,
1454                                   vmci_transport_peer_attach_cb,
1455                                   sk, &attach_sub_id);
1456        if (err < VMCI_SUCCESS) {
1457                err = vmci_transport_error_to_vsock_error(err);
1458                goto destroy;
1459        }
1460
1461        err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1462                                   vmci_transport_peer_detach_cb,
1463                                   sk, &detach_sub_id);
1464        if (err < VMCI_SUCCESS) {
1465                err = vmci_transport_error_to_vsock_error(err);
1466                goto destroy;
1467        }
1468
1469        /* Make VMCI select the handle for us. */
1470        handle = VMCI_INVALID_HANDLE;
1471        is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid;
1472        flags = is_local ? VMCI_QPFLAG_LOCAL : 0;
1473
1474        err = vmci_transport_queue_pair_alloc(&qpair,
1475                                              &handle,
1476                                              pkt->u.size,
1477                                              pkt->u.size,
1478                                              vsk->remote_addr.svm_cid,
1479                                              flags,
1480                                              vmci_transport_is_trusted(
1481                                                  vsk,
1482                                                  vsk->
1483                                                  remote_addr.svm_cid));
1484        if (err < 0)
1485                goto destroy;
1486
1487        err = vmci_transport_send_qp_offer(sk, handle);
1488        if (err < 0) {
1489                err = vmci_transport_error_to_vsock_error(err);
1490                goto destroy;
1491        }
1492
1493        vmci_trans(vsk)->qp_handle = handle;
1494        vmci_trans(vsk)->qpair = qpair;
1495
1496        vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
1497                pkt->u.size;
1498
1499        vmci_trans(vsk)->attach_sub_id = attach_sub_id;
1500        vmci_trans(vsk)->detach_sub_id = detach_sub_id;
1501
1502        vmci_trans(vsk)->notify_ops->process_negotiate(sk);
1503
1504        return 0;
1505
1506destroy:
1507        if (attach_sub_id != VMCI_INVALID_ID)
1508                vmci_event_unsubscribe(attach_sub_id);
1509
1510        if (detach_sub_id != VMCI_INVALID_ID)
1511                vmci_event_unsubscribe(detach_sub_id);
1512
1513        if (!vmci_handle_is_invalid(handle))
1514                vmci_qpair_detach(&qpair);
1515
1516        return err;
1517}
1518
1519static int
1520vmci_transport_recv_connecting_client_invalid(struct sock *sk,
1521                                              struct vmci_transport_packet *pkt)
1522{
1523        int err = 0;
1524        struct vsock_sock *vsk = vsock_sk(sk);
1525
1526        if (vsk->sent_request) {
1527                vsk->sent_request = false;
1528                vsk->ignore_connecting_rst = true;
1529
1530                err = vmci_transport_send_conn_request(
1531                        sk, vmci_trans(vsk)->queue_pair_size);
1532                if (err < 0)
1533                        err = vmci_transport_error_to_vsock_error(err);
1534                else
1535                        err = 0;
1536
1537        }
1538
1539        return err;
1540}
1541
1542static int vmci_transport_recv_connected(struct sock *sk,
1543                                         struct vmci_transport_packet *pkt)
1544{
1545        struct vsock_sock *vsk;
1546        bool pkt_processed = false;
1547
1548        /* In cases where we are closing the connection, it's sufficient to
1549         * mark the state change (and maybe error) and wake up any waiting
1550         * threads. Since this is a connected socket, it's owned by a user
1551         * process and will be cleaned up when the failure is passed back on
1552         * the current or next system call.  Our system call implementations
1553         * must therefore check for error and state changes on entry and when
1554         * being awoken.
1555         */
1556        switch (pkt->type) {
1557        case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
1558                if (pkt->u.mode) {
1559                        vsk = vsock_sk(sk);
1560
1561                        vsk->peer_shutdown |= pkt->u.mode;
1562                        sk->sk_state_change(sk);
1563                }
1564                break;
1565
1566        case VMCI_TRANSPORT_PACKET_TYPE_RST:
1567                vsk = vsock_sk(sk);
1568                /* It is possible that we sent our peer a message (e.g a
1569                 * WAITING_READ) right before we got notified that the peer had
1570                 * detached. If that happens then we can get a RST pkt back
1571                 * from our peer even though there is data available for us to
1572                 * read. In that case, don't shutdown the socket completely but
1573                 * instead allow the local client to finish reading data off
1574                 * the queuepair. Always treat a RST pkt in connected mode like
1575                 * a clean shutdown.
1576                 */
1577                sock_set_flag(sk, SOCK_DONE);
1578                vsk->peer_shutdown = SHUTDOWN_MASK;
1579                if (vsock_stream_has_data(vsk) <= 0)
1580                        sk->sk_state = SS_DISCONNECTING;
1581
1582                sk->sk_state_change(sk);
1583                break;
1584
1585        default:
1586                vsk = vsock_sk(sk);
1587                vmci_trans(vsk)->notify_ops->handle_notify_pkt(
1588                                sk, pkt, false, NULL, NULL,
1589                                &pkt_processed);
1590                if (!pkt_processed)
1591                        return -EINVAL;
1592
1593                break;
1594        }
1595
1596        return 0;
1597}
1598
1599static int vmci_transport_socket_init(struct vsock_sock *vsk,
1600                                      struct vsock_sock *psk)
1601{
1602        vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL);
1603        if (!vsk->trans)
1604                return -ENOMEM;
1605
1606        vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
1607        vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
1608        vmci_trans(vsk)->qpair = NULL;
1609        vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
1610        vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id =
1611                VMCI_INVALID_ID;
1612        vmci_trans(vsk)->notify_ops = NULL;
1613        if (psk) {
1614                vmci_trans(vsk)->queue_pair_size =
1615                        vmci_trans(psk)->queue_pair_size;
1616                vmci_trans(vsk)->queue_pair_min_size =
1617                        vmci_trans(psk)->queue_pair_min_size;
1618                vmci_trans(vsk)->queue_pair_max_size =
1619                        vmci_trans(psk)->queue_pair_max_size;
1620        } else {
1621                vmci_trans(vsk)->queue_pair_size =
1622                        VMCI_TRANSPORT_DEFAULT_QP_SIZE;
1623                vmci_trans(vsk)->queue_pair_min_size =
1624                         VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN;
1625                vmci_trans(vsk)->queue_pair_max_size =
1626                        VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX;
1627        }
1628
1629        return 0;
1630}
1631
1632static void vmci_transport_destruct(struct vsock_sock *vsk)
1633{
1634        if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) {
1635                vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id);
1636                vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID;
1637        }
1638
1639        if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
1640                vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id);
1641                vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
1642        }
1643
1644        if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
1645                vmci_qpair_detach(&vmci_trans(vsk)->qpair);
1646                vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
1647                vmci_trans(vsk)->produce_size = 0;
1648                vmci_trans(vsk)->consume_size = 0;
1649        }
1650
1651        if (vmci_trans(vsk)->notify_ops)
1652                vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
1653
1654        kfree(vsk->trans);
1655        vsk->trans = NULL;
1656}
1657
1658static void vmci_transport_release(struct vsock_sock *vsk)
1659{
1660        if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) {
1661                vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle);
1662                vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
1663        }
1664}
1665
1666static int vmci_transport_dgram_bind(struct vsock_sock *vsk,
1667                                     struct sockaddr_vm *addr)
1668{
1669        u32 port;
1670        u32 flags;
1671        int err;
1672
1673        /* VMCI will select a resource ID for us if we provide
1674         * VMCI_INVALID_ID.
1675         */
1676        port = addr->svm_port == VMADDR_PORT_ANY ?
1677                        VMCI_INVALID_ID : addr->svm_port;
1678
1679        if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE))
1680                return -EACCES;
1681
1682        flags = addr->svm_cid == VMADDR_CID_ANY ?
1683                                VMCI_FLAG_ANYCID_DG_HND : 0;
1684
1685        err = vmci_transport_datagram_create_hnd(port, flags,
1686                                                 vmci_transport_recv_dgram_cb,
1687                                                 &vsk->sk,
1688                                                 &vmci_trans(vsk)->dg_handle);
1689        if (err < VMCI_SUCCESS)
1690                return vmci_transport_error_to_vsock_error(err);
1691        vsock_addr_init(&vsk->local_addr, addr->svm_cid,
1692                        vmci_trans(vsk)->dg_handle.resource);
1693
1694        return 0;
1695}
1696
1697static int vmci_transport_dgram_enqueue(
1698        struct vsock_sock *vsk,
1699        struct sockaddr_vm *remote_addr,
1700        struct iovec *iov,
1701        size_t len)
1702{
1703        int err;
1704        struct vmci_datagram *dg;
1705
1706        if (len > VMCI_MAX_DG_PAYLOAD_SIZE)
1707                return -EMSGSIZE;
1708
1709        if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid))
1710                return -EPERM;
1711
1712        /* Allocate a buffer for the user's message and our packet header. */
1713        dg = kmalloc(len + sizeof(*dg), GFP_KERNEL);
1714        if (!dg)
1715                return -ENOMEM;
1716
1717        memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len);
1718
1719        dg->dst = vmci_make_handle(remote_addr->svm_cid,
1720                                   remote_addr->svm_port);
1721        dg->src = vmci_make_handle(vsk->local_addr.svm_cid,
1722                                   vsk->local_addr.svm_port);
1723        dg->payload_size = len;
1724
1725        err = vmci_datagram_send(dg);
1726        kfree(dg);
1727        if (err < 0)
1728                return vmci_transport_error_to_vsock_error(err);
1729
1730        return err - sizeof(*dg);
1731}
1732
1733static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
1734                                        struct vsock_sock *vsk,
1735                                        struct msghdr *msg, size_t len,
1736                                        int flags)
1737{
1738        int err;
1739        int noblock;
1740        struct vmci_datagram *dg;
1741        size_t payload_len;
1742        struct sk_buff *skb;
1743
1744        noblock = flags & MSG_DONTWAIT;
1745
1746        if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
1747                return -EOPNOTSUPP;
1748
1749        msg->msg_namelen = 0;
1750
1751        /* Retrieve the head sk_buff from the socket's receive queue. */
1752        err = 0;
1753        skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
1754        if (err)
1755                return err;
1756
1757        if (!skb)
1758                return -EAGAIN;
1759
1760        dg = (struct vmci_datagram *)skb->data;
1761        if (!dg)
1762                /* err is 0, meaning we read zero bytes. */
1763                goto out;
1764
1765        payload_len = dg->payload_size;
1766        /* Ensure the sk_buff matches the payload size claimed in the packet. */
1767        if (payload_len != skb->len - sizeof(*dg)) {
1768                err = -EINVAL;
1769                goto out;
1770        }
1771
1772        if (payload_len > len) {
1773                payload_len = len;
1774                msg->msg_flags |= MSG_TRUNC;
1775        }
1776
1777        /* Place the datagram payload in the user's iovec. */
1778        err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov,
1779                payload_len);
1780        if (err)
1781                goto out;
1782
1783        if (msg->msg_name) {
1784                struct sockaddr_vm *vm_addr;
1785
1786                /* Provide the address of the sender. */
1787                vm_addr = (struct sockaddr_vm *)msg->msg_name;
1788                vsock_addr_init(vm_addr, dg->src.context, dg->src.resource);
1789                msg->msg_namelen = sizeof(*vm_addr);
1790        }
1791        err = payload_len;
1792
1793out:
1794        skb_free_datagram(&vsk->sk, skb);
1795        return err;
1796}
1797
1798static bool vmci_transport_dgram_allow(u32 cid, u32 port)
1799{
1800        if (cid == VMADDR_CID_HYPERVISOR) {
1801                /* Registrations of PBRPC Servers do not modify VMX/Hypervisor
1802                 * state and are allowed.
1803                 */
1804                return port == VMCI_UNITY_PBRPC_REGISTER;
1805        }
1806
1807        return true;
1808}
1809
1810static int vmci_transport_connect(struct vsock_sock *vsk)
1811{
1812        int err;
1813        bool old_pkt_proto = false;
1814        struct sock *sk = &vsk->sk;
1815
1816        if (vmci_transport_old_proto_override(&old_pkt_proto) &&
1817                old_pkt_proto) {
1818                err = vmci_transport_send_conn_request(
1819                        sk, vmci_trans(vsk)->queue_pair_size);
1820                if (err < 0) {
1821                        sk->sk_state = SS_UNCONNECTED;
1822                        return err;
1823                }
1824        } else {
1825                int supported_proto_versions =
1826                        vmci_transport_new_proto_supported_versions();
1827                err = vmci_transport_send_conn_request2(
1828                                sk, vmci_trans(vsk)->queue_pair_size,
1829                                supported_proto_versions);
1830                if (err < 0) {
1831                        sk->sk_state = SS_UNCONNECTED;
1832                        return err;
1833                }
1834
1835                vsk->sent_request = true;
1836        }
1837
1838        return err;
1839}
1840
1841static ssize_t vmci_transport_stream_dequeue(
1842        struct vsock_sock *vsk,
1843        struct iovec *iov,
1844        size_t len,
1845        int flags)
1846{
1847        if (flags & MSG_PEEK)
1848                return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0);
1849        else
1850                return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0);
1851}
1852
1853static ssize_t vmci_transport_stream_enqueue(
1854        struct vsock_sock *vsk,
1855        struct iovec *iov,
1856        size_t len)
1857{
1858        return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0);
1859}
1860
1861static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
1862{
1863        return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair);
1864}
1865
1866static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk)
1867{
1868        return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair);
1869}
1870
1871static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk)
1872{
1873        return vmci_trans(vsk)->consume_size;
1874}
1875
1876static bool vmci_transport_stream_is_active(struct vsock_sock *vsk)
1877{
1878        return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle);
1879}
1880
1881static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk)
1882{
1883        return vmci_trans(vsk)->queue_pair_size;
1884}
1885
1886static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk)
1887{
1888        return vmci_trans(vsk)->queue_pair_min_size;
1889}
1890
1891static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk)
1892{
1893        return vmci_trans(vsk)->queue_pair_max_size;
1894}
1895
1896static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
1897{
1898        if (val < vmci_trans(vsk)->queue_pair_min_size)
1899                vmci_trans(vsk)->queue_pair_min_size = val;
1900        if (val > vmci_trans(vsk)->queue_pair_max_size)
1901                vmci_trans(vsk)->queue_pair_max_size = val;
1902        vmci_trans(vsk)->queue_pair_size = val;
1903}
1904
1905static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk,
1906                                               u64 val)
1907{
1908        if (val > vmci_trans(vsk)->queue_pair_size)
1909                vmci_trans(vsk)->queue_pair_size = val;
1910        vmci_trans(vsk)->queue_pair_min_size = val;
1911}
1912
1913static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk,
1914                                               u64 val)
1915{
1916        if (val < vmci_trans(vsk)->queue_pair_size)
1917                vmci_trans(vsk)->queue_pair_size = val;
1918        vmci_trans(vsk)->queue_pair_max_size = val;
1919}
1920
1921static int vmci_transport_notify_poll_in(
1922        struct vsock_sock *vsk,
1923        size_t target,
1924        bool *data_ready_now)
1925{
1926        return vmci_trans(vsk)->notify_ops->poll_in(
1927                        &vsk->sk, target, data_ready_now);
1928}
1929
1930static int vmci_transport_notify_poll_out(
1931        struct vsock_sock *vsk,
1932        size_t target,
1933        bool *space_available_now)
1934{
1935        return vmci_trans(vsk)->notify_ops->poll_out(
1936                        &vsk->sk, target, space_available_now);
1937}
1938
1939static int vmci_transport_notify_recv_init(
1940        struct vsock_sock *vsk,
1941        size_t target,
1942        struct vsock_transport_recv_notify_data *data)
1943{
1944        return vmci_trans(vsk)->notify_ops->recv_init(
1945                        &vsk->sk, target,
1946                        (struct vmci_transport_recv_notify_data *)data);
1947}
1948
1949static int vmci_transport_notify_recv_pre_block(
1950        struct vsock_sock *vsk,
1951        size_t target,
1952        struct vsock_transport_recv_notify_data *data)
1953{
1954        return vmci_trans(vsk)->notify_ops->recv_pre_block(
1955                        &vsk->sk, target,
1956                        (struct vmci_transport_recv_notify_data *)data);
1957}
1958
1959static int vmci_transport_notify_recv_pre_dequeue(
1960        struct vsock_sock *vsk,
1961        size_t target,
1962        struct vsock_transport_recv_notify_data *data)
1963{
1964        return vmci_trans(vsk)->notify_ops->recv_pre_dequeue(
1965                        &vsk->sk, target,
1966                        (struct vmci_transport_recv_notify_data *)data);
1967}
1968
1969static int vmci_transport_notify_recv_post_dequeue(
1970        struct vsock_sock *vsk,
1971        size_t target,
1972        ssize_t copied,
1973        bool data_read,
1974        struct vsock_transport_recv_notify_data *data)
1975{
1976        return vmci_trans(vsk)->notify_ops->recv_post_dequeue(
1977                        &vsk->sk, target, copied, data_read,
1978                        (struct vmci_transport_recv_notify_data *)data);
1979}
1980
1981static int vmci_transport_notify_send_init(
1982        struct vsock_sock *vsk,
1983        struct vsock_transport_send_notify_data *data)
1984{
1985        return vmci_trans(vsk)->notify_ops->send_init(
1986                        &vsk->sk,
1987                        (struct vmci_transport_send_notify_data *)data);
1988}
1989
1990static int vmci_transport_notify_send_pre_block(
1991        struct vsock_sock *vsk,
1992        struct vsock_transport_send_notify_data *data)
1993{
1994        return vmci_trans(vsk)->notify_ops->send_pre_block(
1995                        &vsk->sk,
1996                        (struct vmci_transport_send_notify_data *)data);
1997}
1998
1999static int vmci_transport_notify_send_pre_enqueue(
2000        struct vsock_sock *vsk,
2001        struct vsock_transport_send_notify_data *data)
2002{
2003        return vmci_trans(vsk)->notify_ops->send_pre_enqueue(
2004                        &vsk->sk,
2005                        (struct vmci_transport_send_notify_data *)data);
2006}
2007
2008static int vmci_transport_notify_send_post_enqueue(
2009        struct vsock_sock *vsk,
2010        ssize_t written,
2011        struct vsock_transport_send_notify_data *data)
2012{
2013        return vmci_trans(vsk)->notify_ops->send_post_enqueue(
2014                        &vsk->sk, written,
2015                        (struct vmci_transport_send_notify_data *)data);
2016}
2017
2018static bool vmci_transport_old_proto_override(bool *old_pkt_proto)
2019{
2020        if (PROTOCOL_OVERRIDE != -1) {
2021                if (PROTOCOL_OVERRIDE == 0)
2022                        *old_pkt_proto = true;
2023                else
2024                        *old_pkt_proto = false;
2025
2026                pr_info("Proto override in use\n");
2027                return true;
2028        }
2029
2030        return false;
2031}
2032
2033static bool vmci_transport_proto_to_notify_struct(struct sock *sk,
2034                                                  u16 *proto,
2035                                                  bool old_pkt_proto)
2036{
2037        struct vsock_sock *vsk = vsock_sk(sk);
2038
2039        if (old_pkt_proto) {
2040                if (*proto != VSOCK_PROTO_INVALID) {
2041                        pr_err("Can't set both an old and new protocol\n");
2042                        return false;
2043                }
2044                vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops;
2045                goto exit;
2046        }
2047
2048        switch (*proto) {
2049        case VSOCK_PROTO_PKT_ON_NOTIFY:
2050                vmci_trans(vsk)->notify_ops =
2051                        &vmci_transport_notify_pkt_q_state_ops;
2052                break;
2053        default:
2054                pr_err("Unknown notify protocol version\n");
2055                return false;
2056        }
2057
2058exit:
2059        vmci_trans(vsk)->notify_ops->socket_init(sk);
2060        return true;
2061}
2062
2063static u16 vmci_transport_new_proto_supported_versions(void)
2064{
2065        if (PROTOCOL_OVERRIDE != -1)
2066                return PROTOCOL_OVERRIDE;
2067
2068        return VSOCK_PROTO_ALL_SUPPORTED;
2069}
2070
2071static u32 vmci_transport_get_local_cid(void)
2072{
2073        return vmci_get_context_id();
2074}
2075
2076static struct vsock_transport vmci_transport = {
2077        .init = vmci_transport_socket_init,
2078        .destruct = vmci_transport_destruct,
2079        .release = vmci_transport_release,
2080        .connect = vmci_transport_connect,
2081        .dgram_bind = vmci_transport_dgram_bind,
2082        .dgram_dequeue = vmci_transport_dgram_dequeue,
2083        .dgram_enqueue = vmci_transport_dgram_enqueue,
2084        .dgram_allow = vmci_transport_dgram_allow,
2085        .stream_dequeue = vmci_transport_stream_dequeue,
2086        .stream_enqueue = vmci_transport_stream_enqueue,
2087        .stream_has_data = vmci_transport_stream_has_data,
2088        .stream_has_space = vmci_transport_stream_has_space,
2089        .stream_rcvhiwat = vmci_transport_stream_rcvhiwat,
2090        .stream_is_active = vmci_transport_stream_is_active,
2091        .stream_allow = vmci_transport_stream_allow,
2092        .notify_poll_in = vmci_transport_notify_poll_in,
2093        .notify_poll_out = vmci_transport_notify_poll_out,
2094        .notify_recv_init = vmci_transport_notify_recv_init,
2095        .notify_recv_pre_block = vmci_transport_notify_recv_pre_block,
2096        .notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue,
2097        .notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue,
2098        .notify_send_init = vmci_transport_notify_send_init,
2099        .notify_send_pre_block = vmci_transport_notify_send_pre_block,
2100        .notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue,
2101        .notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue,
2102        .shutdown = vmci_transport_shutdown,
2103        .set_buffer_size = vmci_transport_set_buffer_size,
2104        .set_min_buffer_size = vmci_transport_set_min_buffer_size,
2105        .set_max_buffer_size = vmci_transport_set_max_buffer_size,
2106        .get_buffer_size = vmci_transport_get_buffer_size,
2107        .get_min_buffer_size = vmci_transport_get_min_buffer_size,
2108        .get_max_buffer_size = vmci_transport_get_max_buffer_size,
2109        .get_local_cid = vmci_transport_get_local_cid,
2110};
2111
2112static int __init vmci_transport_init(void)
2113{
2114        int err;
2115
2116        /* Create the datagram handle that we will use to send and receive all
2117         * VSocket control messages for this context.
2118         */
2119        err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID,
2120                                                 VMCI_FLAG_ANYCID_DG_HND,
2121                                                 vmci_transport_recv_stream_cb,
2122                                                 NULL,
2123                                                 &vmci_transport_stream_handle);
2124        if (err < VMCI_SUCCESS) {
2125                pr_err("Unable to create datagram handle. (%d)\n", err);
2126                return vmci_transport_error_to_vsock_error(err);
2127        }
2128
2129        err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED,
2130                                   vmci_transport_qp_resumed_cb,
2131                                   NULL, &vmci_transport_qp_resumed_sub_id);
2132        if (err < VMCI_SUCCESS) {
2133                pr_err("Unable to subscribe to resumed event. (%d)\n", err);
2134                err = vmci_transport_error_to_vsock_error(err);
2135                vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
2136                goto err_destroy_stream_handle;
2137        }
2138
2139        err = vsock_core_init(&vmci_transport);
2140        if (err < 0)
2141                goto err_unsubscribe;
2142
2143        return 0;
2144
2145err_unsubscribe:
2146        vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
2147err_destroy_stream_handle:
2148        vmci_datagram_destroy_handle(vmci_transport_stream_handle);
2149        return err;
2150}
2151module_init(vmci_transport_init);
2152
2153static void __exit vmci_transport_exit(void)
2154{
2155        if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
2156                if (vmci_datagram_destroy_handle(
2157                        vmci_transport_stream_handle) != VMCI_SUCCESS)
2158                        pr_err("Couldn't destroy datagram handle\n");
2159                vmci_transport_stream_handle = VMCI_INVALID_HANDLE;
2160        }
2161
2162        if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) {
2163                vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
2164                vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
2165        }
2166
2167        vsock_core_exit();
2168}
2169module_exit(vmci_transport_exit);
2170
2171MODULE_AUTHOR("VMware, Inc.");
2172MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
2173MODULE_LICENSE("GPL v2");
2174MODULE_ALIAS("vmware_vsock");
2175MODULE_ALIAS_NETPROTO(PF_VSOCK);
2176