linux/net/vmw_vsock/hyperv_transport.c
<<
>>
Prefs
   1/*
   2 * Hyper-V transport for vsock
   3 *
   4 * Hyper-V Sockets supplies a byte-stream based communication mechanism
   5 * between the host and the VM. This driver implements the necessary
   6 * support in the VM by introducing the new vsock transport.
   7 *
   8 * Copyright (c) 2017, Microsoft Corporation.
   9 *
  10 * This program is free software; you can redistribute it and/or modify it
  11 * under the terms and conditions of the GNU General Public License,
  12 * version 2, as published by the Free Software Foundation.
  13 *
  14 * This program is distributed in the hope it will be useful, but WITHOUT
  15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  17 * more details.
  18 *
  19 */
  20#include <linux/module.h>
  21#include <linux/vmalloc.h>
  22#include <linux/hyperv.h>
  23#include <net/sock.h>
  24#include <net/af_vsock.h>
  25
  26/* The host side's design of the feature requires 6 exact 4KB pages for
  27 * recv/send rings respectively -- this is suboptimal considering memory
  28 * consumption, however unluckily we have to live with it, before the
  29 * host comes up with a better design in the future.
  30 */
  31#define PAGE_SIZE_4K            4096
  32#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
  33#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
  34
  35/* The MTU is 16KB per the host side's design */
  36#define HVS_MTU_SIZE            (1024 * 16)
  37
  38struct vmpipe_proto_header {
  39        u32 pkt_type;
  40        u32 data_size;
  41};
  42
  43/* For recv, we use the VMBus in-place packet iterator APIs to directly copy
  44 * data from the ringbuffer into the userspace buffer.
  45 */
  46struct hvs_recv_buf {
  47        /* The header before the payload data */
  48        struct vmpipe_proto_header hdr;
  49
  50        /* The payload */
  51        u8 data[HVS_MTU_SIZE];
  52};
  53
  54/* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
  55 * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated
  56 * buffer, because tests show there is no significant performance difference.
  57 *
  58 * Note: the buffer can be eliminated in the future when we add new VMBus
  59 * ringbuffer APIs that allow us to directly copy data from userspace buffer
  60 * to VMBus ringbuffer.
  61 */
  62#define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header))
  63
  64struct hvs_send_buf {
  65        /* The header before the payload data */
  66        struct vmpipe_proto_header hdr;
  67
  68        /* The payload */
  69        u8 data[HVS_SEND_BUF_SIZE];
  70};
  71
  72#define HVS_HEADER_LEN  (sizeof(struct vmpacket_descriptor) + \
  73                         sizeof(struct vmpipe_proto_header))
  74
  75/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write(), and
  76 * __hv_pkt_iter_next().
  77 */
  78#define VMBUS_PKT_TRAILER_SIZE  (sizeof(u64))
  79
  80#define HVS_PKT_LEN(payload_len)        (HVS_HEADER_LEN + \
  81                                         ALIGN((payload_len), 8) + \
  82                                         VMBUS_PKT_TRAILER_SIZE)
  83
  84union hvs_service_id {
  85        uuid_le srv_id;
  86
  87        struct {
  88                unsigned int svm_port;
  89                unsigned char b[sizeof(uuid_le) - sizeof(unsigned int)];
  90        };
  91};
  92
  93/* Per-socket state (accessed via vsk->trans) */
  94struct hvsock {
  95        struct vsock_sock *vsk;
  96
  97        uuid_le vm_srv_id;
  98        uuid_le host_srv_id;
  99
 100        struct vmbus_channel *chan;
 101        struct vmpacket_descriptor *recv_desc;
 102
 103        /* The length of the payload not delivered to userland yet */
 104        u32 recv_data_len;
 105        /* The offset of the payload */
 106        u32 recv_data_off;
 107
 108        /* Have we sent the zero-length packet (FIN)? */
 109        bool fin_sent;
 110};
 111
 112/* In the VM, we support Hyper-V Sockets with AF_VSOCK, and the endpoint is
 113 * <cid, port> (see struct sockaddr_vm). Note: cid is not really used here:
 114 * when we write apps to connect to the host, we can only use VMADDR_CID_ANY
 115 * or VMADDR_CID_HOST (both are equivalent) as the remote cid, and when we
 116 * write apps to bind() & listen() in the VM, we can only use VMADDR_CID_ANY
 117 * as the local cid.
 118 *
 119 * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
 120 * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
 121 * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
 122 * the below sockaddr:
 123 *
 124 * struct SOCKADDR_HV
 125 * {
 126 *    ADDRESS_FAMILY Family;
 127 *    USHORT Reserved;
 128 *    GUID VmId;
 129 *    GUID ServiceId;
 130 * };
 131 * Note: VmID is not used by Linux VM and actually it isn't transmitted via
 132 * VMBus, because here it's obvious the host and the VM can easily identify
 133 * each other. Though the VmID is useful on the host, especially in the case
 134 * of Windows container, Linux VM doesn't need it at all.
 135 *
 136 * To make use of the AF_VSOCK infrastructure in Linux VM, we have to limit
 137 * the available GUID space of SOCKADDR_HV so that we can create a mapping
 138 * between AF_VSOCK port and SOCKADDR_HV Service GUID. The rule of writing
 139 * Hyper-V Sockets apps on the host and in Linux VM is:
 140 *
 141 ****************************************************************************
 142 * The only valid Service GUIDs, from the perspectives of both the host and *
 143 * Linux VM, that can be connected by the other end, must conform to this   *
 144 * format: <port>-facb-11e6-bd58-64006a7986d3, and the "port" must be in    *
 145 * this range [0, 0x7FFFFFFF].                                              *
 146 ****************************************************************************
 147 *
 148 * When we write apps on the host to connect(), the GUID ServiceID is used.
 149 * When we write apps in Linux VM to connect(), we only need to specify the
 150 * port and the driver will form the GUID and use that to request the host.
 151 *
 152 * From the perspective of Linux VM:
 153 * 1. the local ephemeral port (i.e. the local auto-bound port when we call
 154 * connect() without explicit bind()) is generated by __vsock_bind_stream(),
 155 * and the range is [1024, 0xFFFFFFFF).
 156 * 2. the remote ephemeral port (i.e. the auto-generated remote port for
 157 * a connect request initiated by the host's connect()) is generated by
 158 * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF).
 159 */
 160
 161#define MAX_LISTEN_PORT                 ((u32)0x7FFFFFFF)
 162#define MAX_VM_LISTEN_PORT              MAX_LISTEN_PORT
 163#define MAX_HOST_LISTEN_PORT            MAX_LISTEN_PORT
 164#define MIN_HOST_EPHEMERAL_PORT         (MAX_HOST_LISTEN_PORT + 1)
 165
 166/* 00000000-facb-11e6-bd58-64006a7986d3 */
 167static const uuid_le srv_id_template =
 168        UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
 169                0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
 170
 171static bool is_valid_srv_id(const uuid_le *id)
 172{
 173        return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4);
 174}
 175
 176static unsigned int get_port_by_srv_id(const uuid_le *svr_id)
 177{
 178        return *((unsigned int *)svr_id);
 179}
 180
 181static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id)
 182{
 183        unsigned int port = get_port_by_srv_id(svr_id);
 184
 185        vsock_addr_init(addr, VMADDR_CID_ANY, port);
 186}
 187
 188static void hvs_remote_addr_init(struct sockaddr_vm *remote,
 189                                 struct sockaddr_vm *local)
 190{
 191        static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
 192        struct sock *sk;
 193
 194        vsock_addr_init(remote, VMADDR_CID_ANY, VMADDR_PORT_ANY);
 195
 196        while (1) {
 197                /* Wrap around ? */
 198                if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT ||
 199                    host_ephemeral_port == VMADDR_PORT_ANY)
 200                        host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
 201
 202                remote->svm_port = host_ephemeral_port++;
 203
 204                sk = vsock_find_connected_socket(remote, local);
 205                if (!sk) {
 206                        /* Found an available ephemeral port */
 207                        return;
 208                }
 209
 210                /* Release refcnt got in vsock_find_connected_socket */
 211                sock_put(sk);
 212        }
 213}
 214
 215static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
 216{
 217        set_channel_pending_send_size(chan,
 218                                      HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
 219
 220        /* See hvs_stream_has_space(): we must make sure the host has seen
 221         * the new pending send size, before we can re-check the writable
 222         * bytes.
 223         */
 224        virt_mb();
 225}
 226
 227static void hvs_clear_channel_pending_send_size(struct vmbus_channel *chan)
 228{
 229        set_channel_pending_send_size(chan, 0);
 230
 231        /* Ditto */
 232        virt_mb();
 233}
 234
 235static bool hvs_channel_readable(struct vmbus_channel *chan)
 236{
 237        u32 readable = hv_get_bytes_to_read(&chan->inbound);
 238
 239        /* 0-size payload means FIN */
 240        return readable >= HVS_PKT_LEN(0);
 241}
 242
 243static int hvs_channel_readable_payload(struct vmbus_channel *chan)
 244{
 245        u32 readable = hv_get_bytes_to_read(&chan->inbound);
 246
 247        if (readable > HVS_PKT_LEN(0)) {
 248                /* At least we have 1 byte to read. We don't need to return
 249                 * the exact readable bytes: see vsock_stream_recvmsg() ->
 250                 * vsock_stream_has_data().
 251                 */
 252                return 1;
 253        }
 254
 255        if (readable == HVS_PKT_LEN(0)) {
 256                /* 0-size payload means FIN */
 257                return 0;
 258        }
 259
 260        /* No payload or FIN */
 261        return -1;
 262}
 263
 264static size_t hvs_channel_writable_bytes(struct vmbus_channel *chan)
 265{
 266        u32 writeable = hv_get_bytes_to_write(&chan->outbound);
 267        size_t ret;
 268
 269        /* The ringbuffer mustn't be 100% full, and we should reserve a
 270         * zero-length-payload packet for the FIN: see hv_ringbuffer_write()
 271         * and hvs_shutdown().
 272         */
 273        if (writeable <= HVS_PKT_LEN(1) + HVS_PKT_LEN(0))
 274                return 0;
 275
 276        ret = writeable - HVS_PKT_LEN(1) - HVS_PKT_LEN(0);
 277
 278        return round_down(ret, 8);
 279}
 280
 281static int hvs_send_data(struct vmbus_channel *chan,
 282                         struct hvs_send_buf *send_buf, size_t to_write)
 283{
 284        send_buf->hdr.pkt_type = 1;
 285        send_buf->hdr.data_size = to_write;
 286        return vmbus_sendpacket(chan, &send_buf->hdr,
 287                                sizeof(send_buf->hdr) + to_write,
 288                                0, VM_PKT_DATA_INBAND, 0);
 289}
 290
 291static void hvs_channel_cb(void *ctx)
 292{
 293        struct sock *sk = (struct sock *)ctx;
 294        struct vsock_sock *vsk = vsock_sk(sk);
 295        struct hvsock *hvs = vsk->trans;
 296        struct vmbus_channel *chan = hvs->chan;
 297
 298        if (hvs_channel_readable(chan))
 299                sk->sk_data_ready(sk);
 300
 301        /* See hvs_stream_has_space(): when we reach here, the writable bytes
 302         * may be already less than HVS_PKT_LEN(HVS_SEND_BUF_SIZE).
 303         */
 304        if (hv_get_bytes_to_write(&chan->outbound) > 0)
 305                sk->sk_write_space(sk);
 306}
 307
 308static void hvs_close_connection(struct vmbus_channel *chan)
 309{
 310        struct sock *sk = get_per_channel_state(chan);
 311        struct vsock_sock *vsk = vsock_sk(sk);
 312
 313        lock_sock(sk);
 314
 315        sk->sk_state = TCP_CLOSE;
 316        sock_set_flag(sk, SOCK_DONE);
 317        vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
 318
 319        sk->sk_state_change(sk);
 320
 321        release_sock(sk);
 322}
 323
 324static void hvs_open_connection(struct vmbus_channel *chan)
 325{
 326        uuid_le *if_instance, *if_type;
 327        unsigned char conn_from_host;
 328
 329        struct sockaddr_vm addr;
 330        struct sock *sk, *new = NULL;
 331        struct vsock_sock *vnew;
 332        struct hvsock *hvs, *hvs_new;
 333        int ret;
 334
 335        if_type = &chan->offermsg.offer.if_type;
 336        if_instance = &chan->offermsg.offer.if_instance;
 337        conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
 338
 339        /* The host or the VM should only listen on a port in
 340         * [0, MAX_LISTEN_PORT]
 341         */
 342        if (!is_valid_srv_id(if_type) ||
 343            get_port_by_srv_id(if_type) > MAX_LISTEN_PORT)
 344                return;
 345
 346        hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
 347        sk = vsock_find_bound_socket(&addr);
 348        if (!sk)
 349                return;
 350
 351        lock_sock(sk);
 352        if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
 353            (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
 354                goto out;
 355
 356        if (conn_from_host) {
 357                if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog)
 358                        goto out;
 359
 360                new = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
 361                                     sk->sk_type, 0);
 362                if (!new)
 363                        goto out;
 364
 365                new->sk_state = TCP_SYN_SENT;
 366                vnew = vsock_sk(new);
 367                hvs_new = vnew->trans;
 368                hvs_new->chan = chan;
 369        } else {
 370                hvs = vsock_sk(sk)->trans;
 371                hvs->chan = chan;
 372        }
 373
 374        set_channel_read_mode(chan, HV_CALL_DIRECT);
 375        ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
 376                         RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
 377                         hvs_channel_cb, conn_from_host ? new : sk);
 378        if (ret != 0) {
 379                if (conn_from_host) {
 380                        hvs_new->chan = NULL;
 381                        sock_put(new);
 382                } else {
 383                        hvs->chan = NULL;
 384                }
 385                goto out;
 386        }
 387
 388        set_per_channel_state(chan, conn_from_host ? new : sk);
 389        vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
 390
 391        if (conn_from_host) {
 392                new->sk_state = TCP_ESTABLISHED;
 393                sk->sk_ack_backlog++;
 394
 395                hvs_addr_init(&vnew->local_addr, if_type);
 396                hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
 397
 398                hvs_new->vm_srv_id = *if_type;
 399                hvs_new->host_srv_id = *if_instance;
 400
 401                vsock_insert_connected(vnew);
 402
 403                vsock_enqueue_accept(sk, new);
 404        } else {
 405                sk->sk_state = TCP_ESTABLISHED;
 406                sk->sk_socket->state = SS_CONNECTED;
 407
 408                vsock_insert_connected(vsock_sk(sk));
 409        }
 410
 411        sk->sk_state_change(sk);
 412
 413out:
 414        /* Release refcnt obtained when we called vsock_find_bound_socket() */
 415        sock_put(sk);
 416
 417        release_sock(sk);
 418}
 419
 420static u32 hvs_get_local_cid(void)
 421{
 422        return VMADDR_CID_ANY;
 423}
 424
 425static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
 426{
 427        struct hvsock *hvs;
 428
 429        hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
 430        if (!hvs)
 431                return -ENOMEM;
 432
 433        vsk->trans = hvs;
 434        hvs->vsk = vsk;
 435
 436        return 0;
 437}
 438
 439static int hvs_connect(struct vsock_sock *vsk)
 440{
 441        union hvs_service_id vm, host;
 442        struct hvsock *h = vsk->trans;
 443
 444        vm.srv_id = srv_id_template;
 445        vm.svm_port = vsk->local_addr.svm_port;
 446        h->vm_srv_id = vm.srv_id;
 447
 448        host.srv_id = srv_id_template;
 449        host.svm_port = vsk->remote_addr.svm_port;
 450        h->host_srv_id = host.srv_id;
 451
 452        return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
 453}
 454
 455static int hvs_shutdown(struct vsock_sock *vsk, int mode)
 456{
 457        struct sock *sk = sk_vsock(vsk);
 458        struct vmpipe_proto_header hdr;
 459        struct hvs_send_buf *send_buf;
 460        struct hvsock *hvs;
 461
 462        if (!(mode & SEND_SHUTDOWN))
 463                return 0;
 464
 465        lock_sock(sk);
 466
 467        hvs = vsk->trans;
 468        if (hvs->fin_sent)
 469                goto out;
 470
 471        send_buf = (struct hvs_send_buf *)&hdr;
 472
 473        /* It can't fail: see hvs_channel_writable_bytes(). */
 474        (void)hvs_send_data(hvs->chan, send_buf, 0);
 475
 476        hvs->fin_sent = true;
 477out:
 478        release_sock(sk);
 479        return 0;
 480}
 481
 482static void hvs_release(struct vsock_sock *vsk)
 483{
 484        struct sock *sk = sk_vsock(vsk);
 485        struct hvsock *hvs = vsk->trans;
 486        struct vmbus_channel *chan;
 487
 488        lock_sock(sk);
 489
 490        sk->sk_state = TCP_CLOSING;
 491        vsock_remove_sock(vsk);
 492
 493        release_sock(sk);
 494
 495        chan = hvs->chan;
 496        if (chan)
 497                hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
 498
 499}
 500
 501static void hvs_destruct(struct vsock_sock *vsk)
 502{
 503        struct hvsock *hvs = vsk->trans;
 504        struct vmbus_channel *chan = hvs->chan;
 505
 506        if (chan)
 507                vmbus_hvsock_device_unregister(chan);
 508
 509        kfree(hvs);
 510}
 511
 512static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr)
 513{
 514        return -EOPNOTSUPP;
 515}
 516
 517static int hvs_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
 518                             size_t len, int flags)
 519{
 520        return -EOPNOTSUPP;
 521}
 522
 523static int hvs_dgram_enqueue(struct vsock_sock *vsk,
 524                             struct sockaddr_vm *remote, struct msghdr *msg,
 525                             size_t dgram_len)
 526{
 527        return -EOPNOTSUPP;
 528}
 529
 530static bool hvs_dgram_allow(u32 cid, u32 port)
 531{
 532        return false;
 533}
 534
 535static int hvs_update_recv_data(struct hvsock *hvs)
 536{
 537        struct hvs_recv_buf *recv_buf;
 538        u32 payload_len;
 539
 540        recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
 541        payload_len = recv_buf->hdr.data_size;
 542
 543        if (payload_len > HVS_MTU_SIZE)
 544                return -EIO;
 545
 546        if (payload_len == 0)
 547                hvs->vsk->peer_shutdown |= SEND_SHUTDOWN;
 548
 549        hvs->recv_data_len = payload_len;
 550        hvs->recv_data_off = 0;
 551
 552        return 0;
 553}
 554
 555static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
 556                                  size_t len, int flags)
 557{
 558        struct hvsock *hvs = vsk->trans;
 559        bool need_refill = !hvs->recv_desc;
 560        struct hvs_recv_buf *recv_buf;
 561        u32 to_read;
 562        int ret;
 563
 564        if (flags & MSG_PEEK)
 565                return -EOPNOTSUPP;
 566
 567        if (need_refill) {
 568                hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
 569                ret = hvs_update_recv_data(hvs);
 570                if (ret)
 571                        return ret;
 572        }
 573
 574        recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
 575        to_read = min_t(u32, len, hvs->recv_data_len);
 576        ret = memcpy_to_msg(msg, recv_buf->data + hvs->recv_data_off, to_read);
 577        if (ret != 0)
 578                return ret;
 579
 580        hvs->recv_data_len -= to_read;
 581        if (hvs->recv_data_len == 0) {
 582                hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
 583                if (hvs->recv_desc) {
 584                        ret = hvs_update_recv_data(hvs);
 585                        if (ret)
 586                                return ret;
 587                }
 588        } else {
 589                hvs->recv_data_off += to_read;
 590        }
 591
 592        return to_read;
 593}
 594
 595static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
 596                                  size_t len)
 597{
 598        struct hvsock *hvs = vsk->trans;
 599        struct vmbus_channel *chan = hvs->chan;
 600        struct hvs_send_buf *send_buf;
 601        ssize_t to_write, max_writable, ret;
 602
 603        BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
 604
 605        send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL);
 606        if (!send_buf)
 607                return -ENOMEM;
 608
 609        max_writable = hvs_channel_writable_bytes(chan);
 610        to_write = min_t(ssize_t, len, max_writable);
 611        to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
 612
 613        ret = memcpy_from_msg(send_buf->data, msg, to_write);
 614        if (ret < 0)
 615                goto out;
 616
 617        ret = hvs_send_data(hvs->chan, send_buf, to_write);
 618        if (ret < 0)
 619                goto out;
 620
 621        ret = to_write;
 622out:
 623        kfree(send_buf);
 624        return ret;
 625}
 626
 627static s64 hvs_stream_has_data(struct vsock_sock *vsk)
 628{
 629        struct hvsock *hvs = vsk->trans;
 630        s64 ret;
 631
 632        if (hvs->recv_data_len > 0)
 633                return 1;
 634
 635        switch (hvs_channel_readable_payload(hvs->chan)) {
 636        case 1:
 637                ret = 1;
 638                break;
 639        case 0:
 640                vsk->peer_shutdown |= SEND_SHUTDOWN;
 641                ret = 0;
 642                break;
 643        default: /* -1 */
 644                ret = 0;
 645                break;
 646        }
 647
 648        return ret;
 649}
 650
 651static s64 hvs_stream_has_space(struct vsock_sock *vsk)
 652{
 653        struct hvsock *hvs = vsk->trans;
 654        struct vmbus_channel *chan = hvs->chan;
 655        s64 ret;
 656
 657        ret = hvs_channel_writable_bytes(chan);
 658        if (ret > 0)  {
 659                hvs_clear_channel_pending_send_size(chan);
 660        } else {
 661                /* See hvs_channel_cb() */
 662                hvs_set_channel_pending_send_size(chan);
 663
 664                /* Re-check the writable bytes to avoid race */
 665                ret = hvs_channel_writable_bytes(chan);
 666                if (ret > 0)
 667                        hvs_clear_channel_pending_send_size(chan);
 668        }
 669
 670        return ret;
 671}
 672
 673static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)
 674{
 675        return HVS_MTU_SIZE + 1;
 676}
 677
 678static bool hvs_stream_is_active(struct vsock_sock *vsk)
 679{
 680        struct hvsock *hvs = vsk->trans;
 681
 682        return hvs->chan != NULL;
 683}
 684
 685static bool hvs_stream_allow(u32 cid, u32 port)
 686{
 687        /* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is
 688         * reserved as ephemeral ports, which are used as the host's ports
 689         * when the host initiates connections.
 690         *
 691         * Perform this check in the guest so an immediate error is produced
 692         * instead of a timeout.
 693         */
 694        if (port > MAX_HOST_LISTEN_PORT)
 695                return false;
 696
 697        if (cid == VMADDR_CID_HOST)
 698                return true;
 699
 700        return false;
 701}
 702
 703static
 704int hvs_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *readable)
 705{
 706        struct hvsock *hvs = vsk->trans;
 707
 708        *readable = hvs_channel_readable(hvs->chan);
 709        return 0;
 710}
 711
 712static
 713int hvs_notify_poll_out(struct vsock_sock *vsk, size_t target, bool *writable)
 714{
 715        *writable = hvs_stream_has_space(vsk) > 0;
 716
 717        return 0;
 718}
 719
 720static
 721int hvs_notify_recv_init(struct vsock_sock *vsk, size_t target,
 722                         struct vsock_transport_recv_notify_data *d)
 723{
 724        return 0;
 725}
 726
 727static
 728int hvs_notify_recv_pre_block(struct vsock_sock *vsk, size_t target,
 729                              struct vsock_transport_recv_notify_data *d)
 730{
 731        return 0;
 732}
 733
 734static
 735int hvs_notify_recv_pre_dequeue(struct vsock_sock *vsk, size_t target,
 736                                struct vsock_transport_recv_notify_data *d)
 737{
 738        return 0;
 739}
 740
 741static
 742int hvs_notify_recv_post_dequeue(struct vsock_sock *vsk, size_t target,
 743                                 ssize_t copied, bool data_read,
 744                                 struct vsock_transport_recv_notify_data *d)
 745{
 746        return 0;
 747}
 748
 749static
 750int hvs_notify_send_init(struct vsock_sock *vsk,
 751                         struct vsock_transport_send_notify_data *d)
 752{
 753        return 0;
 754}
 755
 756static
 757int hvs_notify_send_pre_block(struct vsock_sock *vsk,
 758                              struct vsock_transport_send_notify_data *d)
 759{
 760        return 0;
 761}
 762
 763static
 764int hvs_notify_send_pre_enqueue(struct vsock_sock *vsk,
 765                                struct vsock_transport_send_notify_data *d)
 766{
 767        return 0;
 768}
 769
 770static
 771int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
 772                                 struct vsock_transport_send_notify_data *d)
 773{
 774        return 0;
 775}
 776
 777static void hvs_set_buffer_size(struct vsock_sock *vsk, u64 val)
 778{
 779        /* Ignored. */
 780}
 781
 782static void hvs_set_min_buffer_size(struct vsock_sock *vsk, u64 val)
 783{
 784        /* Ignored. */
 785}
 786
 787static void hvs_set_max_buffer_size(struct vsock_sock *vsk, u64 val)
 788{
 789        /* Ignored. */
 790}
 791
 792static u64 hvs_get_buffer_size(struct vsock_sock *vsk)
 793{
 794        return -ENOPROTOOPT;
 795}
 796
 797static u64 hvs_get_min_buffer_size(struct vsock_sock *vsk)
 798{
 799        return -ENOPROTOOPT;
 800}
 801
 802static u64 hvs_get_max_buffer_size(struct vsock_sock *vsk)
 803{
 804        return -ENOPROTOOPT;
 805}
 806
 807static struct vsock_transport hvs_transport = {
 808        .get_local_cid            = hvs_get_local_cid,
 809
 810        .init                     = hvs_sock_init,
 811        .destruct                 = hvs_destruct,
 812        .release                  = hvs_release,
 813        .connect                  = hvs_connect,
 814        .shutdown                 = hvs_shutdown,
 815
 816        .dgram_bind               = hvs_dgram_bind,
 817        .dgram_dequeue            = hvs_dgram_dequeue,
 818        .dgram_enqueue            = hvs_dgram_enqueue,
 819        .dgram_allow              = hvs_dgram_allow,
 820
 821        .stream_dequeue           = hvs_stream_dequeue,
 822        .stream_enqueue           = hvs_stream_enqueue,
 823        .stream_has_data          = hvs_stream_has_data,
 824        .stream_has_space         = hvs_stream_has_space,
 825        .stream_rcvhiwat          = hvs_stream_rcvhiwat,
 826        .stream_is_active         = hvs_stream_is_active,
 827        .stream_allow             = hvs_stream_allow,
 828
 829        .notify_poll_in           = hvs_notify_poll_in,
 830        .notify_poll_out          = hvs_notify_poll_out,
 831        .notify_recv_init         = hvs_notify_recv_init,
 832        .notify_recv_pre_block    = hvs_notify_recv_pre_block,
 833        .notify_recv_pre_dequeue  = hvs_notify_recv_pre_dequeue,
 834        .notify_recv_post_dequeue = hvs_notify_recv_post_dequeue,
 835        .notify_send_init         = hvs_notify_send_init,
 836        .notify_send_pre_block    = hvs_notify_send_pre_block,
 837        .notify_send_pre_enqueue  = hvs_notify_send_pre_enqueue,
 838        .notify_send_post_enqueue = hvs_notify_send_post_enqueue,
 839
 840        .set_buffer_size          = hvs_set_buffer_size,
 841        .set_min_buffer_size      = hvs_set_min_buffer_size,
 842        .set_max_buffer_size      = hvs_set_max_buffer_size,
 843        .get_buffer_size          = hvs_get_buffer_size,
 844        .get_min_buffer_size      = hvs_get_min_buffer_size,
 845        .get_max_buffer_size      = hvs_get_max_buffer_size,
 846};
 847
 848static int hvs_probe(struct hv_device *hdev,
 849                     const struct hv_vmbus_device_id *dev_id)
 850{
 851        struct vmbus_channel *chan = hdev->channel;
 852
 853        hvs_open_connection(chan);
 854
 855        /* Always return success to suppress the unnecessary error message
 856         * in vmbus_probe(): on error the host will rescind the device in
 857         * 30 seconds and we can do cleanup at that time in
 858         * vmbus_onoffer_rescind().
 859         */
 860        return 0;
 861}
 862
 863static int hvs_remove(struct hv_device *hdev)
 864{
 865        struct vmbus_channel *chan = hdev->channel;
 866
 867        vmbus_close(chan);
 868
 869        return 0;
 870}
 871
 872/* This isn't really used. See vmbus_match() and vmbus_probe() */
 873static const struct hv_vmbus_device_id id_table[] = {
 874        {},
 875};
 876
 877static struct hv_driver hvs_drv = {
 878        .name           = "hv_sock",
 879        .hvsock         = true,
 880        .id_table       = id_table,
 881        .probe          = hvs_probe,
 882        .remove         = hvs_remove,
 883};
 884
 885static int __init hvs_init(void)
 886{
 887        int ret;
 888
 889        if (vmbus_proto_version < VERSION_WIN10)
 890                return -ENODEV;
 891
 892        ret = vmbus_driver_register(&hvs_drv);
 893        if (ret != 0)
 894                return ret;
 895
 896        ret = vsock_core_init(&hvs_transport);
 897        if (ret) {
 898                vmbus_driver_unregister(&hvs_drv);
 899                return ret;
 900        }
 901
 902        return 0;
 903}
 904
 905static void __exit hvs_exit(void)
 906{
 907        vsock_core_exit();
 908        vmbus_driver_unregister(&hvs_drv);
 909}
 910
 911module_init(hvs_init);
 912module_exit(hvs_exit);
 913
 914MODULE_DESCRIPTION("Hyper-V Sockets");
 915MODULE_VERSION("1.0.0");
 916MODULE_LICENSE("GPL");
 917MODULE_ALIAS_NETPROTO(PF_VSOCK);
 918