linux/net/vmw_vsock/vmci_transport_notify_qstate.c
<<
>>
Prefs
   1/*
   2 * VMware vSockets Driver
   3 *
   4 * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License as published by the Free
   8 * Software Foundation version 2 and no later version.
   9 *
  10 * This program is distributed in the hope that it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 */
  15
  16#include <linux/types.h>
  17#include <linux/socket.h>
  18#include <linux/stddef.h>
  19#include <net/sock.h>
  20
  21#include "vmci_transport_notify.h"
  22
  23#define PKT_FIELD(vsk, field_name) \
  24        (vmci_trans(vsk)->notify.pkt_q_state.field_name)
  25
  26static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
  27{
  28        bool retval;
  29        u64 notify_limit;
  30
  31        if (!PKT_FIELD(vsk, peer_waiting_write))
  32                return false;
  33
  34        /* When the sender blocks, we take that as a sign that the sender is
  35         * faster than the receiver. To reduce the transmit rate of the sender,
  36         * we delay the sending of the read notification by decreasing the
  37         * write_notify_window. The notification is delayed until the number of
  38         * bytes used in the queue drops below the write_notify_window.
  39         */
  40
  41        if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
  42                PKT_FIELD(vsk, peer_waiting_write_detected) = true;
  43                if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
  44                        PKT_FIELD(vsk, write_notify_window) =
  45                            PKT_FIELD(vsk, write_notify_min_window);
  46                } else {
  47                        PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
  48                        if (PKT_FIELD(vsk, write_notify_window) <
  49                            PKT_FIELD(vsk, write_notify_min_window))
  50                                PKT_FIELD(vsk, write_notify_window) =
  51                                    PKT_FIELD(vsk, write_notify_min_window);
  52
  53                }
  54        }
  55        notify_limit = vmci_trans(vsk)->consume_size -
  56                PKT_FIELD(vsk, write_notify_window);
  57
  58        /* The notify_limit is used to delay notifications in the case where
  59         * flow control is enabled. Below the test is expressed in terms of
  60         * free space in the queue: if free_space > ConsumeSize -
  61         * write_notify_window then notify An alternate way of expressing this
  62         * is to rewrite the expression to use the data ready in the receive
  63         * queue: if write_notify_window > bufferReady then notify as
  64         * free_space == ConsumeSize - bufferReady.
  65         */
  66
  67        retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
  68                notify_limit;
  69
  70        if (retval) {
  71                /* Once we notify the peer, we reset the detected flag so the
  72                 * next wait will again cause a decrease in the window size.
  73                 */
  74
  75                PKT_FIELD(vsk, peer_waiting_write_detected) = false;
  76        }
  77        return retval;
  78}
  79
  80static void
  81vmci_transport_handle_read(struct sock *sk,
  82                           struct vmci_transport_packet *pkt,
  83                           bool bottom_half,
  84                           struct sockaddr_vm *dst, struct sockaddr_vm *src)
  85{
  86        sk->sk_write_space(sk);
  87}
  88
  89static void
  90vmci_transport_handle_wrote(struct sock *sk,
  91                            struct vmci_transport_packet *pkt,
  92                            bool bottom_half,
  93                            struct sockaddr_vm *dst, struct sockaddr_vm *src)
  94{
  95        sk->sk_data_ready(sk);
  96}
  97
  98static void vsock_block_update_write_window(struct sock *sk)
  99{
 100        struct vsock_sock *vsk = vsock_sk(sk);
 101
 102        if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size)
 103                PKT_FIELD(vsk, write_notify_window) =
 104                    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
 105                        vmci_trans(vsk)->consume_size);
 106}
 107
 108static int vmci_transport_send_read_notification(struct sock *sk)
 109{
 110        struct vsock_sock *vsk;
 111        bool sent_read;
 112        unsigned int retries;
 113        int err;
 114
 115        vsk = vsock_sk(sk);
 116        sent_read = false;
 117        retries = 0;
 118        err = 0;
 119
 120        if (vmci_transport_notify_waiting_write(vsk)) {
 121                /* Notify the peer that we have read, retrying the send on
 122                 * failure up to our maximum value.  XXX For now we just log
 123                 * the failure, but later we should schedule a work item to
 124                 * handle the resend until it succeeds.  That would require
 125                 * keeping track of work items in the vsk and cleaning them up
 126                 * upon socket close.
 127                 */
 128                while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
 129                       !sent_read &&
 130                       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
 131                        err = vmci_transport_send_read(sk);
 132                        if (err >= 0)
 133                                sent_read = true;
 134
 135                        retries++;
 136                }
 137
 138                if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read)
 139                        pr_err("%p unable to send read notification to peer\n",
 140                               sk);
 141                else
 142                        PKT_FIELD(vsk, peer_waiting_write) = false;
 143
 144        }
 145        return err;
 146}
 147
 148static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
 149{
 150        struct vsock_sock *vsk = vsock_sk(sk);
 151
 152        PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
 153        PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
 154        PKT_FIELD(vsk, peer_waiting_write) = false;
 155        PKT_FIELD(vsk, peer_waiting_write_detected) = false;
 156}
 157
 158static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
 159{
 160        PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
 161        PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
 162        PKT_FIELD(vsk, peer_waiting_write) = false;
 163        PKT_FIELD(vsk, peer_waiting_write_detected) = false;
 164}
 165
 166static int
 167vmci_transport_notify_pkt_poll_in(struct sock *sk,
 168                                  size_t target, bool *data_ready_now)
 169{
 170        struct vsock_sock *vsk = vsock_sk(sk);
 171
 172        if (vsock_stream_has_data(vsk)) {
 173                *data_ready_now = true;
 174        } else {
 175                /* We can't read right now because there is nothing in the
 176                 * queue. Ask for notifications when there is something to
 177                 * read.
 178                 */
 179                if (sk->sk_state == SS_CONNECTED)
 180                        vsock_block_update_write_window(sk);
 181                *data_ready_now = false;
 182        }
 183
 184        return 0;
 185}
 186
 187static int
 188vmci_transport_notify_pkt_poll_out(struct sock *sk,
 189                                   size_t target, bool *space_avail_now)
 190{
 191        s64 produce_q_free_space;
 192        struct vsock_sock *vsk = vsock_sk(sk);
 193
 194        produce_q_free_space = vsock_stream_has_space(vsk);
 195        if (produce_q_free_space > 0) {
 196                *space_avail_now = true;
 197                return 0;
 198        } else if (produce_q_free_space == 0) {
 199                /* This is a connected socket but we can't currently send data.
 200                 * Nothing else to do.
 201                 */
 202                *space_avail_now = false;
 203        }
 204
 205        return 0;
 206}
 207
 208static int
 209vmci_transport_notify_pkt_recv_init(
 210                                struct sock *sk,
 211                                size_t target,
 212                                struct vmci_transport_recv_notify_data *data)
 213{
 214        struct vsock_sock *vsk = vsock_sk(sk);
 215
 216        data->consume_head = 0;
 217        data->produce_tail = 0;
 218        data->notify_on_block = false;
 219
 220        if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
 221                PKT_FIELD(vsk, write_notify_min_window) = target + 1;
 222                if (PKT_FIELD(vsk, write_notify_window) <
 223                    PKT_FIELD(vsk, write_notify_min_window)) {
 224                        /* If the current window is smaller than the new
 225                         * minimal window size, we need to reevaluate whether
 226                         * we need to notify the sender. If the number of ready
 227                         * bytes are smaller than the new window, we need to
 228                         * send a notification to the sender before we block.
 229                         */
 230
 231                        PKT_FIELD(vsk, write_notify_window) =
 232                            PKT_FIELD(vsk, write_notify_min_window);
 233                        data->notify_on_block = true;
 234                }
 235        }
 236
 237        return 0;
 238}
 239
 240static int
 241vmci_transport_notify_pkt_recv_pre_block(
 242                                struct sock *sk,
 243                                size_t target,
 244                                struct vmci_transport_recv_notify_data *data)
 245{
 246        int err = 0;
 247
 248        vsock_block_update_write_window(sk);
 249
 250        if (data->notify_on_block) {
 251                err = vmci_transport_send_read_notification(sk);
 252                if (err < 0)
 253                        return err;
 254                data->notify_on_block = false;
 255        }
 256
 257        return err;
 258}
 259
 260static int
 261vmci_transport_notify_pkt_recv_post_dequeue(
 262                                struct sock *sk,
 263                                size_t target,
 264                                ssize_t copied,
 265                                bool data_read,
 266                                struct vmci_transport_recv_notify_data *data)
 267{
 268        struct vsock_sock *vsk;
 269        int err;
 270        bool was_full = false;
 271        u64 free_space;
 272
 273        vsk = vsock_sk(sk);
 274        err = 0;
 275
 276        if (data_read) {
 277                smp_mb();
 278
 279                free_space =
 280                        vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair);
 281                was_full = free_space == copied;
 282
 283                if (was_full)
 284                        PKT_FIELD(vsk, peer_waiting_write) = true;
 285
 286                err = vmci_transport_send_read_notification(sk);
 287                if (err < 0)
 288                        return err;
 289
 290                /* See the comment in
 291                 * vmci_transport_notify_pkt_send_post_enqueue().
 292                 */
 293                sk->sk_data_ready(sk);
 294        }
 295
 296        return err;
 297}
 298
 299static int
 300vmci_transport_notify_pkt_send_init(
 301                                struct sock *sk,
 302                                struct vmci_transport_send_notify_data *data)
 303{
 304        data->consume_head = 0;
 305        data->produce_tail = 0;
 306
 307        return 0;
 308}
 309
 310static int
 311vmci_transport_notify_pkt_send_post_enqueue(
 312                                struct sock *sk,
 313                                ssize_t written,
 314                                struct vmci_transport_send_notify_data *data)
 315{
 316        int err = 0;
 317        struct vsock_sock *vsk;
 318        bool sent_wrote = false;
 319        bool was_empty;
 320        int retries = 0;
 321
 322        vsk = vsock_sk(sk);
 323
 324        smp_mb();
 325
 326        was_empty =
 327                vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written;
 328        if (was_empty) {
 329                while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
 330                       !sent_wrote &&
 331                       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
 332                        err = vmci_transport_send_wrote(sk);
 333                        if (err >= 0)
 334                                sent_wrote = true;
 335
 336                        retries++;
 337                }
 338        }
 339
 340        if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) {
 341                pr_err("%p unable to send wrote notification to peer\n",
 342                       sk);
 343                return err;
 344        }
 345
 346        return err;
 347}
 348
 349static void
 350vmci_transport_notify_pkt_handle_pkt(
 351                                struct sock *sk,
 352                                struct vmci_transport_packet *pkt,
 353                                bool bottom_half,
 354                                struct sockaddr_vm *dst,
 355                                struct sockaddr_vm *src, bool *pkt_processed)
 356{
 357        bool processed = false;
 358
 359        switch (pkt->type) {
 360        case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
 361                vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
 362                processed = true;
 363                break;
 364        case VMCI_TRANSPORT_PACKET_TYPE_READ:
 365                vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
 366                processed = true;
 367                break;
 368        }
 369
 370        if (pkt_processed)
 371                *pkt_processed = processed;
 372}
 373
 374static void vmci_transport_notify_pkt_process_request(struct sock *sk)
 375{
 376        struct vsock_sock *vsk = vsock_sk(sk);
 377
 378        PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
 379        if (vmci_trans(vsk)->consume_size <
 380                PKT_FIELD(vsk, write_notify_min_window))
 381                PKT_FIELD(vsk, write_notify_min_window) =
 382                        vmci_trans(vsk)->consume_size;
 383}
 384
 385static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
 386{
 387        struct vsock_sock *vsk = vsock_sk(sk);
 388
 389        PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
 390        if (vmci_trans(vsk)->consume_size <
 391                PKT_FIELD(vsk, write_notify_min_window))
 392                PKT_FIELD(vsk, write_notify_min_window) =
 393                        vmci_trans(vsk)->consume_size;
 394}
 395
 396static int
 397vmci_transport_notify_pkt_recv_pre_dequeue(
 398                                struct sock *sk,
 399                                size_t target,
 400                                struct vmci_transport_recv_notify_data *data)
 401{
 402        return 0; /* NOP for QState. */
 403}
 404
 405static int
 406vmci_transport_notify_pkt_send_pre_block(
 407                                struct sock *sk,
 408                                struct vmci_transport_send_notify_data *data)
 409{
 410        return 0; /* NOP for QState. */
 411}
 412
 413static int
 414vmci_transport_notify_pkt_send_pre_enqueue(
 415                                struct sock *sk,
 416                                struct vmci_transport_send_notify_data *data)
 417{
 418        return 0; /* NOP for QState. */
 419}
 420
 421/* Socket always on control packet based operations. */
 422const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
 423        .socket_init = vmci_transport_notify_pkt_socket_init,
 424        .socket_destruct = vmci_transport_notify_pkt_socket_destruct,
 425        .poll_in = vmci_transport_notify_pkt_poll_in,
 426        .poll_out = vmci_transport_notify_pkt_poll_out,
 427        .handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
 428        .recv_init = vmci_transport_notify_pkt_recv_init,
 429        .recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
 430        .recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
 431        .recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
 432        .send_init = vmci_transport_notify_pkt_send_init,
 433        .send_pre_block = vmci_transport_notify_pkt_send_pre_block,
 434        .send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
 435        .send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
 436        .process_request = vmci_transport_notify_pkt_process_request,
 437        .process_negotiate = vmci_transport_notify_pkt_process_negotiate,
 438};
 439