linux/net/vmw_vsock/vmci_transport_notify_qstate.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * VMware vSockets Driver
   4 *
   5 * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
   6 */
   7
   8#include <linux/types.h>
   9#include <linux/socket.h>
  10#include <linux/stddef.h>
  11#include <net/sock.h>
  12
  13#include "vmci_transport_notify.h"
  14
  15#define PKT_FIELD(vsk, field_name) \
  16        (vmci_trans(vsk)->notify.pkt_q_state.field_name)
  17
  18static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
  19{
  20        bool retval;
  21        u64 notify_limit;
  22
  23        if (!PKT_FIELD(vsk, peer_waiting_write))
  24                return false;
  25
  26        /* When the sender blocks, we take that as a sign that the sender is
  27         * faster than the receiver. To reduce the transmit rate of the sender,
  28         * we delay the sending of the read notification by decreasing the
  29         * write_notify_window. The notification is delayed until the number of
  30         * bytes used in the queue drops below the write_notify_window.
  31         */
  32
  33        if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
  34                PKT_FIELD(vsk, peer_waiting_write_detected) = true;
  35                if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
  36                        PKT_FIELD(vsk, write_notify_window) =
  37                            PKT_FIELD(vsk, write_notify_min_window);
  38                } else {
  39                        PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
  40                        if (PKT_FIELD(vsk, write_notify_window) <
  41                            PKT_FIELD(vsk, write_notify_min_window))
  42                                PKT_FIELD(vsk, write_notify_window) =
  43                                    PKT_FIELD(vsk, write_notify_min_window);
  44
  45                }
  46        }
  47        notify_limit = vmci_trans(vsk)->consume_size -
  48                PKT_FIELD(vsk, write_notify_window);
  49
  50        /* The notify_limit is used to delay notifications in the case where
  51         * flow control is enabled. Below the test is expressed in terms of
  52         * free space in the queue: if free_space > ConsumeSize -
  53         * write_notify_window then notify An alternate way of expressing this
  54         * is to rewrite the expression to use the data ready in the receive
  55         * queue: if write_notify_window > bufferReady then notify as
  56         * free_space == ConsumeSize - bufferReady.
  57         */
  58
  59        retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
  60                notify_limit;
  61
  62        if (retval) {
  63                /* Once we notify the peer, we reset the detected flag so the
  64                 * next wait will again cause a decrease in the window size.
  65                 */
  66
  67                PKT_FIELD(vsk, peer_waiting_write_detected) = false;
  68        }
  69        return retval;
  70}
  71
  72static void
  73vmci_transport_handle_read(struct sock *sk,
  74                           struct vmci_transport_packet *pkt,
  75                           bool bottom_half,
  76                           struct sockaddr_vm *dst, struct sockaddr_vm *src)
  77{
  78        sk->sk_write_space(sk);
  79}
  80
  81static void
  82vmci_transport_handle_wrote(struct sock *sk,
  83                            struct vmci_transport_packet *pkt,
  84                            bool bottom_half,
  85                            struct sockaddr_vm *dst, struct sockaddr_vm *src)
  86{
  87        sk->sk_data_ready(sk);
  88}
  89
  90static void vsock_block_update_write_window(struct sock *sk)
  91{
  92        struct vsock_sock *vsk = vsock_sk(sk);
  93
  94        if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size)
  95                PKT_FIELD(vsk, write_notify_window) =
  96                    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
  97                        vmci_trans(vsk)->consume_size);
  98}
  99
 100static int vmci_transport_send_read_notification(struct sock *sk)
 101{
 102        struct vsock_sock *vsk;
 103        bool sent_read;
 104        unsigned int retries;
 105        int err;
 106
 107        vsk = vsock_sk(sk);
 108        sent_read = false;
 109        retries = 0;
 110        err = 0;
 111
 112        if (vmci_transport_notify_waiting_write(vsk)) {
 113                /* Notify the peer that we have read, retrying the send on
 114                 * failure up to our maximum value.  XXX For now we just log
 115                 * the failure, but later we should schedule a work item to
 116                 * handle the resend until it succeeds.  That would require
 117                 * keeping track of work items in the vsk and cleaning them up
 118                 * upon socket close.
 119                 */
 120                while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
 121                       !sent_read &&
 122                       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
 123                        err = vmci_transport_send_read(sk);
 124                        if (err >= 0)
 125                                sent_read = true;
 126
 127                        retries++;
 128                }
 129
 130                if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read)
 131                        pr_err("%p unable to send read notification to peer\n",
 132                               sk);
 133                else
 134                        PKT_FIELD(vsk, peer_waiting_write) = false;
 135
 136        }
 137        return err;
 138}
 139
 140static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
 141{
 142        struct vsock_sock *vsk = vsock_sk(sk);
 143
 144        PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
 145        PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
 146        PKT_FIELD(vsk, peer_waiting_write) = false;
 147        PKT_FIELD(vsk, peer_waiting_write_detected) = false;
 148}
 149
 150static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
 151{
 152        PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
 153        PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
 154        PKT_FIELD(vsk, peer_waiting_write) = false;
 155        PKT_FIELD(vsk, peer_waiting_write_detected) = false;
 156}
 157
 158static int
 159vmci_transport_notify_pkt_poll_in(struct sock *sk,
 160                                  size_t target, bool *data_ready_now)
 161{
 162        struct vsock_sock *vsk = vsock_sk(sk);
 163
 164        if (vsock_stream_has_data(vsk)) {
 165                *data_ready_now = true;
 166        } else {
 167                /* We can't read right now because there is nothing in the
 168                 * queue. Ask for notifications when there is something to
 169                 * read.
 170                 */
 171                if (sk->sk_state == TCP_ESTABLISHED)
 172                        vsock_block_update_write_window(sk);
 173                *data_ready_now = false;
 174        }
 175
 176        return 0;
 177}
 178
 179static int
 180vmci_transport_notify_pkt_poll_out(struct sock *sk,
 181                                   size_t target, bool *space_avail_now)
 182{
 183        s64 produce_q_free_space;
 184        struct vsock_sock *vsk = vsock_sk(sk);
 185
 186        produce_q_free_space = vsock_stream_has_space(vsk);
 187        if (produce_q_free_space > 0) {
 188                *space_avail_now = true;
 189                return 0;
 190        } else if (produce_q_free_space == 0) {
 191                /* This is a connected socket but we can't currently send data.
 192                 * Nothing else to do.
 193                 */
 194                *space_avail_now = false;
 195        }
 196
 197        return 0;
 198}
 199
 200static int
 201vmci_transport_notify_pkt_recv_init(
 202                                struct sock *sk,
 203                                size_t target,
 204                                struct vmci_transport_recv_notify_data *data)
 205{
 206        struct vsock_sock *vsk = vsock_sk(sk);
 207
 208        data->consume_head = 0;
 209        data->produce_tail = 0;
 210        data->notify_on_block = false;
 211
 212        if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
 213                PKT_FIELD(vsk, write_notify_min_window) = target + 1;
 214                if (PKT_FIELD(vsk, write_notify_window) <
 215                    PKT_FIELD(vsk, write_notify_min_window)) {
 216                        /* If the current window is smaller than the new
 217                         * minimal window size, we need to reevaluate whether
 218                         * we need to notify the sender. If the number of ready
 219                         * bytes are smaller than the new window, we need to
 220                         * send a notification to the sender before we block.
 221                         */
 222
 223                        PKT_FIELD(vsk, write_notify_window) =
 224                            PKT_FIELD(vsk, write_notify_min_window);
 225                        data->notify_on_block = true;
 226                }
 227        }
 228
 229        return 0;
 230}
 231
 232static int
 233vmci_transport_notify_pkt_recv_pre_block(
 234                                struct sock *sk,
 235                                size_t target,
 236                                struct vmci_transport_recv_notify_data *data)
 237{
 238        int err = 0;
 239
 240        vsock_block_update_write_window(sk);
 241
 242        if (data->notify_on_block) {
 243                err = vmci_transport_send_read_notification(sk);
 244                if (err < 0)
 245                        return err;
 246                data->notify_on_block = false;
 247        }
 248
 249        return err;
 250}
 251
 252static int
 253vmci_transport_notify_pkt_recv_post_dequeue(
 254                                struct sock *sk,
 255                                size_t target,
 256                                ssize_t copied,
 257                                bool data_read,
 258                                struct vmci_transport_recv_notify_data *data)
 259{
 260        struct vsock_sock *vsk;
 261        int err;
 262        bool was_full = false;
 263        u64 free_space;
 264
 265        vsk = vsock_sk(sk);
 266        err = 0;
 267
 268        if (data_read) {
 269                smp_mb();
 270
 271                free_space =
 272                        vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair);
 273                was_full = free_space == copied;
 274
 275                if (was_full)
 276                        PKT_FIELD(vsk, peer_waiting_write) = true;
 277
 278                err = vmci_transport_send_read_notification(sk);
 279                if (err < 0)
 280                        return err;
 281
 282                /* See the comment in
 283                 * vmci_transport_notify_pkt_send_post_enqueue().
 284                 */
 285                sk->sk_data_ready(sk);
 286        }
 287
 288        return err;
 289}
 290
 291static int
 292vmci_transport_notify_pkt_send_init(
 293                                struct sock *sk,
 294                                struct vmci_transport_send_notify_data *data)
 295{
 296        data->consume_head = 0;
 297        data->produce_tail = 0;
 298
 299        return 0;
 300}
 301
 302static int
 303vmci_transport_notify_pkt_send_post_enqueue(
 304                                struct sock *sk,
 305                                ssize_t written,
 306                                struct vmci_transport_send_notify_data *data)
 307{
 308        int err = 0;
 309        struct vsock_sock *vsk;
 310        bool sent_wrote = false;
 311        bool was_empty;
 312        int retries = 0;
 313
 314        vsk = vsock_sk(sk);
 315
 316        smp_mb();
 317
 318        was_empty =
 319                vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written;
 320        if (was_empty) {
 321                while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
 322                       !sent_wrote &&
 323                       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
 324                        err = vmci_transport_send_wrote(sk);
 325                        if (err >= 0)
 326                                sent_wrote = true;
 327
 328                        retries++;
 329                }
 330        }
 331
 332        if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) {
 333                pr_err("%p unable to send wrote notification to peer\n",
 334                       sk);
 335                return err;
 336        }
 337
 338        return err;
 339}
 340
 341static void
 342vmci_transport_notify_pkt_handle_pkt(
 343                                struct sock *sk,
 344                                struct vmci_transport_packet *pkt,
 345                                bool bottom_half,
 346                                struct sockaddr_vm *dst,
 347                                struct sockaddr_vm *src, bool *pkt_processed)
 348{
 349        bool processed = false;
 350
 351        switch (pkt->type) {
 352        case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
 353                vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
 354                processed = true;
 355                break;
 356        case VMCI_TRANSPORT_PACKET_TYPE_READ:
 357                vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
 358                processed = true;
 359                break;
 360        }
 361
 362        if (pkt_processed)
 363                *pkt_processed = processed;
 364}
 365
 366static void vmci_transport_notify_pkt_process_request(struct sock *sk)
 367{
 368        struct vsock_sock *vsk = vsock_sk(sk);
 369
 370        PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
 371        if (vmci_trans(vsk)->consume_size <
 372                PKT_FIELD(vsk, write_notify_min_window))
 373                PKT_FIELD(vsk, write_notify_min_window) =
 374                        vmci_trans(vsk)->consume_size;
 375}
 376
 377static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
 378{
 379        struct vsock_sock *vsk = vsock_sk(sk);
 380
 381        PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
 382        if (vmci_trans(vsk)->consume_size <
 383                PKT_FIELD(vsk, write_notify_min_window))
 384                PKT_FIELD(vsk, write_notify_min_window) =
 385                        vmci_trans(vsk)->consume_size;
 386}
 387
 388static int
 389vmci_transport_notify_pkt_recv_pre_dequeue(
 390                                struct sock *sk,
 391                                size_t target,
 392                                struct vmci_transport_recv_notify_data *data)
 393{
 394        return 0; /* NOP for QState. */
 395}
 396
 397static int
 398vmci_transport_notify_pkt_send_pre_block(
 399                                struct sock *sk,
 400                                struct vmci_transport_send_notify_data *data)
 401{
 402        return 0; /* NOP for QState. */
 403}
 404
 405static int
 406vmci_transport_notify_pkt_send_pre_enqueue(
 407                                struct sock *sk,
 408                                struct vmci_transport_send_notify_data *data)
 409{
 410        return 0; /* NOP for QState. */
 411}
 412
 413/* Socket always on control packet based operations. */
 414const struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
 415        .socket_init = vmci_transport_notify_pkt_socket_init,
 416        .socket_destruct = vmci_transport_notify_pkt_socket_destruct,
 417        .poll_in = vmci_transport_notify_pkt_poll_in,
 418        .poll_out = vmci_transport_notify_pkt_poll_out,
 419        .handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
 420        .recv_init = vmci_transport_notify_pkt_recv_init,
 421        .recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
 422        .recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
 423        .recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
 424        .send_init = vmci_transport_notify_pkt_send_init,
 425        .send_pre_block = vmci_transport_notify_pkt_send_pre_block,
 426        .send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
 427        .send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
 428        .process_request = vmci_transport_notify_pkt_process_request,
 429        .process_negotiate = vmci_transport_notify_pkt_process_negotiate,
 430};
 431