linux/drivers/net/hyperv/netvsc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2009, Microsoft Corporation.
   4 *
   5 * Authors:
   6 *   Haiyang Zhang <haiyangz@microsoft.com>
   7 *   Hank Janssen  <hjanssen@microsoft.com>
   8 */
   9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10
  11#include <linux/kernel.h>
  12#include <linux/sched.h>
  13#include <linux/wait.h>
  14#include <linux/mm.h>
  15#include <linux/delay.h>
  16#include <linux/io.h>
  17#include <linux/slab.h>
  18#include <linux/netdevice.h>
  19#include <linux/if_ether.h>
  20#include <linux/vmalloc.h>
  21#include <linux/rtnetlink.h>
  22#include <linux/prefetch.h>
  23
  24#include <asm/sync_bitops.h>
  25#include <asm/mshyperv.h>
  26
  27#include "hyperv_net.h"
  28#include "netvsc_trace.h"
  29
  30/*
  31 * Switch the data path from the synthetic interface to the VF
  32 * interface.
  33 */
  34int netvsc_switch_datapath(struct net_device *ndev, bool vf)
  35{
  36        struct net_device_context *net_device_ctx = netdev_priv(ndev);
  37        struct hv_device *dev = net_device_ctx->device_ctx;
  38        struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
  39        struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
  40        int ret, retry = 0;
  41
  42        /* Block sending traffic to VF if it's about to be gone */
  43        if (!vf)
  44                net_device_ctx->data_path_is_vf = vf;
  45
  46        memset(init_pkt, 0, sizeof(struct nvsp_message));
  47        init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
  48        if (vf)
  49                init_pkt->msg.v4_msg.active_dp.active_datapath =
  50                        NVSP_DATAPATH_VF;
  51        else
  52                init_pkt->msg.v4_msg.active_dp.active_datapath =
  53                        NVSP_DATAPATH_SYNTHETIC;
  54
  55again:
  56        trace_nvsp_send(ndev, init_pkt);
  57
  58        ret = vmbus_sendpacket(dev->channel, init_pkt,
  59                               sizeof(struct nvsp_message),
  60                               (unsigned long)init_pkt, VM_PKT_DATA_INBAND,
  61                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
  62
  63        /* If failed to switch to/from VF, let data_path_is_vf stay false,
  64         * so we use synthetic path to send data.
  65         */
  66        if (ret) {
  67                if (ret != -EAGAIN) {
  68                        netdev_err(ndev,
  69                                   "Unable to send sw datapath msg, err: %d\n",
  70                                   ret);
  71                        return ret;
  72                }
  73
  74                if (retry++ < RETRY_MAX) {
  75                        usleep_range(RETRY_US_LO, RETRY_US_HI);
  76                        goto again;
  77                } else {
  78                        netdev_err(
  79                                ndev,
  80                                "Retry failed to send sw datapath msg, err: %d\n",
  81                                ret);
  82                        return ret;
  83                }
  84        }
  85
  86        wait_for_completion(&nv_dev->channel_init_wait);
  87        net_device_ctx->data_path_is_vf = vf;
  88
  89        return 0;
  90}
  91
  92/* Worker to setup sub channels on initial setup
  93 * Initial hotplug event occurs in softirq context
  94 * and can't wait for channels.
  95 */
  96static void netvsc_subchan_work(struct work_struct *w)
  97{
  98        struct netvsc_device *nvdev =
  99                container_of(w, struct netvsc_device, subchan_work);
 100        struct rndis_device *rdev;
 101        int i, ret;
 102
 103        /* Avoid deadlock with device removal already under RTNL */
 104        if (!rtnl_trylock()) {
 105                schedule_work(w);
 106                return;
 107        }
 108
 109        rdev = nvdev->extension;
 110        if (rdev) {
 111                ret = rndis_set_subchannel(rdev->ndev, nvdev, NULL);
 112                if (ret == 0) {
 113                        netif_device_attach(rdev->ndev);
 114                } else {
 115                        /* fallback to only primary channel */
 116                        for (i = 1; i < nvdev->num_chn; i++)
 117                                netif_napi_del(&nvdev->chan_table[i].napi);
 118
 119                        nvdev->max_chn = 1;
 120                        nvdev->num_chn = 1;
 121                }
 122        }
 123
 124        rtnl_unlock();
 125}
 126
 127static struct netvsc_device *alloc_net_device(void)
 128{
 129        struct netvsc_device *net_device;
 130
 131        net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
 132        if (!net_device)
 133                return NULL;
 134
 135        init_waitqueue_head(&net_device->wait_drain);
 136        net_device->destroy = false;
 137        net_device->tx_disable = true;
 138
 139        net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 140        net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
 141
 142        init_completion(&net_device->channel_init_wait);
 143        init_waitqueue_head(&net_device->subchan_open);
 144        INIT_WORK(&net_device->subchan_work, netvsc_subchan_work);
 145
 146        return net_device;
 147}
 148
 149static void free_netvsc_device(struct rcu_head *head)
 150{
 151        struct netvsc_device *nvdev
 152                = container_of(head, struct netvsc_device, rcu);
 153        int i;
 154
 155        kfree(nvdev->extension);
 156        vfree(nvdev->recv_buf);
 157        vfree(nvdev->send_buf);
 158        kfree(nvdev->send_section_map);
 159
 160        for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
 161                xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq);
 162                kfree(nvdev->chan_table[i].recv_buf);
 163                vfree(nvdev->chan_table[i].mrc.slots);
 164        }
 165
 166        kfree(nvdev);
 167}
 168
 169static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
 170{
 171        call_rcu(&nvdev->rcu, free_netvsc_device);
 172}
 173
 174static void netvsc_revoke_recv_buf(struct hv_device *device,
 175                                   struct netvsc_device *net_device,
 176                                   struct net_device *ndev)
 177{
 178        struct nvsp_message *revoke_packet;
 179        int ret;
 180
 181        /*
 182         * If we got a section count, it means we received a
 183         * SendReceiveBufferComplete msg (ie sent
 184         * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
 185         * to send a revoke msg here
 186         */
 187        if (net_device->recv_section_cnt) {
 188                /* Send the revoke receive buffer */
 189                revoke_packet = &net_device->revoke_packet;
 190                memset(revoke_packet, 0, sizeof(struct nvsp_message));
 191
 192                revoke_packet->hdr.msg_type =
 193                        NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
 194                revoke_packet->msg.v1_msg.
 195                revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 196
 197                trace_nvsp_send(ndev, revoke_packet);
 198
 199                ret = vmbus_sendpacket(device->channel,
 200                                       revoke_packet,
 201                                       sizeof(struct nvsp_message),
 202                                       VMBUS_RQST_ID_NO_RESPONSE,
 203                                       VM_PKT_DATA_INBAND, 0);
 204                /* If the failure is because the channel is rescinded;
 205                 * ignore the failure since we cannot send on a rescinded
 206                 * channel. This would allow us to properly cleanup
 207                 * even when the channel is rescinded.
 208                 */
 209                if (device->channel->rescind)
 210                        ret = 0;
 211                /*
 212                 * If we failed here, we might as well return and
 213                 * have a leak rather than continue and a bugchk
 214                 */
 215                if (ret != 0) {
 216                        netdev_err(ndev, "unable to send "
 217                                "revoke receive buffer to netvsp\n");
 218                        return;
 219                }
 220                net_device->recv_section_cnt = 0;
 221        }
 222}
 223
 224static void netvsc_revoke_send_buf(struct hv_device *device,
 225                                   struct netvsc_device *net_device,
 226                                   struct net_device *ndev)
 227{
 228        struct nvsp_message *revoke_packet;
 229        int ret;
 230
 231        /* Deal with the send buffer we may have setup.
 232         * If we got a  send section size, it means we received a
 233         * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
 234         * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
 235         * to send a revoke msg here
 236         */
 237        if (net_device->send_section_cnt) {
 238                /* Send the revoke receive buffer */
 239                revoke_packet = &net_device->revoke_packet;
 240                memset(revoke_packet, 0, sizeof(struct nvsp_message));
 241
 242                revoke_packet->hdr.msg_type =
 243                        NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
 244                revoke_packet->msg.v1_msg.revoke_send_buf.id =
 245                        NETVSC_SEND_BUFFER_ID;
 246
 247                trace_nvsp_send(ndev, revoke_packet);
 248
 249                ret = vmbus_sendpacket(device->channel,
 250                                       revoke_packet,
 251                                       sizeof(struct nvsp_message),
 252                                       VMBUS_RQST_ID_NO_RESPONSE,
 253                                       VM_PKT_DATA_INBAND, 0);
 254
 255                /* If the failure is because the channel is rescinded;
 256                 * ignore the failure since we cannot send on a rescinded
 257                 * channel. This would allow us to properly cleanup
 258                 * even when the channel is rescinded.
 259                 */
 260                if (device->channel->rescind)
 261                        ret = 0;
 262
 263                /* If we failed here, we might as well return and
 264                 * have a leak rather than continue and a bugchk
 265                 */
 266                if (ret != 0) {
 267                        netdev_err(ndev, "unable to send "
 268                                   "revoke send buffer to netvsp\n");
 269                        return;
 270                }
 271                net_device->send_section_cnt = 0;
 272        }
 273}
 274
 275static void netvsc_teardown_recv_gpadl(struct hv_device *device,
 276                                       struct netvsc_device *net_device,
 277                                       struct net_device *ndev)
 278{
 279        int ret;
 280
 281        if (net_device->recv_buf_gpadl_handle.gpadl_handle) {
 282                ret = vmbus_teardown_gpadl(device->channel,
 283                                           &net_device->recv_buf_gpadl_handle);
 284
 285                /* If we failed here, we might as well return and have a leak
 286                 * rather than continue and a bugchk
 287                 */
 288                if (ret != 0) {
 289                        netdev_err(ndev,
 290                                   "unable to teardown receive buffer's gpadl\n");
 291                        return;
 292                }
 293        }
 294}
 295
 296static void netvsc_teardown_send_gpadl(struct hv_device *device,
 297                                       struct netvsc_device *net_device,
 298                                       struct net_device *ndev)
 299{
 300        int ret;
 301
 302        if (net_device->send_buf_gpadl_handle.gpadl_handle) {
 303                ret = vmbus_teardown_gpadl(device->channel,
 304                                           &net_device->send_buf_gpadl_handle);
 305
 306                /* If we failed here, we might as well return and have a leak
 307                 * rather than continue and a bugchk
 308                 */
 309                if (ret != 0) {
 310                        netdev_err(ndev,
 311                                   "unable to teardown send buffer's gpadl\n");
 312                        return;
 313                }
 314        }
 315}
 316
 317int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
 318{
 319        struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
 320        int node = cpu_to_node(nvchan->channel->target_cpu);
 321        size_t size;
 322
 323        size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
 324        nvchan->mrc.slots = vzalloc_node(size, node);
 325        if (!nvchan->mrc.slots)
 326                nvchan->mrc.slots = vzalloc(size);
 327
 328        return nvchan->mrc.slots ? 0 : -ENOMEM;
 329}
 330
 331static int netvsc_init_buf(struct hv_device *device,
 332                           struct netvsc_device *net_device,
 333                           const struct netvsc_device_info *device_info)
 334{
 335        struct nvsp_1_message_send_receive_buffer_complete *resp;
 336        struct net_device *ndev = hv_get_drvdata(device);
 337        struct nvsp_message *init_packet;
 338        unsigned int buf_size;
 339        size_t map_words;
 340        int i, ret = 0;
 341
 342        /* Get receive buffer area. */
 343        buf_size = device_info->recv_sections * device_info->recv_section_size;
 344        buf_size = roundup(buf_size, PAGE_SIZE);
 345
 346        /* Legacy hosts only allow smaller receive buffer */
 347        if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
 348                buf_size = min_t(unsigned int, buf_size,
 349                                 NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
 350
 351        net_device->recv_buf = vzalloc(buf_size);
 352        if (!net_device->recv_buf) {
 353                netdev_err(ndev,
 354                           "unable to allocate receive buffer of size %u\n",
 355                           buf_size);
 356                ret = -ENOMEM;
 357                goto cleanup;
 358        }
 359
 360        net_device->recv_buf_size = buf_size;
 361
 362        /*
 363         * Establish the gpadl handle for this buffer on this
 364         * channel.  Note: This call uses the vmbus connection rather
 365         * than the channel to establish the gpadl handle.
 366         */
 367        ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
 368                                    buf_size,
 369                                    &net_device->recv_buf_gpadl_handle);
 370        if (ret != 0) {
 371                netdev_err(ndev,
 372                        "unable to establish receive buffer's gpadl\n");
 373                goto cleanup;
 374        }
 375
 376        /* Notify the NetVsp of the gpadl handle */
 377        init_packet = &net_device->channel_init_pkt;
 378        memset(init_packet, 0, sizeof(struct nvsp_message));
 379        init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
 380        init_packet->msg.v1_msg.send_recv_buf.
 381                gpadl_handle = net_device->recv_buf_gpadl_handle.gpadl_handle;
 382        init_packet->msg.v1_msg.
 383                send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 384
 385        trace_nvsp_send(ndev, init_packet);
 386
 387        /* Send the gpadl notification request */
 388        ret = vmbus_sendpacket(device->channel, init_packet,
 389                               sizeof(struct nvsp_message),
 390                               (unsigned long)init_packet,
 391                               VM_PKT_DATA_INBAND,
 392                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 393        if (ret != 0) {
 394                netdev_err(ndev,
 395                        "unable to send receive buffer's gpadl to netvsp\n");
 396                goto cleanup;
 397        }
 398
 399        wait_for_completion(&net_device->channel_init_wait);
 400
 401        /* Check the response */
 402        resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
 403        if (resp->status != NVSP_STAT_SUCCESS) {
 404                netdev_err(ndev,
 405                           "Unable to complete receive buffer initialization with NetVsp - status %d\n",
 406                           resp->status);
 407                ret = -EINVAL;
 408                goto cleanup;
 409        }
 410
 411        /* Parse the response */
 412        netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
 413                   resp->num_sections, resp->sections[0].sub_alloc_size,
 414                   resp->sections[0].num_sub_allocs);
 415
 416        /* There should only be one section for the entire receive buffer */
 417        if (resp->num_sections != 1 || resp->sections[0].offset != 0) {
 418                ret = -EINVAL;
 419                goto cleanup;
 420        }
 421
 422        net_device->recv_section_size = resp->sections[0].sub_alloc_size;
 423        net_device->recv_section_cnt = resp->sections[0].num_sub_allocs;
 424
 425        /* Ensure buffer will not overflow */
 426        if (net_device->recv_section_size < NETVSC_MTU_MIN || (u64)net_device->recv_section_size *
 427            (u64)net_device->recv_section_cnt > (u64)buf_size) {
 428                netdev_err(ndev, "invalid recv_section_size %u\n",
 429                           net_device->recv_section_size);
 430                ret = -EINVAL;
 431                goto cleanup;
 432        }
 433
 434        for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
 435                struct netvsc_channel *nvchan = &net_device->chan_table[i];
 436
 437                nvchan->recv_buf = kzalloc(net_device->recv_section_size, GFP_KERNEL);
 438                if (nvchan->recv_buf == NULL) {
 439                        ret = -ENOMEM;
 440                        goto cleanup;
 441                }
 442        }
 443
 444        /* Setup receive completion ring.
 445         * Add 1 to the recv_section_cnt because at least one entry in a
 446         * ring buffer has to be empty.
 447         */
 448        net_device->recv_completion_cnt = net_device->recv_section_cnt + 1;
 449        ret = netvsc_alloc_recv_comp_ring(net_device, 0);
 450        if (ret)
 451                goto cleanup;
 452
 453        /* Now setup the send buffer. */
 454        buf_size = device_info->send_sections * device_info->send_section_size;
 455        buf_size = round_up(buf_size, PAGE_SIZE);
 456
 457        net_device->send_buf = vzalloc(buf_size);
 458        if (!net_device->send_buf) {
 459                netdev_err(ndev, "unable to allocate send buffer of size %u\n",
 460                           buf_size);
 461                ret = -ENOMEM;
 462                goto cleanup;
 463        }
 464        net_device->send_buf_size = buf_size;
 465
 466        /* Establish the gpadl handle for this buffer on this
 467         * channel.  Note: This call uses the vmbus connection rather
 468         * than the channel to establish the gpadl handle.
 469         */
 470        ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
 471                                    buf_size,
 472                                    &net_device->send_buf_gpadl_handle);
 473        if (ret != 0) {
 474                netdev_err(ndev,
 475                           "unable to establish send buffer's gpadl\n");
 476                goto cleanup;
 477        }
 478
 479        /* Notify the NetVsp of the gpadl handle */
 480        init_packet = &net_device->channel_init_pkt;
 481        memset(init_packet, 0, sizeof(struct nvsp_message));
 482        init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
 483        init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
 484                net_device->send_buf_gpadl_handle.gpadl_handle;
 485        init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
 486
 487        trace_nvsp_send(ndev, init_packet);
 488
 489        /* Send the gpadl notification request */
 490        ret = vmbus_sendpacket(device->channel, init_packet,
 491                               sizeof(struct nvsp_message),
 492                               (unsigned long)init_packet,
 493                               VM_PKT_DATA_INBAND,
 494                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 495        if (ret != 0) {
 496                netdev_err(ndev,
 497                           "unable to send send buffer's gpadl to netvsp\n");
 498                goto cleanup;
 499        }
 500
 501        wait_for_completion(&net_device->channel_init_wait);
 502
 503        /* Check the response */
 504        if (init_packet->msg.v1_msg.
 505            send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
 506                netdev_err(ndev, "Unable to complete send buffer "
 507                           "initialization with NetVsp - status %d\n",
 508                           init_packet->msg.v1_msg.
 509                           send_send_buf_complete.status);
 510                ret = -EINVAL;
 511                goto cleanup;
 512        }
 513
 514        /* Parse the response */
 515        net_device->send_section_size = init_packet->msg.
 516                                v1_msg.send_send_buf_complete.section_size;
 517        if (net_device->send_section_size < NETVSC_MTU_MIN) {
 518                netdev_err(ndev, "invalid send_section_size %u\n",
 519                           net_device->send_section_size);
 520                ret = -EINVAL;
 521                goto cleanup;
 522        }
 523
 524        /* Section count is simply the size divided by the section size. */
 525        net_device->send_section_cnt = buf_size / net_device->send_section_size;
 526
 527        netdev_dbg(ndev, "Send section size: %d, Section count:%d\n",
 528                   net_device->send_section_size, net_device->send_section_cnt);
 529
 530        /* Setup state for managing the send buffer. */
 531        map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG);
 532
 533        net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL);
 534        if (net_device->send_section_map == NULL) {
 535                ret = -ENOMEM;
 536                goto cleanup;
 537        }
 538
 539        goto exit;
 540
 541cleanup:
 542        netvsc_revoke_recv_buf(device, net_device, ndev);
 543        netvsc_revoke_send_buf(device, net_device, ndev);
 544        netvsc_teardown_recv_gpadl(device, net_device, ndev);
 545        netvsc_teardown_send_gpadl(device, net_device, ndev);
 546
 547exit:
 548        return ret;
 549}
 550
 551/* Negotiate NVSP protocol version */
 552static int negotiate_nvsp_ver(struct hv_device *device,
 553                              struct netvsc_device *net_device,
 554                              struct nvsp_message *init_packet,
 555                              u32 nvsp_ver)
 556{
 557        struct net_device *ndev = hv_get_drvdata(device);
 558        int ret;
 559
 560        memset(init_packet, 0, sizeof(struct nvsp_message));
 561        init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
 562        init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
 563        init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
 564        trace_nvsp_send(ndev, init_packet);
 565
 566        /* Send the init request */
 567        ret = vmbus_sendpacket(device->channel, init_packet,
 568                               sizeof(struct nvsp_message),
 569                               (unsigned long)init_packet,
 570                               VM_PKT_DATA_INBAND,
 571                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 572
 573        if (ret != 0)
 574                return ret;
 575
 576        wait_for_completion(&net_device->channel_init_wait);
 577
 578        if (init_packet->msg.init_msg.init_complete.status !=
 579            NVSP_STAT_SUCCESS)
 580                return -EINVAL;
 581
 582        if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
 583                return 0;
 584
 585        /* NVSPv2 or later: Send NDIS config */
 586        memset(init_packet, 0, sizeof(struct nvsp_message));
 587        init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
 588        init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN;
 589        init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
 590
 591        if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) {
 592                if (hv_is_isolation_supported())
 593                        netdev_info(ndev, "SR-IOV not advertised by guests on the host supporting isolation\n");
 594                else
 595                        init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
 596
 597                /* Teaming bit is needed to receive link speed updates */
 598                init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
 599        }
 600
 601        if (nvsp_ver >= NVSP_PROTOCOL_VERSION_61)
 602                init_packet->msg.v2_msg.send_ndis_config.capability.rsc = 1;
 603
 604        trace_nvsp_send(ndev, init_packet);
 605
 606        ret = vmbus_sendpacket(device->channel, init_packet,
 607                                sizeof(struct nvsp_message),
 608                                VMBUS_RQST_ID_NO_RESPONSE,
 609                                VM_PKT_DATA_INBAND, 0);
 610
 611        return ret;
 612}
 613
 614static int netvsc_connect_vsp(struct hv_device *device,
 615                              struct netvsc_device *net_device,
 616                              const struct netvsc_device_info *device_info)
 617{
 618        struct net_device *ndev = hv_get_drvdata(device);
 619        static const u32 ver_list[] = {
 620                NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
 621                NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5,
 622                NVSP_PROTOCOL_VERSION_6, NVSP_PROTOCOL_VERSION_61
 623        };
 624        struct nvsp_message *init_packet;
 625        int ndis_version, i, ret;
 626
 627        init_packet = &net_device->channel_init_pkt;
 628
 629        /* Negotiate the latest NVSP protocol supported */
 630        for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--)
 631                if (negotiate_nvsp_ver(device, net_device, init_packet,
 632                                       ver_list[i])  == 0) {
 633                        net_device->nvsp_version = ver_list[i];
 634                        break;
 635                }
 636
 637        if (i < 0) {
 638                ret = -EPROTO;
 639                goto cleanup;
 640        }
 641
 642        if (hv_is_isolation_supported() && net_device->nvsp_version < NVSP_PROTOCOL_VERSION_61) {
 643                netdev_err(ndev, "Invalid NVSP version 0x%x (expected >= 0x%x) from the host supporting isolation\n",
 644                           net_device->nvsp_version, NVSP_PROTOCOL_VERSION_61);
 645                ret = -EPROTO;
 646                goto cleanup;
 647        }
 648
 649        pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);
 650
 651        /* Send the ndis version */
 652        memset(init_packet, 0, sizeof(struct nvsp_message));
 653
 654        if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
 655                ndis_version = 0x00060001;
 656        else
 657                ndis_version = 0x0006001e;
 658
 659        init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
 660        init_packet->msg.v1_msg.
 661                send_ndis_ver.ndis_major_ver =
 662                                (ndis_version & 0xFFFF0000) >> 16;
 663        init_packet->msg.v1_msg.
 664                send_ndis_ver.ndis_minor_ver =
 665                                ndis_version & 0xFFFF;
 666
 667        trace_nvsp_send(ndev, init_packet);
 668
 669        /* Send the init request */
 670        ret = vmbus_sendpacket(device->channel, init_packet,
 671                                sizeof(struct nvsp_message),
 672                                VMBUS_RQST_ID_NO_RESPONSE,
 673                                VM_PKT_DATA_INBAND, 0);
 674        if (ret != 0)
 675                goto cleanup;
 676
 677
 678        ret = netvsc_init_buf(device, net_device, device_info);
 679
 680cleanup:
 681        return ret;
 682}
 683
 684/*
 685 * netvsc_device_remove - Callback when the root bus device is removed
 686 */
 687void netvsc_device_remove(struct hv_device *device)
 688{
 689        struct net_device *ndev = hv_get_drvdata(device);
 690        struct net_device_context *net_device_ctx = netdev_priv(ndev);
 691        struct netvsc_device *net_device
 692                = rtnl_dereference(net_device_ctx->nvdev);
 693        int i;
 694
 695        /*
 696         * Revoke receive buffer. If host is pre-Win2016 then tear down
 697         * receive buffer GPADL. Do the same for send buffer.
 698         */
 699        netvsc_revoke_recv_buf(device, net_device, ndev);
 700        if (vmbus_proto_version < VERSION_WIN10)
 701                netvsc_teardown_recv_gpadl(device, net_device, ndev);
 702
 703        netvsc_revoke_send_buf(device, net_device, ndev);
 704        if (vmbus_proto_version < VERSION_WIN10)
 705                netvsc_teardown_send_gpadl(device, net_device, ndev);
 706
 707        RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
 708
 709        /* Disable NAPI and disassociate its context from the device. */
 710        for (i = 0; i < net_device->num_chn; i++) {
 711                /* See also vmbus_reset_channel_cb(). */
 712                napi_disable(&net_device->chan_table[i].napi);
 713                netif_napi_del(&net_device->chan_table[i].napi);
 714        }
 715
 716        /*
 717         * At this point, no one should be accessing net_device
 718         * except in here
 719         */
 720        netdev_dbg(ndev, "net device safe to remove\n");
 721
 722        /* Now, we can close the channel safely */
 723        vmbus_close(device->channel);
 724
 725        /*
 726         * If host is Win2016 or higher then we do the GPADL tear down
 727         * here after VMBus is closed.
 728        */
 729        if (vmbus_proto_version >= VERSION_WIN10) {
 730                netvsc_teardown_recv_gpadl(device, net_device, ndev);
 731                netvsc_teardown_send_gpadl(device, net_device, ndev);
 732        }
 733
 734        /* Release all resources */
 735        free_netvsc_device_rcu(net_device);
 736}
 737
 738#define RING_AVAIL_PERCENT_HIWATER 20
 739#define RING_AVAIL_PERCENT_LOWATER 10
 740
 741static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
 742                                         u32 index)
 743{
 744        sync_change_bit(index, net_device->send_section_map);
 745}
 746
 747static void netvsc_send_tx_complete(struct net_device *ndev,
 748                                    struct netvsc_device *net_device,
 749                                    struct vmbus_channel *channel,
 750                                    const struct vmpacket_descriptor *desc,
 751                                    int budget)
 752{
 753        struct net_device_context *ndev_ctx = netdev_priv(ndev);
 754        struct sk_buff *skb;
 755        u16 q_idx = 0;
 756        int queue_sends;
 757        u64 cmd_rqst;
 758
 759        cmd_rqst = channel->request_addr_callback(channel, (u64)desc->trans_id);
 760        if (cmd_rqst == VMBUS_RQST_ERROR) {
 761                netdev_err(ndev, "Incorrect transaction id\n");
 762                return;
 763        }
 764
 765        skb = (struct sk_buff *)(unsigned long)cmd_rqst;
 766
 767        /* Notify the layer above us */
 768        if (likely(skb)) {
 769                const struct hv_netvsc_packet *packet
 770                        = (struct hv_netvsc_packet *)skb->cb;
 771                u32 send_index = packet->send_buf_index;
 772                struct netvsc_stats *tx_stats;
 773
 774                if (send_index != NETVSC_INVALID_INDEX)
 775                        netvsc_free_send_slot(net_device, send_index);
 776                q_idx = packet->q_idx;
 777
 778                tx_stats = &net_device->chan_table[q_idx].tx_stats;
 779
 780                u64_stats_update_begin(&tx_stats->syncp);
 781                tx_stats->packets += packet->total_packets;
 782                tx_stats->bytes += packet->total_bytes;
 783                u64_stats_update_end(&tx_stats->syncp);
 784
 785                napi_consume_skb(skb, budget);
 786        }
 787
 788        queue_sends =
 789                atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
 790
 791        if (unlikely(net_device->destroy)) {
 792                if (queue_sends == 0)
 793                        wake_up(&net_device->wait_drain);
 794        } else {
 795                struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
 796
 797                if (netif_tx_queue_stopped(txq) && !net_device->tx_disable &&
 798                    (hv_get_avail_to_write_percent(&channel->outbound) >
 799                     RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) {
 800                        netif_tx_wake_queue(txq);
 801                        ndev_ctx->eth_stats.wake_queue++;
 802                }
 803        }
 804}
 805
 806static void netvsc_send_completion(struct net_device *ndev,
 807                                   struct netvsc_device *net_device,
 808                                   struct vmbus_channel *incoming_channel,
 809                                   const struct vmpacket_descriptor *desc,
 810                                   int budget)
 811{
 812        const struct nvsp_message *nvsp_packet;
 813        u32 msglen = hv_pkt_datalen(desc);
 814        struct nvsp_message *pkt_rqst;
 815        u64 cmd_rqst;
 816
 817        /* First check if this is a VMBUS completion without data payload */
 818        if (!msglen) {
 819                cmd_rqst = incoming_channel->request_addr_callback(incoming_channel,
 820                                                                   (u64)desc->trans_id);
 821                if (cmd_rqst == VMBUS_RQST_ERROR) {
 822                        netdev_err(ndev, "Invalid transaction id\n");
 823                        return;
 824                }
 825
 826                pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst;
 827                switch (pkt_rqst->hdr.msg_type) {
 828                case NVSP_MSG4_TYPE_SWITCH_DATA_PATH:
 829                        complete(&net_device->channel_init_wait);
 830                        break;
 831
 832                default:
 833                        netdev_err(ndev, "Unexpected VMBUS completion!!\n");
 834                }
 835                return;
 836        }
 837
 838        /* Ensure packet is big enough to read header fields */
 839        if (msglen < sizeof(struct nvsp_message_header)) {
 840                netdev_err(ndev, "nvsp_message length too small: %u\n", msglen);
 841                return;
 842        }
 843
 844        nvsp_packet = hv_pkt_data(desc);
 845        switch (nvsp_packet->hdr.msg_type) {
 846        case NVSP_MSG_TYPE_INIT_COMPLETE:
 847                if (msglen < sizeof(struct nvsp_message_header) +
 848                                sizeof(struct nvsp_message_init_complete)) {
 849                        netdev_err(ndev, "nvsp_msg length too small: %u\n",
 850                                   msglen);
 851                        return;
 852                }
 853                fallthrough;
 854
 855        case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
 856                if (msglen < sizeof(struct nvsp_message_header) +
 857                                sizeof(struct nvsp_1_message_send_receive_buffer_complete)) {
 858                        netdev_err(ndev, "nvsp_msg1 length too small: %u\n",
 859                                   msglen);
 860                        return;
 861                }
 862                fallthrough;
 863
 864        case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE:
 865                if (msglen < sizeof(struct nvsp_message_header) +
 866                                sizeof(struct nvsp_1_message_send_send_buffer_complete)) {
 867                        netdev_err(ndev, "nvsp_msg1 length too small: %u\n",
 868                                   msglen);
 869                        return;
 870                }
 871                fallthrough;
 872
 873        case NVSP_MSG5_TYPE_SUBCHANNEL:
 874                if (msglen < sizeof(struct nvsp_message_header) +
 875                                sizeof(struct nvsp_5_subchannel_complete)) {
 876                        netdev_err(ndev, "nvsp_msg5 length too small: %u\n",
 877                                   msglen);
 878                        return;
 879                }
 880                /* Copy the response back */
 881                memcpy(&net_device->channel_init_pkt, nvsp_packet,
 882                       sizeof(struct nvsp_message));
 883                complete(&net_device->channel_init_wait);
 884                break;
 885
 886        case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
 887                netvsc_send_tx_complete(ndev, net_device, incoming_channel,
 888                                        desc, budget);
 889                break;
 890
 891        default:
 892                netdev_err(ndev,
 893                           "Unknown send completion type %d received!!\n",
 894                           nvsp_packet->hdr.msg_type);
 895        }
 896}
 897
 898static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
 899{
 900        unsigned long *map_addr = net_device->send_section_map;
 901        unsigned int i;
 902
 903        for_each_clear_bit(i, map_addr, net_device->send_section_cnt) {
 904                if (sync_test_and_set_bit(i, map_addr) == 0)
 905                        return i;
 906        }
 907
 908        return NETVSC_INVALID_INDEX;
 909}
 910
 911static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
 912                                    unsigned int section_index,
 913                                    u32 pend_size,
 914                                    struct hv_netvsc_packet *packet,
 915                                    struct rndis_message *rndis_msg,
 916                                    struct hv_page_buffer *pb,
 917                                    bool xmit_more)
 918{
 919        char *start = net_device->send_buf;
 920        char *dest = start + (section_index * net_device->send_section_size)
 921                     + pend_size;
 922        int i;
 923        u32 padding = 0;
 924        u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
 925                packet->page_buf_cnt;
 926        u32 remain;
 927
 928        /* Add padding */
 929        remain = packet->total_data_buflen & (net_device->pkt_align - 1);
 930        if (xmit_more && remain) {
 931                padding = net_device->pkt_align - remain;
 932                rndis_msg->msg_len += padding;
 933                packet->total_data_buflen += padding;
 934        }
 935
 936        for (i = 0; i < page_count; i++) {
 937                char *src = phys_to_virt(pb[i].pfn << HV_HYP_PAGE_SHIFT);
 938                u32 offset = pb[i].offset;
 939                u32 len = pb[i].len;
 940
 941                memcpy(dest, (src + offset), len);
 942                dest += len;
 943        }
 944
 945        if (padding)
 946                memset(dest, 0, padding);
 947}
 948
 949static inline int netvsc_send_pkt(
 950        struct hv_device *device,
 951        struct hv_netvsc_packet *packet,
 952        struct netvsc_device *net_device,
 953        struct hv_page_buffer *pb,
 954        struct sk_buff *skb)
 955{
 956        struct nvsp_message nvmsg;
 957        struct nvsp_1_message_send_rndis_packet *rpkt =
 958                &nvmsg.msg.v1_msg.send_rndis_pkt;
 959        struct netvsc_channel * const nvchan =
 960                &net_device->chan_table[packet->q_idx];
 961        struct vmbus_channel *out_channel = nvchan->channel;
 962        struct net_device *ndev = hv_get_drvdata(device);
 963        struct net_device_context *ndev_ctx = netdev_priv(ndev);
 964        struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
 965        u64 req_id;
 966        int ret;
 967        u32 ring_avail = hv_get_avail_to_write_percent(&out_channel->outbound);
 968
 969        memset(&nvmsg, 0, sizeof(struct nvsp_message));
 970        nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
 971        if (skb)
 972                rpkt->channel_type = 0;         /* 0 is RMC_DATA */
 973        else
 974                rpkt->channel_type = 1;         /* 1 is RMC_CONTROL */
 975
 976        rpkt->send_buf_section_index = packet->send_buf_index;
 977        if (packet->send_buf_index == NETVSC_INVALID_INDEX)
 978                rpkt->send_buf_section_size = 0;
 979        else
 980                rpkt->send_buf_section_size = packet->total_data_buflen;
 981
 982        req_id = (ulong)skb;
 983
 984        if (out_channel->rescind)
 985                return -ENODEV;
 986
 987        trace_nvsp_send_pkt(ndev, out_channel, rpkt);
 988
 989        if (packet->page_buf_cnt) {
 990                if (packet->cp_partial)
 991                        pb += packet->rmsg_pgcnt;
 992
 993                ret = vmbus_sendpacket_pagebuffer(out_channel,
 994                                                  pb, packet->page_buf_cnt,
 995                                                  &nvmsg, sizeof(nvmsg),
 996                                                  req_id);
 997        } else {
 998                ret = vmbus_sendpacket(out_channel,
 999                                       &nvmsg, sizeof(nvmsg),
1000                                       req_id, VM_PKT_DATA_INBAND,
1001                                       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
1002        }
1003
1004        if (ret == 0) {
1005                atomic_inc_return(&nvchan->queue_sends);
1006
1007                if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
1008                        netif_tx_stop_queue(txq);
1009                        ndev_ctx->eth_stats.stop_queue++;
1010                }
1011        } else if (ret == -EAGAIN) {
1012                netif_tx_stop_queue(txq);
1013                ndev_ctx->eth_stats.stop_queue++;
1014        } else {
1015                netdev_err(ndev,
1016                           "Unable to send packet pages %u len %u, ret %d\n",
1017                           packet->page_buf_cnt, packet->total_data_buflen,
1018                           ret);
1019        }
1020
1021        if (netif_tx_queue_stopped(txq) &&
1022            atomic_read(&nvchan->queue_sends) < 1 &&
1023            !net_device->tx_disable) {
1024                netif_tx_wake_queue(txq);
1025                ndev_ctx->eth_stats.wake_queue++;
1026                if (ret == -EAGAIN)
1027                        ret = -ENOSPC;
1028        }
1029
1030        return ret;
1031}
1032
1033/* Move packet out of multi send data (msd), and clear msd */
1034static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
1035                                struct sk_buff **msd_skb,
1036                                struct multi_send_data *msdp)
1037{
1038        *msd_skb = msdp->skb;
1039        *msd_send = msdp->pkt;
1040        msdp->skb = NULL;
1041        msdp->pkt = NULL;
1042        msdp->count = 0;
1043}
1044
1045/* RCU already held by caller */
1046/* Batching/bouncing logic is designed to attempt to optimize
1047 * performance.
1048 *
1049 * For small, non-LSO packets we copy the packet to a send buffer
1050 * which is pre-registered with the Hyper-V side. This enables the
1051 * hypervisor to avoid remapping the aperture to access the packet
1052 * descriptor and data.
1053 *
1054 * If we already started using a buffer and the netdev is transmitting
1055 * a burst of packets, keep on copying into the buffer until it is
1056 * full or we are done collecting a burst. If there is an existing
1057 * buffer with space for the RNDIS descriptor but not the packet, copy
1058 * the RNDIS descriptor to the buffer, keeping the packet in place.
1059 *
1060 * If we do batching and send more than one packet using a single
1061 * NetVSC message, free the SKBs of the packets copied, except for the
1062 * last packet. This is done to streamline the handling of the case
1063 * where the last packet only had the RNDIS descriptor copied to the
1064 * send buffer, with the data pointers included in the NetVSC message.
1065 */
1066int netvsc_send(struct net_device *ndev,
1067                struct hv_netvsc_packet *packet,
1068                struct rndis_message *rndis_msg,
1069                struct hv_page_buffer *pb,
1070                struct sk_buff *skb,
1071                bool xdp_tx)
1072{
1073        struct net_device_context *ndev_ctx = netdev_priv(ndev);
1074        struct netvsc_device *net_device
1075                = rcu_dereference_bh(ndev_ctx->nvdev);
1076        struct hv_device *device = ndev_ctx->device_ctx;
1077        int ret = 0;
1078        struct netvsc_channel *nvchan;
1079        u32 pktlen = packet->total_data_buflen, msd_len = 0;
1080        unsigned int section_index = NETVSC_INVALID_INDEX;
1081        struct multi_send_data *msdp;
1082        struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
1083        struct sk_buff *msd_skb = NULL;
1084        bool try_batch, xmit_more;
1085
1086        /* If device is rescinded, return error and packet will get dropped. */
1087        if (unlikely(!net_device || net_device->destroy))
1088                return -ENODEV;
1089
1090        nvchan = &net_device->chan_table[packet->q_idx];
1091        packet->send_buf_index = NETVSC_INVALID_INDEX;
1092        packet->cp_partial = false;
1093
1094        /* Send a control message or XDP packet directly without accessing
1095         * msd (Multi-Send Data) field which may be changed during data packet
1096         * processing.
1097         */
1098        if (!skb || xdp_tx)
1099                return netvsc_send_pkt(device, packet, net_device, pb, skb);
1100
1101        /* batch packets in send buffer if possible */
1102        msdp = &nvchan->msd;
1103        if (msdp->pkt)
1104                msd_len = msdp->pkt->total_data_buflen;
1105
1106        try_batch =  msd_len > 0 && msdp->count < net_device->max_pkt;
1107        if (try_batch && msd_len + pktlen + net_device->pkt_align <
1108            net_device->send_section_size) {
1109                section_index = msdp->pkt->send_buf_index;
1110
1111        } else if (try_batch && msd_len + packet->rmsg_size <
1112                   net_device->send_section_size) {
1113                section_index = msdp->pkt->send_buf_index;
1114                packet->cp_partial = true;
1115
1116        } else if (pktlen + net_device->pkt_align <
1117                   net_device->send_section_size) {
1118                section_index = netvsc_get_next_send_section(net_device);
1119                if (unlikely(section_index == NETVSC_INVALID_INDEX)) {
1120                        ++ndev_ctx->eth_stats.tx_send_full;
1121                } else {
1122                        move_pkt_msd(&msd_send, &msd_skb, msdp);
1123                        msd_len = 0;
1124                }
1125        }
1126
1127        /* Keep aggregating only if stack says more data is coming
1128         * and not doing mixed modes send and not flow blocked
1129         */
1130        xmit_more = netdev_xmit_more() &&
1131                !packet->cp_partial &&
1132                !netif_xmit_stopped(netdev_get_tx_queue(ndev, packet->q_idx));
1133
1134        if (section_index != NETVSC_INVALID_INDEX) {
1135                netvsc_copy_to_send_buf(net_device,
1136                                        section_index, msd_len,
1137                                        packet, rndis_msg, pb, xmit_more);
1138
1139                packet->send_buf_index = section_index;
1140
1141                if (packet->cp_partial) {
1142                        packet->page_buf_cnt -= packet->rmsg_pgcnt;
1143                        packet->total_data_buflen = msd_len + packet->rmsg_size;
1144                } else {
1145                        packet->page_buf_cnt = 0;
1146                        packet->total_data_buflen += msd_len;
1147                }
1148
1149                if (msdp->pkt) {
1150                        packet->total_packets += msdp->pkt->total_packets;
1151                        packet->total_bytes += msdp->pkt->total_bytes;
1152                }
1153
1154                if (msdp->skb)
1155                        dev_consume_skb_any(msdp->skb);
1156
1157                if (xmit_more) {
1158                        msdp->skb = skb;
1159                        msdp->pkt = packet;
1160                        msdp->count++;
1161                } else {
1162                        cur_send = packet;
1163                        msdp->skb = NULL;
1164                        msdp->pkt = NULL;
1165                        msdp->count = 0;
1166                }
1167        } else {
1168                move_pkt_msd(&msd_send, &msd_skb, msdp);
1169                cur_send = packet;
1170        }
1171
1172        if (msd_send) {
1173                int m_ret = netvsc_send_pkt(device, msd_send, net_device,
1174                                            NULL, msd_skb);
1175
1176                if (m_ret != 0) {
1177                        netvsc_free_send_slot(net_device,
1178                                              msd_send->send_buf_index);
1179                        dev_kfree_skb_any(msd_skb);
1180                }
1181        }
1182
1183        if (cur_send)
1184                ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb);
1185
1186        if (ret != 0 && section_index != NETVSC_INVALID_INDEX)
1187                netvsc_free_send_slot(net_device, section_index);
1188
1189        return ret;
1190}
1191
1192/* Send pending recv completions */
1193static int send_recv_completions(struct net_device *ndev,
1194                                 struct netvsc_device *nvdev,
1195                                 struct netvsc_channel *nvchan)
1196{
1197        struct multi_recv_comp *mrc = &nvchan->mrc;
1198        struct recv_comp_msg {
1199                struct nvsp_message_header hdr;
1200                u32 status;
1201        }  __packed;
1202        struct recv_comp_msg msg = {
1203                .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
1204        };
1205        int ret;
1206
1207        while (mrc->first != mrc->next) {
1208                const struct recv_comp_data *rcd
1209                        = mrc->slots + mrc->first;
1210
1211                msg.status = rcd->status;
1212                ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
1213                                       rcd->tid, VM_PKT_COMP, 0);
1214                if (unlikely(ret)) {
1215                        struct net_device_context *ndev_ctx = netdev_priv(ndev);
1216
1217                        ++ndev_ctx->eth_stats.rx_comp_busy;
1218                        return ret;
1219                }
1220
1221                if (++mrc->first == nvdev->recv_completion_cnt)
1222                        mrc->first = 0;
1223        }
1224
1225        /* receive completion ring has been emptied */
1226        if (unlikely(nvdev->destroy))
1227                wake_up(&nvdev->wait_drain);
1228
1229        return 0;
1230}
1231
1232/* Count how many receive completions are outstanding */
1233static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
1234                                 const struct multi_recv_comp *mrc,
1235                                 u32 *filled, u32 *avail)
1236{
1237        u32 count = nvdev->recv_completion_cnt;
1238
1239        if (mrc->next >= mrc->first)
1240                *filled = mrc->next - mrc->first;
1241        else
1242                *filled = (count - mrc->first) + mrc->next;
1243
1244        *avail = count - *filled - 1;
1245}
1246
1247/* Add receive complete to ring to send to host. */
1248static void enq_receive_complete(struct net_device *ndev,
1249                                 struct netvsc_device *nvdev, u16 q_idx,
1250                                 u64 tid, u32 status)
1251{
1252        struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
1253        struct multi_recv_comp *mrc = &nvchan->mrc;
1254        struct recv_comp_data *rcd;
1255        u32 filled, avail;
1256
1257        recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1258
1259        if (unlikely(filled > NAPI_POLL_WEIGHT)) {
1260                send_recv_completions(ndev, nvdev, nvchan);
1261                recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1262        }
1263
1264        if (unlikely(!avail)) {
1265                netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
1266                           q_idx, tid);
1267                return;
1268        }
1269
1270        rcd = mrc->slots + mrc->next;
1271        rcd->tid = tid;
1272        rcd->status = status;
1273
1274        if (++mrc->next == nvdev->recv_completion_cnt)
1275                mrc->next = 0;
1276}
1277
1278static int netvsc_receive(struct net_device *ndev,
1279                          struct netvsc_device *net_device,
1280                          struct netvsc_channel *nvchan,
1281                          const struct vmpacket_descriptor *desc)
1282{
1283        struct net_device_context *net_device_ctx = netdev_priv(ndev);
1284        struct vmbus_channel *channel = nvchan->channel;
1285        const struct vmtransfer_page_packet_header *vmxferpage_packet
1286                = container_of(desc, const struct vmtransfer_page_packet_header, d);
1287        const struct nvsp_message *nvsp = hv_pkt_data(desc);
1288        u32 msglen = hv_pkt_datalen(desc);
1289        u16 q_idx = channel->offermsg.offer.sub_channel_index;
1290        char *recv_buf = net_device->recv_buf;
1291        u32 status = NVSP_STAT_SUCCESS;
1292        int i;
1293        int count = 0;
1294
1295        /* Ensure packet is big enough to read header fields */
1296        if (msglen < sizeof(struct nvsp_message_header)) {
1297                netif_err(net_device_ctx, rx_err, ndev,
1298                          "invalid nvsp header, length too small: %u\n",
1299                          msglen);
1300                return 0;
1301        }
1302
1303        /* Make sure this is a valid nvsp packet */
1304        if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
1305                netif_err(net_device_ctx, rx_err, ndev,
1306                          "Unknown nvsp packet type received %u\n",
1307                          nvsp->hdr.msg_type);
1308                return 0;
1309        }
1310
1311        /* Validate xfer page pkt header */
1312        if ((desc->offset8 << 3) < sizeof(struct vmtransfer_page_packet_header)) {
1313                netif_err(net_device_ctx, rx_err, ndev,
1314                          "Invalid xfer page pkt, offset too small: %u\n",
1315                          desc->offset8 << 3);
1316                return 0;
1317        }
1318
1319        if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
1320                netif_err(net_device_ctx, rx_err, ndev,
1321                          "Invalid xfer page set id - expecting %x got %x\n",
1322                          NETVSC_RECEIVE_BUFFER_ID,
1323                          vmxferpage_packet->xfer_pageset_id);
1324                return 0;
1325        }
1326
1327        count = vmxferpage_packet->range_cnt;
1328
1329        /* Check count for a valid value */
1330        if (NETVSC_XFER_HEADER_SIZE(count) > desc->offset8 << 3) {
1331                netif_err(net_device_ctx, rx_err, ndev,
1332                          "Range count is not valid: %d\n",
1333                          count);
1334                return 0;
1335        }
1336
1337        /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1338        for (i = 0; i < count; i++) {
1339                u32 offset = vmxferpage_packet->ranges[i].byte_offset;
1340                u32 buflen = vmxferpage_packet->ranges[i].byte_count;
1341                void *data;
1342                int ret;
1343
1344                if (unlikely(offset > net_device->recv_buf_size ||
1345                             buflen > net_device->recv_buf_size - offset)) {
1346                        nvchan->rsc.cnt = 0;
1347                        status = NVSP_STAT_FAIL;
1348                        netif_err(net_device_ctx, rx_err, ndev,
1349                                  "Packet offset:%u + len:%u too big\n",
1350                                  offset, buflen);
1351
1352                        continue;
1353                }
1354
1355                /* We're going to copy (sections of) the packet into nvchan->recv_buf;
1356                 * make sure that nvchan->recv_buf is large enough to hold the packet.
1357                 */
1358                if (unlikely(buflen > net_device->recv_section_size)) {
1359                        nvchan->rsc.cnt = 0;
1360                        status = NVSP_STAT_FAIL;
1361                        netif_err(net_device_ctx, rx_err, ndev,
1362                                  "Packet too big: buflen=%u recv_section_size=%u\n",
1363                                  buflen, net_device->recv_section_size);
1364
1365                        continue;
1366                }
1367
1368                data = recv_buf + offset;
1369
1370                nvchan->rsc.is_last = (i == count - 1);
1371
1372                trace_rndis_recv(ndev, q_idx, data);
1373
1374                /* Pass it to the upper layer */
1375                ret = rndis_filter_receive(ndev, net_device,
1376                                           nvchan, data, buflen);
1377
1378                if (unlikely(ret != NVSP_STAT_SUCCESS)) {
1379                        /* Drop incomplete packet */
1380                        nvchan->rsc.cnt = 0;
1381                        status = NVSP_STAT_FAIL;
1382                }
1383        }
1384
1385        enq_receive_complete(ndev, net_device, q_idx,
1386                             vmxferpage_packet->d.trans_id, status);
1387
1388        return count;
1389}
1390
1391static void netvsc_send_table(struct net_device *ndev,
1392                              struct netvsc_device *nvscdev,
1393                              const struct nvsp_message *nvmsg,
1394                              u32 msglen)
1395{
1396        struct net_device_context *net_device_ctx = netdev_priv(ndev);
1397        u32 count, offset, *tab;
1398        int i;
1399
1400        /* Ensure packet is big enough to read send_table fields */
1401        if (msglen < sizeof(struct nvsp_message_header) +
1402                     sizeof(struct nvsp_5_send_indirect_table)) {
1403                netdev_err(ndev, "nvsp_v5_msg length too small: %u\n", msglen);
1404                return;
1405        }
1406
1407        count = nvmsg->msg.v5_msg.send_table.count;
1408        offset = nvmsg->msg.v5_msg.send_table.offset;
1409
1410        if (count != VRSS_SEND_TAB_SIZE) {
1411                netdev_err(ndev, "Received wrong send-table size:%u\n", count);
1412                return;
1413        }
1414
1415        /* If negotiated version <= NVSP_PROTOCOL_VERSION_6, the offset may be
1416         * wrong due to a host bug. So fix the offset here.
1417         */
1418        if (nvscdev->nvsp_version <= NVSP_PROTOCOL_VERSION_6 &&
1419            msglen >= sizeof(struct nvsp_message_header) +
1420            sizeof(union nvsp_6_message_uber) + count * sizeof(u32))
1421                offset = sizeof(struct nvsp_message_header) +
1422                         sizeof(union nvsp_6_message_uber);
1423
1424        /* Boundary check for all versions */
1425        if (msglen < count * sizeof(u32) || offset > msglen - count * sizeof(u32)) {
1426                netdev_err(ndev, "Received send-table offset too big:%u\n",
1427                           offset);
1428                return;
1429        }
1430
1431        tab = (void *)nvmsg + offset;
1432
1433        for (i = 0; i < count; i++)
1434                net_device_ctx->tx_table[i] = tab[i];
1435}
1436
1437static void netvsc_send_vf(struct net_device *ndev,
1438                           const struct nvsp_message *nvmsg,
1439                           u32 msglen)
1440{
1441        struct net_device_context *net_device_ctx = netdev_priv(ndev);
1442
1443        /* Ensure packet is big enough to read its fields */
1444        if (msglen < sizeof(struct nvsp_message_header) +
1445                     sizeof(struct nvsp_4_send_vf_association)) {
1446                netdev_err(ndev, "nvsp_v4_msg length too small: %u\n", msglen);
1447                return;
1448        }
1449
1450        net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
1451        net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
1452        netdev_info(ndev, "VF slot %u %s\n",
1453                    net_device_ctx->vf_serial,
1454                    net_device_ctx->vf_alloc ? "added" : "removed");
1455}
1456
1457static void netvsc_receive_inband(struct net_device *ndev,
1458                                  struct netvsc_device *nvscdev,
1459                                  const struct vmpacket_descriptor *desc)
1460{
1461        const struct nvsp_message *nvmsg = hv_pkt_data(desc);
1462        u32 msglen = hv_pkt_datalen(desc);
1463
1464        /* Ensure packet is big enough to read header fields */
1465        if (msglen < sizeof(struct nvsp_message_header)) {
1466                netdev_err(ndev, "inband nvsp_message length too small: %u\n", msglen);
1467                return;
1468        }
1469
1470        switch (nvmsg->hdr.msg_type) {
1471        case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
1472                netvsc_send_table(ndev, nvscdev, nvmsg, msglen);
1473                break;
1474
1475        case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
1476                if (hv_is_isolation_supported())
1477                        netdev_err(ndev, "Ignore VF_ASSOCIATION msg from the host supporting isolation\n");
1478                else
1479                        netvsc_send_vf(ndev, nvmsg, msglen);
1480                break;
1481        }
1482}
1483
1484static int netvsc_process_raw_pkt(struct hv_device *device,
1485                                  struct netvsc_channel *nvchan,
1486                                  struct netvsc_device *net_device,
1487                                  struct net_device *ndev,
1488                                  const struct vmpacket_descriptor *desc,
1489                                  int budget)
1490{
1491        struct vmbus_channel *channel = nvchan->channel;
1492        const struct nvsp_message *nvmsg = hv_pkt_data(desc);
1493
1494        trace_nvsp_recv(ndev, channel, nvmsg);
1495
1496        switch (desc->type) {
1497        case VM_PKT_COMP:
1498                netvsc_send_completion(ndev, net_device, channel, desc, budget);
1499                break;
1500
1501        case VM_PKT_DATA_USING_XFER_PAGES:
1502                return netvsc_receive(ndev, net_device, nvchan, desc);
1503                break;
1504
1505        case VM_PKT_DATA_INBAND:
1506                netvsc_receive_inband(ndev, net_device, desc);
1507                break;
1508
1509        default:
1510                netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
1511                           desc->type, desc->trans_id);
1512                break;
1513        }
1514
1515        return 0;
1516}
1517
1518static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
1519{
1520        struct vmbus_channel *primary = channel->primary_channel;
1521
1522        return primary ? primary->device_obj : channel->device_obj;
1523}
1524
1525/* Network processing softirq
1526 * Process data in incoming ring buffer from host
1527 * Stops when ring is empty or budget is met or exceeded.
1528 */
1529int netvsc_poll(struct napi_struct *napi, int budget)
1530{
1531        struct netvsc_channel *nvchan
1532                = container_of(napi, struct netvsc_channel, napi);
1533        struct netvsc_device *net_device = nvchan->net_device;
1534        struct vmbus_channel *channel = nvchan->channel;
1535        struct hv_device *device = netvsc_channel_to_device(channel);
1536        struct net_device *ndev = hv_get_drvdata(device);
1537        int work_done = 0;
1538        int ret;
1539
1540        /* If starting a new interval */
1541        if (!nvchan->desc)
1542                nvchan->desc = hv_pkt_iter_first(channel);
1543
1544        while (nvchan->desc && work_done < budget) {
1545                work_done += netvsc_process_raw_pkt(device, nvchan, net_device,
1546                                                    ndev, nvchan->desc, budget);
1547                nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
1548        }
1549
1550        /* Send any pending receive completions */
1551        ret = send_recv_completions(ndev, net_device, nvchan);
1552
1553        /* If it did not exhaust NAPI budget this time
1554         *  and not doing busy poll
1555         * then re-enable host interrupts
1556         *  and reschedule if ring is not empty
1557         *   or sending receive completion failed.
1558         */
1559        if (work_done < budget &&
1560            napi_complete_done(napi, work_done) &&
1561            (ret || hv_end_read(&channel->inbound)) &&
1562            napi_schedule_prep(napi)) {
1563                hv_begin_read(&channel->inbound);
1564                __napi_schedule(napi);
1565        }
1566
1567        /* Driver may overshoot since multiple packets per descriptor */
1568        return min(work_done, budget);
1569}
1570
1571/* Call back when data is available in host ring buffer.
1572 * Processing is deferred until network softirq (NAPI)
1573 */
1574void netvsc_channel_cb(void *context)
1575{
1576        struct netvsc_channel *nvchan = context;
1577        struct vmbus_channel *channel = nvchan->channel;
1578        struct hv_ring_buffer_info *rbi = &channel->inbound;
1579
1580        /* preload first vmpacket descriptor */
1581        prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
1582
1583        if (napi_schedule_prep(&nvchan->napi)) {
1584                /* disable interrupts from host */
1585                hv_begin_read(rbi);
1586
1587                __napi_schedule_irqoff(&nvchan->napi);
1588        }
1589}
1590
1591/*
1592 * netvsc_device_add - Callback when the device belonging to this
1593 * driver is added
1594 */
1595struct netvsc_device *netvsc_device_add(struct hv_device *device,
1596                                const struct netvsc_device_info *device_info)
1597{
1598        int i, ret = 0;
1599        struct netvsc_device *net_device;
1600        struct net_device *ndev = hv_get_drvdata(device);
1601        struct net_device_context *net_device_ctx = netdev_priv(ndev);
1602
1603        net_device = alloc_net_device();
1604        if (!net_device)
1605                return ERR_PTR(-ENOMEM);
1606
1607        for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
1608                net_device_ctx->tx_table[i] = 0;
1609
1610        /* Because the device uses NAPI, all the interrupt batching and
1611         * control is done via Net softirq, not the channel handling
1612         */
1613        set_channel_read_mode(device->channel, HV_CALL_ISR);
1614
1615        /* If we're reopening the device we may have multiple queues, fill the
1616         * chn_table with the default channel to use it before subchannels are
1617         * opened.
1618         * Initialize the channel state before we open;
1619         * we can be interrupted as soon as we open the channel.
1620         */
1621
1622        for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
1623                struct netvsc_channel *nvchan = &net_device->chan_table[i];
1624
1625                nvchan->channel = device->channel;
1626                nvchan->net_device = net_device;
1627                u64_stats_init(&nvchan->tx_stats.syncp);
1628                u64_stats_init(&nvchan->rx_stats.syncp);
1629
1630                ret = xdp_rxq_info_reg(&nvchan->xdp_rxq, ndev, i, 0);
1631
1632                if (ret) {
1633                        netdev_err(ndev, "xdp_rxq_info_reg fail: %d\n", ret);
1634                        goto cleanup2;
1635                }
1636
1637                ret = xdp_rxq_info_reg_mem_model(&nvchan->xdp_rxq,
1638                                                 MEM_TYPE_PAGE_SHARED, NULL);
1639
1640                if (ret) {
1641                        netdev_err(ndev, "xdp reg_mem_model fail: %d\n", ret);
1642                        goto cleanup2;
1643                }
1644        }
1645
1646        /* Enable NAPI handler before init callbacks */
1647        netif_napi_add(ndev, &net_device->chan_table[0].napi,
1648                       netvsc_poll, NAPI_POLL_WEIGHT);
1649
1650        /* Open the channel */
1651        device->channel->next_request_id_callback = vmbus_next_request_id;
1652        device->channel->request_addr_callback = vmbus_request_addr;
1653        device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
1654        device->channel->max_pkt_size = NETVSC_MAX_PKT_SIZE;
1655
1656        ret = vmbus_open(device->channel, netvsc_ring_bytes,
1657                         netvsc_ring_bytes,  NULL, 0,
1658                         netvsc_channel_cb, net_device->chan_table);
1659
1660        if (ret != 0) {
1661                netdev_err(ndev, "unable to open channel: %d\n", ret);
1662                goto cleanup;
1663        }
1664
1665        /* Channel is opened */
1666        netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
1667
1668        napi_enable(&net_device->chan_table[0].napi);
1669
1670        /* Connect with the NetVsp */
1671        ret = netvsc_connect_vsp(device, net_device, device_info);
1672        if (ret != 0) {
1673                netdev_err(ndev,
1674                        "unable to connect to NetVSP - %d\n", ret);
1675                goto close;
1676        }
1677
1678        /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
1679         * populated.
1680         */
1681        rcu_assign_pointer(net_device_ctx->nvdev, net_device);
1682
1683        return net_device;
1684
1685close:
1686        RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
1687        napi_disable(&net_device->chan_table[0].napi);
1688
1689        /* Now, we can close the channel safely */
1690        vmbus_close(device->channel);
1691
1692cleanup:
1693        netif_napi_del(&net_device->chan_table[0].napi);
1694
1695cleanup2:
1696        free_netvsc_device(&net_device->rcu);
1697
1698        return ERR_PTR(ret);
1699}
1700