linux/drivers/net/hyperv/netvsc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2009, Microsoft Corporation.
   4 *
   5 * Authors:
   6 *   Haiyang Zhang <haiyangz@microsoft.com>
   7 *   Hank Janssen  <hjanssen@microsoft.com>
   8 */
   9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10
  11#include <linux/kernel.h>
  12#include <linux/sched.h>
  13#include <linux/wait.h>
  14#include <linux/mm.h>
  15#include <linux/delay.h>
  16#include <linux/io.h>
  17#include <linux/slab.h>
  18#include <linux/netdevice.h>
  19#include <linux/if_ether.h>
  20#include <linux/vmalloc.h>
  21#include <linux/rtnetlink.h>
  22#include <linux/prefetch.h>
  23
  24#include <asm/sync_bitops.h>
  25
  26#include "hyperv_net.h"
  27#include "netvsc_trace.h"
  28
  29/*
  30 * Switch the data path from the synthetic interface to the VF
  31 * interface.
  32 */
  33void netvsc_switch_datapath(struct net_device *ndev, bool vf)
  34{
  35        struct net_device_context *net_device_ctx = netdev_priv(ndev);
  36        struct hv_device *dev = net_device_ctx->device_ctx;
  37        struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
  38        struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
  39
  40        memset(init_pkt, 0, sizeof(struct nvsp_message));
  41        init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
  42        if (vf)
  43                init_pkt->msg.v4_msg.active_dp.active_datapath =
  44                        NVSP_DATAPATH_VF;
  45        else
  46                init_pkt->msg.v4_msg.active_dp.active_datapath =
  47                        NVSP_DATAPATH_SYNTHETIC;
  48
  49        trace_nvsp_send(ndev, init_pkt);
  50
  51        vmbus_sendpacket(dev->channel, init_pkt,
  52                               sizeof(struct nvsp_message),
  53                               (unsigned long)init_pkt,
  54                               VM_PKT_DATA_INBAND, 0);
  55}
  56
  57/* Worker to setup sub channels on initial setup
  58 * Initial hotplug event occurs in softirq context
  59 * and can't wait for channels.
  60 */
  61static void netvsc_subchan_work(struct work_struct *w)
  62{
  63        struct netvsc_device *nvdev =
  64                container_of(w, struct netvsc_device, subchan_work);
  65        struct rndis_device *rdev;
  66        int i, ret;
  67
  68        /* Avoid deadlock with device removal already under RTNL */
  69        if (!rtnl_trylock()) {
  70                schedule_work(w);
  71                return;
  72        }
  73
  74        rdev = nvdev->extension;
  75        if (rdev) {
  76                ret = rndis_set_subchannel(rdev->ndev, nvdev, NULL);
  77                if (ret == 0) {
  78                        netif_device_attach(rdev->ndev);
  79                } else {
  80                        /* fallback to only primary channel */
  81                        for (i = 1; i < nvdev->num_chn; i++)
  82                                netif_napi_del(&nvdev->chan_table[i].napi);
  83
  84                        nvdev->max_chn = 1;
  85                        nvdev->num_chn = 1;
  86                }
  87        }
  88
  89        rtnl_unlock();
  90}
  91
  92static struct netvsc_device *alloc_net_device(void)
  93{
  94        struct netvsc_device *net_device;
  95
  96        net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
  97        if (!net_device)
  98                return NULL;
  99
 100        init_waitqueue_head(&net_device->wait_drain);
 101        net_device->destroy = false;
 102        net_device->tx_disable = false;
 103
 104        net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 105        net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
 106
 107        init_completion(&net_device->channel_init_wait);
 108        init_waitqueue_head(&net_device->subchan_open);
 109        INIT_WORK(&net_device->subchan_work, netvsc_subchan_work);
 110
 111        return net_device;
 112}
 113
 114static void free_netvsc_device(struct rcu_head *head)
 115{
 116        struct netvsc_device *nvdev
 117                = container_of(head, struct netvsc_device, rcu);
 118        int i;
 119
 120        kfree(nvdev->extension);
 121        vfree(nvdev->recv_buf);
 122        vfree(nvdev->send_buf);
 123        kfree(nvdev->send_section_map);
 124
 125        for (i = 0; i < VRSS_CHANNEL_MAX; i++)
 126                vfree(nvdev->chan_table[i].mrc.slots);
 127
 128        kfree(nvdev);
 129}
 130
 131static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
 132{
 133        call_rcu(&nvdev->rcu, free_netvsc_device);
 134}
 135
 136static void netvsc_revoke_recv_buf(struct hv_device *device,
 137                                   struct netvsc_device *net_device,
 138                                   struct net_device *ndev)
 139{
 140        struct nvsp_message *revoke_packet;
 141        int ret;
 142
 143        /*
 144         * If we got a section count, it means we received a
 145         * SendReceiveBufferComplete msg (ie sent
 146         * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
 147         * to send a revoke msg here
 148         */
 149        if (net_device->recv_section_cnt) {
 150                /* Send the revoke receive buffer */
 151                revoke_packet = &net_device->revoke_packet;
 152                memset(revoke_packet, 0, sizeof(struct nvsp_message));
 153
 154                revoke_packet->hdr.msg_type =
 155                        NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
 156                revoke_packet->msg.v1_msg.
 157                revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 158
 159                trace_nvsp_send(ndev, revoke_packet);
 160
 161                ret = vmbus_sendpacket(device->channel,
 162                                       revoke_packet,
 163                                       sizeof(struct nvsp_message),
 164                                       (unsigned long)revoke_packet,
 165                                       VM_PKT_DATA_INBAND, 0);
 166                /* If the failure is because the channel is rescinded;
 167                 * ignore the failure since we cannot send on a rescinded
 168                 * channel. This would allow us to properly cleanup
 169                 * even when the channel is rescinded.
 170                 */
 171                if (device->channel->rescind)
 172                        ret = 0;
 173                /*
 174                 * If we failed here, we might as well return and
 175                 * have a leak rather than continue and a bugchk
 176                 */
 177                if (ret != 0) {
 178                        netdev_err(ndev, "unable to send "
 179                                "revoke receive buffer to netvsp\n");
 180                        return;
 181                }
 182                net_device->recv_section_cnt = 0;
 183        }
 184}
 185
 186static void netvsc_revoke_send_buf(struct hv_device *device,
 187                                   struct netvsc_device *net_device,
 188                                   struct net_device *ndev)
 189{
 190        struct nvsp_message *revoke_packet;
 191        int ret;
 192
 193        /* Deal with the send buffer we may have setup.
 194         * If we got a  send section size, it means we received a
 195         * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
 196         * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
 197         * to send a revoke msg here
 198         */
 199        if (net_device->send_section_cnt) {
 200                /* Send the revoke receive buffer */
 201                revoke_packet = &net_device->revoke_packet;
 202                memset(revoke_packet, 0, sizeof(struct nvsp_message));
 203
 204                revoke_packet->hdr.msg_type =
 205                        NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
 206                revoke_packet->msg.v1_msg.revoke_send_buf.id =
 207                        NETVSC_SEND_BUFFER_ID;
 208
 209                trace_nvsp_send(ndev, revoke_packet);
 210
 211                ret = vmbus_sendpacket(device->channel,
 212                                       revoke_packet,
 213                                       sizeof(struct nvsp_message),
 214                                       (unsigned long)revoke_packet,
 215                                       VM_PKT_DATA_INBAND, 0);
 216
 217                /* If the failure is because the channel is rescinded;
 218                 * ignore the failure since we cannot send on a rescinded
 219                 * channel. This would allow us to properly cleanup
 220                 * even when the channel is rescinded.
 221                 */
 222                if (device->channel->rescind)
 223                        ret = 0;
 224
 225                /* If we failed here, we might as well return and
 226                 * have a leak rather than continue and a bugchk
 227                 */
 228                if (ret != 0) {
 229                        netdev_err(ndev, "unable to send "
 230                                   "revoke send buffer to netvsp\n");
 231                        return;
 232                }
 233                net_device->send_section_cnt = 0;
 234        }
 235}
 236
 237static void netvsc_teardown_recv_gpadl(struct hv_device *device,
 238                                       struct netvsc_device *net_device,
 239                                       struct net_device *ndev)
 240{
 241        int ret;
 242
 243        if (net_device->recv_buf_gpadl_handle) {
 244                ret = vmbus_teardown_gpadl(device->channel,
 245                                           net_device->recv_buf_gpadl_handle);
 246
 247                /* If we failed here, we might as well return and have a leak
 248                 * rather than continue and a bugchk
 249                 */
 250                if (ret != 0) {
 251                        netdev_err(ndev,
 252                                   "unable to teardown receive buffer's gpadl\n");
 253                        return;
 254                }
 255                net_device->recv_buf_gpadl_handle = 0;
 256        }
 257}
 258
 259static void netvsc_teardown_send_gpadl(struct hv_device *device,
 260                                       struct netvsc_device *net_device,
 261                                       struct net_device *ndev)
 262{
 263        int ret;
 264
 265        if (net_device->send_buf_gpadl_handle) {
 266                ret = vmbus_teardown_gpadl(device->channel,
 267                                           net_device->send_buf_gpadl_handle);
 268
 269                /* If we failed here, we might as well return and have a leak
 270                 * rather than continue and a bugchk
 271                 */
 272                if (ret != 0) {
 273                        netdev_err(ndev,
 274                                   "unable to teardown send buffer's gpadl\n");
 275                        return;
 276                }
 277                net_device->send_buf_gpadl_handle = 0;
 278        }
 279}
 280
 281int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
 282{
 283        struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
 284        int node = cpu_to_node(nvchan->channel->target_cpu);
 285        size_t size;
 286
 287        size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
 288        nvchan->mrc.slots = vzalloc_node(size, node);
 289        if (!nvchan->mrc.slots)
 290                nvchan->mrc.slots = vzalloc(size);
 291
 292        return nvchan->mrc.slots ? 0 : -ENOMEM;
 293}
 294
 295static int netvsc_init_buf(struct hv_device *device,
 296                           struct netvsc_device *net_device,
 297                           const struct netvsc_device_info *device_info)
 298{
 299        struct nvsp_1_message_send_receive_buffer_complete *resp;
 300        struct net_device *ndev = hv_get_drvdata(device);
 301        struct nvsp_message *init_packet;
 302        unsigned int buf_size;
 303        size_t map_words;
 304        int ret = 0;
 305
 306        /* Get receive buffer area. */
 307        buf_size = device_info->recv_sections * device_info->recv_section_size;
 308        buf_size = roundup(buf_size, PAGE_SIZE);
 309
 310        /* Legacy hosts only allow smaller receive buffer */
 311        if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
 312                buf_size = min_t(unsigned int, buf_size,
 313                                 NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
 314
 315        net_device->recv_buf = vzalloc(buf_size);
 316        if (!net_device->recv_buf) {
 317                netdev_err(ndev,
 318                           "unable to allocate receive buffer of size %u\n",
 319                           buf_size);
 320                ret = -ENOMEM;
 321                goto cleanup;
 322        }
 323
 324        net_device->recv_buf_size = buf_size;
 325
 326        /*
 327         * Establish the gpadl handle for this buffer on this
 328         * channel.  Note: This call uses the vmbus connection rather
 329         * than the channel to establish the gpadl handle.
 330         */
 331        ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
 332                                    buf_size,
 333                                    &net_device->recv_buf_gpadl_handle);
 334        if (ret != 0) {
 335                netdev_err(ndev,
 336                        "unable to establish receive buffer's gpadl\n");
 337                goto cleanup;
 338        }
 339
 340        /* Notify the NetVsp of the gpadl handle */
 341        init_packet = &net_device->channel_init_pkt;
 342        memset(init_packet, 0, sizeof(struct nvsp_message));
 343        init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
 344        init_packet->msg.v1_msg.send_recv_buf.
 345                gpadl_handle = net_device->recv_buf_gpadl_handle;
 346        init_packet->msg.v1_msg.
 347                send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 348
 349        trace_nvsp_send(ndev, init_packet);
 350
 351        /* Send the gpadl notification request */
 352        ret = vmbus_sendpacket(device->channel, init_packet,
 353                               sizeof(struct nvsp_message),
 354                               (unsigned long)init_packet,
 355                               VM_PKT_DATA_INBAND,
 356                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 357        if (ret != 0) {
 358                netdev_err(ndev,
 359                        "unable to send receive buffer's gpadl to netvsp\n");
 360                goto cleanup;
 361        }
 362
 363        wait_for_completion(&net_device->channel_init_wait);
 364
 365        /* Check the response */
 366        resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
 367        if (resp->status != NVSP_STAT_SUCCESS) {
 368                netdev_err(ndev,
 369                           "Unable to complete receive buffer initialization with NetVsp - status %d\n",
 370                           resp->status);
 371                ret = -EINVAL;
 372                goto cleanup;
 373        }
 374
 375        /* Parse the response */
 376        netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
 377                   resp->num_sections, resp->sections[0].sub_alloc_size,
 378                   resp->sections[0].num_sub_allocs);
 379
 380        /* There should only be one section for the entire receive buffer */
 381        if (resp->num_sections != 1 || resp->sections[0].offset != 0) {
 382                ret = -EINVAL;
 383                goto cleanup;
 384        }
 385
 386        net_device->recv_section_size = resp->sections[0].sub_alloc_size;
 387        net_device->recv_section_cnt = resp->sections[0].num_sub_allocs;
 388
 389        /* Setup receive completion ring */
 390        net_device->recv_completion_cnt
 391                = round_up(net_device->recv_section_cnt + 1,
 392                           PAGE_SIZE / sizeof(u64));
 393        ret = netvsc_alloc_recv_comp_ring(net_device, 0);
 394        if (ret)
 395                goto cleanup;
 396
 397        /* Now setup the send buffer. */
 398        buf_size = device_info->send_sections * device_info->send_section_size;
 399        buf_size = round_up(buf_size, PAGE_SIZE);
 400
 401        net_device->send_buf = vzalloc(buf_size);
 402        if (!net_device->send_buf) {
 403                netdev_err(ndev, "unable to allocate send buffer of size %u\n",
 404                           buf_size);
 405                ret = -ENOMEM;
 406                goto cleanup;
 407        }
 408
 409        /* Establish the gpadl handle for this buffer on this
 410         * channel.  Note: This call uses the vmbus connection rather
 411         * than the channel to establish the gpadl handle.
 412         */
 413        ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
 414                                    buf_size,
 415                                    &net_device->send_buf_gpadl_handle);
 416        if (ret != 0) {
 417                netdev_err(ndev,
 418                           "unable to establish send buffer's gpadl\n");
 419                goto cleanup;
 420        }
 421
 422        /* Notify the NetVsp of the gpadl handle */
 423        init_packet = &net_device->channel_init_pkt;
 424        memset(init_packet, 0, sizeof(struct nvsp_message));
 425        init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
 426        init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
 427                net_device->send_buf_gpadl_handle;
 428        init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
 429
 430        trace_nvsp_send(ndev, init_packet);
 431
 432        /* Send the gpadl notification request */
 433        ret = vmbus_sendpacket(device->channel, init_packet,
 434                               sizeof(struct nvsp_message),
 435                               (unsigned long)init_packet,
 436                               VM_PKT_DATA_INBAND,
 437                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 438        if (ret != 0) {
 439                netdev_err(ndev,
 440                           "unable to send send buffer's gpadl to netvsp\n");
 441                goto cleanup;
 442        }
 443
 444        wait_for_completion(&net_device->channel_init_wait);
 445
 446        /* Check the response */
 447        if (init_packet->msg.v1_msg.
 448            send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
 449                netdev_err(ndev, "Unable to complete send buffer "
 450                           "initialization with NetVsp - status %d\n",
 451                           init_packet->msg.v1_msg.
 452                           send_send_buf_complete.status);
 453                ret = -EINVAL;
 454                goto cleanup;
 455        }
 456
 457        /* Parse the response */
 458        net_device->send_section_size = init_packet->msg.
 459                                v1_msg.send_send_buf_complete.section_size;
 460
 461        /* Section count is simply the size divided by the section size. */
 462        net_device->send_section_cnt = buf_size / net_device->send_section_size;
 463
 464        netdev_dbg(ndev, "Send section size: %d, Section count:%d\n",
 465                   net_device->send_section_size, net_device->send_section_cnt);
 466
 467        /* Setup state for managing the send buffer. */
 468        map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG);
 469
 470        net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL);
 471        if (net_device->send_section_map == NULL) {
 472                ret = -ENOMEM;
 473                goto cleanup;
 474        }
 475
 476        goto exit;
 477
 478cleanup:
 479        netvsc_revoke_recv_buf(device, net_device, ndev);
 480        netvsc_revoke_send_buf(device, net_device, ndev);
 481        netvsc_teardown_recv_gpadl(device, net_device, ndev);
 482        netvsc_teardown_send_gpadl(device, net_device, ndev);
 483
 484exit:
 485        return ret;
 486}
 487
 488/* Negotiate NVSP protocol version */
 489static int negotiate_nvsp_ver(struct hv_device *device,
 490                              struct netvsc_device *net_device,
 491                              struct nvsp_message *init_packet,
 492                              u32 nvsp_ver)
 493{
 494        struct net_device *ndev = hv_get_drvdata(device);
 495        int ret;
 496
 497        memset(init_packet, 0, sizeof(struct nvsp_message));
 498        init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
 499        init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
 500        init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
 501        trace_nvsp_send(ndev, init_packet);
 502
 503        /* Send the init request */
 504        ret = vmbus_sendpacket(device->channel, init_packet,
 505                               sizeof(struct nvsp_message),
 506                               (unsigned long)init_packet,
 507                               VM_PKT_DATA_INBAND,
 508                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 509
 510        if (ret != 0)
 511                return ret;
 512
 513        wait_for_completion(&net_device->channel_init_wait);
 514
 515        if (init_packet->msg.init_msg.init_complete.status !=
 516            NVSP_STAT_SUCCESS)
 517                return -EINVAL;
 518
 519        if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
 520                return 0;
 521
 522        /* NVSPv2 or later: Send NDIS config */
 523        memset(init_packet, 0, sizeof(struct nvsp_message));
 524        init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
 525        init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN;
 526        init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
 527
 528        if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) {
 529                init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
 530
 531                /* Teaming bit is needed to receive link speed updates */
 532                init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
 533        }
 534
 535        if (nvsp_ver >= NVSP_PROTOCOL_VERSION_61)
 536                init_packet->msg.v2_msg.send_ndis_config.capability.rsc = 1;
 537
 538        trace_nvsp_send(ndev, init_packet);
 539
 540        ret = vmbus_sendpacket(device->channel, init_packet,
 541                                sizeof(struct nvsp_message),
 542                                (unsigned long)init_packet,
 543                                VM_PKT_DATA_INBAND, 0);
 544
 545        return ret;
 546}
 547
 548static int netvsc_connect_vsp(struct hv_device *device,
 549                              struct netvsc_device *net_device,
 550                              const struct netvsc_device_info *device_info)
 551{
 552        struct net_device *ndev = hv_get_drvdata(device);
 553        static const u32 ver_list[] = {
 554                NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
 555                NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5,
 556                NVSP_PROTOCOL_VERSION_6, NVSP_PROTOCOL_VERSION_61
 557        };
 558        struct nvsp_message *init_packet;
 559        int ndis_version, i, ret;
 560
 561        init_packet = &net_device->channel_init_pkt;
 562
 563        /* Negotiate the latest NVSP protocol supported */
 564        for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--)
 565                if (negotiate_nvsp_ver(device, net_device, init_packet,
 566                                       ver_list[i])  == 0) {
 567                        net_device->nvsp_version = ver_list[i];
 568                        break;
 569                }
 570
 571        if (i < 0) {
 572                ret = -EPROTO;
 573                goto cleanup;
 574        }
 575
 576        pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);
 577
 578        /* Send the ndis version */
 579        memset(init_packet, 0, sizeof(struct nvsp_message));
 580
 581        if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
 582                ndis_version = 0x00060001;
 583        else
 584                ndis_version = 0x0006001e;
 585
 586        init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
 587        init_packet->msg.v1_msg.
 588                send_ndis_ver.ndis_major_ver =
 589                                (ndis_version & 0xFFFF0000) >> 16;
 590        init_packet->msg.v1_msg.
 591                send_ndis_ver.ndis_minor_ver =
 592                                ndis_version & 0xFFFF;
 593
 594        trace_nvsp_send(ndev, init_packet);
 595
 596        /* Send the init request */
 597        ret = vmbus_sendpacket(device->channel, init_packet,
 598                                sizeof(struct nvsp_message),
 599                                (unsigned long)init_packet,
 600                                VM_PKT_DATA_INBAND, 0);
 601        if (ret != 0)
 602                goto cleanup;
 603
 604
 605        ret = netvsc_init_buf(device, net_device, device_info);
 606
 607cleanup:
 608        return ret;
 609}
 610
 611/*
 612 * netvsc_device_remove - Callback when the root bus device is removed
 613 */
 614void netvsc_device_remove(struct hv_device *device)
 615{
 616        struct net_device *ndev = hv_get_drvdata(device);
 617        struct net_device_context *net_device_ctx = netdev_priv(ndev);
 618        struct netvsc_device *net_device
 619                = rtnl_dereference(net_device_ctx->nvdev);
 620        int i;
 621
 622        /*
 623         * Revoke receive buffer. If host is pre-Win2016 then tear down
 624         * receive buffer GPADL. Do the same for send buffer.
 625         */
 626        netvsc_revoke_recv_buf(device, net_device, ndev);
 627        if (vmbus_proto_version < VERSION_WIN10)
 628                netvsc_teardown_recv_gpadl(device, net_device, ndev);
 629
 630        netvsc_revoke_send_buf(device, net_device, ndev);
 631        if (vmbus_proto_version < VERSION_WIN10)
 632                netvsc_teardown_send_gpadl(device, net_device, ndev);
 633
 634        RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
 635
 636        /* And disassociate NAPI context from device */
 637        for (i = 0; i < net_device->num_chn; i++)
 638                netif_napi_del(&net_device->chan_table[i].napi);
 639
 640        /*
 641         * At this point, no one should be accessing net_device
 642         * except in here
 643         */
 644        netdev_dbg(ndev, "net device safe to remove\n");
 645
 646        /* Now, we can close the channel safely */
 647        vmbus_close(device->channel);
 648
 649        /*
 650         * If host is Win2016 or higher then we do the GPADL tear down
 651         * here after VMBus is closed.
 652        */
 653        if (vmbus_proto_version >= VERSION_WIN10) {
 654                netvsc_teardown_recv_gpadl(device, net_device, ndev);
 655                netvsc_teardown_send_gpadl(device, net_device, ndev);
 656        }
 657
 658        /* Release all resources */
 659        free_netvsc_device_rcu(net_device);
 660}
 661
 662#define RING_AVAIL_PERCENT_HIWATER 20
 663#define RING_AVAIL_PERCENT_LOWATER 10
 664
 665static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
 666                                         u32 index)
 667{
 668        sync_change_bit(index, net_device->send_section_map);
 669}
 670
 671static void netvsc_send_tx_complete(struct net_device *ndev,
 672                                    struct netvsc_device *net_device,
 673                                    struct vmbus_channel *channel,
 674                                    const struct vmpacket_descriptor *desc,
 675                                    int budget)
 676{
 677        struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
 678        struct net_device_context *ndev_ctx = netdev_priv(ndev);
 679        u16 q_idx = 0;
 680        int queue_sends;
 681
 682        /* Notify the layer above us */
 683        if (likely(skb)) {
 684                const struct hv_netvsc_packet *packet
 685                        = (struct hv_netvsc_packet *)skb->cb;
 686                u32 send_index = packet->send_buf_index;
 687                struct netvsc_stats *tx_stats;
 688
 689                if (send_index != NETVSC_INVALID_INDEX)
 690                        netvsc_free_send_slot(net_device, send_index);
 691                q_idx = packet->q_idx;
 692
 693                tx_stats = &net_device->chan_table[q_idx].tx_stats;
 694
 695                u64_stats_update_begin(&tx_stats->syncp);
 696                tx_stats->packets += packet->total_packets;
 697                tx_stats->bytes += packet->total_bytes;
 698                u64_stats_update_end(&tx_stats->syncp);
 699
 700                napi_consume_skb(skb, budget);
 701        }
 702
 703        queue_sends =
 704                atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
 705
 706        if (unlikely(net_device->destroy)) {
 707                if (queue_sends == 0)
 708                        wake_up(&net_device->wait_drain);
 709        } else {
 710                struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
 711
 712                if (netif_tx_queue_stopped(txq) && !net_device->tx_disable &&
 713                    (hv_get_avail_to_write_percent(&channel->outbound) >
 714                     RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) {
 715                        netif_tx_wake_queue(txq);
 716                        ndev_ctx->eth_stats.wake_queue++;
 717                }
 718        }
 719}
 720
 721static void netvsc_send_completion(struct net_device *ndev,
 722                                   struct netvsc_device *net_device,
 723                                   struct vmbus_channel *incoming_channel,
 724                                   const struct vmpacket_descriptor *desc,
 725                                   int budget)
 726{
 727        const struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
 728
 729        switch (nvsp_packet->hdr.msg_type) {
 730        case NVSP_MSG_TYPE_INIT_COMPLETE:
 731        case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
 732        case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE:
 733        case NVSP_MSG5_TYPE_SUBCHANNEL:
 734                /* Copy the response back */
 735                memcpy(&net_device->channel_init_pkt, nvsp_packet,
 736                       sizeof(struct nvsp_message));
 737                complete(&net_device->channel_init_wait);
 738                break;
 739
 740        case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
 741                netvsc_send_tx_complete(ndev, net_device, incoming_channel,
 742                                        desc, budget);
 743                break;
 744
 745        default:
 746                netdev_err(ndev,
 747                           "Unknown send completion type %d received!!\n",
 748                           nvsp_packet->hdr.msg_type);
 749        }
 750}
 751
 752static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
 753{
 754        unsigned long *map_addr = net_device->send_section_map;
 755        unsigned int i;
 756
 757        for_each_clear_bit(i, map_addr, net_device->send_section_cnt) {
 758                if (sync_test_and_set_bit(i, map_addr) == 0)
 759                        return i;
 760        }
 761
 762        return NETVSC_INVALID_INDEX;
 763}
 764
 765static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
 766                                    unsigned int section_index,
 767                                    u32 pend_size,
 768                                    struct hv_netvsc_packet *packet,
 769                                    struct rndis_message *rndis_msg,
 770                                    struct hv_page_buffer *pb,
 771                                    bool xmit_more)
 772{
 773        char *start = net_device->send_buf;
 774        char *dest = start + (section_index * net_device->send_section_size)
 775                     + pend_size;
 776        int i;
 777        u32 padding = 0;
 778        u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
 779                packet->page_buf_cnt;
 780        u32 remain;
 781
 782        /* Add padding */
 783        remain = packet->total_data_buflen & (net_device->pkt_align - 1);
 784        if (xmit_more && remain) {
 785                padding = net_device->pkt_align - remain;
 786                rndis_msg->msg_len += padding;
 787                packet->total_data_buflen += padding;
 788        }
 789
 790        for (i = 0; i < page_count; i++) {
 791                char *src = phys_to_virt(pb[i].pfn << PAGE_SHIFT);
 792                u32 offset = pb[i].offset;
 793                u32 len = pb[i].len;
 794
 795                memcpy(dest, (src + offset), len);
 796                dest += len;
 797        }
 798
 799        if (padding)
 800                memset(dest, 0, padding);
 801}
 802
 803static inline int netvsc_send_pkt(
 804        struct hv_device *device,
 805        struct hv_netvsc_packet *packet,
 806        struct netvsc_device *net_device,
 807        struct hv_page_buffer *pb,
 808        struct sk_buff *skb)
 809{
 810        struct nvsp_message nvmsg;
 811        struct nvsp_1_message_send_rndis_packet *rpkt =
 812                &nvmsg.msg.v1_msg.send_rndis_pkt;
 813        struct netvsc_channel * const nvchan =
 814                &net_device->chan_table[packet->q_idx];
 815        struct vmbus_channel *out_channel = nvchan->channel;
 816        struct net_device *ndev = hv_get_drvdata(device);
 817        struct net_device_context *ndev_ctx = netdev_priv(ndev);
 818        struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
 819        u64 req_id;
 820        int ret;
 821        u32 ring_avail = hv_get_avail_to_write_percent(&out_channel->outbound);
 822
 823        nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
 824        if (skb)
 825                rpkt->channel_type = 0;         /* 0 is RMC_DATA */
 826        else
 827                rpkt->channel_type = 1;         /* 1 is RMC_CONTROL */
 828
 829        rpkt->send_buf_section_index = packet->send_buf_index;
 830        if (packet->send_buf_index == NETVSC_INVALID_INDEX)
 831                rpkt->send_buf_section_size = 0;
 832        else
 833                rpkt->send_buf_section_size = packet->total_data_buflen;
 834
 835        req_id = (ulong)skb;
 836
 837        if (out_channel->rescind)
 838                return -ENODEV;
 839
 840        trace_nvsp_send_pkt(ndev, out_channel, rpkt);
 841
 842        if (packet->page_buf_cnt) {
 843                if (packet->cp_partial)
 844                        pb += packet->rmsg_pgcnt;
 845
 846                ret = vmbus_sendpacket_pagebuffer(out_channel,
 847                                                  pb, packet->page_buf_cnt,
 848                                                  &nvmsg, sizeof(nvmsg),
 849                                                  req_id);
 850        } else {
 851                ret = vmbus_sendpacket(out_channel,
 852                                       &nvmsg, sizeof(nvmsg),
 853                                       req_id, VM_PKT_DATA_INBAND,
 854                                       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 855        }
 856
 857        if (ret == 0) {
 858                atomic_inc_return(&nvchan->queue_sends);
 859
 860                if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
 861                        netif_tx_stop_queue(txq);
 862                        ndev_ctx->eth_stats.stop_queue++;
 863                }
 864        } else if (ret == -EAGAIN) {
 865                netif_tx_stop_queue(txq);
 866                ndev_ctx->eth_stats.stop_queue++;
 867        } else {
 868                netdev_err(ndev,
 869                           "Unable to send packet pages %u len %u, ret %d\n",
 870                           packet->page_buf_cnt, packet->total_data_buflen,
 871                           ret);
 872        }
 873
 874        if (netif_tx_queue_stopped(txq) &&
 875            atomic_read(&nvchan->queue_sends) < 1 &&
 876            !net_device->tx_disable) {
 877                netif_tx_wake_queue(txq);
 878                ndev_ctx->eth_stats.wake_queue++;
 879                if (ret == -EAGAIN)
 880                        ret = -ENOSPC;
 881        }
 882
 883        return ret;
 884}
 885
 886/* Move packet out of multi send data (msd), and clear msd */
 887static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
 888                                struct sk_buff **msd_skb,
 889                                struct multi_send_data *msdp)
 890{
 891        *msd_skb = msdp->skb;
 892        *msd_send = msdp->pkt;
 893        msdp->skb = NULL;
 894        msdp->pkt = NULL;
 895        msdp->count = 0;
 896}
 897
 898/* RCU already held by caller */
 899int netvsc_send(struct net_device *ndev,
 900                struct hv_netvsc_packet *packet,
 901                struct rndis_message *rndis_msg,
 902                struct hv_page_buffer *pb,
 903                struct sk_buff *skb)
 904{
 905        struct net_device_context *ndev_ctx = netdev_priv(ndev);
 906        struct netvsc_device *net_device
 907                = rcu_dereference_bh(ndev_ctx->nvdev);
 908        struct hv_device *device = ndev_ctx->device_ctx;
 909        int ret = 0;
 910        struct netvsc_channel *nvchan;
 911        u32 pktlen = packet->total_data_buflen, msd_len = 0;
 912        unsigned int section_index = NETVSC_INVALID_INDEX;
 913        struct multi_send_data *msdp;
 914        struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
 915        struct sk_buff *msd_skb = NULL;
 916        bool try_batch, xmit_more;
 917
 918        /* If device is rescinded, return error and packet will get dropped. */
 919        if (unlikely(!net_device || net_device->destroy))
 920                return -ENODEV;
 921
 922        nvchan = &net_device->chan_table[packet->q_idx];
 923        packet->send_buf_index = NETVSC_INVALID_INDEX;
 924        packet->cp_partial = false;
 925
 926        /* Send control message directly without accessing msd (Multi-Send
 927         * Data) field which may be changed during data packet processing.
 928         */
 929        if (!skb)
 930                return netvsc_send_pkt(device, packet, net_device, pb, skb);
 931
 932        /* batch packets in send buffer if possible */
 933        msdp = &nvchan->msd;
 934        if (msdp->pkt)
 935                msd_len = msdp->pkt->total_data_buflen;
 936
 937        try_batch =  msd_len > 0 && msdp->count < net_device->max_pkt;
 938        if (try_batch && msd_len + pktlen + net_device->pkt_align <
 939            net_device->send_section_size) {
 940                section_index = msdp->pkt->send_buf_index;
 941
 942        } else if (try_batch && msd_len + packet->rmsg_size <
 943                   net_device->send_section_size) {
 944                section_index = msdp->pkt->send_buf_index;
 945                packet->cp_partial = true;
 946
 947        } else if (pktlen + net_device->pkt_align <
 948                   net_device->send_section_size) {
 949                section_index = netvsc_get_next_send_section(net_device);
 950                if (unlikely(section_index == NETVSC_INVALID_INDEX)) {
 951                        ++ndev_ctx->eth_stats.tx_send_full;
 952                } else {
 953                        move_pkt_msd(&msd_send, &msd_skb, msdp);
 954                        msd_len = 0;
 955                }
 956        }
 957
 958        /* Keep aggregating only if stack says more data is coming
 959         * and not doing mixed modes send and not flow blocked
 960         */
 961        xmit_more = netdev_xmit_more() &&
 962                !packet->cp_partial &&
 963                !netif_xmit_stopped(netdev_get_tx_queue(ndev, packet->q_idx));
 964
 965        if (section_index != NETVSC_INVALID_INDEX) {
 966                netvsc_copy_to_send_buf(net_device,
 967                                        section_index, msd_len,
 968                                        packet, rndis_msg, pb, xmit_more);
 969
 970                packet->send_buf_index = section_index;
 971
 972                if (packet->cp_partial) {
 973                        packet->page_buf_cnt -= packet->rmsg_pgcnt;
 974                        packet->total_data_buflen = msd_len + packet->rmsg_size;
 975                } else {
 976                        packet->page_buf_cnt = 0;
 977                        packet->total_data_buflen += msd_len;
 978                }
 979
 980                if (msdp->pkt) {
 981                        packet->total_packets += msdp->pkt->total_packets;
 982                        packet->total_bytes += msdp->pkt->total_bytes;
 983                }
 984
 985                if (msdp->skb)
 986                        dev_consume_skb_any(msdp->skb);
 987
 988                if (xmit_more) {
 989                        msdp->skb = skb;
 990                        msdp->pkt = packet;
 991                        msdp->count++;
 992                } else {
 993                        cur_send = packet;
 994                        msdp->skb = NULL;
 995                        msdp->pkt = NULL;
 996                        msdp->count = 0;
 997                }
 998        } else {
 999                move_pkt_msd(&msd_send, &msd_skb, msdp);
1000                cur_send = packet;
1001        }
1002
1003        if (msd_send) {
1004                int m_ret = netvsc_send_pkt(device, msd_send, net_device,
1005                                            NULL, msd_skb);
1006
1007                if (m_ret != 0) {
1008                        netvsc_free_send_slot(net_device,
1009                                              msd_send->send_buf_index);
1010                        dev_kfree_skb_any(msd_skb);
1011                }
1012        }
1013
1014        if (cur_send)
1015                ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb);
1016
1017        if (ret != 0 && section_index != NETVSC_INVALID_INDEX)
1018                netvsc_free_send_slot(net_device, section_index);
1019
1020        return ret;
1021}
1022
1023/* Send pending recv completions */
1024static int send_recv_completions(struct net_device *ndev,
1025                                 struct netvsc_device *nvdev,
1026                                 struct netvsc_channel *nvchan)
1027{
1028        struct multi_recv_comp *mrc = &nvchan->mrc;
1029        struct recv_comp_msg {
1030                struct nvsp_message_header hdr;
1031                u32 status;
1032        }  __packed;
1033        struct recv_comp_msg msg = {
1034                .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
1035        };
1036        int ret;
1037
1038        while (mrc->first != mrc->next) {
1039                const struct recv_comp_data *rcd
1040                        = mrc->slots + mrc->first;
1041
1042                msg.status = rcd->status;
1043                ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
1044                                       rcd->tid, VM_PKT_COMP, 0);
1045                if (unlikely(ret)) {
1046                        struct net_device_context *ndev_ctx = netdev_priv(ndev);
1047
1048                        ++ndev_ctx->eth_stats.rx_comp_busy;
1049                        return ret;
1050                }
1051
1052                if (++mrc->first == nvdev->recv_completion_cnt)
1053                        mrc->first = 0;
1054        }
1055
1056        /* receive completion ring has been emptied */
1057        if (unlikely(nvdev->destroy))
1058                wake_up(&nvdev->wait_drain);
1059
1060        return 0;
1061}
1062
1063/* Count how many receive completions are outstanding */
1064static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
1065                                 const struct multi_recv_comp *mrc,
1066                                 u32 *filled, u32 *avail)
1067{
1068        u32 count = nvdev->recv_completion_cnt;
1069
1070        if (mrc->next >= mrc->first)
1071                *filled = mrc->next - mrc->first;
1072        else
1073                *filled = (count - mrc->first) + mrc->next;
1074
1075        *avail = count - *filled - 1;
1076}
1077
1078/* Add receive complete to ring to send to host. */
1079static void enq_receive_complete(struct net_device *ndev,
1080                                 struct netvsc_device *nvdev, u16 q_idx,
1081                                 u64 tid, u32 status)
1082{
1083        struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
1084        struct multi_recv_comp *mrc = &nvchan->mrc;
1085        struct recv_comp_data *rcd;
1086        u32 filled, avail;
1087
1088        recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1089
1090        if (unlikely(filled > NAPI_POLL_WEIGHT)) {
1091                send_recv_completions(ndev, nvdev, nvchan);
1092                recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1093        }
1094
1095        if (unlikely(!avail)) {
1096                netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
1097                           q_idx, tid);
1098                return;
1099        }
1100
1101        rcd = mrc->slots + mrc->next;
1102        rcd->tid = tid;
1103        rcd->status = status;
1104
1105        if (++mrc->next == nvdev->recv_completion_cnt)
1106                mrc->next = 0;
1107}
1108
1109static int netvsc_receive(struct net_device *ndev,
1110                          struct netvsc_device *net_device,
1111                          struct netvsc_channel *nvchan,
1112                          const struct vmpacket_descriptor *desc,
1113                          const struct nvsp_message *nvsp)
1114{
1115        struct net_device_context *net_device_ctx = netdev_priv(ndev);
1116        struct vmbus_channel *channel = nvchan->channel;
1117        const struct vmtransfer_page_packet_header *vmxferpage_packet
1118                = container_of(desc, const struct vmtransfer_page_packet_header, d);
1119        u16 q_idx = channel->offermsg.offer.sub_channel_index;
1120        char *recv_buf = net_device->recv_buf;
1121        u32 status = NVSP_STAT_SUCCESS;
1122        int i;
1123        int count = 0;
1124
1125        /* Make sure this is a valid nvsp packet */
1126        if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
1127                netif_err(net_device_ctx, rx_err, ndev,
1128                          "Unknown nvsp packet type received %u\n",
1129                          nvsp->hdr.msg_type);
1130                return 0;
1131        }
1132
1133        if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
1134                netif_err(net_device_ctx, rx_err, ndev,
1135                          "Invalid xfer page set id - expecting %x got %x\n",
1136                          NETVSC_RECEIVE_BUFFER_ID,
1137                          vmxferpage_packet->xfer_pageset_id);
1138                return 0;
1139        }
1140
1141        count = vmxferpage_packet->range_cnt;
1142
1143        /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1144        for (i = 0; i < count; i++) {
1145                u32 offset = vmxferpage_packet->ranges[i].byte_offset;
1146                u32 buflen = vmxferpage_packet->ranges[i].byte_count;
1147                void *data;
1148                int ret;
1149
1150                if (unlikely(offset + buflen > net_device->recv_buf_size)) {
1151                        nvchan->rsc.cnt = 0;
1152                        status = NVSP_STAT_FAIL;
1153                        netif_err(net_device_ctx, rx_err, ndev,
1154                                  "Packet offset:%u + len:%u too big\n",
1155                                  offset, buflen);
1156
1157                        continue;
1158                }
1159
1160                data = recv_buf + offset;
1161
1162                nvchan->rsc.is_last = (i == count - 1);
1163
1164                trace_rndis_recv(ndev, q_idx, data);
1165
1166                /* Pass it to the upper layer */
1167                ret = rndis_filter_receive(ndev, net_device,
1168                                           nvchan, data, buflen);
1169
1170                if (unlikely(ret != NVSP_STAT_SUCCESS))
1171                        status = NVSP_STAT_FAIL;
1172        }
1173
1174        enq_receive_complete(ndev, net_device, q_idx,
1175                             vmxferpage_packet->d.trans_id, status);
1176
1177        return count;
1178}
1179
1180static void netvsc_send_table(struct net_device *ndev,
1181                              const struct nvsp_message *nvmsg)
1182{
1183        struct net_device_context *net_device_ctx = netdev_priv(ndev);
1184        u32 count, *tab;
1185        int i;
1186
1187        count = nvmsg->msg.v5_msg.send_table.count;
1188        if (count != VRSS_SEND_TAB_SIZE) {
1189                netdev_err(ndev, "Received wrong send-table size:%u\n", count);
1190                return;
1191        }
1192
1193        tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
1194                      nvmsg->msg.v5_msg.send_table.offset);
1195
1196        for (i = 0; i < count; i++)
1197                net_device_ctx->tx_table[i] = tab[i];
1198}
1199
1200static void netvsc_send_vf(struct net_device *ndev,
1201                           const struct nvsp_message *nvmsg)
1202{
1203        struct net_device_context *net_device_ctx = netdev_priv(ndev);
1204
1205        net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
1206        net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
1207        netdev_info(ndev, "VF slot %u %s\n",
1208                    net_device_ctx->vf_serial,
1209                    net_device_ctx->vf_alloc ? "added" : "removed");
1210}
1211
1212static  void netvsc_receive_inband(struct net_device *ndev,
1213                                   const struct nvsp_message *nvmsg)
1214{
1215        switch (nvmsg->hdr.msg_type) {
1216        case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
1217                netvsc_send_table(ndev, nvmsg);
1218                break;
1219
1220        case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
1221                netvsc_send_vf(ndev, nvmsg);
1222                break;
1223        }
1224}
1225
1226static int netvsc_process_raw_pkt(struct hv_device *device,
1227                                  struct netvsc_channel *nvchan,
1228                                  struct netvsc_device *net_device,
1229                                  struct net_device *ndev,
1230                                  const struct vmpacket_descriptor *desc,
1231                                  int budget)
1232{
1233        struct vmbus_channel *channel = nvchan->channel;
1234        const struct nvsp_message *nvmsg = hv_pkt_data(desc);
1235
1236        trace_nvsp_recv(ndev, channel, nvmsg);
1237
1238        switch (desc->type) {
1239        case VM_PKT_COMP:
1240                netvsc_send_completion(ndev, net_device, channel,
1241                                       desc, budget);
1242                break;
1243
1244        case VM_PKT_DATA_USING_XFER_PAGES:
1245                return netvsc_receive(ndev, net_device, nvchan,
1246                                      desc, nvmsg);
1247                break;
1248
1249        case VM_PKT_DATA_INBAND:
1250                netvsc_receive_inband(ndev, nvmsg);
1251                break;
1252
1253        default:
1254                netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
1255                           desc->type, desc->trans_id);
1256                break;
1257        }
1258
1259        return 0;
1260}
1261
1262static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
1263{
1264        struct vmbus_channel *primary = channel->primary_channel;
1265
1266        return primary ? primary->device_obj : channel->device_obj;
1267}
1268
1269/* Network processing softirq
1270 * Process data in incoming ring buffer from host
1271 * Stops when ring is empty or budget is met or exceeded.
1272 */
1273int netvsc_poll(struct napi_struct *napi, int budget)
1274{
1275        struct netvsc_channel *nvchan
1276                = container_of(napi, struct netvsc_channel, napi);
1277        struct netvsc_device *net_device = nvchan->net_device;
1278        struct vmbus_channel *channel = nvchan->channel;
1279        struct hv_device *device = netvsc_channel_to_device(channel);
1280        struct net_device *ndev = hv_get_drvdata(device);
1281        int work_done = 0;
1282        int ret;
1283
1284        /* If starting a new interval */
1285        if (!nvchan->desc)
1286                nvchan->desc = hv_pkt_iter_first(channel);
1287
1288        while (nvchan->desc && work_done < budget) {
1289                work_done += netvsc_process_raw_pkt(device, nvchan, net_device,
1290                                                    ndev, nvchan->desc, budget);
1291                nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
1292        }
1293
1294        /* Send any pending receive completions */
1295        ret = send_recv_completions(ndev, net_device, nvchan);
1296
1297        /* If it did not exhaust NAPI budget this time
1298         *  and not doing busy poll
1299         * then re-enable host interrupts
1300         *  and reschedule if ring is not empty
1301         *   or sending receive completion failed.
1302         */
1303        if (work_done < budget &&
1304            napi_complete_done(napi, work_done) &&
1305            (ret || hv_end_read(&channel->inbound)) &&
1306            napi_schedule_prep(napi)) {
1307                hv_begin_read(&channel->inbound);
1308                __napi_schedule(napi);
1309        }
1310
1311        /* Driver may overshoot since multiple packets per descriptor */
1312        return min(work_done, budget);
1313}
1314
1315/* Call back when data is available in host ring buffer.
1316 * Processing is deferred until network softirq (NAPI)
1317 */
1318void netvsc_channel_cb(void *context)
1319{
1320        struct netvsc_channel *nvchan = context;
1321        struct vmbus_channel *channel = nvchan->channel;
1322        struct hv_ring_buffer_info *rbi = &channel->inbound;
1323
1324        /* preload first vmpacket descriptor */
1325        prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
1326
1327        if (napi_schedule_prep(&nvchan->napi)) {
1328                /* disable interrupts from host */
1329                hv_begin_read(rbi);
1330
1331                __napi_schedule_irqoff(&nvchan->napi);
1332        }
1333}
1334
1335/*
1336 * netvsc_device_add - Callback when the device belonging to this
1337 * driver is added
1338 */
1339struct netvsc_device *netvsc_device_add(struct hv_device *device,
1340                                const struct netvsc_device_info *device_info)
1341{
1342        int i, ret = 0;
1343        struct netvsc_device *net_device;
1344        struct net_device *ndev = hv_get_drvdata(device);
1345        struct net_device_context *net_device_ctx = netdev_priv(ndev);
1346
1347        net_device = alloc_net_device();
1348        if (!net_device)
1349                return ERR_PTR(-ENOMEM);
1350
1351        for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
1352                net_device_ctx->tx_table[i] = 0;
1353
1354        /* Because the device uses NAPI, all the interrupt batching and
1355         * control is done via Net softirq, not the channel handling
1356         */
1357        set_channel_read_mode(device->channel, HV_CALL_ISR);
1358
1359        /* If we're reopening the device we may have multiple queues, fill the
1360         * chn_table with the default channel to use it before subchannels are
1361         * opened.
1362         * Initialize the channel state before we open;
1363         * we can be interrupted as soon as we open the channel.
1364         */
1365
1366        for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
1367                struct netvsc_channel *nvchan = &net_device->chan_table[i];
1368
1369                nvchan->channel = device->channel;
1370                nvchan->net_device = net_device;
1371                u64_stats_init(&nvchan->tx_stats.syncp);
1372                u64_stats_init(&nvchan->rx_stats.syncp);
1373        }
1374
1375        /* Enable NAPI handler before init callbacks */
1376        netif_napi_add(ndev, &net_device->chan_table[0].napi,
1377                       netvsc_poll, NAPI_POLL_WEIGHT);
1378
1379        /* Open the channel */
1380        ret = vmbus_open(device->channel, netvsc_ring_bytes,
1381                         netvsc_ring_bytes,  NULL, 0,
1382                         netvsc_channel_cb, net_device->chan_table);
1383
1384        if (ret != 0) {
1385                netdev_err(ndev, "unable to open channel: %d\n", ret);
1386                goto cleanup;
1387        }
1388
1389        /* Channel is opened */
1390        netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
1391
1392        napi_enable(&net_device->chan_table[0].napi);
1393
1394        /* Connect with the NetVsp */
1395        ret = netvsc_connect_vsp(device, net_device, device_info);
1396        if (ret != 0) {
1397                netdev_err(ndev,
1398                        "unable to connect to NetVSP - %d\n", ret);
1399                goto close;
1400        }
1401
1402        /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
1403         * populated.
1404         */
1405        rcu_assign_pointer(net_device_ctx->nvdev, net_device);
1406
1407        return net_device;
1408
1409close:
1410        RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
1411        napi_disable(&net_device->chan_table[0].napi);
1412
1413        /* Now, we can close the channel safely */
1414        vmbus_close(device->channel);
1415
1416cleanup:
1417        netif_napi_del(&net_device->chan_table[0].napi);
1418        free_netvsc_device(&net_device->rcu);
1419
1420        return ERR_PTR(ret);
1421}
1422