linux/drivers/net/hyperv/netvsc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009, Microsoft Corporation.
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms and conditions of the GNU General Public License,
   6 * version 2, as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, see <http://www.gnu.org/licenses/>.
  15 *
  16 * Authors:
  17 *   Haiyang Zhang <haiyangz@microsoft.com>
  18 *   Hank Janssen  <hjanssen@microsoft.com>
  19 */
  20#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  21
  22#include <linux/kernel.h>
  23#include <linux/sched.h>
  24#include <linux/wait.h>
  25#include <linux/mm.h>
  26#include <linux/delay.h>
  27#include <linux/io.h>
  28#include <linux/slab.h>
  29#include <linux/netdevice.h>
  30#include <linux/if_ether.h>
  31#include <linux/vmalloc.h>
  32#include <linux/rtnetlink.h>
  33#include <linux/prefetch.h>
  34
  35#include <asm/sync_bitops.h>
  36
  37#include "hyperv_net.h"
  38
  39/*
  40 * Switch the data path from the synthetic interface to the VF
  41 * interface.
  42 */
  43void netvsc_switch_datapath(struct net_device *ndev, bool vf)
  44{
  45        struct net_device_context *net_device_ctx = netdev_priv(ndev);
  46        struct hv_device *dev = net_device_ctx->device_ctx;
  47        struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
  48        struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
  49
  50        memset(init_pkt, 0, sizeof(struct nvsp_message));
  51        init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
  52        if (vf)
  53                init_pkt->msg.v4_msg.active_dp.active_datapath =
  54                        NVSP_DATAPATH_VF;
  55        else
  56                init_pkt->msg.v4_msg.active_dp.active_datapath =
  57                        NVSP_DATAPATH_SYNTHETIC;
  58
  59        vmbus_sendpacket(dev->channel, init_pkt,
  60                               sizeof(struct nvsp_message),
  61                               (unsigned long)init_pkt,
  62                               VM_PKT_DATA_INBAND, 0);
  63}
  64
  65static struct netvsc_device *alloc_net_device(void)
  66{
  67        struct netvsc_device *net_device;
  68
  69        net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
  70        if (!net_device)
  71                return NULL;
  72
  73        init_waitqueue_head(&net_device->wait_drain);
  74        net_device->destroy = false;
  75        atomic_set(&net_device->open_cnt, 0);
  76        net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
  77        net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
  78
  79        init_completion(&net_device->channel_init_wait);
  80        init_waitqueue_head(&net_device->subchan_open);
  81        INIT_WORK(&net_device->subchan_work, rndis_set_subchannel);
  82
  83        return net_device;
  84}
  85
  86static void free_netvsc_device(struct rcu_head *head)
  87{
  88        struct netvsc_device *nvdev
  89                = container_of(head, struct netvsc_device, rcu);
  90        int i;
  91
  92        for (i = 0; i < VRSS_CHANNEL_MAX; i++)
  93                vfree(nvdev->chan_table[i].mrc.slots);
  94
  95        kfree(nvdev);
  96}
  97
  98static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
  99{
 100        call_rcu(&nvdev->rcu, free_netvsc_device);
 101}
 102
 103static void netvsc_destroy_buf(struct hv_device *device)
 104{
 105        struct nvsp_message *revoke_packet;
 106        struct net_device *ndev = hv_get_drvdata(device);
 107        struct net_device_context *ndc = netdev_priv(ndev);
 108        struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
 109        int ret;
 110
 111        /*
 112         * If we got a section count, it means we received a
 113         * SendReceiveBufferComplete msg (ie sent
 114         * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
 115         * to send a revoke msg here
 116         */
 117        if (net_device->recv_section_cnt) {
 118                /* Send the revoke receive buffer */
 119                revoke_packet = &net_device->revoke_packet;
 120                memset(revoke_packet, 0, sizeof(struct nvsp_message));
 121
 122                revoke_packet->hdr.msg_type =
 123                        NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
 124                revoke_packet->msg.v1_msg.
 125                revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 126
 127                ret = vmbus_sendpacket(device->channel,
 128                                       revoke_packet,
 129                                       sizeof(struct nvsp_message),
 130                                       (unsigned long)revoke_packet,
 131                                       VM_PKT_DATA_INBAND, 0);
 132                /* If the failure is because the channel is rescinded;
 133                 * ignore the failure since we cannot send on a rescinded
 134                 * channel. This would allow us to properly cleanup
 135                 * even when the channel is rescinded.
 136                 */
 137                if (device->channel->rescind)
 138                        ret = 0;
 139                /*
 140                 * If we failed here, we might as well return and
 141                 * have a leak rather than continue and a bugchk
 142                 */
 143                if (ret != 0) {
 144                        netdev_err(ndev, "unable to send "
 145                                "revoke receive buffer to netvsp\n");
 146                        return;
 147                }
 148                net_device->recv_section_cnt = 0;
 149        }
 150
 151        /* Teardown the gpadl on the vsp end */
 152        if (net_device->recv_buf_gpadl_handle) {
 153                ret = vmbus_teardown_gpadl(device->channel,
 154                                           net_device->recv_buf_gpadl_handle);
 155
 156                /* If we failed here, we might as well return and have a leak
 157                 * rather than continue and a bugchk
 158                 */
 159                if (ret != 0) {
 160                        netdev_err(ndev,
 161                                   "unable to teardown receive buffer's gpadl\n");
 162                        return;
 163                }
 164                net_device->recv_buf_gpadl_handle = 0;
 165        }
 166
 167        if (net_device->recv_buf) {
 168                /* Free up the receive buffer */
 169                vfree(net_device->recv_buf);
 170                net_device->recv_buf = NULL;
 171        }
 172
 173        /* Deal with the send buffer we may have setup.
 174         * If we got a  send section size, it means we received a
 175         * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
 176         * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
 177         * to send a revoke msg here
 178         */
 179        if (net_device->send_section_cnt) {
 180                /* Send the revoke receive buffer */
 181                revoke_packet = &net_device->revoke_packet;
 182                memset(revoke_packet, 0, sizeof(struct nvsp_message));
 183
 184                revoke_packet->hdr.msg_type =
 185                        NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
 186                revoke_packet->msg.v1_msg.revoke_send_buf.id =
 187                        NETVSC_SEND_BUFFER_ID;
 188
 189                ret = vmbus_sendpacket(device->channel,
 190                                       revoke_packet,
 191                                       sizeof(struct nvsp_message),
 192                                       (unsigned long)revoke_packet,
 193                                       VM_PKT_DATA_INBAND, 0);
 194
 195                /* If the failure is because the channel is rescinded;
 196                 * ignore the failure since we cannot send on a rescinded
 197                 * channel. This would allow us to properly cleanup
 198                 * even when the channel is rescinded.
 199                 */
 200                if (device->channel->rescind)
 201                        ret = 0;
 202
 203                /* If we failed here, we might as well return and
 204                 * have a leak rather than continue and a bugchk
 205                 */
 206                if (ret != 0) {
 207                        netdev_err(ndev, "unable to send "
 208                                   "revoke send buffer to netvsp\n");
 209                        return;
 210                }
 211                net_device->send_section_cnt = 0;
 212        }
 213        /* Teardown the gpadl on the vsp end */
 214        if (net_device->send_buf_gpadl_handle) {
 215                ret = vmbus_teardown_gpadl(device->channel,
 216                                           net_device->send_buf_gpadl_handle);
 217
 218                /* If we failed here, we might as well return and have a leak
 219                 * rather than continue and a bugchk
 220                 */
 221                if (ret != 0) {
 222                        netdev_err(ndev,
 223                                   "unable to teardown send buffer's gpadl\n");
 224                        return;
 225                }
 226                net_device->send_buf_gpadl_handle = 0;
 227        }
 228        if (net_device->send_buf) {
 229                /* Free up the send buffer */
 230                vfree(net_device->send_buf);
 231                net_device->send_buf = NULL;
 232        }
 233        kfree(net_device->send_section_map);
 234}
 235
 236int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
 237{
 238        struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
 239        int node = cpu_to_node(nvchan->channel->target_cpu);
 240        size_t size;
 241
 242        size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
 243        nvchan->mrc.slots = vzalloc_node(size, node);
 244        if (!nvchan->mrc.slots)
 245                nvchan->mrc.slots = vzalloc(size);
 246
 247        return nvchan->mrc.slots ? 0 : -ENOMEM;
 248}
 249
 250static int netvsc_init_buf(struct hv_device *device,
 251                           struct netvsc_device *net_device,
 252                           const struct netvsc_device_info *device_info)
 253{
 254        struct nvsp_1_message_send_receive_buffer_complete *resp;
 255        struct net_device *ndev = hv_get_drvdata(device);
 256        struct nvsp_message *init_packet;
 257        unsigned int buf_size;
 258        size_t map_words;
 259        int ret = 0;
 260
 261        /* Get receive buffer area. */
 262        buf_size = device_info->recv_sections * device_info->recv_section_size;
 263        buf_size = roundup(buf_size, PAGE_SIZE);
 264
 265        net_device->recv_buf = vzalloc(buf_size);
 266        if (!net_device->recv_buf) {
 267                netdev_err(ndev,
 268                           "unable to allocate receive buffer of size %u\n",
 269                           buf_size);
 270                ret = -ENOMEM;
 271                goto cleanup;
 272        }
 273
 274        /*
 275         * Establish the gpadl handle for this buffer on this
 276         * channel.  Note: This call uses the vmbus connection rather
 277         * than the channel to establish the gpadl handle.
 278         */
 279        ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
 280                                    buf_size,
 281                                    &net_device->recv_buf_gpadl_handle);
 282        if (ret != 0) {
 283                netdev_err(ndev,
 284                        "unable to establish receive buffer's gpadl\n");
 285                goto cleanup;
 286        }
 287
 288        /* Notify the NetVsp of the gpadl handle */
 289        init_packet = &net_device->channel_init_pkt;
 290        memset(init_packet, 0, sizeof(struct nvsp_message));
 291        init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
 292        init_packet->msg.v1_msg.send_recv_buf.
 293                gpadl_handle = net_device->recv_buf_gpadl_handle;
 294        init_packet->msg.v1_msg.
 295                send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 296
 297        /* Send the gpadl notification request */
 298        ret = vmbus_sendpacket(device->channel, init_packet,
 299                               sizeof(struct nvsp_message),
 300                               (unsigned long)init_packet,
 301                               VM_PKT_DATA_INBAND,
 302                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 303        if (ret != 0) {
 304                netdev_err(ndev,
 305                        "unable to send receive buffer's gpadl to netvsp\n");
 306                goto cleanup;
 307        }
 308
 309        wait_for_completion(&net_device->channel_init_wait);
 310
 311        /* Check the response */
 312        resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
 313        if (resp->status != NVSP_STAT_SUCCESS) {
 314                netdev_err(ndev,
 315                           "Unable to complete receive buffer initialization with NetVsp - status %d\n",
 316                           resp->status);
 317                ret = -EINVAL;
 318                goto cleanup;
 319        }
 320
 321        /* Parse the response */
 322        netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
 323                   resp->num_sections, resp->sections[0].sub_alloc_size,
 324                   resp->sections[0].num_sub_allocs);
 325
 326        /* There should only be one section for the entire receive buffer */
 327        if (resp->num_sections != 1 || resp->sections[0].offset != 0) {
 328                ret = -EINVAL;
 329                goto cleanup;
 330        }
 331
 332        net_device->recv_section_size = resp->sections[0].sub_alloc_size;
 333        net_device->recv_section_cnt = resp->sections[0].num_sub_allocs;
 334
 335        /* Setup receive completion ring */
 336        net_device->recv_completion_cnt
 337                = round_up(net_device->recv_section_cnt + 1,
 338                           PAGE_SIZE / sizeof(u64));
 339        ret = netvsc_alloc_recv_comp_ring(net_device, 0);
 340        if (ret)
 341                goto cleanup;
 342
 343        /* Now setup the send buffer. */
 344        buf_size = device_info->send_sections * device_info->send_section_size;
 345        buf_size = round_up(buf_size, PAGE_SIZE);
 346
 347        net_device->send_buf = vzalloc(buf_size);
 348        if (!net_device->send_buf) {
 349                netdev_err(ndev, "unable to allocate send buffer of size %u\n",
 350                           buf_size);
 351                ret = -ENOMEM;
 352                goto cleanup;
 353        }
 354
 355        /* Establish the gpadl handle for this buffer on this
 356         * channel.  Note: This call uses the vmbus connection rather
 357         * than the channel to establish the gpadl handle.
 358         */
 359        ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
 360                                    buf_size,
 361                                    &net_device->send_buf_gpadl_handle);
 362        if (ret != 0) {
 363                netdev_err(ndev,
 364                           "unable to establish send buffer's gpadl\n");
 365                goto cleanup;
 366        }
 367
 368        /* Notify the NetVsp of the gpadl handle */
 369        init_packet = &net_device->channel_init_pkt;
 370        memset(init_packet, 0, sizeof(struct nvsp_message));
 371        init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
 372        init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
 373                net_device->send_buf_gpadl_handle;
 374        init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
 375
 376        /* Send the gpadl notification request */
 377        ret = vmbus_sendpacket(device->channel, init_packet,
 378                               sizeof(struct nvsp_message),
 379                               (unsigned long)init_packet,
 380                               VM_PKT_DATA_INBAND,
 381                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 382        if (ret != 0) {
 383                netdev_err(ndev,
 384                           "unable to send send buffer's gpadl to netvsp\n");
 385                goto cleanup;
 386        }
 387
 388        wait_for_completion(&net_device->channel_init_wait);
 389
 390        /* Check the response */
 391        if (init_packet->msg.v1_msg.
 392            send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
 393                netdev_err(ndev, "Unable to complete send buffer "
 394                           "initialization with NetVsp - status %d\n",
 395                           init_packet->msg.v1_msg.
 396                           send_send_buf_complete.status);
 397                ret = -EINVAL;
 398                goto cleanup;
 399        }
 400
 401        /* Parse the response */
 402        net_device->send_section_size = init_packet->msg.
 403                                v1_msg.send_send_buf_complete.section_size;
 404
 405        /* Section count is simply the size divided by the section size. */
 406        net_device->send_section_cnt = buf_size / net_device->send_section_size;
 407
 408        netdev_dbg(ndev, "Send section size: %d, Section count:%d\n",
 409                   net_device->send_section_size, net_device->send_section_cnt);
 410
 411        /* Setup state for managing the send buffer. */
 412        map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG);
 413
 414        net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL);
 415        if (net_device->send_section_map == NULL) {
 416                ret = -ENOMEM;
 417                goto cleanup;
 418        }
 419
 420        goto exit;
 421
 422cleanup:
 423        netvsc_destroy_buf(device);
 424
 425exit:
 426        return ret;
 427}
 428
 429/* Negotiate NVSP protocol version */
 430static int negotiate_nvsp_ver(struct hv_device *device,
 431                              struct netvsc_device *net_device,
 432                              struct nvsp_message *init_packet,
 433                              u32 nvsp_ver)
 434{
 435        struct net_device *ndev = hv_get_drvdata(device);
 436        int ret;
 437
 438        memset(init_packet, 0, sizeof(struct nvsp_message));
 439        init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
 440        init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
 441        init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
 442
 443        /* Send the init request */
 444        ret = vmbus_sendpacket(device->channel, init_packet,
 445                               sizeof(struct nvsp_message),
 446                               (unsigned long)init_packet,
 447                               VM_PKT_DATA_INBAND,
 448                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 449
 450        if (ret != 0)
 451                return ret;
 452
 453        wait_for_completion(&net_device->channel_init_wait);
 454
 455        if (init_packet->msg.init_msg.init_complete.status !=
 456            NVSP_STAT_SUCCESS)
 457                return -EINVAL;
 458
 459        if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
 460                return 0;
 461
 462        /* NVSPv2 or later: Send NDIS config */
 463        memset(init_packet, 0, sizeof(struct nvsp_message));
 464        init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
 465        init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN;
 466        init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
 467
 468        if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) {
 469                init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
 470
 471                /* Teaming bit is needed to receive link speed updates */
 472                init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
 473        }
 474
 475        ret = vmbus_sendpacket(device->channel, init_packet,
 476                                sizeof(struct nvsp_message),
 477                                (unsigned long)init_packet,
 478                                VM_PKT_DATA_INBAND, 0);
 479
 480        return ret;
 481}
 482
 483static int netvsc_connect_vsp(struct hv_device *device,
 484                              struct netvsc_device *net_device,
 485                              const struct netvsc_device_info *device_info)
 486{
 487        const u32 ver_list[] = {
 488                NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
 489                NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5
 490        };
 491        struct nvsp_message *init_packet;
 492        int ndis_version, i, ret;
 493
 494        init_packet = &net_device->channel_init_pkt;
 495
 496        /* Negotiate the latest NVSP protocol supported */
 497        for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--)
 498                if (negotiate_nvsp_ver(device, net_device, init_packet,
 499                                       ver_list[i])  == 0) {
 500                        net_device->nvsp_version = ver_list[i];
 501                        break;
 502                }
 503
 504        if (i < 0) {
 505                ret = -EPROTO;
 506                goto cleanup;
 507        }
 508
 509        pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);
 510
 511        /* Send the ndis version */
 512        memset(init_packet, 0, sizeof(struct nvsp_message));
 513
 514        if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
 515                ndis_version = 0x00060001;
 516        else
 517                ndis_version = 0x0006001e;
 518
 519        init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
 520        init_packet->msg.v1_msg.
 521                send_ndis_ver.ndis_major_ver =
 522                                (ndis_version & 0xFFFF0000) >> 16;
 523        init_packet->msg.v1_msg.
 524                send_ndis_ver.ndis_minor_ver =
 525                                ndis_version & 0xFFFF;
 526
 527        /* Send the init request */
 528        ret = vmbus_sendpacket(device->channel, init_packet,
 529                                sizeof(struct nvsp_message),
 530                                (unsigned long)init_packet,
 531                                VM_PKT_DATA_INBAND, 0);
 532        if (ret != 0)
 533                goto cleanup;
 534
 535
 536        ret = netvsc_init_buf(device, net_device, device_info);
 537
 538cleanup:
 539        return ret;
 540}
 541
 542static void netvsc_disconnect_vsp(struct hv_device *device)
 543{
 544        netvsc_destroy_buf(device);
 545}
 546
 547/*
 548 * netvsc_device_remove - Callback when the root bus device is removed
 549 */
 550void netvsc_device_remove(struct hv_device *device)
 551{
 552        struct net_device *ndev = hv_get_drvdata(device);
 553        struct net_device_context *net_device_ctx = netdev_priv(ndev);
 554        struct netvsc_device *net_device
 555                = rtnl_dereference(net_device_ctx->nvdev);
 556        int i;
 557
 558        cancel_work_sync(&net_device->subchan_work);
 559
 560        netvsc_disconnect_vsp(device);
 561
 562        RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
 563
 564        /*
 565         * At this point, no one should be accessing net_device
 566         * except in here
 567         */
 568        netdev_dbg(ndev, "net device safe to remove\n");
 569
 570        /* Now, we can close the channel safely */
 571        vmbus_close(device->channel);
 572
 573        /* And dissassociate NAPI context from device */
 574        for (i = 0; i < net_device->num_chn; i++)
 575                netif_napi_del(&net_device->chan_table[i].napi);
 576
 577        /* Release all resources */
 578        free_netvsc_device_rcu(net_device);
 579}
 580
 581#define RING_AVAIL_PERCENT_HIWATER 20
 582#define RING_AVAIL_PERCENT_LOWATER 10
 583
 584/*
 585 * Get the percentage of available bytes to write in the ring.
 586 * The return value is in range from 0 to 100.
 587 */
 588static inline u32 hv_ringbuf_avail_percent(
 589                struct hv_ring_buffer_info *ring_info)
 590{
 591        u32 avail_read, avail_write;
 592
 593        hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);
 594
 595        return avail_write * 100 / ring_info->ring_datasize;
 596}
 597
 598static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
 599                                         u32 index)
 600{
 601        sync_change_bit(index, net_device->send_section_map);
 602}
 603
 604static void netvsc_send_tx_complete(struct netvsc_device *net_device,
 605                                    struct vmbus_channel *incoming_channel,
 606                                    struct hv_device *device,
 607                                    const struct vmpacket_descriptor *desc,
 608                                    int budget)
 609{
 610        struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
 611        struct net_device *ndev = hv_get_drvdata(device);
 612        struct vmbus_channel *channel = device->channel;
 613        u16 q_idx = 0;
 614        int queue_sends;
 615
 616        /* Notify the layer above us */
 617        if (likely(skb)) {
 618                const struct hv_netvsc_packet *packet
 619                        = (struct hv_netvsc_packet *)skb->cb;
 620                u32 send_index = packet->send_buf_index;
 621                struct netvsc_stats *tx_stats;
 622
 623                if (send_index != NETVSC_INVALID_INDEX)
 624                        netvsc_free_send_slot(net_device, send_index);
 625                q_idx = packet->q_idx;
 626                channel = incoming_channel;
 627
 628                tx_stats = &net_device->chan_table[q_idx].tx_stats;
 629
 630                u64_stats_update_begin(&tx_stats->syncp);
 631                tx_stats->packets += packet->total_packets;
 632                tx_stats->bytes += packet->total_bytes;
 633                u64_stats_update_end(&tx_stats->syncp);
 634
 635                napi_consume_skb(skb, budget);
 636        }
 637
 638        queue_sends =
 639                atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
 640
 641        if (net_device->destroy && queue_sends == 0)
 642                wake_up(&net_device->wait_drain);
 643
 644        if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
 645            (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
 646             queue_sends < 1))
 647                netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
 648}
 649
 650static void netvsc_send_completion(struct netvsc_device *net_device,
 651                                   struct vmbus_channel *incoming_channel,
 652                                   struct hv_device *device,
 653                                   const struct vmpacket_descriptor *desc,
 654                                   int budget)
 655{
 656        struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
 657        struct net_device *ndev = hv_get_drvdata(device);
 658
 659        switch (nvsp_packet->hdr.msg_type) {
 660        case NVSP_MSG_TYPE_INIT_COMPLETE:
 661        case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
 662        case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE:
 663        case NVSP_MSG5_TYPE_SUBCHANNEL:
 664                /* Copy the response back */
 665                memcpy(&net_device->channel_init_pkt, nvsp_packet,
 666                       sizeof(struct nvsp_message));
 667                complete(&net_device->channel_init_wait);
 668                break;
 669
 670        case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
 671                netvsc_send_tx_complete(net_device, incoming_channel,
 672                                        device, desc, budget);
 673                break;
 674
 675        default:
 676                netdev_err(ndev,
 677                           "Unknown send completion type %d received!!\n",
 678                           nvsp_packet->hdr.msg_type);
 679        }
 680}
 681
 682static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
 683{
 684        unsigned long *map_addr = net_device->send_section_map;
 685        unsigned int i;
 686
 687        for_each_clear_bit(i, map_addr, net_device->send_section_cnt) {
 688                if (sync_test_and_set_bit(i, map_addr) == 0)
 689                        return i;
 690        }
 691
 692        return NETVSC_INVALID_INDEX;
 693}
 694
 695static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
 696                                   unsigned int section_index,
 697                                   u32 pend_size,
 698                                   struct hv_netvsc_packet *packet,
 699                                   struct rndis_message *rndis_msg,
 700                                   struct hv_page_buffer *pb,
 701                                   struct sk_buff *skb)
 702{
 703        char *start = net_device->send_buf;
 704        char *dest = start + (section_index * net_device->send_section_size)
 705                     + pend_size;
 706        int i;
 707        u32 msg_size = 0;
 708        u32 padding = 0;
 709        u32 remain = packet->total_data_buflen % net_device->pkt_align;
 710        u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
 711                packet->page_buf_cnt;
 712
 713        /* Add padding */
 714        if (skb->xmit_more && remain && !packet->cp_partial) {
 715                padding = net_device->pkt_align - remain;
 716                rndis_msg->msg_len += padding;
 717                packet->total_data_buflen += padding;
 718        }
 719
 720        for (i = 0; i < page_count; i++) {
 721                char *src = phys_to_virt(pb[i].pfn << PAGE_SHIFT);
 722                u32 offset = pb[i].offset;
 723                u32 len = pb[i].len;
 724
 725                memcpy(dest, (src + offset), len);
 726                msg_size += len;
 727                dest += len;
 728        }
 729
 730        if (padding) {
 731                memset(dest, 0, padding);
 732                msg_size += padding;
 733        }
 734
 735        return msg_size;
 736}
 737
 738static inline int netvsc_send_pkt(
 739        struct hv_device *device,
 740        struct hv_netvsc_packet *packet,
 741        struct netvsc_device *net_device,
 742        struct hv_page_buffer *pb,
 743        struct sk_buff *skb)
 744{
 745        struct nvsp_message nvmsg;
 746        struct nvsp_1_message_send_rndis_packet * const rpkt =
 747                &nvmsg.msg.v1_msg.send_rndis_pkt;
 748        struct netvsc_channel * const nvchan =
 749                &net_device->chan_table[packet->q_idx];
 750        struct vmbus_channel *out_channel = nvchan->channel;
 751        struct net_device *ndev = hv_get_drvdata(device);
 752        struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
 753        u64 req_id;
 754        int ret;
 755        u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound);
 756
 757        nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
 758        if (skb)
 759                rpkt->channel_type = 0;         /* 0 is RMC_DATA */
 760        else
 761                rpkt->channel_type = 1;         /* 1 is RMC_CONTROL */
 762
 763        rpkt->send_buf_section_index = packet->send_buf_index;
 764        if (packet->send_buf_index == NETVSC_INVALID_INDEX)
 765                rpkt->send_buf_section_size = 0;
 766        else
 767                rpkt->send_buf_section_size = packet->total_data_buflen;
 768
 769        req_id = (ulong)skb;
 770
 771        if (out_channel->rescind)
 772                return -ENODEV;
 773
 774        if (packet->page_buf_cnt) {
 775                if (packet->cp_partial)
 776                        pb += packet->rmsg_pgcnt;
 777
 778                ret = vmbus_sendpacket_pagebuffer(out_channel,
 779                                                  pb, packet->page_buf_cnt,
 780                                                  &nvmsg, sizeof(nvmsg),
 781                                                  req_id);
 782        } else {
 783                ret = vmbus_sendpacket(out_channel,
 784                                       &nvmsg, sizeof(nvmsg),
 785                                       req_id, VM_PKT_DATA_INBAND,
 786                                       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 787        }
 788
 789        if (ret == 0) {
 790                atomic_inc_return(&nvchan->queue_sends);
 791
 792                if (ring_avail < RING_AVAIL_PERCENT_LOWATER)
 793                        netif_tx_stop_queue(txq);
 794        } else if (ret == -EAGAIN) {
 795                netif_tx_stop_queue(txq);
 796                if (atomic_read(&nvchan->queue_sends) < 1) {
 797                        netif_tx_wake_queue(txq);
 798                        ret = -ENOSPC;
 799                }
 800        } else {
 801                netdev_err(ndev,
 802                           "Unable to send packet pages %u len %u, ret %d\n",
 803                           packet->page_buf_cnt, packet->total_data_buflen,
 804                           ret);
 805        }
 806
 807        return ret;
 808}
 809
 810/* Move packet out of multi send data (msd), and clear msd */
 811static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
 812                                struct sk_buff **msd_skb,
 813                                struct multi_send_data *msdp)
 814{
 815        *msd_skb = msdp->skb;
 816        *msd_send = msdp->pkt;
 817        msdp->skb = NULL;
 818        msdp->pkt = NULL;
 819        msdp->count = 0;
 820}
 821
 822/* RCU already held by caller */
 823int netvsc_send(struct net_device_context *ndev_ctx,
 824                struct hv_netvsc_packet *packet,
 825                struct rndis_message *rndis_msg,
 826                struct hv_page_buffer *pb,
 827                struct sk_buff *skb)
 828{
 829        struct netvsc_device *net_device
 830                = rcu_dereference_bh(ndev_ctx->nvdev);
 831        struct hv_device *device = ndev_ctx->device_ctx;
 832        int ret = 0;
 833        struct netvsc_channel *nvchan;
 834        u32 pktlen = packet->total_data_buflen, msd_len = 0;
 835        unsigned int section_index = NETVSC_INVALID_INDEX;
 836        struct multi_send_data *msdp;
 837        struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
 838        struct sk_buff *msd_skb = NULL;
 839        bool try_batch;
 840        bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
 841
 842        /* If device is rescinded, return error and packet will get dropped. */
 843        if (unlikely(!net_device || net_device->destroy))
 844                return -ENODEV;
 845
 846        /* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
 847         * here before the negotiation with the host is finished and
 848         * send_section_map may not be allocated yet.
 849         */
 850        if (unlikely(!net_device->send_section_map))
 851                return -EAGAIN;
 852
 853        nvchan = &net_device->chan_table[packet->q_idx];
 854        packet->send_buf_index = NETVSC_INVALID_INDEX;
 855        packet->cp_partial = false;
 856
 857        /* Send control message directly without accessing msd (Multi-Send
 858         * Data) field which may be changed during data packet processing.
 859         */
 860        if (!skb) {
 861                cur_send = packet;
 862                goto send_now;
 863        }
 864
 865        /* batch packets in send buffer if possible */
 866        msdp = &nvchan->msd;
 867        if (msdp->pkt)
 868                msd_len = msdp->pkt->total_data_buflen;
 869
 870        try_batch =  msd_len > 0 && msdp->count < net_device->max_pkt;
 871        if (try_batch && msd_len + pktlen + net_device->pkt_align <
 872            net_device->send_section_size) {
 873                section_index = msdp->pkt->send_buf_index;
 874
 875        } else if (try_batch && msd_len + packet->rmsg_size <
 876                   net_device->send_section_size) {
 877                section_index = msdp->pkt->send_buf_index;
 878                packet->cp_partial = true;
 879
 880        } else if (pktlen + net_device->pkt_align <
 881                   net_device->send_section_size) {
 882                section_index = netvsc_get_next_send_section(net_device);
 883                if (unlikely(section_index == NETVSC_INVALID_INDEX)) {
 884                        ++ndev_ctx->eth_stats.tx_send_full;
 885                } else {
 886                        move_pkt_msd(&msd_send, &msd_skb, msdp);
 887                        msd_len = 0;
 888                }
 889        }
 890
 891        if (section_index != NETVSC_INVALID_INDEX) {
 892                netvsc_copy_to_send_buf(net_device,
 893                                        section_index, msd_len,
 894                                        packet, rndis_msg, pb, skb);
 895
 896                packet->send_buf_index = section_index;
 897
 898                if (packet->cp_partial) {
 899                        packet->page_buf_cnt -= packet->rmsg_pgcnt;
 900                        packet->total_data_buflen = msd_len + packet->rmsg_size;
 901                } else {
 902                        packet->page_buf_cnt = 0;
 903                        packet->total_data_buflen += msd_len;
 904                }
 905
 906                if (msdp->pkt) {
 907                        packet->total_packets += msdp->pkt->total_packets;
 908                        packet->total_bytes += msdp->pkt->total_bytes;
 909                }
 910
 911                if (msdp->skb)
 912                        dev_consume_skb_any(msdp->skb);
 913
 914                if (xmit_more && !packet->cp_partial) {
 915                        msdp->skb = skb;
 916                        msdp->pkt = packet;
 917                        msdp->count++;
 918                } else {
 919                        cur_send = packet;
 920                        msdp->skb = NULL;
 921                        msdp->pkt = NULL;
 922                        msdp->count = 0;
 923                }
 924        } else {
 925                move_pkt_msd(&msd_send, &msd_skb, msdp);
 926                cur_send = packet;
 927        }
 928
 929        if (msd_send) {
 930                int m_ret = netvsc_send_pkt(device, msd_send, net_device,
 931                                            NULL, msd_skb);
 932
 933                if (m_ret != 0) {
 934                        netvsc_free_send_slot(net_device,
 935                                              msd_send->send_buf_index);
 936                        dev_kfree_skb_any(msd_skb);
 937                }
 938        }
 939
 940send_now:
 941        if (cur_send)
 942                ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb);
 943
 944        if (ret != 0 && section_index != NETVSC_INVALID_INDEX)
 945                netvsc_free_send_slot(net_device, section_index);
 946
 947        return ret;
 948}
 949
 950/* Send pending recv completions */
 951static int send_recv_completions(struct net_device *ndev,
 952                                 struct netvsc_device *nvdev,
 953                                 struct netvsc_channel *nvchan)
 954{
 955        struct multi_recv_comp *mrc = &nvchan->mrc;
 956        struct recv_comp_msg {
 957                struct nvsp_message_header hdr;
 958                u32 status;
 959        }  __packed;
 960        struct recv_comp_msg msg = {
 961                .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
 962        };
 963        int ret;
 964
 965        while (mrc->first != mrc->next) {
 966                const struct recv_comp_data *rcd
 967                        = mrc->slots + mrc->first;
 968
 969                msg.status = rcd->status;
 970                ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
 971                                       rcd->tid, VM_PKT_COMP, 0);
 972                if (unlikely(ret)) {
 973                        struct net_device_context *ndev_ctx = netdev_priv(ndev);
 974
 975                        ++ndev_ctx->eth_stats.rx_comp_busy;
 976                        return ret;
 977                }
 978
 979                if (++mrc->first == nvdev->recv_completion_cnt)
 980                        mrc->first = 0;
 981        }
 982
 983        /* receive completion ring has been emptied */
 984        if (unlikely(nvdev->destroy))
 985                wake_up(&nvdev->wait_drain);
 986
 987        return 0;
 988}
 989
 990/* Count how many receive completions are outstanding */
 991static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
 992                                 const struct multi_recv_comp *mrc,
 993                                 u32 *filled, u32 *avail)
 994{
 995        u32 count = nvdev->recv_completion_cnt;
 996
 997        if (mrc->next >= mrc->first)
 998                *filled = mrc->next - mrc->first;
 999        else
1000                *filled = (count - mrc->first) + mrc->next;
1001
1002        *avail = count - *filled - 1;
1003}
1004
1005/* Add receive complete to ring to send to host. */
1006static void enq_receive_complete(struct net_device *ndev,
1007                                 struct netvsc_device *nvdev, u16 q_idx,
1008                                 u64 tid, u32 status)
1009{
1010        struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
1011        struct multi_recv_comp *mrc = &nvchan->mrc;
1012        struct recv_comp_data *rcd;
1013        u32 filled, avail;
1014
1015        recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1016
1017        if (unlikely(filled > NAPI_POLL_WEIGHT)) {
1018                send_recv_completions(ndev, nvdev, nvchan);
1019                recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1020        }
1021
1022        if (unlikely(!avail)) {
1023                netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
1024                           q_idx, tid);
1025                return;
1026        }
1027
1028        rcd = mrc->slots + mrc->next;
1029        rcd->tid = tid;
1030        rcd->status = status;
1031
1032        if (++mrc->next == nvdev->recv_completion_cnt)
1033                mrc->next = 0;
1034}
1035
1036static int netvsc_receive(struct net_device *ndev,
1037                          struct netvsc_device *net_device,
1038                          struct net_device_context *net_device_ctx,
1039                          struct hv_device *device,
1040                          struct vmbus_channel *channel,
1041                          const struct vmpacket_descriptor *desc,
1042                          struct nvsp_message *nvsp)
1043{
1044        const struct vmtransfer_page_packet_header *vmxferpage_packet
1045                = container_of(desc, const struct vmtransfer_page_packet_header, d);
1046        u16 q_idx = channel->offermsg.offer.sub_channel_index;
1047        char *recv_buf = net_device->recv_buf;
1048        u32 status = NVSP_STAT_SUCCESS;
1049        int i;
1050        int count = 0;
1051
1052        /* Make sure this is a valid nvsp packet */
1053        if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
1054                netif_err(net_device_ctx, rx_err, ndev,
1055                          "Unknown nvsp packet type received %u\n",
1056                          nvsp->hdr.msg_type);
1057                return 0;
1058        }
1059
1060        if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
1061                netif_err(net_device_ctx, rx_err, ndev,
1062                          "Invalid xfer page set id - expecting %x got %x\n",
1063                          NETVSC_RECEIVE_BUFFER_ID,
1064                          vmxferpage_packet->xfer_pageset_id);
1065                return 0;
1066        }
1067
1068        count = vmxferpage_packet->range_cnt;
1069
1070        /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1071        for (i = 0; i < count; i++) {
1072                void *data = recv_buf
1073                        + vmxferpage_packet->ranges[i].byte_offset;
1074                u32 buflen = vmxferpage_packet->ranges[i].byte_count;
1075
1076                /* Pass it to the upper layer */
1077                status = rndis_filter_receive(ndev, net_device, device,
1078                                              channel, data, buflen);
1079        }
1080
1081        enq_receive_complete(ndev, net_device, q_idx,
1082                             vmxferpage_packet->d.trans_id, status);
1083
1084        return count;
1085}
1086
1087static void netvsc_send_table(struct hv_device *hdev,
1088                              struct nvsp_message *nvmsg)
1089{
1090        struct net_device *ndev = hv_get_drvdata(hdev);
1091        struct net_device_context *net_device_ctx = netdev_priv(ndev);
1092        int i;
1093        u32 count, *tab;
1094
1095        count = nvmsg->msg.v5_msg.send_table.count;
1096        if (count != VRSS_SEND_TAB_SIZE) {
1097                netdev_err(ndev, "Received wrong send-table size:%u\n", count);
1098                return;
1099        }
1100
1101        tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
1102                      nvmsg->msg.v5_msg.send_table.offset);
1103
1104        for (i = 0; i < count; i++)
1105                net_device_ctx->tx_send_table[i] = tab[i];
1106}
1107
1108static void netvsc_send_vf(struct net_device_context *net_device_ctx,
1109                           struct nvsp_message *nvmsg)
1110{
1111        net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
1112        net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
1113}
1114
1115static inline void netvsc_receive_inband(struct hv_device *hdev,
1116                                 struct net_device_context *net_device_ctx,
1117                                 struct nvsp_message *nvmsg)
1118{
1119        switch (nvmsg->hdr.msg_type) {
1120        case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
1121                netvsc_send_table(hdev, nvmsg);
1122                break;
1123
1124        case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
1125                netvsc_send_vf(net_device_ctx, nvmsg);
1126                break;
1127        }
1128}
1129
1130static int netvsc_process_raw_pkt(struct hv_device *device,
1131                                  struct vmbus_channel *channel,
1132                                  struct netvsc_device *net_device,
1133                                  struct net_device *ndev,
1134                                  const struct vmpacket_descriptor *desc,
1135                                  int budget)
1136{
1137        struct net_device_context *net_device_ctx = netdev_priv(ndev);
1138        struct nvsp_message *nvmsg = hv_pkt_data(desc);
1139
1140        switch (desc->type) {
1141        case VM_PKT_COMP:
1142                netvsc_send_completion(net_device, channel, device,
1143                                       desc, budget);
1144                break;
1145
1146        case VM_PKT_DATA_USING_XFER_PAGES:
1147                return netvsc_receive(ndev, net_device, net_device_ctx,
1148                                      device, channel, desc, nvmsg);
1149                break;
1150
1151        case VM_PKT_DATA_INBAND:
1152                netvsc_receive_inband(device, net_device_ctx, nvmsg);
1153                break;
1154
1155        default:
1156                netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
1157                           desc->type, desc->trans_id);
1158                break;
1159        }
1160
1161        return 0;
1162}
1163
1164static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
1165{
1166        struct vmbus_channel *primary = channel->primary_channel;
1167
1168        return primary ? primary->device_obj : channel->device_obj;
1169}
1170
1171/* Network processing softirq
1172 * Process data in incoming ring buffer from host
1173 * Stops when ring is empty or budget is met or exceeded.
1174 */
1175int netvsc_poll(struct napi_struct *napi, int budget)
1176{
1177        struct netvsc_channel *nvchan
1178                = container_of(napi, struct netvsc_channel, napi);
1179        struct netvsc_device *net_device = nvchan->net_device;
1180        struct vmbus_channel *channel = nvchan->channel;
1181        struct hv_device *device = netvsc_channel_to_device(channel);
1182        struct net_device *ndev = hv_get_drvdata(device);
1183        int work_done = 0;
1184
1185        /* If starting a new interval */
1186        if (!nvchan->desc)
1187                nvchan->desc = hv_pkt_iter_first(channel);
1188
1189        while (nvchan->desc && work_done < budget) {
1190                work_done += netvsc_process_raw_pkt(device, channel, net_device,
1191                                                    ndev, nvchan->desc, budget);
1192                nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
1193        }
1194
1195        /* If send of pending receive completions suceeded
1196         *   and did not exhaust NAPI budget this time
1197         *   and not doing busy poll
1198         * then re-enable host interrupts
1199         *     and reschedule if ring is not empty.
1200         */
1201        if (send_recv_completions(ndev, net_device, nvchan) == 0 &&
1202            work_done < budget &&
1203            napi_complete_done(napi, work_done) &&
1204            hv_end_read(&channel->inbound)) {
1205                hv_begin_read(&channel->inbound);
1206                napi_reschedule(napi);
1207        }
1208
1209        /* Driver may overshoot since multiple packets per descriptor */
1210        return min(work_done, budget);
1211}
1212
1213/* Call back when data is available in host ring buffer.
1214 * Processing is deferred until network softirq (NAPI)
1215 */
1216void netvsc_channel_cb(void *context)
1217{
1218        struct netvsc_channel *nvchan = context;
1219        struct vmbus_channel *channel = nvchan->channel;
1220        struct hv_ring_buffer_info *rbi = &channel->inbound;
1221
1222        /* preload first vmpacket descriptor */
1223        prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
1224
1225        if (napi_schedule_prep(&nvchan->napi)) {
1226                /* disable interupts from host */
1227                hv_begin_read(rbi);
1228
1229                __napi_schedule(&nvchan->napi);
1230        }
1231}
1232
1233/*
1234 * netvsc_device_add - Callback when the device belonging to this
1235 * driver is added
1236 */
1237struct netvsc_device *netvsc_device_add(struct hv_device *device,
1238                                const struct netvsc_device_info *device_info)
1239{
1240        int i, ret = 0;
1241        int ring_size = device_info->ring_size;
1242        struct netvsc_device *net_device;
1243        struct net_device *ndev = hv_get_drvdata(device);
1244        struct net_device_context *net_device_ctx = netdev_priv(ndev);
1245
1246        net_device = alloc_net_device();
1247        if (!net_device)
1248                return ERR_PTR(-ENOMEM);
1249
1250        net_device->ring_size = ring_size;
1251
1252        /* Because the device uses NAPI, all the interrupt batching and
1253         * control is done via Net softirq, not the channel handling
1254         */
1255        set_channel_read_mode(device->channel, HV_CALL_ISR);
1256
1257        /* If we're reopening the device we may have multiple queues, fill the
1258         * chn_table with the default channel to use it before subchannels are
1259         * opened.
1260         * Initialize the channel state before we open;
1261         * we can be interrupted as soon as we open the channel.
1262         */
1263
1264        for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
1265                struct netvsc_channel *nvchan = &net_device->chan_table[i];
1266
1267                nvchan->channel = device->channel;
1268                nvchan->net_device = net_device;
1269                u64_stats_init(&nvchan->tx_stats.syncp);
1270                u64_stats_init(&nvchan->rx_stats.syncp);
1271        }
1272
1273        /* Enable NAPI handler before init callbacks */
1274        netif_napi_add(ndev, &net_device->chan_table[0].napi,
1275                       netvsc_poll, NAPI_POLL_WEIGHT);
1276
1277        /* Open the channel */
1278        ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
1279                         ring_size * PAGE_SIZE, NULL, 0,
1280                         netvsc_channel_cb,
1281                         net_device->chan_table);
1282
1283        if (ret != 0) {
1284                netif_napi_del(&net_device->chan_table[0].napi);
1285                netdev_err(ndev, "unable to open channel: %d\n", ret);
1286                goto cleanup;
1287        }
1288
1289        /* Channel is opened */
1290        netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
1291
1292        napi_enable(&net_device->chan_table[0].napi);
1293
1294        /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
1295         * populated.
1296         */
1297        rcu_assign_pointer(net_device_ctx->nvdev, net_device);
1298
1299        /* Connect with the NetVsp */
1300        ret = netvsc_connect_vsp(device, net_device, device_info);
1301        if (ret != 0) {
1302                netdev_err(ndev,
1303                        "unable to connect to NetVSP - %d\n", ret);
1304                goto close;
1305        }
1306
1307        return net_device;
1308
1309close:
1310        RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
1311        napi_disable(&net_device->chan_table[0].napi);
1312
1313        /* Now, we can close the channel safely */
1314        vmbus_close(device->channel);
1315
1316cleanup:
1317        free_netvsc_device(&net_device->rcu);
1318
1319        return ERR_PTR(ret);
1320}
1321