linux/drivers/net/hyperv/netvsc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009, Microsoft Corporation.
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms and conditions of the GNU General Public License,
   6 * version 2, as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, see <http://www.gnu.org/licenses/>.
  15 *
  16 * Authors:
  17 *   Haiyang Zhang <haiyangz@microsoft.com>
  18 *   Hank Janssen  <hjanssen@microsoft.com>
  19 */
  20#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  21
  22#include <linux/kernel.h>
  23#include <linux/sched.h>
  24#include <linux/wait.h>
  25#include <linux/mm.h>
  26#include <linux/delay.h>
  27#include <linux/io.h>
  28#include <linux/slab.h>
  29#include <linux/netdevice.h>
  30#include <linux/if_ether.h>
  31#include <linux/vmalloc.h>
  32#include <asm/sync_bitops.h>
  33
  34#include "hyperv_net.h"
  35
  36
  37static struct netvsc_device *alloc_net_device(struct hv_device *device)
  38{
  39        struct netvsc_device *net_device;
  40        struct net_device *ndev = hv_get_drvdata(device);
  41        int i;
  42
  43        net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
  44        if (!net_device)
  45                return NULL;
  46
  47        net_device->cb_buffer = kzalloc(NETVSC_PACKET_SIZE, GFP_KERNEL);
  48        if (!net_device->cb_buffer) {
  49                kfree(net_device);
  50                return NULL;
  51        }
  52
  53        init_waitqueue_head(&net_device->wait_drain);
  54        net_device->start_remove = false;
  55        net_device->destroy = false;
  56        net_device->dev = device;
  57        net_device->ndev = ndev;
  58        net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
  59        net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
  60
  61        for (i = 0; i < num_online_cpus(); i++)
  62                spin_lock_init(&net_device->msd[i].lock);
  63
  64        hv_set_drvdata(device, net_device);
  65        return net_device;
  66}
  67
  68static void free_netvsc_device(struct netvsc_device *nvdev)
  69{
  70        kfree(nvdev->cb_buffer);
  71        kfree(nvdev);
  72}
  73
  74static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
  75{
  76        struct netvsc_device *net_device;
  77
  78        net_device = hv_get_drvdata(device);
  79        if (net_device && net_device->destroy)
  80                net_device = NULL;
  81
  82        return net_device;
  83}
  84
  85static struct netvsc_device *get_inbound_net_device(struct hv_device *device)
  86{
  87        struct netvsc_device *net_device;
  88
  89        net_device = hv_get_drvdata(device);
  90
  91        if (!net_device)
  92                goto get_in_err;
  93
  94        if (net_device->destroy &&
  95                atomic_read(&net_device->num_outstanding_sends) == 0)
  96                net_device = NULL;
  97
  98get_in_err:
  99        return net_device;
 100}
 101
 102
 103static int netvsc_destroy_buf(struct netvsc_device *net_device)
 104{
 105        struct nvsp_message *revoke_packet;
 106        int ret = 0;
 107        struct net_device *ndev = net_device->ndev;
 108
 109        /*
 110         * If we got a section count, it means we received a
 111         * SendReceiveBufferComplete msg (ie sent
 112         * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
 113         * to send a revoke msg here
 114         */
 115        if (net_device->recv_section_cnt) {
 116                /* Send the revoke receive buffer */
 117                revoke_packet = &net_device->revoke_packet;
 118                memset(revoke_packet, 0, sizeof(struct nvsp_message));
 119
 120                revoke_packet->hdr.msg_type =
 121                        NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
 122                revoke_packet->msg.v1_msg.
 123                revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 124
 125                ret = vmbus_sendpacket(net_device->dev->channel,
 126                                       revoke_packet,
 127                                       sizeof(struct nvsp_message),
 128                                       (unsigned long)revoke_packet,
 129                                       VM_PKT_DATA_INBAND, 0);
 130                /*
 131                 * If we failed here, we might as well return and
 132                 * have a leak rather than continue and a bugchk
 133                 */
 134                if (ret != 0) {
 135                        netdev_err(ndev, "unable to send "
 136                                "revoke receive buffer to netvsp\n");
 137                        return ret;
 138                }
 139        }
 140
 141        /* Teardown the gpadl on the vsp end */
 142        if (net_device->recv_buf_gpadl_handle) {
 143                ret = vmbus_teardown_gpadl(net_device->dev->channel,
 144                           net_device->recv_buf_gpadl_handle);
 145
 146                /* If we failed here, we might as well return and have a leak
 147                 * rather than continue and a bugchk
 148                 */
 149                if (ret != 0) {
 150                        netdev_err(ndev,
 151                                   "unable to teardown receive buffer's gpadl\n");
 152                        return ret;
 153                }
 154                net_device->recv_buf_gpadl_handle = 0;
 155        }
 156
 157        if (net_device->recv_buf) {
 158                /* Free up the receive buffer */
 159                vfree(net_device->recv_buf);
 160                net_device->recv_buf = NULL;
 161        }
 162
 163        if (net_device->recv_section) {
 164                net_device->recv_section_cnt = 0;
 165                kfree(net_device->recv_section);
 166                net_device->recv_section = NULL;
 167        }
 168
 169        /* Deal with the send buffer we may have setup.
 170         * If we got a  send section size, it means we received a
 171         * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
 172         * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
 173         * to send a revoke msg here
 174         */
 175        if (net_device->send_section_size) {
 176                /* Send the revoke receive buffer */
 177                revoke_packet = &net_device->revoke_packet;
 178                memset(revoke_packet, 0, sizeof(struct nvsp_message));
 179
 180                revoke_packet->hdr.msg_type =
 181                        NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
 182                revoke_packet->msg.v1_msg.revoke_send_buf.id =
 183                        NETVSC_SEND_BUFFER_ID;
 184
 185                ret = vmbus_sendpacket(net_device->dev->channel,
 186                                       revoke_packet,
 187                                       sizeof(struct nvsp_message),
 188                                       (unsigned long)revoke_packet,
 189                                       VM_PKT_DATA_INBAND, 0);
 190                /* If we failed here, we might as well return and
 191                 * have a leak rather than continue and a bugchk
 192                 */
 193                if (ret != 0) {
 194                        netdev_err(ndev, "unable to send "
 195                                   "revoke send buffer to netvsp\n");
 196                        return ret;
 197                }
 198        }
 199        /* Teardown the gpadl on the vsp end */
 200        if (net_device->send_buf_gpadl_handle) {
 201                ret = vmbus_teardown_gpadl(net_device->dev->channel,
 202                                           net_device->send_buf_gpadl_handle);
 203
 204                /* If we failed here, we might as well return and have a leak
 205                 * rather than continue and a bugchk
 206                 */
 207                if (ret != 0) {
 208                        netdev_err(ndev,
 209                                   "unable to teardown send buffer's gpadl\n");
 210                        return ret;
 211                }
 212                net_device->send_buf_gpadl_handle = 0;
 213        }
 214        if (net_device->send_buf) {
 215                /* Free up the send buffer */
 216                vfree(net_device->send_buf);
 217                net_device->send_buf = NULL;
 218        }
 219        kfree(net_device->send_section_map);
 220
 221        return ret;
 222}
 223
 224static int netvsc_init_buf(struct hv_device *device)
 225{
 226        int ret = 0;
 227        unsigned long t;
 228        struct netvsc_device *net_device;
 229        struct nvsp_message *init_packet;
 230        struct net_device *ndev;
 231        int node;
 232
 233        net_device = get_outbound_net_device(device);
 234        if (!net_device)
 235                return -ENODEV;
 236        ndev = net_device->ndev;
 237
 238        node = cpu_to_node(device->channel->target_cpu);
 239        net_device->recv_buf = vzalloc_node(net_device->recv_buf_size, node);
 240        if (!net_device->recv_buf)
 241                net_device->recv_buf = vzalloc(net_device->recv_buf_size);
 242
 243        if (!net_device->recv_buf) {
 244                netdev_err(ndev, "unable to allocate receive "
 245                        "buffer of size %d\n", net_device->recv_buf_size);
 246                ret = -ENOMEM;
 247                goto cleanup;
 248        }
 249
 250        /*
 251         * Establish the gpadl handle for this buffer on this
 252         * channel.  Note: This call uses the vmbus connection rather
 253         * than the channel to establish the gpadl handle.
 254         */
 255        ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
 256                                    net_device->recv_buf_size,
 257                                    &net_device->recv_buf_gpadl_handle);
 258        if (ret != 0) {
 259                netdev_err(ndev,
 260                        "unable to establish receive buffer's gpadl\n");
 261                goto cleanup;
 262        }
 263
 264
 265        /* Notify the NetVsp of the gpadl handle */
 266        init_packet = &net_device->channel_init_pkt;
 267
 268        memset(init_packet, 0, sizeof(struct nvsp_message));
 269
 270        init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
 271        init_packet->msg.v1_msg.send_recv_buf.
 272                gpadl_handle = net_device->recv_buf_gpadl_handle;
 273        init_packet->msg.v1_msg.
 274                send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 275
 276        /* Send the gpadl notification request */
 277        ret = vmbus_sendpacket(device->channel, init_packet,
 278                               sizeof(struct nvsp_message),
 279                               (unsigned long)init_packet,
 280                               VM_PKT_DATA_INBAND,
 281                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 282        if (ret != 0) {
 283                netdev_err(ndev,
 284                        "unable to send receive buffer's gpadl to netvsp\n");
 285                goto cleanup;
 286        }
 287
 288        t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
 289        BUG_ON(t == 0);
 290
 291
 292        /* Check the response */
 293        if (init_packet->msg.v1_msg.
 294            send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
 295                netdev_err(ndev, "Unable to complete receive buffer "
 296                           "initialization with NetVsp - status %d\n",
 297                           init_packet->msg.v1_msg.
 298                           send_recv_buf_complete.status);
 299                ret = -EINVAL;
 300                goto cleanup;
 301        }
 302
 303        /* Parse the response */
 304
 305        net_device->recv_section_cnt = init_packet->msg.
 306                v1_msg.send_recv_buf_complete.num_sections;
 307
 308        net_device->recv_section = kmemdup(
 309                init_packet->msg.v1_msg.send_recv_buf_complete.sections,
 310                net_device->recv_section_cnt *
 311                sizeof(struct nvsp_1_receive_buffer_section),
 312                GFP_KERNEL);
 313        if (net_device->recv_section == NULL) {
 314                ret = -EINVAL;
 315                goto cleanup;
 316        }
 317
 318        /*
 319         * For 1st release, there should only be 1 section that represents the
 320         * entire receive buffer
 321         */
 322        if (net_device->recv_section_cnt != 1 ||
 323            net_device->recv_section->offset != 0) {
 324                ret = -EINVAL;
 325                goto cleanup;
 326        }
 327
 328        /* Now setup the send buffer.
 329         */
 330        net_device->send_buf = vzalloc_node(net_device->send_buf_size, node);
 331        if (!net_device->send_buf)
 332                net_device->send_buf = vzalloc(net_device->send_buf_size);
 333        if (!net_device->send_buf) {
 334                netdev_err(ndev, "unable to allocate send "
 335                           "buffer of size %d\n", net_device->send_buf_size);
 336                ret = -ENOMEM;
 337                goto cleanup;
 338        }
 339
 340        /* Establish the gpadl handle for this buffer on this
 341         * channel.  Note: This call uses the vmbus connection rather
 342         * than the channel to establish the gpadl handle.
 343         */
 344        ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
 345                                    net_device->send_buf_size,
 346                                    &net_device->send_buf_gpadl_handle);
 347        if (ret != 0) {
 348                netdev_err(ndev,
 349                           "unable to establish send buffer's gpadl\n");
 350                goto cleanup;
 351        }
 352
 353        /* Notify the NetVsp of the gpadl handle */
 354        init_packet = &net_device->channel_init_pkt;
 355        memset(init_packet, 0, sizeof(struct nvsp_message));
 356        init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
 357        init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
 358                net_device->send_buf_gpadl_handle;
 359        init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
 360
 361        /* Send the gpadl notification request */
 362        ret = vmbus_sendpacket(device->channel, init_packet,
 363                               sizeof(struct nvsp_message),
 364                               (unsigned long)init_packet,
 365                               VM_PKT_DATA_INBAND,
 366                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 367        if (ret != 0) {
 368                netdev_err(ndev,
 369                           "unable to send send buffer's gpadl to netvsp\n");
 370                goto cleanup;
 371        }
 372
 373        t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
 374        BUG_ON(t == 0);
 375
 376        /* Check the response */
 377        if (init_packet->msg.v1_msg.
 378            send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
 379                netdev_err(ndev, "Unable to complete send buffer "
 380                           "initialization with NetVsp - status %d\n",
 381                           init_packet->msg.v1_msg.
 382                           send_send_buf_complete.status);
 383                ret = -EINVAL;
 384                goto cleanup;
 385        }
 386
 387        /* Parse the response */
 388        net_device->send_section_size = init_packet->msg.
 389                                v1_msg.send_send_buf_complete.section_size;
 390
 391        /* Section count is simply the size divided by the section size.
 392         */
 393        net_device->send_section_cnt =
 394                net_device->send_buf_size/net_device->send_section_size;
 395
 396        dev_info(&device->device, "Send section size: %d, Section count:%d\n",
 397                 net_device->send_section_size, net_device->send_section_cnt);
 398
 399        /* Setup state for managing the send buffer. */
 400        net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
 401                                             BITS_PER_LONG);
 402
 403        net_device->send_section_map =
 404                kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
 405        if (net_device->send_section_map == NULL) {
 406                ret = -ENOMEM;
 407                goto cleanup;
 408        }
 409
 410        goto exit;
 411
 412cleanup:
 413        netvsc_destroy_buf(net_device);
 414
 415exit:
 416        return ret;
 417}
 418
 419
 420/* Negotiate NVSP protocol version */
 421static int negotiate_nvsp_ver(struct hv_device *device,
 422                              struct netvsc_device *net_device,
 423                              struct nvsp_message *init_packet,
 424                              u32 nvsp_ver)
 425{
 426        int ret;
 427        unsigned long t;
 428
 429        memset(init_packet, 0, sizeof(struct nvsp_message));
 430        init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
 431        init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
 432        init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
 433
 434        /* Send the init request */
 435        ret = vmbus_sendpacket(device->channel, init_packet,
 436                               sizeof(struct nvsp_message),
 437                               (unsigned long)init_packet,
 438                               VM_PKT_DATA_INBAND,
 439                               VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 440
 441        if (ret != 0)
 442                return ret;
 443
 444        t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
 445
 446        if (t == 0)
 447                return -ETIMEDOUT;
 448
 449        if (init_packet->msg.init_msg.init_complete.status !=
 450            NVSP_STAT_SUCCESS)
 451                return -EINVAL;
 452
 453        if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
 454                return 0;
 455
 456        /* NVSPv2 or later: Send NDIS config */
 457        memset(init_packet, 0, sizeof(struct nvsp_message));
 458        init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
 459        init_packet->msg.v2_msg.send_ndis_config.mtu = net_device->ndev->mtu +
 460                                                       ETH_HLEN;
 461        init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
 462
 463        if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5)
 464                init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
 465
 466        ret = vmbus_sendpacket(device->channel, init_packet,
 467                                sizeof(struct nvsp_message),
 468                                (unsigned long)init_packet,
 469                                VM_PKT_DATA_INBAND, 0);
 470
 471        return ret;
 472}
 473
 474static int netvsc_connect_vsp(struct hv_device *device)
 475{
 476        int ret;
 477        struct netvsc_device *net_device;
 478        struct nvsp_message *init_packet;
 479        int ndis_version;
 480        struct net_device *ndev;
 481        u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
 482                NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 };
 483        int i, num_ver = 4; /* number of different NVSP versions */
 484
 485        net_device = get_outbound_net_device(device);
 486        if (!net_device)
 487                return -ENODEV;
 488        ndev = net_device->ndev;
 489
 490        init_packet = &net_device->channel_init_pkt;
 491
 492        /* Negotiate the latest NVSP protocol supported */
 493        for (i = num_ver - 1; i >= 0; i--)
 494                if (negotiate_nvsp_ver(device, net_device, init_packet,
 495                                       ver_list[i])  == 0) {
 496                        net_device->nvsp_version = ver_list[i];
 497                        break;
 498                }
 499
 500        if (i < 0) {
 501                ret = -EPROTO;
 502                goto cleanup;
 503        }
 504
 505        pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);
 506
 507        /* Send the ndis version */
 508        memset(init_packet, 0, sizeof(struct nvsp_message));
 509
 510        if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
 511                ndis_version = 0x00060001;
 512        else
 513                ndis_version = 0x0006001e;
 514
 515        init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
 516        init_packet->msg.v1_msg.
 517                send_ndis_ver.ndis_major_ver =
 518                                (ndis_version & 0xFFFF0000) >> 16;
 519        init_packet->msg.v1_msg.
 520                send_ndis_ver.ndis_minor_ver =
 521                                ndis_version & 0xFFFF;
 522
 523        /* Send the init request */
 524        ret = vmbus_sendpacket(device->channel, init_packet,
 525                                sizeof(struct nvsp_message),
 526                                (unsigned long)init_packet,
 527                                VM_PKT_DATA_INBAND, 0);
 528        if (ret != 0)
 529                goto cleanup;
 530
 531        /* Post the big receive buffer to NetVSP */
 532        if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
 533                net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
 534        else
 535                net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
 536        net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
 537
 538        ret = netvsc_init_buf(device);
 539
 540cleanup:
 541        return ret;
 542}
 543
 544static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
 545{
 546        netvsc_destroy_buf(net_device);
 547}
 548
 549/*
 550 * netvsc_device_remove - Callback when the root bus device is removed
 551 */
 552int netvsc_device_remove(struct hv_device *device)
 553{
 554        struct netvsc_device *net_device;
 555        unsigned long flags;
 556
 557        net_device = hv_get_drvdata(device);
 558
 559        netvsc_disconnect_vsp(net_device);
 560
 561        /*
 562         * Since we have already drained, we don't need to busy wait
 563         * as was done in final_release_stor_device()
 564         * Note that we cannot set the ext pointer to NULL until
 565         * we have drained - to drain the outgoing packets, we need to
 566         * allow incoming packets.
 567         */
 568
 569        spin_lock_irqsave(&device->channel->inbound_lock, flags);
 570        hv_set_drvdata(device, NULL);
 571        spin_unlock_irqrestore(&device->channel->inbound_lock, flags);
 572
 573        /*
 574         * At this point, no one should be accessing net_device
 575         * except in here
 576         */
 577        dev_notice(&device->device, "net device safe to remove\n");
 578
 579        /* Now, we can close the channel safely */
 580        vmbus_close(device->channel);
 581
 582        /* Release all resources */
 583        vfree(net_device->sub_cb_buf);
 584        free_netvsc_device(net_device);
 585        return 0;
 586}
 587
 588
 589#define RING_AVAIL_PERCENT_HIWATER 20
 590#define RING_AVAIL_PERCENT_LOWATER 10
 591
 592/*
 593 * Get the percentage of available bytes to write in the ring.
 594 * The return value is in range from 0 to 100.
 595 */
 596static inline u32 hv_ringbuf_avail_percent(
 597                struct hv_ring_buffer_info *ring_info)
 598{
 599        u32 avail_read, avail_write;
 600
 601        hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);
 602
 603        return avail_write * 100 / ring_info->ring_datasize;
 604}
 605
 606static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
 607                                         u32 index)
 608{
 609        sync_change_bit(index, net_device->send_section_map);
 610}
 611
 612static void netvsc_send_completion(struct netvsc_device *net_device,
 613                                   struct hv_device *device,
 614                                   struct vmpacket_descriptor *packet)
 615{
 616        struct nvsp_message *nvsp_packet;
 617        struct hv_netvsc_packet *nvsc_packet;
 618        struct net_device *ndev;
 619        u32 send_index;
 620
 621        ndev = net_device->ndev;
 622
 623        nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
 624                        (packet->offset8 << 3));
 625
 626        if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) ||
 627            (nvsp_packet->hdr.msg_type ==
 628             NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
 629            (nvsp_packet->hdr.msg_type ==
 630             NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
 631            (nvsp_packet->hdr.msg_type ==
 632             NVSP_MSG5_TYPE_SUBCHANNEL)) {
 633                /* Copy the response back */
 634                memcpy(&net_device->channel_init_pkt, nvsp_packet,
 635                       sizeof(struct nvsp_message));
 636                complete(&net_device->channel_init_wait);
 637        } else if (nvsp_packet->hdr.msg_type ==
 638                   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
 639                int num_outstanding_sends;
 640                u16 q_idx = 0;
 641                struct vmbus_channel *channel = device->channel;
 642                int queue_sends;
 643
 644                /* Get the send context */
 645                nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
 646                        packet->trans_id;
 647
 648                /* Notify the layer above us */
 649                if (nvsc_packet) {
 650                        send_index = nvsc_packet->send_buf_index;
 651                        if (send_index != NETVSC_INVALID_INDEX)
 652                                netvsc_free_send_slot(net_device, send_index);
 653                        q_idx = nvsc_packet->q_idx;
 654                        channel = nvsc_packet->channel;
 655                        nvsc_packet->send_completion(nvsc_packet->
 656                                                     send_completion_ctx);
 657                }
 658
 659                num_outstanding_sends =
 660                        atomic_dec_return(&net_device->num_outstanding_sends);
 661                queue_sends = atomic_dec_return(&net_device->
 662                                                queue_sends[q_idx]);
 663
 664                if (net_device->destroy && num_outstanding_sends == 0)
 665                        wake_up(&net_device->wait_drain);
 666
 667                if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
 668                    !net_device->start_remove &&
 669                    (hv_ringbuf_avail_percent(&channel->outbound) >
 670                     RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
 671                                netif_tx_wake_queue(netdev_get_tx_queue(
 672                                                    ndev, q_idx));
 673        } else {
 674                netdev_err(ndev, "Unknown send completion packet type- "
 675                           "%d received!!\n", nvsp_packet->hdr.msg_type);
 676        }
 677
 678}
 679
 680static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
 681{
 682        unsigned long index;
 683        u32 max_words = net_device->map_words;
 684        unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
 685        u32 section_cnt = net_device->send_section_cnt;
 686        int ret_val = NETVSC_INVALID_INDEX;
 687        int i;
 688        int prev_val;
 689
 690        for (i = 0; i < max_words; i++) {
 691                if (!~(map_addr[i]))
 692                        continue;
 693                index = ffz(map_addr[i]);
 694                prev_val = sync_test_and_set_bit(index, &map_addr[i]);
 695                if (prev_val)
 696                        continue;
 697                if ((index + (i * BITS_PER_LONG)) >= section_cnt)
 698                        break;
 699                ret_val = (index + (i * BITS_PER_LONG));
 700                break;
 701        }
 702        return ret_val;
 703}
 704
 705static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
 706                                   unsigned int section_index,
 707                                   u32 pend_size,
 708                                   struct hv_netvsc_packet *packet)
 709{
 710        char *start = net_device->send_buf;
 711        char *dest = start + (section_index * net_device->send_section_size)
 712                     + pend_size;
 713        int i;
 714        u32 msg_size = 0;
 715        u32 padding = 0;
 716        u32 remain = packet->total_data_buflen % net_device->pkt_align;
 717        u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
 718                packet->page_buf_cnt;
 719
 720        /* Add padding */
 721        if (packet->is_data_pkt && packet->xmit_more && remain &&
 722            !packet->cp_partial) {
 723                padding = net_device->pkt_align - remain;
 724                packet->rndis_msg->msg_len += padding;
 725                packet->total_data_buflen += padding;
 726        }
 727
 728        for (i = 0; i < page_count; i++) {
 729                char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
 730                u32 offset = packet->page_buf[i].offset;
 731                u32 len = packet->page_buf[i].len;
 732
 733                memcpy(dest, (src + offset), len);
 734                msg_size += len;
 735                dest += len;
 736        }
 737
 738        if (padding) {
 739                memset(dest, 0, padding);
 740                msg_size += padding;
 741        }
 742
 743        return msg_size;
 744}
 745
 746static inline int netvsc_send_pkt(
 747        struct hv_netvsc_packet *packet,
 748        struct netvsc_device *net_device)
 749{
 750        struct nvsp_message nvmsg;
 751        struct vmbus_channel *out_channel = packet->channel;
 752        u16 q_idx = packet->q_idx;
 753        struct net_device *ndev = net_device->ndev;
 754        u64 req_id;
 755        int ret;
 756        struct hv_page_buffer *pgbuf;
 757        u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound);
 758
 759        nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
 760        if (packet->is_data_pkt) {
 761                /* 0 is RMC_DATA; */
 762                nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0;
 763        } else {
 764                /* 1 is RMC_CONTROL; */
 765                nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1;
 766        }
 767
 768        nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
 769                packet->send_buf_index;
 770        if (packet->send_buf_index == NETVSC_INVALID_INDEX)
 771                nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
 772        else
 773                nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size =
 774                        packet->total_data_buflen;
 775
 776        if (packet->send_completion)
 777                req_id = (ulong)packet;
 778        else
 779                req_id = 0;
 780
 781        if (out_channel->rescind)
 782                return -ENODEV;
 783
 784        /*
 785         * It is possible that once we successfully place this packet
 786         * on the ringbuffer, we may stop the queue. In that case, we want
 787         * to notify the host independent of the xmit_more flag. We don't
 788         * need to be precise here; in the worst case we may signal the host
 789         * unnecessarily.
 790         */
 791        if (ring_avail < (RING_AVAIL_PERCENT_LOWATER + 1))
 792                packet->xmit_more = false;
 793
 794        if (packet->page_buf_cnt) {
 795                pgbuf = packet->cp_partial ? packet->page_buf +
 796                        packet->rmsg_pgcnt : packet->page_buf;
 797                ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
 798                                                      pgbuf,
 799                                                      packet->page_buf_cnt,
 800                                                      &nvmsg,
 801                                                      sizeof(struct nvsp_message),
 802                                                      req_id,
 803                                                      VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED,
 804                                                      !packet->xmit_more);
 805        } else {
 806                ret = vmbus_sendpacket_ctl(out_channel, &nvmsg,
 807                                           sizeof(struct nvsp_message),
 808                                           req_id,
 809                                           VM_PKT_DATA_INBAND,
 810                                           VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED,
 811                                           !packet->xmit_more);
 812        }
 813
 814        if (ret == 0) {
 815                atomic_inc(&net_device->num_outstanding_sends);
 816                atomic_inc(&net_device->queue_sends[q_idx]);
 817
 818                if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
 819                        netif_tx_stop_queue(netdev_get_tx_queue(ndev, q_idx));
 820
 821                        if (atomic_read(&net_device->
 822                                queue_sends[q_idx]) < 1)
 823                                netif_tx_wake_queue(netdev_get_tx_queue(
 824                                                    ndev, q_idx));
 825                }
 826        } else if (ret == -EAGAIN) {
 827                netif_tx_stop_queue(netdev_get_tx_queue(
 828                                    ndev, q_idx));
 829                if (atomic_read(&net_device->queue_sends[q_idx]) < 1) {
 830                        netif_tx_wake_queue(netdev_get_tx_queue(
 831                                            ndev, q_idx));
 832                        ret = -ENOSPC;
 833                }
 834        } else {
 835                netdev_err(ndev, "Unable to send packet %p ret %d\n",
 836                           packet, ret);
 837        }
 838
 839        return ret;
 840}
 841
 842int netvsc_send(struct hv_device *device,
 843                struct hv_netvsc_packet *packet)
 844{
 845        struct netvsc_device *net_device;
 846        int ret = 0, m_ret = 0;
 847        struct vmbus_channel *out_channel;
 848        u16 q_idx = packet->q_idx;
 849        u32 pktlen = packet->total_data_buflen, msd_len = 0;
 850        unsigned int section_index = NETVSC_INVALID_INDEX;
 851        unsigned long flag;
 852        struct multi_send_data *msdp;
 853        struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
 854        bool try_batch;
 855
 856        net_device = get_outbound_net_device(device);
 857        if (!net_device)
 858                return -ENODEV;
 859
 860        out_channel = net_device->chn_table[q_idx];
 861        if (!out_channel) {
 862                out_channel = device->channel;
 863                q_idx = 0;
 864                packet->q_idx = 0;
 865        }
 866        packet->channel = out_channel;
 867        packet->send_buf_index = NETVSC_INVALID_INDEX;
 868        packet->cp_partial = false;
 869
 870        msdp = &net_device->msd[q_idx];
 871
 872        /* batch packets in send buffer if possible */
 873        spin_lock_irqsave(&msdp->lock, flag);
 874        if (msdp->pkt)
 875                msd_len = msdp->pkt->total_data_buflen;
 876
 877        try_batch = packet->is_data_pkt && msd_len > 0 && msdp->count <
 878                    net_device->max_pkt;
 879
 880        if (try_batch && msd_len + pktlen + net_device->pkt_align <
 881            net_device->send_section_size) {
 882                section_index = msdp->pkt->send_buf_index;
 883
 884        } else if (try_batch && msd_len + packet->rmsg_size <
 885                   net_device->send_section_size) {
 886                section_index = msdp->pkt->send_buf_index;
 887                packet->cp_partial = true;
 888
 889        } else if (packet->is_data_pkt && pktlen + net_device->pkt_align <
 890                   net_device->send_section_size) {
 891                section_index = netvsc_get_next_send_section(net_device);
 892                if (section_index != NETVSC_INVALID_INDEX) {
 893                                msd_send = msdp->pkt;
 894                                msdp->pkt = NULL;
 895                                msdp->count = 0;
 896                                msd_len = 0;
 897                }
 898        }
 899
 900        if (section_index != NETVSC_INVALID_INDEX) {
 901                netvsc_copy_to_send_buf(net_device,
 902                                        section_index, msd_len,
 903                                        packet);
 904
 905                packet->send_buf_index = section_index;
 906
 907                if (packet->cp_partial) {
 908                        packet->page_buf_cnt -= packet->rmsg_pgcnt;
 909                        packet->total_data_buflen = msd_len + packet->rmsg_size;
 910                } else {
 911                        packet->page_buf_cnt = 0;
 912                        packet->total_data_buflen += msd_len;
 913                }
 914
 915                if (msdp->pkt)
 916                        netvsc_xmit_completion(msdp->pkt);
 917
 918                if (packet->xmit_more && !packet->cp_partial) {
 919                        msdp->pkt = packet;
 920                        msdp->count++;
 921                } else {
 922                        cur_send = packet;
 923                        msdp->pkt = NULL;
 924                        msdp->count = 0;
 925                }
 926        } else {
 927                msd_send = msdp->pkt;
 928                msdp->pkt = NULL;
 929                msdp->count = 0;
 930                cur_send = packet;
 931        }
 932
 933        spin_unlock_irqrestore(&msdp->lock, flag);
 934
 935        if (msd_send) {
 936                m_ret = netvsc_send_pkt(msd_send, net_device);
 937
 938                if (m_ret != 0) {
 939                        netvsc_free_send_slot(net_device,
 940                                              msd_send->send_buf_index);
 941                        netvsc_xmit_completion(msd_send);
 942                }
 943        }
 944
 945        if (cur_send)
 946                ret = netvsc_send_pkt(cur_send, net_device);
 947
 948        if (ret != 0 && section_index != NETVSC_INVALID_INDEX)
 949                netvsc_free_send_slot(net_device, section_index);
 950
 951        return ret;
 952}
 953
 954static void netvsc_send_recv_completion(struct hv_device *device,
 955                                        struct vmbus_channel *channel,
 956                                        struct netvsc_device *net_device,
 957                                        u64 transaction_id, u32 status)
 958{
 959        struct nvsp_message recvcompMessage;
 960        int retries = 0;
 961        int ret;
 962        struct net_device *ndev;
 963
 964        ndev = net_device->ndev;
 965
 966        recvcompMessage.hdr.msg_type =
 967                                NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
 968
 969        recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
 970
 971retry_send_cmplt:
 972        /* Send the completion */
 973        ret = vmbus_sendpacket(channel, &recvcompMessage,
 974                               sizeof(struct nvsp_message), transaction_id,
 975                               VM_PKT_COMP, 0);
 976        if (ret == 0) {
 977                /* success */
 978                /* no-op */
 979        } else if (ret == -EAGAIN) {
 980                /* no more room...wait a bit and attempt to retry 3 times */
 981                retries++;
 982                netdev_err(ndev, "unable to send receive completion pkt"
 983                        " (tid %llx)...retrying %d\n", transaction_id, retries);
 984
 985                if (retries < 4) {
 986                        udelay(100);
 987                        goto retry_send_cmplt;
 988                } else {
 989                        netdev_err(ndev, "unable to send receive "
 990                                "completion pkt (tid %llx)...give up retrying\n",
 991                                transaction_id);
 992                }
 993        } else {
 994                netdev_err(ndev, "unable to send receive "
 995                        "completion pkt - %llx\n", transaction_id);
 996        }
 997}
 998
 999static void netvsc_receive(struct netvsc_device *net_device,
1000                        struct vmbus_channel *channel,
1001                        struct hv_device *device,
1002                        struct vmpacket_descriptor *packet)
1003{
1004        struct vmtransfer_page_packet_header *vmxferpage_packet;
1005        struct nvsp_message *nvsp_packet;
1006        struct hv_netvsc_packet nv_pkt;
1007        struct hv_netvsc_packet *netvsc_packet = &nv_pkt;
1008        u32 status = NVSP_STAT_SUCCESS;
1009        int i;
1010        int count = 0;
1011        struct net_device *ndev;
1012
1013        ndev = net_device->ndev;
1014
1015        /*
1016         * All inbound packets other than send completion should be xfer page
1017         * packet
1018         */
1019        if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) {
1020                netdev_err(ndev, "Unknown packet type received - %d\n",
1021                           packet->type);
1022                return;
1023        }
1024
1025        nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
1026                        (packet->offset8 << 3));
1027
1028        /* Make sure this is a valid nvsp packet */
1029        if (nvsp_packet->hdr.msg_type !=
1030            NVSP_MSG1_TYPE_SEND_RNDIS_PKT) {
1031                netdev_err(ndev, "Unknown nvsp packet type received-"
1032                        " %d\n", nvsp_packet->hdr.msg_type);
1033                return;
1034        }
1035
1036        vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet;
1037
1038        if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) {
1039                netdev_err(ndev, "Invalid xfer page set id - "
1040                           "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID,
1041                           vmxferpage_packet->xfer_pageset_id);
1042                return;
1043        }
1044
1045        count = vmxferpage_packet->range_cnt;
1046        netvsc_packet->channel = channel;
1047
1048        /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1049        for (i = 0; i < count; i++) {
1050                /* Initialize the netvsc packet */
1051                netvsc_packet->status = NVSP_STAT_SUCCESS;
1052                netvsc_packet->data = (void *)((unsigned long)net_device->
1053                        recv_buf + vmxferpage_packet->ranges[i].byte_offset);
1054                netvsc_packet->total_data_buflen =
1055                                        vmxferpage_packet->ranges[i].byte_count;
1056
1057                /* Pass it to the upper layer */
1058                rndis_filter_receive(device, netvsc_packet);
1059
1060                if (netvsc_packet->status != NVSP_STAT_SUCCESS)
1061                        status = NVSP_STAT_FAIL;
1062        }
1063
1064        netvsc_send_recv_completion(device, channel, net_device,
1065                                    vmxferpage_packet->d.trans_id, status);
1066}
1067
1068
1069static void netvsc_send_table(struct hv_device *hdev,
1070                              struct nvsp_message *nvmsg)
1071{
1072        struct netvsc_device *nvscdev;
1073        struct net_device *ndev;
1074        int i;
1075        u32 count, *tab;
1076
1077        nvscdev = get_outbound_net_device(hdev);
1078        if (!nvscdev)
1079                return;
1080        ndev = nvscdev->ndev;
1081
1082        count = nvmsg->msg.v5_msg.send_table.count;
1083        if (count != VRSS_SEND_TAB_SIZE) {
1084                netdev_err(ndev, "Received wrong send-table size:%u\n", count);
1085                return;
1086        }
1087
1088        tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
1089                      nvmsg->msg.v5_msg.send_table.offset);
1090
1091        for (i = 0; i < count; i++)
1092                nvscdev->send_table[i] = tab[i];
1093}
1094
1095static void netvsc_send_vf(struct netvsc_device *nvdev,
1096                           struct nvsp_message *nvmsg)
1097{
1098        nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
1099        nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
1100}
1101
1102static inline void netvsc_receive_inband(struct hv_device *hdev,
1103                                         struct netvsc_device *nvdev,
1104                                         struct nvsp_message *nvmsg)
1105{
1106        switch (nvmsg->hdr.msg_type) {
1107        case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
1108                netvsc_send_table(hdev, nvmsg);
1109                break;
1110
1111        case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
1112                netvsc_send_vf(nvdev, nvmsg);
1113                break;
1114        }
1115}
1116
1117void netvsc_channel_cb(void *context)
1118{
1119        int ret;
1120        struct vmbus_channel *channel = (struct vmbus_channel *)context;
1121        struct hv_device *device;
1122        struct netvsc_device *net_device;
1123        u32 bytes_recvd;
1124        u64 request_id;
1125        struct vmpacket_descriptor *desc;
1126        unsigned char *buffer;
1127        int bufferlen = NETVSC_PACKET_SIZE;
1128        struct net_device *ndev;
1129        struct nvsp_message *nvmsg;
1130
1131        if (channel->primary_channel != NULL)
1132                device = channel->primary_channel->device_obj;
1133        else
1134                device = channel->device_obj;
1135
1136        net_device = get_inbound_net_device(device);
1137        if (!net_device)
1138                return;
1139        ndev = net_device->ndev;
1140        buffer = get_per_channel_state(channel);
1141
1142        do {
1143                ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
1144                                           &bytes_recvd, &request_id);
1145                if (ret == 0) {
1146                        if (bytes_recvd > 0) {
1147                                desc = (struct vmpacket_descriptor *)buffer;
1148                                nvmsg = (struct nvsp_message *)((unsigned long)
1149                                         desc + (desc->offset8 << 3));
1150                                switch (desc->type) {
1151                                case VM_PKT_COMP:
1152                                        netvsc_send_completion(net_device,
1153                                                                device, desc);
1154                                        break;
1155
1156                                case VM_PKT_DATA_USING_XFER_PAGES:
1157                                        netvsc_receive(net_device, channel,
1158                                                       device, desc);
1159                                        break;
1160
1161                                case VM_PKT_DATA_INBAND:
1162                                        netvsc_receive_inband(device,
1163                                                              net_device,
1164                                                              nvmsg);
1165                                        break;
1166
1167                                default:
1168                                        netdev_err(ndev,
1169                                                   "unhandled packet type %d, "
1170                                                   "tid %llx len %d\n",
1171                                                   desc->type, request_id,
1172                                                   bytes_recvd);
1173                                        break;
1174                                }
1175
1176                        } else {
1177                                /*
1178                                 * We are done for this pass.
1179                                 */
1180                                break;
1181                        }
1182
1183                } else if (ret == -ENOBUFS) {
1184                        if (bufferlen > NETVSC_PACKET_SIZE)
1185                                kfree(buffer);
1186                        /* Handle large packet */
1187                        buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
1188                        if (buffer == NULL) {
1189                                /* Try again next time around */
1190                                netdev_err(ndev,
1191                                           "unable to allocate buffer of size "
1192                                           "(%d)!!\n", bytes_recvd);
1193                                break;
1194                        }
1195
1196                        bufferlen = bytes_recvd;
1197                }
1198        } while (1);
1199
1200        if (bufferlen > NETVSC_PACKET_SIZE)
1201                kfree(buffer);
1202        return;
1203}
1204
1205/*
1206 * netvsc_device_add - Callback when the device belonging to this
1207 * driver is added
1208 */
1209int netvsc_device_add(struct hv_device *device, void *additional_info)
1210{
1211        int ret = 0;
1212        int ring_size =
1213        ((struct netvsc_device_info *)additional_info)->ring_size;
1214        struct netvsc_device *net_device;
1215        struct net_device *ndev;
1216
1217        net_device = alloc_net_device(device);
1218        if (!net_device)
1219                return -ENOMEM;
1220
1221        net_device->ring_size = ring_size;
1222
1223        /*
1224         * Coming into this function, struct net_device * is
1225         * registered as the driver private data.
1226         * In alloc_net_device(), we register struct netvsc_device *
1227         * as the driver private data and stash away struct net_device *
1228         * in struct netvsc_device *.
1229         */
1230        ndev = net_device->ndev;
1231
1232        /* Add netvsc_device context to netvsc_device */
1233        net_device->nd_ctx = netdev_priv(ndev);
1234
1235        /* Initialize the NetVSC channel extension */
1236        init_completion(&net_device->channel_init_wait);
1237
1238        set_per_channel_state(device->channel, net_device->cb_buffer);
1239
1240        /* Open the channel */
1241        ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
1242                         ring_size * PAGE_SIZE, NULL, 0,
1243                         netvsc_channel_cb, device->channel);
1244
1245        if (ret != 0) {
1246                netdev_err(ndev, "unable to open channel: %d\n", ret);
1247                goto cleanup;
1248        }
1249
1250        /* Channel is opened */
1251        pr_info("hv_netvsc channel opened successfully\n");
1252
1253        net_device->chn_table[0] = device->channel;
1254
1255        /* Connect with the NetVsp */
1256        ret = netvsc_connect_vsp(device);
1257        if (ret != 0) {
1258                netdev_err(ndev,
1259                        "unable to connect to NetVSP - %d\n", ret);
1260                goto close;
1261        }
1262
1263        return ret;
1264
1265close:
1266        /* Now, we can close the channel safely */
1267        vmbus_close(device->channel);
1268
1269cleanup:
1270        free_netvsc_device(net_device);
1271
1272        return ret;
1273}
1274