linux/drivers/net/virtio_net.c
<<
>>
Prefs
   1/* A network driver using virtio.
   2 *
   3 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
   4 *
   5 * This program is free software; you can redistribute it and/or modify
   6 * it under the terms of the GNU General Public License as published by
   7 * the Free Software Foundation; either version 2 of the License, or
   8 * (at your option) any later version.
   9 *
  10 * This program is distributed in the hope that it will be useful,
  11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 * GNU General Public License for more details.
  14 *
  15 * You should have received a copy of the GNU General Public License
  16 * along with this program; if not, write to the Free Software
  17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18 */
  19//#define DEBUG
  20#include <linux/netdevice.h>
  21#include <linux/etherdevice.h>
  22#include <linux/ethtool.h>
  23#include <linux/module.h>
  24#include <linux/virtio.h>
  25#include <linux/virtio_net.h>
  26#include <linux/scatterlist.h>
  27#include <linux/if_vlan.h>
  28#include <linux/slab.h>
  29#include <linux/cpu.h>
  30
  31static int napi_weight = NAPI_POLL_WEIGHT;
  32module_param(napi_weight, int, 0444);
  33
  34static bool csum = true, gso = true;
  35module_param(csum, bool, 0444);
  36module_param(gso, bool, 0444);
  37
  38/* FIXME: MTU in config. */
  39#define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
  40#define GOOD_COPY_LEN   128
  41
  42#define VIRTNET_DRIVER_VERSION "1.0.0"
  43
  44struct virtnet_stats {
  45        struct u64_stats_sync tx_syncp;
  46        struct u64_stats_sync rx_syncp;
  47        u64 tx_bytes;
  48        u64 tx_packets;
  49
  50        u64 rx_bytes;
  51        u64 rx_packets;
  52};
  53
  54/* Internal representation of a send virtqueue */
  55struct send_queue {
  56        /* Virtqueue associated with this send _queue */
  57        struct virtqueue *vq;
  58
  59        /* TX: fragments + linear part + virtio header */
  60        struct scatterlist sg[MAX_SKB_FRAGS + 2];
  61
  62        /* Name of the send queue: output.$index */
  63        char name[40];
  64};
  65
  66/* Internal representation of a receive virtqueue */
  67struct receive_queue {
  68        /* Virtqueue associated with this receive_queue */
  69        struct virtqueue *vq;
  70
  71        struct napi_struct napi;
  72
  73        /* Number of input buffers, and max we've ever had. */
  74        unsigned int num, max;
  75
  76        /* Chain pages by the private ptr. */
  77        struct page *pages;
  78
  79        /* RX: fragments + linear part + virtio header */
  80        struct scatterlist sg[MAX_SKB_FRAGS + 2];
  81
  82        /* Name of this receive queue: input.$index */
  83        char name[40];
  84};
  85
  86struct virtnet_info {
  87        struct virtio_device *vdev;
  88        struct virtqueue *cvq;
  89        struct net_device *dev;
  90        struct send_queue *sq;
  91        struct receive_queue *rq;
  92        unsigned int status;
  93
  94        /* Max # of queue pairs supported by the device */
  95        u16 max_queue_pairs;
  96
  97        /* # of queue pairs currently used by the driver */
  98        u16 curr_queue_pairs;
  99
 100        /* I like... big packets and I cannot lie! */
 101        bool big_packets;
 102
 103        /* Host will merge rx buffers for big packets (shake it! shake it!) */
 104        bool mergeable_rx_bufs;
 105
 106        /* Has control virtqueue */
 107        bool has_cvq;
 108
 109        /* Host can handle any s/g split between our header and packet data */
 110        bool any_header_sg;
 111
 112        /* Packet virtio header size */
 113        u8 hdr_len;
 114
 115        /* Active statistics */
 116        struct virtnet_stats __percpu *stats;
 117
 118        /* Work struct for refilling if we run low on memory. */
 119        struct delayed_work refill;
 120
 121        /* Work struct for config space updates */
 122        struct work_struct config_work;
 123
 124        /* Does the affinity hint is set for virtqueues? */
 125        bool affinity_hint_set;
 126
 127        /* Per-cpu variable to show the mapping from CPU to virtqueue */
 128        int __percpu *vq_index;
 129
 130        /* CPU hot plug notifier */
 131        struct notifier_block nb;
 132
 133        /* Maximum allowed MTU */
 134        u16 max_mtu;
 135};
 136
 137struct padded_vnet_hdr {
 138        struct virtio_net_hdr_mrg_rxbuf hdr;
 139        /*
 140         * hdr is in a separate sg buffer, and data sg buffer shares same page
 141         * with this header sg. This padding makes next sg 16 byte aligned
 142         * after the header.
 143         */
 144        char padding[4];
 145};
 146
 147/* Converting between virtqueue no. and kernel tx/rx queue no.
 148 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
 149 */
 150static int vq2txq(struct virtqueue *vq)
 151{
 152        return (vq->index - 1) / 2;
 153}
 154
 155static int txq2vq(int txq)
 156{
 157        return txq * 2 + 1;
 158}
 159
 160static int vq2rxq(struct virtqueue *vq)
 161{
 162        return vq->index / 2;
 163}
 164
 165static int rxq2vq(int rxq)
 166{
 167        return rxq * 2;
 168}
 169
 170static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)
 171{
 172        return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;
 173}
 174
 175/*
 176 * private is used to chain pages for big packets, put the whole
 177 * most recent used list in the beginning for reuse
 178 */
 179static void give_pages(struct receive_queue *rq, struct page *page)
 180{
 181        struct page *end;
 182
 183        /* Find end of list, sew whole thing into vi->rq.pages. */
 184        for (end = page; end->private; end = (struct page *)end->private);
 185        end->private = (unsigned long)rq->pages;
 186        rq->pages = page;
 187}
 188
 189static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
 190{
 191        struct page *p = rq->pages;
 192
 193        if (p) {
 194                rq->pages = (struct page *)p->private;
 195                /* clear private here, it is used to chain pages */
 196                p->private = 0;
 197        } else
 198                p = alloc_page(gfp_mask);
 199        return p;
 200}
 201
 202static void skb_xmit_done(struct virtqueue *vq)
 203{
 204        struct virtnet_info *vi = vq->vdev->priv;
 205
 206        /* Suppress further interrupts. */
 207        virtqueue_disable_cb(vq);
 208
 209        /* We were probably waiting for more output buffers. */
 210        netif_wake_subqueue(vi->dev, vq2txq(vq));
 211}
 212
 213static void set_skb_frag(struct sk_buff *skb, struct page *page,
 214                         unsigned int offset, unsigned int *len)
 215{
 216        int size = min((unsigned)PAGE_SIZE - offset, *len);
 217        int i = skb_shinfo(skb)->nr_frags;
 218
 219        __skb_fill_page_desc(skb, i, page, offset, size);
 220
 221        skb->data_len += size;
 222        skb->len += size;
 223        skb->truesize += PAGE_SIZE;
 224        skb_shinfo(skb)->nr_frags++;
 225        skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 226        *len -= size;
 227}
 228
 229/* Called from bottom half context */
 230static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 231                                   struct receive_queue *rq,
 232                                   struct page *page, unsigned int len)
 233{
 234        struct sk_buff *skb;
 235        struct virtio_net_hdr_mrg_rxbuf *hdr;
 236        unsigned int copy, hdr_len, offset;
 237        char *p;
 238
 239        p = page_address(page);
 240
 241        /* copy small packet so we can reuse these pages for small data */
 242        skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN);
 243        if (unlikely(!skb))
 244                return NULL;
 245
 246        hdr = skb_vnet_hdr(skb);
 247
 248        hdr_len = vi->hdr_len;
 249        if (vi->mergeable_rx_bufs)
 250                offset = sizeof *hdr;
 251        else
 252                offset = sizeof(struct padded_vnet_hdr);
 253
 254        memcpy(hdr, p, hdr_len);
 255
 256        len -= hdr_len;
 257        p += offset;
 258
 259        copy = len;
 260        if (copy > skb_tailroom(skb))
 261                copy = skb_tailroom(skb);
 262        memcpy(skb_put(skb, copy), p, copy);
 263
 264        len -= copy;
 265        offset += copy;
 266
 267        /*
 268         * Verify that we can indeed put this data into a skb.
 269         * This is here to handle cases when the device erroneously
 270         * tries to receive more than is possible. This is usually
 271         * the case of a broken device.
 272         */
 273        if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
 274                net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
 275                dev_kfree_skb(skb);
 276                return NULL;
 277        }
 278
 279        while (len) {
 280                set_skb_frag(skb, page, offset, &len);
 281                page = (struct page *)page->private;
 282                offset = 0;
 283        }
 284
 285        if (page)
 286                give_pages(rq, page);
 287
 288        return skb;
 289}
 290
 291static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len)
 292{
 293        struct sk_buff * skb = buf;
 294
 295        len -= vi->hdr_len;
 296        skb_trim(skb, len);
 297
 298        return skb;
 299}
 300
 301static struct sk_buff *receive_big(struct net_device *dev,
 302                                   struct virtnet_info *vi,
 303                                   struct receive_queue *rq,
 304                                   void *buf, unsigned int len)
 305{
 306        struct page *page = buf;
 307        struct sk_buff *skb = page_to_skb(vi, rq, page, len);
 308
 309        if (unlikely(!skb))
 310                goto err;
 311
 312        return skb;
 313
 314err:
 315        dev->stats.rx_dropped++;
 316        give_pages(rq, page);
 317        return NULL;
 318}
 319
 320static struct sk_buff *receive_mergeable(struct net_device *dev,
 321                                         struct virtnet_info *vi,
 322                                         struct receive_queue *rq,
 323                                         void *buf,
 324                                         unsigned int len)
 325{
 326        struct virtio_net_hdr_mrg_rxbuf *hdr = page_address(buf);
 327        u16 num_buf = virtio16_to_cpu(rq->vq->vdev, hdr->num_buffers);
 328        struct page *page = buf;
 329        struct sk_buff *skb = page_to_skb(vi, rq, page, len);
 330        int i;
 331
 332        if (unlikely(!skb))
 333                goto err_skb;
 334
 335        while (--num_buf) {
 336                i = skb_shinfo(skb)->nr_frags;
 337                if (i >= MAX_SKB_FRAGS) {
 338                        pr_debug("%s: packet too long\n", skb->dev->name);
 339                        skb->dev->stats.rx_length_errors++;
 340                        return NULL;
 341                }
 342                page = virtqueue_get_buf(rq->vq, &len);
 343                if (!page) {
 344                        pr_debug("%s: rx error: %d buffers %d missing\n",
 345                                 dev->name,
 346                                 virtio16_to_cpu(rq->vq->vdev,
 347                                                 hdr->num_buffers),
 348                                 num_buf);
 349                        dev->stats.rx_length_errors++;
 350                        goto err_buf;
 351                }
 352
 353                if (len > PAGE_SIZE)
 354                        len = PAGE_SIZE;
 355
 356                set_skb_frag(skb, page, 0, &len);
 357
 358                --rq->num;
 359        }
 360        return skb;
 361err_skb:
 362        give_pages(rq, page);
 363        while (--num_buf) {
 364                buf = virtqueue_get_buf(rq->vq, &len);
 365                if (unlikely(!buf)) {
 366                        pr_debug("%s: rx error: %d buffers missing\n",
 367                                 dev->name, num_buf);
 368                        dev->stats.rx_length_errors++;
 369                        break;
 370                }
 371                page = buf;
 372                give_pages(rq, page);
 373                --rq->num;
 374        }
 375err_buf:
 376        dev->stats.rx_dropped++;
 377        dev_kfree_skb(skb);
 378        return NULL;
 379}
 380
 381static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
 382                        void *buf, unsigned int len)
 383{
 384        struct net_device *dev = vi->dev;
 385        struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 386        struct sk_buff *skb;
 387        struct virtio_net_hdr_mrg_rxbuf *hdr;
 388
 389        if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
 390                pr_debug("%s: short packet %i\n", dev->name, len);
 391                dev->stats.rx_length_errors++;
 392                if (vi->mergeable_rx_bufs || vi->big_packets)
 393                        give_pages(rq, buf);
 394                else
 395                        dev_kfree_skb(buf);
 396                return;
 397        }
 398        if (vi->mergeable_rx_bufs)
 399                skb = receive_mergeable(dev, vi, rq, buf, len);
 400        else if (vi->big_packets)
 401                skb = receive_big(dev, vi, rq, buf, len);
 402        else
 403                skb = receive_small(vi, buf, len);
 404
 405        if (unlikely(!skb))
 406                return;
 407
 408        hdr = skb_vnet_hdr(skb);
 409
 410        u64_stats_update_begin(&stats->rx_syncp);
 411        stats->rx_bytes += skb->len;
 412        stats->rx_packets++;
 413        u64_stats_update_end(&stats->rx_syncp);
 414
 415        if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
 416                pr_debug("Needs csum!\n");
 417                if (!skb_partial_csum_set(skb,
 418                          virtio16_to_cpu(vi->vdev, hdr->hdr.csum_start),
 419                          virtio16_to_cpu(vi->vdev, hdr->hdr.csum_offset)))
 420                        goto frame_err;
 421        } else if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) {
 422                skb->ip_summed = CHECKSUM_UNNECESSARY;
 423        }
 424
 425        skb->protocol = eth_type_trans(skb, dev);
 426        pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
 427                 ntohs(skb->protocol), skb->len, skb->pkt_type);
 428
 429        if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 430                pr_debug("GSO!\n");
 431                switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 432                case VIRTIO_NET_HDR_GSO_TCPV4:
 433                        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 434                        break;
 435                case VIRTIO_NET_HDR_GSO_UDP:
 436                        skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 437                        break;
 438                case VIRTIO_NET_HDR_GSO_TCPV6:
 439                        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 440                        break;
 441                default:
 442                        net_warn_ratelimited("%s: bad gso type %u.\n",
 443                                             dev->name, hdr->hdr.gso_type);
 444                        goto frame_err;
 445                }
 446
 447                if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
 448                        skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 449
 450                skb_shinfo(skb)->gso_size = virtio16_to_cpu(vi->vdev,
 451                                                            hdr->hdr.gso_size);
 452                if (skb_shinfo(skb)->gso_size == 0) {
 453                        net_warn_ratelimited("%s: zero gso size.\n", dev->name);
 454                        goto frame_err;
 455                }
 456
 457                /* Header must be checked, and gso_segs computed. */
 458                skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 459                skb_shinfo(skb)->gso_segs = 0;
 460        }
 461
 462        napi_gro_receive(&rq->napi, skb);
 463        return;
 464
 465frame_err:
 466        dev->stats.rx_frame_errors++;
 467        dev_kfree_skb(skb);
 468}
 469
 470static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
 471                             gfp_t gfp)
 472{
 473        struct sk_buff *skb;
 474        struct virtio_net_hdr_mrg_rxbuf *hdr;
 475        int err;
 476
 477        skb = __netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN, gfp);
 478        if (unlikely(!skb))
 479                return -ENOMEM;
 480
 481        skb_put(skb, MAX_PACKET_LEN);
 482
 483        hdr = skb_vnet_hdr(skb);
 484        sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
 485        sg_set_buf(rq->sg, hdr, vi->hdr_len);
 486        skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
 487
 488        err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp);
 489        if (err < 0)
 490                dev_kfree_skb(skb);
 491
 492        return err;
 493}
 494
 495static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
 496                           gfp_t gfp)
 497{
 498        struct page *first, *list = NULL;
 499        char *p;
 500        int i, err, offset;
 501
 502        sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
 503
 504        /* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
 505        for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
 506                first = get_a_page(rq, gfp);
 507                if (!first) {
 508                        if (list)
 509                                give_pages(rq, list);
 510                        return -ENOMEM;
 511                }
 512                sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
 513
 514                /* chain new page in list head to match sg */
 515                first->private = (unsigned long)list;
 516                list = first;
 517        }
 518
 519        first = get_a_page(rq, gfp);
 520        if (!first) {
 521                give_pages(rq, list);
 522                return -ENOMEM;
 523        }
 524        p = page_address(first);
 525
 526        /* rq->sg[0], rq->sg[1] share the same page */
 527        /* a separated rq->sg[0] for header - required in case !any_header_sg */
 528        sg_set_buf(&rq->sg[0], p, vi->hdr_len);
 529
 530        /* rq->sg[1] for data packet, from offset */
 531        offset = sizeof(struct padded_vnet_hdr);
 532        sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
 533
 534        /* chain first in list head */
 535        first->private = (unsigned long)list;
 536        err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2,
 537                                  first, gfp);
 538        if (err < 0)
 539                give_pages(rq, first);
 540
 541        return err;
 542}
 543
 544static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
 545{
 546        struct page *page;
 547        int err;
 548
 549        page = get_a_page(rq, gfp);
 550        if (!page)
 551                return -ENOMEM;
 552
 553        sg_init_one(rq->sg, page_address(page), PAGE_SIZE);
 554
 555        err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, page, gfp);
 556        if (err < 0)
 557                give_pages(rq, page);
 558
 559        return err;
 560}
 561
 562/*
 563 * Returns false if we couldn't fill entirely (OOM).
 564 *
 565 * Normally run in the receive path, but can also be run from ndo_open
 566 * before we're receiving packets, or from refill_work which is
 567 * careful to disable receiving (using napi_disable).
 568 */
 569static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
 570                          gfp_t gfp)
 571{
 572        int err;
 573        bool oom;
 574
 575        do {
 576                if (vi->mergeable_rx_bufs)
 577                        err = add_recvbuf_mergeable(rq, gfp);
 578                else if (vi->big_packets)
 579                        err = add_recvbuf_big(vi, rq, gfp);
 580                else
 581                        err = add_recvbuf_small(vi, rq, gfp);
 582
 583                oom = err == -ENOMEM;
 584                if (err)
 585                        break;
 586                ++rq->num;
 587        } while (rq->vq->num_free);
 588        if (unlikely(rq->num > rq->max))
 589                rq->max = rq->num;
 590        virtqueue_kick(rq->vq);
 591        return !oom;
 592}
 593
 594static void skb_recv_done(struct virtqueue *rvq)
 595{
 596        struct virtnet_info *vi = rvq->vdev->priv;
 597        struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
 598
 599        /* Schedule NAPI, Suppress further interrupts if successful. */
 600        if (napi_schedule_prep(&rq->napi)) {
 601                virtqueue_disable_cb(rvq);
 602                __napi_schedule(&rq->napi);
 603        }
 604}
 605
 606static void virtnet_napi_enable(struct receive_queue *rq)
 607{
 608        napi_enable(&rq->napi);
 609
 610        /* If all buffers were filled by other side before we napi_enabled, we
 611         * won't get another interrupt, so process any outstanding packets
 612         * now.  virtnet_poll wants re-enable the queue, so we disable here.
 613         * We synchronize against interrupts via NAPI_STATE_SCHED */
 614        if (napi_schedule_prep(&rq->napi)) {
 615                virtqueue_disable_cb(rq->vq);
 616                local_bh_disable();
 617                __napi_schedule(&rq->napi);
 618                local_bh_enable();
 619        }
 620}
 621
 622static void refill_work(struct work_struct *work)
 623{
 624        struct virtnet_info *vi =
 625                container_of(work, struct virtnet_info, refill.work);
 626        bool still_empty;
 627        int i;
 628
 629        for (i = 0; i < vi->curr_queue_pairs; i++) {
 630                struct receive_queue *rq = &vi->rq[i];
 631
 632                napi_disable(&rq->napi);
 633                still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
 634                virtnet_napi_enable(rq);
 635
 636                /* In theory, this can happen: if we don't get any buffers in
 637                 * we will *never* try to fill again.
 638                 */
 639                if (still_empty)
 640                        schedule_delayed_work(&vi->refill, HZ/2);
 641        }
 642}
 643
 644static int virtnet_poll(struct napi_struct *napi, int budget)
 645{
 646        struct receive_queue *rq =
 647                container_of(napi, struct receive_queue, napi);
 648        struct virtnet_info *vi = rq->vq->vdev->priv;
 649        void *buf;
 650        unsigned int r, len, received = 0;
 651
 652        while (received < budget &&
 653               (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
 654                receive_buf(vi, rq, buf, len);
 655                --rq->num;
 656                received++;
 657        }
 658
 659        if (rq->num < rq->max / 2) {
 660                if (!try_fill_recv(vi, rq, GFP_ATOMIC))
 661                        schedule_delayed_work(&vi->refill, 0);
 662        }
 663
 664        /* Out of packets? */
 665        if (received < budget) {
 666                r = virtqueue_enable_cb_prepare(rq->vq);
 667                napi_complete_done(napi, received);
 668                if (unlikely(virtqueue_poll(rq->vq, r)) &&
 669                    napi_schedule_prep(napi)) {
 670                        virtqueue_disable_cb(rq->vq);
 671                        __napi_schedule(napi);
 672                }
 673        }
 674
 675        return received;
 676}
 677
 678static int virtnet_open(struct net_device *dev)
 679{
 680        struct virtnet_info *vi = netdev_priv(dev);
 681        int i;
 682
 683        for (i = 0; i < vi->max_queue_pairs; i++) {
 684                if (i < vi->curr_queue_pairs)
 685                        /* Make sure we have some buffers: if oom use wq. */
 686                        if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
 687                                schedule_delayed_work(&vi->refill, 0);
 688                virtnet_napi_enable(&vi->rq[i]);
 689        }
 690
 691        return 0;
 692}
 693
 694static void free_old_xmit_skbs(struct send_queue *sq)
 695{
 696        struct sk_buff *skb;
 697        unsigned int len;
 698        struct virtnet_info *vi = sq->vq->vdev->priv;
 699        struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 700
 701        while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
 702                pr_debug("Sent skb %p\n", skb);
 703
 704                u64_stats_update_begin(&stats->tx_syncp);
 705                stats->tx_bytes += skb->len;
 706                stats->tx_packets++;
 707                u64_stats_update_end(&stats->tx_syncp);
 708
 709                dev_kfree_skb_any(skb);
 710        }
 711}
 712
 713static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 714{
 715        struct virtio_net_hdr_mrg_rxbuf *hdr;
 716        const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
 717        struct virtnet_info *vi = sq->vq->vdev->priv;
 718        unsigned num_sg;
 719        unsigned hdr_len = vi->hdr_len;
 720        bool can_push;
 721
 722        pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
 723
 724        can_push = vi->any_header_sg &&
 725                !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
 726                !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
 727        /* Even if we can, don't push here yet as this would skew
 728         * csum_start offset below. */
 729        if (can_push)
 730                hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
 731        else
 732                hdr = skb_vnet_hdr(skb);
 733
 734        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 735                hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
 736                hdr->hdr.csum_start = cpu_to_virtio16(vi->vdev,
 737                                                skb_checksum_start_offset(skb));
 738                hdr->hdr.csum_offset = cpu_to_virtio16(vi->vdev,
 739                                                         skb->csum_offset);
 740        } else {
 741                hdr->hdr.flags = 0;
 742                hdr->hdr.csum_offset = hdr->hdr.csum_start = 0;
 743        }
 744
 745        if (skb_is_gso(skb)) {
 746                hdr->hdr.hdr_len = cpu_to_virtio16(vi->vdev, skb_headlen(skb));
 747                hdr->hdr.gso_size = cpu_to_virtio16(vi->vdev,
 748                                                    skb_shinfo(skb)->gso_size);
 749                if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
 750                        hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
 751                else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
 752                        hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
 753                else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
 754                        hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
 755                else
 756                        BUG();
 757                if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
 758                        hdr->hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
 759        } else {
 760                hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
 761                hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
 762        }
 763
 764        if (vi->mergeable_rx_bufs)
 765                hdr->num_buffers = 0;
 766
 767        sg_init_table(sq->sg, MAX_SKB_FRAGS + 2);
 768        if (can_push) {
 769                __skb_push(skb, hdr_len);
 770                num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
 771                /* Pull header back to avoid skew in tx bytes calculations. */
 772                __skb_pull(skb, hdr_len);
 773        } else {
 774                sg_set_buf(sq->sg, hdr, hdr_len);
 775                num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
 776        }
 777        return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
 778}
 779
 780static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 781{
 782        struct virtnet_info *vi = netdev_priv(dev);
 783        int qnum = skb_get_queue_mapping(skb);
 784        struct send_queue *sq = &vi->sq[qnum];
 785        int err;
 786        struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
 787        bool kick = !skb->xmit_more;
 788
 789        /* Free up any pending old buffers before queueing new ones. */
 790        free_old_xmit_skbs(sq);
 791
 792        /* Try to transmit */
 793        err = xmit_skb(sq, skb);
 794
 795        /* This should not happen! */
 796        if (unlikely(err)) {
 797                dev->stats.tx_fifo_errors++;
 798                if (net_ratelimit())
 799                        dev_warn(&dev->dev,
 800                                 "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
 801                dev->stats.tx_dropped++;
 802                kfree_skb(skb);
 803                return NETDEV_TX_OK;
 804        }
 805
 806        /* Don't wait up for transmitted skbs to be freed. */
 807        skb_orphan(skb);
 808        nf_reset(skb);
 809
 810        /* Apparently nice girls don't return TX_BUSY; stop the queue
 811         * before it gets out of hand.  Naturally, this wastes entries. */
 812        if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
 813                netif_stop_subqueue(dev, qnum);
 814                if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
 815                        /* More just got used, free them then recheck. */
 816                        free_old_xmit_skbs(sq);
 817                        if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
 818                                netif_start_subqueue(dev, qnum);
 819                                virtqueue_disable_cb(sq->vq);
 820                        }
 821                }
 822        }
 823
 824        if (kick || netif_xmit_stopped(txq))
 825                virtqueue_kick(sq->vq);
 826
 827        return NETDEV_TX_OK;
 828}
 829
 830/*
 831 * Send command via the control virtqueue and check status.  Commands
 832 * supported by the hypervisor, as indicated by feature bits, should
 833 * never fail unless improperly formated.
 834 */
 835static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 836                                 struct scatterlist *out,
 837                                 struct scatterlist *in)
 838{
 839        struct scatterlist *sgs[4], hdr, stat;
 840        struct virtio_net_ctrl_hdr ctrl;
 841        virtio_net_ctrl_ack status = ~0;
 842        unsigned out_num = 0, in_num = 0, tmp;
 843
 844        /* Caller should know better */
 845        BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
 846
 847        ctrl.class = class;
 848        ctrl.cmd = cmd;
 849        /* Add header */
 850        sg_init_one(&hdr, &ctrl, sizeof(ctrl));
 851        sgs[out_num++] = &hdr;
 852
 853        if (out)
 854                sgs[out_num++] = out;
 855        if (in)
 856                sgs[out_num + in_num++] = in;
 857
 858        /* Add return status. */
 859        sg_init_one(&stat, &status, sizeof(status));
 860        sgs[out_num + in_num++] = &stat;
 861
 862        BUG_ON(out_num + in_num > ARRAY_SIZE(sgs));
 863        virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC);
 864
 865        virtqueue_kick(vi->cvq);
 866
 867        /* Spin for a response, the kick causes an ioport write, trapping
 868         * into the hypervisor, so the request should be handled immediately.
 869         */
 870        while (!virtqueue_get_buf(vi->cvq, &tmp) &&
 871               !virtqueue_is_broken(vi->cvq))
 872                cpu_relax();
 873
 874        return status == VIRTIO_NET_OK;
 875}
 876
 877static int virtnet_set_mac_address(struct net_device *dev, void *p)
 878{
 879        struct virtnet_info *vi = netdev_priv(dev);
 880        struct virtio_device *vdev = vi->vdev;
 881        int ret;
 882        struct sockaddr *addr = p;
 883        struct scatterlist sg;
 884
 885        ret = eth_prepare_mac_addr_change(dev, p);
 886        if (ret)
 887                return ret;
 888
 889        if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
 890                sg_init_one(&sg, addr->sa_data, dev->addr_len);
 891                if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
 892                                          VIRTIO_NET_CTRL_MAC_ADDR_SET,
 893                                          &sg, NULL)) {
 894                        dev_warn(&vdev->dev,
 895                                 "Failed to set mac address by vq command.\n");
 896                        return -EINVAL;
 897                }
 898        } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
 899                   !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
 900                unsigned int i;
 901
 902                /* Naturally, this has an atomicity problem. */
 903                for (i = 0; i < dev->addr_len; i++)
 904                        virtio_cwrite8(vdev,
 905                                       offsetof(struct virtio_net_config, mac) +
 906                                       i, addr->sa_data[i]);
 907        }
 908
 909        eth_commit_mac_addr_change(dev, p);
 910
 911        return 0;
 912}
 913
 914static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
 915                                               struct rtnl_link_stats64 *tot)
 916{
 917        struct virtnet_info *vi = netdev_priv(dev);
 918        int cpu;
 919        unsigned int start;
 920
 921        for_each_possible_cpu(cpu) {
 922                struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
 923                u64 tpackets, tbytes, rpackets, rbytes;
 924
 925                do {
 926                        start = u64_stats_fetch_begin_irq(&stats->tx_syncp);
 927                        tpackets = stats->tx_packets;
 928                        tbytes   = stats->tx_bytes;
 929                } while (u64_stats_fetch_retry_irq(&stats->tx_syncp, start));
 930
 931                do {
 932                        start = u64_stats_fetch_begin_irq(&stats->rx_syncp);
 933                        rpackets = stats->rx_packets;
 934                        rbytes   = stats->rx_bytes;
 935                } while (u64_stats_fetch_retry_irq(&stats->rx_syncp, start));
 936
 937                tot->rx_packets += rpackets;
 938                tot->tx_packets += tpackets;
 939                tot->rx_bytes   += rbytes;
 940                tot->tx_bytes   += tbytes;
 941        }
 942
 943        tot->tx_dropped = dev->stats.tx_dropped;
 944        tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
 945        tot->rx_dropped = dev->stats.rx_dropped;
 946        tot->rx_length_errors = dev->stats.rx_length_errors;
 947        tot->rx_frame_errors = dev->stats.rx_frame_errors;
 948
 949        return tot;
 950}
 951
 952#ifdef CONFIG_NET_POLL_CONTROLLER
 953static void virtnet_netpoll(struct net_device *dev)
 954{
 955        struct virtnet_info *vi = netdev_priv(dev);
 956        int i;
 957
 958        for (i = 0; i < vi->curr_queue_pairs; i++)
 959                napi_schedule(&vi->rq[i].napi);
 960}
 961#endif
 962
 963static void virtnet_ack_link_announce(struct virtnet_info *vi)
 964{
 965        rtnl_lock();
 966        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
 967                                  VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL, NULL))
 968                dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
 969        rtnl_unlock();
 970}
 971
 972static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
 973{
 974        struct scatterlist sg;
 975        struct virtio_net_ctrl_mq s;
 976        struct net_device *dev = vi->dev;
 977
 978        if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
 979                return 0;
 980
 981        s.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
 982        sg_init_one(&sg, &s, sizeof(s));
 983
 984        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
 985                                  VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, NULL)) {
 986                dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
 987                         queue_pairs);
 988                return -EINVAL;
 989        } else {
 990                vi->curr_queue_pairs = queue_pairs;
 991                /* virtnet_open() will refill when device is going to up. */
 992                if (dev->flags & IFF_UP)
 993                        schedule_delayed_work(&vi->refill, 0);
 994        }
 995
 996        return 0;
 997}
 998
 999static int virtnet_close(struct net_device *dev)
1000{
1001        struct virtnet_info *vi = netdev_priv(dev);
1002        int i;
1003
1004        /* Make sure refill_work doesn't re-enable napi! */
1005        cancel_delayed_work_sync(&vi->refill);
1006
1007        for (i = 0; i < vi->max_queue_pairs; i++)
1008                napi_disable(&vi->rq[i].napi);
1009
1010        return 0;
1011}
1012
1013static void virtnet_set_rx_mode(struct net_device *dev)
1014{
1015        struct virtnet_info *vi = netdev_priv(dev);
1016        struct scatterlist sg[2];
1017        u8 promisc, allmulti;
1018        struct virtio_net_ctrl_mac *mac_data;
1019        struct netdev_hw_addr *ha;
1020        int uc_count;
1021        int mc_count;
1022        void *buf;
1023        int i;
1024
1025        /* We can't dynamicaly set ndo_set_rx_mode, so return gracefully */
1026        if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
1027                return;
1028
1029        promisc = ((dev->flags & IFF_PROMISC) != 0);
1030        allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
1031
1032        sg_init_one(sg, &promisc, sizeof(promisc));
1033
1034        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1035                                  VIRTIO_NET_CTRL_RX_PROMISC,
1036                                  sg, NULL))
1037                dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
1038                         promisc ? "en" : "dis");
1039
1040        sg_init_one(sg, &allmulti, sizeof(allmulti));
1041
1042        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1043                                  VIRTIO_NET_CTRL_RX_ALLMULTI,
1044                                  sg, NULL))
1045                dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
1046                         allmulti ? "en" : "dis");
1047
1048        uc_count = netdev_uc_count(dev);
1049        mc_count = netdev_mc_count(dev);
1050        /* MAC filter - use one buffer for both lists */
1051        buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
1052                      (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
1053        mac_data = buf;
1054        if (!buf)
1055                return;
1056
1057        sg_init_table(sg, 2);
1058
1059        /* Store the unicast list and count in the front of the buffer */
1060        mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
1061        i = 0;
1062        netdev_for_each_uc_addr(ha, dev)
1063                memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1064
1065        sg_set_buf(&sg[0], mac_data,
1066                   sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
1067
1068        /* multicast list and count fill the end */
1069        mac_data = (void *)&mac_data->macs[uc_count][0];
1070
1071        mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
1072        i = 0;
1073        netdev_for_each_mc_addr(ha, dev)
1074                memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1075
1076        sg_set_buf(&sg[1], mac_data,
1077                   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
1078
1079        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1080                                  VIRTIO_NET_CTRL_MAC_TABLE_SET,
1081                                  sg, NULL))
1082                dev_warn(&dev->dev, "Failed to set MAC fitler table.\n");
1083
1084        kfree(buf);
1085}
1086
1087static int virtnet_vlan_rx_add_vid(struct net_device *dev,
1088                                   __be16 proto, u16 vid)
1089{
1090        struct virtnet_info *vi = netdev_priv(dev);
1091        struct scatterlist sg;
1092
1093        sg_init_one(&sg, &vid, sizeof(vid));
1094
1095        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1096                                  VIRTIO_NET_CTRL_VLAN_ADD, &sg, NULL))
1097                dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
1098        return 0;
1099}
1100
1101static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
1102                                    __be16 proto, u16 vid)
1103{
1104        struct virtnet_info *vi = netdev_priv(dev);
1105        struct scatterlist sg;
1106
1107        sg_init_one(&sg, &vid, sizeof(vid));
1108
1109        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1110                                  VIRTIO_NET_CTRL_VLAN_DEL, &sg, NULL))
1111                dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
1112        return 0;
1113}
1114
1115static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
1116{
1117        int i;
1118        int cpu;
1119
1120        if (vi->affinity_hint_set) {
1121                for (i = 0; i < vi->max_queue_pairs; i++) {
1122                        virtqueue_set_affinity(vi->rq[i].vq, -1);
1123                        virtqueue_set_affinity(vi->sq[i].vq, -1);
1124                }
1125
1126                vi->affinity_hint_set = false;
1127        }
1128
1129        i = 0;
1130        for_each_online_cpu(cpu) {
1131                if (cpu == hcpu) {
1132                        *per_cpu_ptr(vi->vq_index, cpu) = -1;
1133                } else {
1134                        *per_cpu_ptr(vi->vq_index, cpu) =
1135                                ++i % vi->curr_queue_pairs;
1136                }
1137        }
1138}
1139
1140static void virtnet_set_affinity(struct virtnet_info *vi)
1141{
1142        int i;
1143        int cpu;
1144
1145        /* In multiqueue mode, when the number of cpu is equal to the number of
1146         * queue pairs, we let the queue pairs to be private to one cpu by
1147         * setting the affinity hint to eliminate the contention.
1148         */
1149        if (vi->curr_queue_pairs == 1 ||
1150            vi->max_queue_pairs != num_online_cpus()) {
1151                virtnet_clean_affinity(vi, -1);
1152                return;
1153        }
1154
1155        i = 0;
1156        for_each_online_cpu(cpu) {
1157                virtqueue_set_affinity(vi->rq[i].vq, cpu);
1158                virtqueue_set_affinity(vi->sq[i].vq, cpu);
1159                *per_cpu_ptr(vi->vq_index, cpu) = i;
1160                i++;
1161        }
1162
1163        vi->affinity_hint_set = true;
1164}
1165
1166static int virtnet_cpu_callback(struct notifier_block *nfb,
1167                                unsigned long action, void *hcpu)
1168{
1169        struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb);
1170
1171        switch(action & ~CPU_TASKS_FROZEN) {
1172        case CPU_ONLINE:
1173        case CPU_DOWN_FAILED:
1174        case CPU_DEAD:
1175                virtnet_set_affinity(vi);
1176                break;
1177        case CPU_DOWN_PREPARE:
1178                virtnet_clean_affinity(vi, (long)hcpu);
1179                break;
1180        default:
1181                break;
1182        }
1183
1184        return NOTIFY_OK;
1185}
1186
1187static void virtnet_get_ringparam(struct net_device *dev,
1188                                struct ethtool_ringparam *ring)
1189{
1190        struct virtnet_info *vi = netdev_priv(dev);
1191
1192        ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
1193        ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
1194        ring->rx_pending = ring->rx_max_pending;
1195        ring->tx_pending = ring->tx_max_pending;
1196}
1197
1198
1199static void virtnet_get_drvinfo(struct net_device *dev,
1200                                struct ethtool_drvinfo *info)
1201{
1202        struct virtnet_info *vi = netdev_priv(dev);
1203        struct virtio_device *vdev = vi->vdev;
1204
1205        strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
1206        strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
1207        strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
1208
1209}
1210
1211/* TODO: Eliminate OOO packets during switching */
1212static int virtnet_set_channels(struct net_device *dev,
1213                                struct ethtool_channels *channels)
1214{
1215        struct virtnet_info *vi = netdev_priv(dev);
1216        u16 queue_pairs = channels->combined_count;
1217        int err;
1218
1219        /* We don't support separate rx/tx channels.
1220         * We don't allow setting 'other' channels.
1221         */
1222        if (channels->rx_count || channels->tx_count || channels->other_count)
1223                return -EINVAL;
1224
1225        if (queue_pairs > vi->max_queue_pairs)
1226                return -EINVAL;
1227
1228        get_online_cpus();
1229        err = virtnet_set_queues(vi, queue_pairs);
1230        if (!err) {
1231                netif_set_real_num_tx_queues(dev, queue_pairs);
1232                netif_set_real_num_rx_queues(dev, queue_pairs);
1233
1234                virtnet_set_affinity(vi);
1235        }
1236        put_online_cpus();
1237
1238        return err;
1239}
1240
1241static void virtnet_get_channels(struct net_device *dev,
1242                                 struct ethtool_channels *channels)
1243{
1244        struct virtnet_info *vi = netdev_priv(dev);
1245
1246        channels->combined_count = vi->curr_queue_pairs;
1247        channels->max_combined = vi->max_queue_pairs;
1248        channels->max_other = 0;
1249        channels->rx_count = 0;
1250        channels->tx_count = 0;
1251        channels->other_count = 0;
1252}
1253
1254static const struct ethtool_ops virtnet_ethtool_ops = {
1255        .get_drvinfo = virtnet_get_drvinfo,
1256        .get_link = ethtool_op_get_link,
1257        .get_ringparam = virtnet_get_ringparam,
1258        .set_channels = virtnet_set_channels,
1259        .get_channels = virtnet_get_channels,
1260};
1261
1262#define MIN_MTU 68
1263#define MAX_MTU 65535
1264
1265static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
1266{
1267        struct virtnet_info *vi = netdev_priv(dev);
1268
1269        if (new_mtu < MIN_MTU || new_mtu > vi->max_mtu)
1270                return -EINVAL;
1271        dev->mtu = new_mtu;
1272        return 0;
1273}
1274
1275/* To avoid contending a lock hold by a vcpu who would exit to host, select the
1276 * txq based on the processor id.
1277 */
1278static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb,
1279                        void *accel_priv, select_queue_fallback_t fallback)
1280{
1281        int txq;
1282        struct virtnet_info *vi = netdev_priv(dev);
1283
1284        if (skb_rx_queue_recorded(skb)) {
1285                txq = skb_get_rx_queue(skb);
1286        } else {
1287                txq = *__this_cpu_ptr(vi->vq_index);
1288                if (txq == -1)
1289                        txq = 0;
1290        }
1291
1292        while (unlikely(txq >= dev->real_num_tx_queues))
1293                txq -= dev->real_num_tx_queues;
1294
1295        return txq;
1296}
1297
1298static const struct net_device_ops virtnet_netdev = {
1299        .ndo_open            = virtnet_open,
1300        .ndo_stop            = virtnet_close,
1301        .ndo_start_xmit      = start_xmit,
1302        .ndo_validate_addr   = eth_validate_addr,
1303        .ndo_set_mac_address = virtnet_set_mac_address,
1304        .ndo_set_rx_mode     = virtnet_set_rx_mode,
1305        .ndo_change_mtu      = virtnet_change_mtu,
1306        .ndo_get_stats64     = virtnet_stats,
1307        .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
1308        .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
1309        .ndo_select_queue     = virtnet_select_queue,
1310#ifdef CONFIG_NET_POLL_CONTROLLER
1311        .ndo_poll_controller = virtnet_netpoll,
1312#endif
1313        .ndo_features_check     = passthru_features_check,
1314};
1315
1316static void virtnet_config_changed_work(struct work_struct *work)
1317{
1318        struct virtnet_info *vi =
1319                container_of(work, struct virtnet_info, config_work);
1320        u16 v;
1321
1322        if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
1323                                 struct virtio_net_config, status, &v) < 0)
1324                return;
1325
1326        if (v & VIRTIO_NET_S_ANNOUNCE) {
1327                netdev_notify_peers(vi->dev);
1328                virtnet_ack_link_announce(vi);
1329        }
1330
1331        /* Ignore unknown (future) status bits */
1332        v &= VIRTIO_NET_S_LINK_UP;
1333
1334        if (vi->status == v)
1335                return;
1336
1337        vi->status = v;
1338
1339        if (vi->status & VIRTIO_NET_S_LINK_UP) {
1340                netif_carrier_on(vi->dev);
1341                netif_tx_wake_all_queues(vi->dev);
1342        } else {
1343                netif_carrier_off(vi->dev);
1344                netif_tx_stop_all_queues(vi->dev);
1345        }
1346}
1347
1348static void virtnet_config_changed(struct virtio_device *vdev)
1349{
1350        struct virtnet_info *vi = vdev->priv;
1351
1352        schedule_work(&vi->config_work);
1353}
1354
1355static void virtnet_free_queues(struct virtnet_info *vi)
1356{
1357        int i;
1358
1359        for (i = 0; i < vi->max_queue_pairs; i++)
1360                netif_napi_del(&vi->rq[i].napi);
1361
1362        kfree(vi->rq);
1363        kfree(vi->sq);
1364}
1365
1366static void free_receive_bufs(struct virtnet_info *vi)
1367{
1368        int i;
1369
1370        for (i = 0; i < vi->max_queue_pairs; i++) {
1371                while (vi->rq[i].pages)
1372                        __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
1373        }
1374}
1375
1376static void free_unused_bufs(struct virtnet_info *vi)
1377{
1378        void *buf;
1379        int i;
1380
1381        for (i = 0; i < vi->max_queue_pairs; i++) {
1382                struct virtqueue *vq = vi->sq[i].vq;
1383                while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
1384                        dev_kfree_skb(buf);
1385        }
1386
1387        for (i = 0; i < vi->max_queue_pairs; i++) {
1388                struct virtqueue *vq = vi->rq[i].vq;
1389
1390                while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
1391                        if (vi->mergeable_rx_bufs || vi->big_packets)
1392                                give_pages(&vi->rq[i], buf);
1393                        else
1394                                dev_kfree_skb(buf);
1395                        --vi->rq[i].num;
1396                }
1397                BUG_ON(vi->rq[i].num != 0);
1398        }
1399}
1400
1401static void virtnet_del_vqs(struct virtnet_info *vi)
1402{
1403        struct virtio_device *vdev = vi->vdev;
1404
1405        virtnet_clean_affinity(vi, -1);
1406
1407        vdev->config->del_vqs(vdev);
1408
1409        virtnet_free_queues(vi);
1410}
1411
1412static int virtnet_find_vqs(struct virtnet_info *vi)
1413{
1414        vq_callback_t **callbacks;
1415        struct virtqueue **vqs;
1416        int ret = -ENOMEM;
1417        int i, total_vqs;
1418        const char **names;
1419
1420        /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
1421         * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
1422         * possible control vq.
1423         */
1424        total_vqs = vi->max_queue_pairs * 2 +
1425                    virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
1426
1427        /* Allocate space for find_vqs parameters */
1428        vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
1429        if (!vqs)
1430                goto err_vq;
1431        callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
1432        if (!callbacks)
1433                goto err_callback;
1434        names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
1435        if (!names)
1436                goto err_names;
1437
1438        /* Parameters for control virtqueue, if any */
1439        if (vi->has_cvq) {
1440                callbacks[total_vqs - 1] = NULL;
1441                names[total_vqs - 1] = "control";
1442        }
1443
1444        /* Allocate/initialize parameters for send/receive virtqueues */
1445        for (i = 0; i < vi->max_queue_pairs; i++) {
1446                callbacks[rxq2vq(i)] = skb_recv_done;
1447                callbacks[txq2vq(i)] = skb_xmit_done;
1448                sprintf(vi->rq[i].name, "input.%d", i);
1449                sprintf(vi->sq[i].name, "output.%d", i);
1450                names[rxq2vq(i)] = vi->rq[i].name;
1451                names[txq2vq(i)] = vi->sq[i].name;
1452        }
1453
1454        ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
1455                                         names);
1456        if (ret)
1457                goto err_find;
1458
1459        if (vi->has_cvq) {
1460                vi->cvq = vqs[total_vqs - 1];
1461                if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
1462                        vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
1463        }
1464
1465        for (i = 0; i < vi->max_queue_pairs; i++) {
1466                vi->rq[i].vq = vqs[rxq2vq(i)];
1467                vi->sq[i].vq = vqs[txq2vq(i)];
1468        }
1469
1470        kfree(names);
1471        kfree(callbacks);
1472        kfree(vqs);
1473
1474        return 0;
1475
1476err_find:
1477        kfree(names);
1478err_names:
1479        kfree(callbacks);
1480err_callback:
1481        kfree(vqs);
1482err_vq:
1483        return ret;
1484}
1485
1486static int virtnet_alloc_queues(struct virtnet_info *vi)
1487{
1488        int i;
1489
1490        vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
1491        if (!vi->sq)
1492                goto err_sq;
1493        vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
1494        if (!vi->rq)
1495                goto err_rq;
1496
1497        INIT_DELAYED_WORK(&vi->refill, refill_work);
1498        for (i = 0; i < vi->max_queue_pairs; i++) {
1499                vi->rq[i].pages = NULL;
1500                netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
1501                               napi_weight);
1502
1503                sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
1504                sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
1505        }
1506
1507        return 0;
1508
1509err_rq:
1510        kfree(vi->sq);
1511err_sq:
1512        return -ENOMEM;
1513}
1514
1515static int init_vqs(struct virtnet_info *vi)
1516{
1517        int ret;
1518
1519        /* Allocate send & receive queues */
1520        ret = virtnet_alloc_queues(vi);
1521        if (ret)
1522                goto err;
1523
1524        ret = virtnet_find_vqs(vi);
1525        if (ret)
1526                goto err_free;
1527
1528        get_online_cpus();
1529        virtnet_set_affinity(vi);
1530        put_online_cpus();
1531
1532        return 0;
1533
1534err_free:
1535        virtnet_free_queues(vi);
1536err:
1537        return ret;
1538}
1539
1540static int virtnet_probe(struct virtio_device *vdev)
1541{
1542        int i, err;
1543        struct net_device *dev;
1544        struct virtnet_info *vi;
1545        u16 max_queue_pairs;
1546        int mtu;
1547
1548        if (!vdev->config->get) {
1549                dev_err(&vdev->dev, "%s failure: config access disabled\n",
1550                        __func__);
1551                return -EINVAL;
1552        }
1553
1554        /* Find if host supports multiqueue virtio_net device */
1555        err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
1556                                   struct virtio_net_config,
1557                                   max_virtqueue_pairs, &max_queue_pairs);
1558
1559        /* We need at least 2 queue's */
1560        if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1561            max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1562            !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
1563                max_queue_pairs = 1;
1564
1565        /* Allocate ourselves a network device with room for our info */
1566        dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
1567        if (!dev)
1568                return -ENOMEM;
1569
1570        /* Set up network device as normal. */
1571        dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
1572        dev->netdev_ops = &virtnet_netdev;
1573        dev->features = NETIF_F_HIGHDMA;
1574
1575        SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
1576        SET_NETDEV_DEV(dev, &vdev->dev);
1577
1578        /* Do we support "hardware" checksums? */
1579        if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
1580                /* This opens up the world of extra features. */
1581                dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
1582                if (csum)
1583                        dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
1584
1585                if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
1586                        dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO
1587                                | NETIF_F_TSO_ECN | NETIF_F_TSO6;
1588                }
1589                /* Individual feature bits: what can host handle? */
1590                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
1591                        dev->hw_features |= NETIF_F_TSO;
1592                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
1593                        dev->hw_features |= NETIF_F_TSO6;
1594                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
1595                        dev->hw_features |= NETIF_F_TSO_ECN;
1596                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
1597                        dev->hw_features |= NETIF_F_UFO;
1598
1599                if (gso)
1600                        dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO);
1601                /* (!csum && gso) case will be fixed by register_netdev() */
1602        }
1603        if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
1604                dev->features |= NETIF_F_RXCSUM;
1605
1606        dev->vlan_features = dev->features;
1607
1608        /* Configuration may specify what MAC to use.  Otherwise random. */
1609        if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
1610                virtio_cread_bytes(vdev,
1611                                   offsetof(struct virtio_net_config, mac),
1612                                   dev->dev_addr, dev->addr_len);
1613        else
1614                eth_hw_addr_random(dev);
1615
1616        /* Set up our device-specific information */
1617        vi = netdev_priv(dev);
1618        vi->dev = dev;
1619        vi->vdev = vdev;
1620        vdev->priv = vi;
1621        vi->stats = alloc_percpu(struct virtnet_stats);
1622        err = -ENOMEM;
1623        if (vi->stats == NULL)
1624                goto free;
1625
1626        vi->vq_index = alloc_percpu(int);
1627        if (vi->vq_index == NULL)
1628                goto free_stats;
1629
1630        INIT_WORK(&vi->config_work, virtnet_config_changed_work);
1631
1632        /* If we can receive ANY GSO packets, we must allocate large ones. */
1633        if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
1634            virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
1635            virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
1636                vi->big_packets = true;
1637
1638        if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
1639                vi->mergeable_rx_bufs = true;
1640
1641        if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
1642            virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
1643                vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1644        else
1645                vi->hdr_len = sizeof(struct virtio_net_hdr);
1646
1647        if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT))
1648                vi->any_header_sg = true;
1649
1650        if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
1651                vi->has_cvq = true;
1652
1653        vi->max_mtu = MAX_MTU;
1654        if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
1655                mtu = virtio_cread16(vdev,
1656                                     offsetof(struct virtio_net_config,
1657                                              mtu));
1658                if (virtnet_change_mtu(dev, mtu))
1659                        __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
1660                else
1661                        vi->max_mtu = mtu;
1662        }
1663
1664        if (vi->any_header_sg)
1665                dev->needed_headroom = vi->hdr_len;
1666
1667        /* Enable multiqueue by default */
1668        if (num_online_cpus() >= max_queue_pairs)
1669                vi->curr_queue_pairs = max_queue_pairs;
1670        else
1671                vi->curr_queue_pairs = num_online_cpus();
1672        vi->max_queue_pairs = max_queue_pairs;
1673
1674        /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
1675        err = init_vqs(vi);
1676        if (err)
1677                goto free_index;
1678
1679        netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
1680        netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
1681
1682        err = register_netdev(dev);
1683        if (err) {
1684                pr_debug("virtio_net: registering device failed\n");
1685                goto free_vqs;
1686        }
1687
1688        virtio_device_ready(vdev);
1689
1690        /* Last of all, set up some receive buffers. */
1691        for (i = 0; i < vi->curr_queue_pairs; i++) {
1692                try_fill_recv(vi, &vi->rq[i], GFP_KERNEL);
1693
1694                /* If we didn't even get one input buffer, we're useless. */
1695                if (vi->rq[i].num == 0) {
1696                        free_unused_bufs(vi);
1697                        err = -ENOMEM;
1698                        goto free_recv_bufs;
1699                }
1700        }
1701
1702        vi->nb.notifier_call = &virtnet_cpu_callback;
1703        err = register_hotcpu_notifier(&vi->nb);
1704        if (err) {
1705                pr_debug("virtio_net: registering cpu notifier failed\n");
1706                goto free_recv_bufs;
1707        }
1708
1709        rtnl_lock();
1710        virtnet_set_queues(vi, vi->curr_queue_pairs);
1711        rtnl_unlock();
1712
1713        /* Assume link up if device can't report link status,
1714           otherwise get link status from config. */
1715        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
1716                netif_carrier_off(dev);
1717                schedule_work(&vi->config_work);
1718        } else {
1719                vi->status = VIRTIO_NET_S_LINK_UP;
1720                netif_carrier_on(dev);
1721        }
1722
1723        pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
1724                 dev->name, max_queue_pairs);
1725
1726        return 0;
1727
1728free_recv_bufs:
1729        vi->vdev->config->reset(vdev);
1730
1731        free_receive_bufs(vi);
1732        unregister_netdev(dev);
1733free_vqs:
1734        cancel_delayed_work_sync(&vi->refill);
1735        virtnet_del_vqs(vi);
1736free_index:
1737        free_percpu(vi->vq_index);
1738free_stats:
1739        free_percpu(vi->stats);
1740free:
1741        free_netdev(dev);
1742        return err;
1743}
1744
1745static void remove_vq_common(struct virtnet_info *vi)
1746{
1747        vi->vdev->config->reset(vi->vdev);
1748
1749        /* Free unused buffers in both send and recv, if any. */
1750        free_unused_bufs(vi);
1751
1752        free_receive_bufs(vi);
1753
1754        virtnet_del_vqs(vi);
1755}
1756
1757static void virtnet_remove(struct virtio_device *vdev)
1758{
1759        struct virtnet_info *vi = vdev->priv;
1760
1761        unregister_hotcpu_notifier(&vi->nb);
1762
1763        /* Make sure no work handler is accessing the device. */
1764        flush_work(&vi->config_work);
1765
1766        unregister_netdev(vi->dev);
1767
1768        remove_vq_common(vi);
1769
1770
1771        free_percpu(vi->vq_index);
1772        free_percpu(vi->stats);
1773        free_netdev(vi->dev);
1774}
1775
1776#ifdef CONFIG_PM_SLEEP
1777static int virtnet_freeze(struct virtio_device *vdev)
1778{
1779        struct virtnet_info *vi = vdev->priv;
1780        int i;
1781
1782        unregister_hotcpu_notifier(&vi->nb);
1783
1784        /* Make sure no work handler is accessing the device */
1785        flush_work(&vi->config_work);
1786
1787        netif_device_detach(vi->dev);
1788        cancel_delayed_work_sync(&vi->refill);
1789
1790        if (netif_running(vi->dev))
1791                for (i = 0; i < vi->max_queue_pairs; i++) {
1792                        napi_disable(&vi->rq[i].napi);
1793                        netif_napi_del(&vi->rq[i].napi);
1794                }
1795
1796        remove_vq_common(vi);
1797
1798        return 0;
1799}
1800
1801static int virtnet_restore(struct virtio_device *vdev)
1802{
1803        struct virtnet_info *vi = vdev->priv;
1804        int err, i;
1805
1806        err = init_vqs(vi);
1807        if (err)
1808                return err;
1809
1810        virtio_device_ready(vdev);
1811
1812        if (netif_running(vi->dev)) {
1813                for (i = 0; i < vi->curr_queue_pairs; i++)
1814                        if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
1815                                schedule_delayed_work(&vi->refill, 0);
1816
1817                for (i = 0; i < vi->max_queue_pairs; i++)
1818                        virtnet_napi_enable(&vi->rq[i]);
1819        }
1820
1821        netif_device_attach(vi->dev);
1822
1823        rtnl_lock();
1824        virtnet_set_queues(vi, vi->curr_queue_pairs);
1825        rtnl_unlock();
1826
1827        err = register_hotcpu_notifier(&vi->nb);
1828        if (err)
1829                return err;
1830
1831        return 0;
1832}
1833#endif
1834
1835static struct virtio_device_id id_table[] = {
1836        { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
1837        { 0 },
1838};
1839
1840static unsigned int features[] = {
1841        VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
1842        VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
1843        VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
1844        VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
1845        VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
1846        VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
1847        VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
1848        VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
1849        VIRTIO_NET_F_CTRL_MAC_ADDR,
1850        VIRTIO_F_ANY_LAYOUT,
1851        VIRTIO_NET_F_MTU,
1852};
1853
1854static struct virtio_driver virtio_net_driver = {
1855        .feature_table = features,
1856        .feature_table_size = ARRAY_SIZE(features),
1857        .driver.name =  KBUILD_MODNAME,
1858        .driver.owner = THIS_MODULE,
1859        .id_table =     id_table,
1860        .probe =        virtnet_probe,
1861        .remove =       virtnet_remove,
1862        .config_changed = virtnet_config_changed,
1863#ifdef CONFIG_PM_SLEEP
1864        .freeze =       virtnet_freeze,
1865        .restore =      virtnet_restore,
1866#endif
1867};
1868
1869module_virtio_driver(virtio_net_driver);
1870
1871MODULE_DEVICE_TABLE(virtio, id_table);
1872MODULE_DESCRIPTION("Virtio network driver");
1873MODULE_LICENSE("GPL");
1874