linux/drivers/net/virtio_net.c
<<
>>
Prefs
   1/* A network driver using virtio.
   2 *
   3 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
   4 *
   5 * This program is free software; you can redistribute it and/or modify
   6 * it under the terms of the GNU General Public License as published by
   7 * the Free Software Foundation; either version 2 of the License, or
   8 * (at your option) any later version.
   9 *
  10 * This program is distributed in the hope that it will be useful,
  11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 * GNU General Public License for more details.
  14 *
  15 * You should have received a copy of the GNU General Public License
  16 * along with this program; if not, write to the Free Software
  17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18 */
  19//#define DEBUG
  20#include <linux/netdevice.h>
  21#include <linux/etherdevice.h>
  22#include <linux/ethtool.h>
  23#include <linux/module.h>
  24#include <linux/virtio.h>
  25#include <linux/virtio_net.h>
  26#include <linux/scatterlist.h>
  27#include <linux/if_vlan.h>
  28#include <linux/slab.h>
  29
  30static int napi_weight = 128;
  31module_param(napi_weight, int, 0444);
  32
  33static int csum = 1, gso = 1;
  34module_param(csum, bool, 0444);
  35module_param(gso, bool, 0444);
  36
  37/* FIXME: MTU in config. */
  38#define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
  39#define GOOD_COPY_LEN   128
  40
  41#define VIRTNET_SEND_COMMAND_SG_MAX    2
  42
  43struct virtnet_info {
  44        struct virtio_device *vdev;
  45        struct virtqueue *rvq, *svq, *cvq;
  46        struct net_device *dev;
  47        struct napi_struct napi;
  48        unsigned int status;
  49
  50        /* Number of input buffers, and max we've ever had. */
  51        unsigned int num, max;
  52
  53        /* I like... big packets and I cannot lie! */
  54        bool big_packets;
  55
  56        /* Host will merge rx buffers for big packets (shake it! shake it!) */
  57        bool mergeable_rx_bufs;
  58
  59        /* Work struct for refilling if we run low on memory. */
  60        struct delayed_work refill;
  61
  62        /* Chain pages by the private ptr. */
  63        struct page *pages;
  64
  65        /* fragments + linear part + virtio header */
  66        struct scatterlist rx_sg[MAX_SKB_FRAGS + 2];
  67        struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
  68};
  69
  70struct skb_vnet_hdr {
  71        union {
  72                struct virtio_net_hdr hdr;
  73                struct virtio_net_hdr_mrg_rxbuf mhdr;
  74        };
  75        unsigned int num_sg;
  76};
  77
  78struct padded_vnet_hdr {
  79        struct virtio_net_hdr hdr;
  80        /*
  81         * virtio_net_hdr should be in a separated sg buffer because of a
  82         * QEMU bug, and data sg buffer shares same page with this header sg.
  83         * This padding makes next sg 16 byte aligned after virtio_net_hdr.
  84         */
  85        char padding[6];
  86};
  87
  88static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
  89{
  90        return (struct skb_vnet_hdr *)skb->cb;
  91}
  92
  93/*
  94 * private is used to chain pages for big packets, put the whole
  95 * most recent used list in the beginning for reuse
  96 */
  97static void give_pages(struct virtnet_info *vi, struct page *page)
  98{
  99        struct page *end;
 100
 101        /* Find end of list, sew whole thing into vi->pages. */
 102        for (end = page; end->private; end = (struct page *)end->private);
 103        end->private = (unsigned long)vi->pages;
 104        vi->pages = page;
 105}
 106
 107static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
 108{
 109        struct page *p = vi->pages;
 110
 111        if (p) {
 112                vi->pages = (struct page *)p->private;
 113                /* clear private here, it is used to chain pages */
 114                p->private = 0;
 115        } else
 116                p = alloc_page(gfp_mask);
 117        return p;
 118}
 119
 120static void skb_xmit_done(struct virtqueue *svq)
 121{
 122        struct virtnet_info *vi = svq->vdev->priv;
 123
 124        /* Suppress further interrupts. */
 125        virtqueue_disable_cb(svq);
 126
 127        /* We were probably waiting for more output buffers. */
 128        netif_wake_queue(vi->dev);
 129}
 130
 131static void set_skb_frag(struct sk_buff *skb, struct page *page,
 132                         unsigned int offset, unsigned int *len)
 133{
 134        int i = skb_shinfo(skb)->nr_frags;
 135        skb_frag_t *f;
 136
 137        f = &skb_shinfo(skb)->frags[i];
 138        f->size = min((unsigned)PAGE_SIZE - offset, *len);
 139        f->page_offset = offset;
 140        f->page = page;
 141
 142        skb->data_len += f->size;
 143        skb->len += f->size;
 144        skb_shinfo(skb)->nr_frags++;
 145        *len -= f->size;
 146}
 147
 148static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 149                                   struct page *page, unsigned int len)
 150{
 151        struct sk_buff *skb;
 152        struct skb_vnet_hdr *hdr;
 153        unsigned int copy, hdr_len, offset;
 154        char *p;
 155
 156        p = page_address(page);
 157
 158        /* copy small packet so we can reuse these pages for small data */
 159        skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN);
 160        if (unlikely(!skb))
 161                return NULL;
 162
 163        hdr = skb_vnet_hdr(skb);
 164
 165        if (vi->mergeable_rx_bufs) {
 166                hdr_len = sizeof hdr->mhdr;
 167                offset = hdr_len;
 168        } else {
 169                hdr_len = sizeof hdr->hdr;
 170                offset = sizeof(struct padded_vnet_hdr);
 171        }
 172
 173        memcpy(hdr, p, hdr_len);
 174
 175        len -= hdr_len;
 176        p += offset;
 177
 178        copy = len;
 179        if (copy > skb_tailroom(skb))
 180                copy = skb_tailroom(skb);
 181        memcpy(skb_put(skb, copy), p, copy);
 182
 183        len -= copy;
 184        offset += copy;
 185
 186        while (len) {
 187                set_skb_frag(skb, page, offset, &len);
 188                page = (struct page *)page->private;
 189                offset = 0;
 190        }
 191
 192        if (page)
 193                give_pages(vi, page);
 194
 195        return skb;
 196}
 197
 198static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb)
 199{
 200        struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
 201        struct page *page;
 202        int num_buf, i, len;
 203
 204        num_buf = hdr->mhdr.num_buffers;
 205        while (--num_buf) {
 206                i = skb_shinfo(skb)->nr_frags;
 207                if (i >= MAX_SKB_FRAGS) {
 208                        pr_debug("%s: packet too long\n", skb->dev->name);
 209                        skb->dev->stats.rx_length_errors++;
 210                        return -EINVAL;
 211                }
 212
 213                page = virtqueue_get_buf(vi->rvq, &len);
 214                if (!page) {
 215                        pr_debug("%s: rx error: %d buffers missing\n",
 216                                 skb->dev->name, hdr->mhdr.num_buffers);
 217                        skb->dev->stats.rx_length_errors++;
 218                        return -EINVAL;
 219                }
 220                if (len > PAGE_SIZE)
 221                        len = PAGE_SIZE;
 222
 223                set_skb_frag(skb, page, 0, &len);
 224
 225                --vi->num;
 226        }
 227        return 0;
 228}
 229
 230static void receive_buf(struct net_device *dev, void *buf, unsigned int len)
 231{
 232        struct virtnet_info *vi = netdev_priv(dev);
 233        struct sk_buff *skb;
 234        struct page *page;
 235        struct skb_vnet_hdr *hdr;
 236
 237        if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
 238                pr_debug("%s: short packet %i\n", dev->name, len);
 239                dev->stats.rx_length_errors++;
 240                if (vi->mergeable_rx_bufs || vi->big_packets)
 241                        give_pages(vi, buf);
 242                else
 243                        dev_kfree_skb(buf);
 244                return;
 245        }
 246
 247        if (!vi->mergeable_rx_bufs && !vi->big_packets) {
 248                skb = buf;
 249                len -= sizeof(struct virtio_net_hdr);
 250                skb_trim(skb, len);
 251        } else {
 252                page = buf;
 253                skb = page_to_skb(vi, page, len);
 254                if (unlikely(!skb)) {
 255                        dev->stats.rx_dropped++;
 256                        give_pages(vi, page);
 257                        return;
 258                }
 259                if (vi->mergeable_rx_bufs)
 260                        if (receive_mergeable(vi, skb)) {
 261                                dev_kfree_skb(skb);
 262                                return;
 263                        }
 264        }
 265
 266        hdr = skb_vnet_hdr(skb);
 267        skb->truesize += skb->data_len;
 268        dev->stats.rx_bytes += skb->len;
 269        dev->stats.rx_packets++;
 270
 271        if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
 272                pr_debug("Needs csum!\n");
 273                if (!skb_partial_csum_set(skb,
 274                                          hdr->hdr.csum_start,
 275                                          hdr->hdr.csum_offset))
 276                        goto frame_err;
 277        }
 278
 279        skb->protocol = eth_type_trans(skb, dev);
 280        pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
 281                 ntohs(skb->protocol), skb->len, skb->pkt_type);
 282
 283        if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 284                pr_debug("GSO!\n");
 285                switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 286                case VIRTIO_NET_HDR_GSO_TCPV4:
 287                        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 288                        break;
 289                case VIRTIO_NET_HDR_GSO_UDP:
 290                        skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 291                        break;
 292                case VIRTIO_NET_HDR_GSO_TCPV6:
 293                        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 294                        break;
 295                default:
 296                        if (net_ratelimit())
 297                                printk(KERN_WARNING "%s: bad gso type %u.\n",
 298                                       dev->name, hdr->hdr.gso_type);
 299                        goto frame_err;
 300                }
 301
 302                if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
 303                        skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 304
 305                skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
 306                if (skb_shinfo(skb)->gso_size == 0) {
 307                        if (net_ratelimit())
 308                                printk(KERN_WARNING "%s: zero gso size.\n",
 309                                       dev->name);
 310                        goto frame_err;
 311                }
 312
 313                /* Header must be checked, and gso_segs computed. */
 314                skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 315                skb_shinfo(skb)->gso_segs = 0;
 316        }
 317
 318        netif_receive_skb(skb);
 319        return;
 320
 321frame_err:
 322        dev->stats.rx_frame_errors++;
 323        dev_kfree_skb(skb);
 324}
 325
 326static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp)
 327{
 328        struct sk_buff *skb;
 329        struct skb_vnet_hdr *hdr;
 330        int err;
 331
 332        skb = netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN);
 333        if (unlikely(!skb))
 334                return -ENOMEM;
 335
 336        skb_put(skb, MAX_PACKET_LEN);
 337
 338        hdr = skb_vnet_hdr(skb);
 339        sg_set_buf(vi->rx_sg, &hdr->hdr, sizeof hdr->hdr);
 340
 341        skb_to_sgvec(skb, vi->rx_sg + 1, 0, skb->len);
 342
 343        err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 2, skb, gfp);
 344        if (err < 0)
 345                dev_kfree_skb(skb);
 346
 347        return err;
 348}
 349
 350static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp)
 351{
 352        struct page *first, *list = NULL;
 353        char *p;
 354        int i, err, offset;
 355
 356        /* page in vi->rx_sg[MAX_SKB_FRAGS + 1] is list tail */
 357        for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
 358                first = get_a_page(vi, gfp);
 359                if (!first) {
 360                        if (list)
 361                                give_pages(vi, list);
 362                        return -ENOMEM;
 363                }
 364                sg_set_buf(&vi->rx_sg[i], page_address(first), PAGE_SIZE);
 365
 366                /* chain new page in list head to match sg */
 367                first->private = (unsigned long)list;
 368                list = first;
 369        }
 370
 371        first = get_a_page(vi, gfp);
 372        if (!first) {
 373                give_pages(vi, list);
 374                return -ENOMEM;
 375        }
 376        p = page_address(first);
 377
 378        /* vi->rx_sg[0], vi->rx_sg[1] share the same page */
 379        /* a separated vi->rx_sg[0] for virtio_net_hdr only due to QEMU bug */
 380        sg_set_buf(&vi->rx_sg[0], p, sizeof(struct virtio_net_hdr));
 381
 382        /* vi->rx_sg[1] for data packet, from offset */
 383        offset = sizeof(struct padded_vnet_hdr);
 384        sg_set_buf(&vi->rx_sg[1], p + offset, PAGE_SIZE - offset);
 385
 386        /* chain first in list head */
 387        first->private = (unsigned long)list;
 388        err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2,
 389                                    first, gfp);
 390        if (err < 0)
 391                give_pages(vi, first);
 392
 393        return err;
 394}
 395
 396static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp)
 397{
 398        struct page *page;
 399        int err;
 400
 401        page = get_a_page(vi, gfp);
 402        if (!page)
 403                return -ENOMEM;
 404
 405        sg_init_one(vi->rx_sg, page_address(page), PAGE_SIZE);
 406
 407        err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 1, page, gfp);
 408        if (err < 0)
 409                give_pages(vi, page);
 410
 411        return err;
 412}
 413
 414/* Returns false if we couldn't fill entirely (OOM). */
 415static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
 416{
 417        int err;
 418        bool oom;
 419
 420        do {
 421                if (vi->mergeable_rx_bufs)
 422                        err = add_recvbuf_mergeable(vi, gfp);
 423                else if (vi->big_packets)
 424                        err = add_recvbuf_big(vi, gfp);
 425                else
 426                        err = add_recvbuf_small(vi, gfp);
 427
 428                oom = err == -ENOMEM;
 429                if (err < 0)
 430                        break;
 431                ++vi->num;
 432        } while (err > 0);
 433        if (unlikely(vi->num > vi->max))
 434                vi->max = vi->num;
 435        virtqueue_kick(vi->rvq);
 436        return !oom;
 437}
 438
 439static void skb_recv_done(struct virtqueue *rvq)
 440{
 441        struct virtnet_info *vi = rvq->vdev->priv;
 442        /* Schedule NAPI, Suppress further interrupts if successful. */
 443        if (napi_schedule_prep(&vi->napi)) {
 444                virtqueue_disable_cb(rvq);
 445                __napi_schedule(&vi->napi);
 446        }
 447}
 448
 449static void virtnet_napi_enable(struct virtnet_info *vi)
 450{
 451        napi_enable(&vi->napi);
 452
 453        /* If all buffers were filled by other side before we napi_enabled, we
 454         * won't get another interrupt, so process any outstanding packets
 455         * now.  virtnet_poll wants re-enable the queue, so we disable here.
 456         * We synchronize against interrupts via NAPI_STATE_SCHED */
 457        if (napi_schedule_prep(&vi->napi)) {
 458                virtqueue_disable_cb(vi->rvq);
 459                __napi_schedule(&vi->napi);
 460        }
 461}
 462
 463static void refill_work(struct work_struct *work)
 464{
 465        struct virtnet_info *vi;
 466        bool still_empty;
 467
 468        vi = container_of(work, struct virtnet_info, refill.work);
 469        napi_disable(&vi->napi);
 470        still_empty = !try_fill_recv(vi, GFP_KERNEL);
 471        virtnet_napi_enable(vi);
 472
 473        /* In theory, this can happen: if we don't get any buffers in
 474         * we will *never* try to fill again. */
 475        if (still_empty)
 476                schedule_delayed_work(&vi->refill, HZ/2);
 477}
 478
 479static int virtnet_poll(struct napi_struct *napi, int budget)
 480{
 481        struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
 482        void *buf;
 483        unsigned int len, received = 0;
 484
 485again:
 486        while (received < budget &&
 487               (buf = virtqueue_get_buf(vi->rvq, &len)) != NULL) {
 488                receive_buf(vi->dev, buf, len);
 489                --vi->num;
 490                received++;
 491        }
 492
 493        if (vi->num < vi->max / 2) {
 494                if (!try_fill_recv(vi, GFP_ATOMIC))
 495                        schedule_delayed_work(&vi->refill, 0);
 496        }
 497
 498        /* Out of packets? */
 499        if (received < budget) {
 500                napi_complete(napi);
 501                if (unlikely(!virtqueue_enable_cb(vi->rvq)) &&
 502                    napi_schedule_prep(napi)) {
 503                        virtqueue_disable_cb(vi->rvq);
 504                        __napi_schedule(napi);
 505                        goto again;
 506                }
 507        }
 508
 509        return received;
 510}
 511
 512static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
 513{
 514        struct sk_buff *skb;
 515        unsigned int len, tot_sgs = 0;
 516
 517        while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) {
 518                pr_debug("Sent skb %p\n", skb);
 519                vi->dev->stats.tx_bytes += skb->len;
 520                vi->dev->stats.tx_packets++;
 521                tot_sgs += skb_vnet_hdr(skb)->num_sg;
 522                dev_kfree_skb_any(skb);
 523        }
 524        return tot_sgs;
 525}
 526
 527static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 528{
 529        struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
 530        const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
 531
 532        pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
 533
 534        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 535                hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
 536                hdr->hdr.csum_start = skb_checksum_start_offset(skb);
 537                hdr->hdr.csum_offset = skb->csum_offset;
 538        } else {
 539                hdr->hdr.flags = 0;
 540                hdr->hdr.csum_offset = hdr->hdr.csum_start = 0;
 541        }
 542
 543        if (skb_is_gso(skb)) {
 544                hdr->hdr.hdr_len = skb_headlen(skb);
 545                hdr->hdr.gso_size = skb_shinfo(skb)->gso_size;
 546                if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
 547                        hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
 548                else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
 549                        hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
 550                else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
 551                        hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
 552                else
 553                        BUG();
 554                if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
 555                        hdr->hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
 556        } else {
 557                hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
 558                hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
 559        }
 560
 561        hdr->mhdr.num_buffers = 0;
 562
 563        /* Encode metadata header at front. */
 564        if (vi->mergeable_rx_bufs)
 565                sg_set_buf(vi->tx_sg, &hdr->mhdr, sizeof hdr->mhdr);
 566        else
 567                sg_set_buf(vi->tx_sg, &hdr->hdr, sizeof hdr->hdr);
 568
 569        hdr->num_sg = skb_to_sgvec(skb, vi->tx_sg + 1, 0, skb->len) + 1;
 570        return virtqueue_add_buf(vi->svq, vi->tx_sg, hdr->num_sg,
 571                                        0, skb);
 572}
 573
 574static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 575{
 576        struct virtnet_info *vi = netdev_priv(dev);
 577        int capacity;
 578
 579        /* Free up any pending old buffers before queueing new ones. */
 580        free_old_xmit_skbs(vi);
 581
 582        /* Try to transmit */
 583        capacity = xmit_skb(vi, skb);
 584
 585        /* This can happen with OOM and indirect buffers. */
 586        if (unlikely(capacity < 0)) {
 587                if (net_ratelimit()) {
 588                        if (likely(capacity == -ENOMEM)) {
 589                                dev_warn(&dev->dev,
 590                                         "TX queue failure: out of memory\n");
 591                        } else {
 592                                dev->stats.tx_fifo_errors++;
 593                                dev_warn(&dev->dev,
 594                                         "Unexpected TX queue failure: %d\n",
 595                                         capacity);
 596                        }
 597                }
 598                dev->stats.tx_dropped++;
 599                kfree_skb(skb);
 600                return NETDEV_TX_OK;
 601        }
 602        virtqueue_kick(vi->svq);
 603
 604        /* Don't wait up for transmitted skbs to be freed. */
 605        skb_orphan(skb);
 606        nf_reset(skb);
 607
 608        /* Apparently nice girls don't return TX_BUSY; stop the queue
 609         * before it gets out of hand.  Naturally, this wastes entries. */
 610        if (capacity < 2+MAX_SKB_FRAGS) {
 611                netif_stop_queue(dev);
 612                if (unlikely(!virtqueue_enable_cb(vi->svq))) {
 613                        /* More just got used, free them then recheck. */
 614                        capacity += free_old_xmit_skbs(vi);
 615                        if (capacity >= 2+MAX_SKB_FRAGS) {
 616                                netif_start_queue(dev);
 617                                virtqueue_disable_cb(vi->svq);
 618                        }
 619                }
 620        }
 621
 622        return NETDEV_TX_OK;
 623}
 624
 625static int virtnet_set_mac_address(struct net_device *dev, void *p)
 626{
 627        struct virtnet_info *vi = netdev_priv(dev);
 628        struct virtio_device *vdev = vi->vdev;
 629        int ret;
 630
 631        ret = eth_mac_addr(dev, p);
 632        if (ret)
 633                return ret;
 634
 635        if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
 636                vdev->config->set(vdev, offsetof(struct virtio_net_config, mac),
 637                                  dev->dev_addr, dev->addr_len);
 638
 639        return 0;
 640}
 641
 642#ifdef CONFIG_NET_POLL_CONTROLLER
 643static void virtnet_netpoll(struct net_device *dev)
 644{
 645        struct virtnet_info *vi = netdev_priv(dev);
 646
 647        napi_schedule(&vi->napi);
 648}
 649#endif
 650
 651static int virtnet_open(struct net_device *dev)
 652{
 653        struct virtnet_info *vi = netdev_priv(dev);
 654
 655        virtnet_napi_enable(vi);
 656        return 0;
 657}
 658
 659/*
 660 * Send command via the control virtqueue and check status.  Commands
 661 * supported by the hypervisor, as indicated by feature bits, should
 662 * never fail unless improperly formated.
 663 */
 664static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 665                                 struct scatterlist *data, int out, int in)
 666{
 667        struct scatterlist *s, sg[VIRTNET_SEND_COMMAND_SG_MAX + 2];
 668        struct virtio_net_ctrl_hdr ctrl;
 669        virtio_net_ctrl_ack status = ~0;
 670        unsigned int tmp;
 671        int i;
 672
 673        /* Caller should know better */
 674        BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ||
 675                (out + in > VIRTNET_SEND_COMMAND_SG_MAX));
 676
 677        out++; /* Add header */
 678        in++; /* Add return status */
 679
 680        ctrl.class = class;
 681        ctrl.cmd = cmd;
 682
 683        sg_init_table(sg, out + in);
 684
 685        sg_set_buf(&sg[0], &ctrl, sizeof(ctrl));
 686        for_each_sg(data, s, out + in - 2, i)
 687                sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
 688        sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
 689
 690        BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi) < 0);
 691
 692        virtqueue_kick(vi->cvq);
 693
 694        /*
 695         * Spin for a response, the kick causes an ioport write, trapping
 696         * into the hypervisor, so the request should be handled immediately.
 697         */
 698        while (!virtqueue_get_buf(vi->cvq, &tmp))
 699                cpu_relax();
 700
 701        return status == VIRTIO_NET_OK;
 702}
 703
 704static int virtnet_close(struct net_device *dev)
 705{
 706        struct virtnet_info *vi = netdev_priv(dev);
 707
 708        napi_disable(&vi->napi);
 709
 710        return 0;
 711}
 712
 713static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
 714{
 715        struct virtnet_info *vi = netdev_priv(dev);
 716        struct virtio_device *vdev = vi->vdev;
 717
 718        if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM))
 719                return -ENOSYS;
 720
 721        return ethtool_op_set_tx_hw_csum(dev, data);
 722}
 723
 724static void virtnet_set_rx_mode(struct net_device *dev)
 725{
 726        struct virtnet_info *vi = netdev_priv(dev);
 727        struct scatterlist sg[2];
 728        u8 promisc, allmulti;
 729        struct virtio_net_ctrl_mac *mac_data;
 730        struct netdev_hw_addr *ha;
 731        int uc_count;
 732        int mc_count;
 733        void *buf;
 734        int i;
 735
 736        /* We can't dynamicaly set ndo_set_rx_mode, so return gracefully */
 737        if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
 738                return;
 739
 740        promisc = ((dev->flags & IFF_PROMISC) != 0);
 741        allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
 742
 743        sg_init_one(sg, &promisc, sizeof(promisc));
 744
 745        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
 746                                  VIRTIO_NET_CTRL_RX_PROMISC,
 747                                  sg, 1, 0))
 748                dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
 749                         promisc ? "en" : "dis");
 750
 751        sg_init_one(sg, &allmulti, sizeof(allmulti));
 752
 753        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
 754                                  VIRTIO_NET_CTRL_RX_ALLMULTI,
 755                                  sg, 1, 0))
 756                dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
 757                         allmulti ? "en" : "dis");
 758
 759        uc_count = netdev_uc_count(dev);
 760        mc_count = netdev_mc_count(dev);
 761        /* MAC filter - use one buffer for both lists */
 762        buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
 763                      (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
 764        mac_data = buf;
 765        if (!buf) {
 766                dev_warn(&dev->dev, "No memory for MAC address buffer\n");
 767                return;
 768        }
 769
 770        sg_init_table(sg, 2);
 771
 772        /* Store the unicast list and count in the front of the buffer */
 773        mac_data->entries = uc_count;
 774        i = 0;
 775        netdev_for_each_uc_addr(ha, dev)
 776                memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 777
 778        sg_set_buf(&sg[0], mac_data,
 779                   sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
 780
 781        /* multicast list and count fill the end */
 782        mac_data = (void *)&mac_data->macs[uc_count][0];
 783
 784        mac_data->entries = mc_count;
 785        i = 0;
 786        netdev_for_each_mc_addr(ha, dev)
 787                memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 788
 789        sg_set_buf(&sg[1], mac_data,
 790                   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
 791
 792        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
 793                                  VIRTIO_NET_CTRL_MAC_TABLE_SET,
 794                                  sg, 2, 0))
 795                dev_warn(&dev->dev, "Failed to set MAC fitler table.\n");
 796
 797        kfree(buf);
 798}
 799
 800static void virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid)
 801{
 802        struct virtnet_info *vi = netdev_priv(dev);
 803        struct scatterlist sg;
 804
 805        sg_init_one(&sg, &vid, sizeof(vid));
 806
 807        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
 808                                  VIRTIO_NET_CTRL_VLAN_ADD, &sg, 1, 0))
 809                dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
 810}
 811
 812static void virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
 813{
 814        struct virtnet_info *vi = netdev_priv(dev);
 815        struct scatterlist sg;
 816
 817        sg_init_one(&sg, &vid, sizeof(vid));
 818
 819        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
 820                                  VIRTIO_NET_CTRL_VLAN_DEL, &sg, 1, 0))
 821                dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
 822}
 823
 824static const struct ethtool_ops virtnet_ethtool_ops = {
 825        .set_tx_csum = virtnet_set_tx_csum,
 826        .set_sg = ethtool_op_set_sg,
 827        .set_tso = ethtool_op_set_tso,
 828        .set_ufo = ethtool_op_set_ufo,
 829        .get_link = ethtool_op_get_link,
 830};
 831
 832#define MIN_MTU 68
 833#define MAX_MTU 65535
 834
 835static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
 836{
 837        if (new_mtu < MIN_MTU || new_mtu > MAX_MTU)
 838                return -EINVAL;
 839        dev->mtu = new_mtu;
 840        return 0;
 841}
 842
 843static const struct net_device_ops virtnet_netdev = {
 844        .ndo_open            = virtnet_open,
 845        .ndo_stop            = virtnet_close,
 846        .ndo_start_xmit      = start_xmit,
 847        .ndo_validate_addr   = eth_validate_addr,
 848        .ndo_set_mac_address = virtnet_set_mac_address,
 849        .ndo_set_rx_mode     = virtnet_set_rx_mode,
 850        .ndo_change_mtu      = virtnet_change_mtu,
 851        .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
 852        .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
 853#ifdef CONFIG_NET_POLL_CONTROLLER
 854        .ndo_poll_controller = virtnet_netpoll,
 855#endif
 856};
 857
 858static void virtnet_update_status(struct virtnet_info *vi)
 859{
 860        u16 v;
 861
 862        if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS))
 863                return;
 864
 865        vi->vdev->config->get(vi->vdev,
 866                              offsetof(struct virtio_net_config, status),
 867                              &v, sizeof(v));
 868
 869        /* Ignore unknown (future) status bits */
 870        v &= VIRTIO_NET_S_LINK_UP;
 871
 872        if (vi->status == v)
 873                return;
 874
 875        vi->status = v;
 876
 877        if (vi->status & VIRTIO_NET_S_LINK_UP) {
 878                netif_carrier_on(vi->dev);
 879                netif_wake_queue(vi->dev);
 880        } else {
 881                netif_carrier_off(vi->dev);
 882                netif_stop_queue(vi->dev);
 883        }
 884}
 885
 886static void virtnet_config_changed(struct virtio_device *vdev)
 887{
 888        struct virtnet_info *vi = vdev->priv;
 889
 890        virtnet_update_status(vi);
 891}
 892
 893static int virtnet_probe(struct virtio_device *vdev)
 894{
 895        int err;
 896        struct net_device *dev;
 897        struct virtnet_info *vi;
 898        struct virtqueue *vqs[3];
 899        vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
 900        const char *names[] = { "input", "output", "control" };
 901        int nvqs;
 902
 903        /* Allocate ourselves a network device with room for our info */
 904        dev = alloc_etherdev(sizeof(struct virtnet_info));
 905        if (!dev)
 906                return -ENOMEM;
 907
 908        /* Set up network device as normal. */
 909        dev->netdev_ops = &virtnet_netdev;
 910        dev->features = NETIF_F_HIGHDMA;
 911        SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
 912        SET_NETDEV_DEV(dev, &vdev->dev);
 913
 914        /* Do we support "hardware" checksums? */
 915        if (csum && virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
 916                /* This opens up the world of extra features. */
 917                dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
 918                if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
 919                        dev->features |= NETIF_F_TSO | NETIF_F_UFO
 920                                | NETIF_F_TSO_ECN | NETIF_F_TSO6;
 921                }
 922                /* Individual feature bits: what can host handle? */
 923                if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
 924                        dev->features |= NETIF_F_TSO;
 925                if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
 926                        dev->features |= NETIF_F_TSO6;
 927                if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
 928                        dev->features |= NETIF_F_TSO_ECN;
 929                if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
 930                        dev->features |= NETIF_F_UFO;
 931        }
 932
 933        /* Configuration may specify what MAC to use.  Otherwise random. */
 934        if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
 935                vdev->config->get(vdev,
 936                                  offsetof(struct virtio_net_config, mac),
 937                                  dev->dev_addr, dev->addr_len);
 938        } else
 939                random_ether_addr(dev->dev_addr);
 940
 941        /* Set up our device-specific information */
 942        vi = netdev_priv(dev);
 943        netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
 944        vi->dev = dev;
 945        vi->vdev = vdev;
 946        vdev->priv = vi;
 947        vi->pages = NULL;
 948        INIT_DELAYED_WORK(&vi->refill, refill_work);
 949        sg_init_table(vi->rx_sg, ARRAY_SIZE(vi->rx_sg));
 950        sg_init_table(vi->tx_sg, ARRAY_SIZE(vi->tx_sg));
 951
 952        /* If we can receive ANY GSO packets, we must allocate large ones. */
 953        if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
 954            virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
 955            virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
 956                vi->big_packets = true;
 957
 958        if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 959                vi->mergeable_rx_bufs = true;
 960
 961        /* We expect two virtqueues, receive then send,
 962         * and optionally control. */
 963        nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
 964
 965        err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
 966        if (err)
 967                goto free;
 968
 969        vi->rvq = vqs[0];
 970        vi->svq = vqs[1];
 971
 972        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
 973                vi->cvq = vqs[2];
 974
 975                if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
 976                        dev->features |= NETIF_F_HW_VLAN_FILTER;
 977        }
 978
 979        err = register_netdev(dev);
 980        if (err) {
 981                pr_debug("virtio_net: registering device failed\n");
 982                goto free_vqs;
 983        }
 984
 985        /* Last of all, set up some receive buffers. */
 986        try_fill_recv(vi, GFP_KERNEL);
 987
 988        /* If we didn't even get one input buffer, we're useless. */
 989        if (vi->num == 0) {
 990                err = -ENOMEM;
 991                goto unregister;
 992        }
 993
 994        /* Assume link up if device can't report link status,
 995           otherwise get link status from config. */
 996        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
 997                netif_carrier_off(dev);
 998                virtnet_update_status(vi);
 999        } else {
1000                vi->status = VIRTIO_NET_S_LINK_UP;
1001                netif_carrier_on(dev);
1002        }
1003
1004        pr_debug("virtnet: registered device %s\n", dev->name);
1005        return 0;
1006
1007unregister:
1008        unregister_netdev(dev);
1009        cancel_delayed_work_sync(&vi->refill);
1010free_vqs:
1011        vdev->config->del_vqs(vdev);
1012free:
1013        free_netdev(dev);
1014        return err;
1015}
1016
1017static void free_unused_bufs(struct virtnet_info *vi)
1018{
1019        void *buf;
1020        while (1) {
1021                buf = virtqueue_detach_unused_buf(vi->svq);
1022                if (!buf)
1023                        break;
1024                dev_kfree_skb(buf);
1025        }
1026        while (1) {
1027                buf = virtqueue_detach_unused_buf(vi->rvq);
1028                if (!buf)
1029                        break;
1030                if (vi->mergeable_rx_bufs || vi->big_packets)
1031                        give_pages(vi, buf);
1032                else
1033                        dev_kfree_skb(buf);
1034                --vi->num;
1035        }
1036        BUG_ON(vi->num != 0);
1037}
1038
1039static void __devexit virtnet_remove(struct virtio_device *vdev)
1040{
1041        struct virtnet_info *vi = vdev->priv;
1042
1043        /* Stop all the virtqueues. */
1044        vdev->config->reset(vdev);
1045
1046
1047        unregister_netdev(vi->dev);
1048        cancel_delayed_work_sync(&vi->refill);
1049
1050        /* Free unused buffers in both send and recv, if any. */
1051        free_unused_bufs(vi);
1052
1053        vdev->config->del_vqs(vi->vdev);
1054
1055        while (vi->pages)
1056                __free_pages(get_a_page(vi, GFP_KERNEL), 0);
1057
1058        free_netdev(vi->dev);
1059}
1060
1061static struct virtio_device_id id_table[] = {
1062        { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
1063        { 0 },
1064};
1065
1066static unsigned int features[] = {
1067        VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
1068        VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
1069        VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
1070        VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
1071        VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
1072        VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
1073        VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
1074};
1075
1076static struct virtio_driver virtio_net_driver = {
1077        .feature_table = features,
1078        .feature_table_size = ARRAY_SIZE(features),
1079        .driver.name =  KBUILD_MODNAME,
1080        .driver.owner = THIS_MODULE,
1081        .id_table =     id_table,
1082        .probe =        virtnet_probe,
1083        .remove =       __devexit_p(virtnet_remove),
1084        .config_changed = virtnet_config_changed,
1085};
1086
1087static int __init init(void)
1088{
1089        return register_virtio_driver(&virtio_net_driver);
1090}
1091
1092static void __exit fini(void)
1093{
1094        unregister_virtio_driver(&virtio_net_driver);
1095}
1096module_init(init);
1097module_exit(fini);
1098
1099MODULE_DEVICE_TABLE(virtio, id_table);
1100MODULE_DESCRIPTION("Virtio network driver");
1101MODULE_LICENSE("GPL");
1102