linux/drivers/net/xen-netfront.c
<<
>>
Prefs
   1/*
   2 * Virtual network driver for conversing with remote driver backends.
   3 *
   4 * Copyright (c) 2002-2005, K A Fraser
   5 * Copyright (c) 2005, XenSource Ltd
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License version 2
   9 * as published by the Free Software Foundation; or, when distributed
  10 * separately from the Linux kernel or incorporated into other
  11 * software packages, subject to the following license:
  12 *
  13 * Permission is hereby granted, free of charge, to any person obtaining a copy
  14 * of this source file (the "Software"), to deal in the Software without
  15 * restriction, including without limitation the rights to use, copy, modify,
  16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  17 * and to permit persons to whom the Software is furnished to do so, subject to
  18 * the following conditions:
  19 *
  20 * The above copyright notice and this permission notice shall be included in
  21 * all copies or substantial portions of the Software.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  29 * IN THE SOFTWARE.
  30 */
  31
  32#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  33
  34#include <linux/module.h>
  35#include <linux/kernel.h>
  36#include <linux/netdevice.h>
  37#include <linux/etherdevice.h>
  38#include <linux/skbuff.h>
  39#include <linux/ethtool.h>
  40#include <linux/if_ether.h>
  41#include <net/tcp.h>
  42#include <linux/udp.h>
  43#include <linux/moduleparam.h>
  44#include <linux/mm.h>
  45#include <linux/slab.h>
  46#include <net/ip.h>
  47
  48#include <xen/xen.h>
  49#include <xen/xenbus.h>
  50#include <xen/events.h>
  51#include <xen/page.h>
  52#include <xen/platform_pci.h>
  53#include <xen/grant_table.h>
  54
  55#include <xen/interface/io/netif.h>
  56#include <xen/interface/memory.h>
  57#include <xen/interface/grant_table.h>
  58
  59/* Module parameters */
  60static unsigned int xennet_max_queues;
  61module_param_named(max_queues, xennet_max_queues, uint, 0644);
  62MODULE_PARM_DESC(max_queues,
  63                 "Maximum number of queues per virtual interface");
  64
  65static const struct ethtool_ops xennet_ethtool_ops;
  66
  67struct netfront_cb {
  68        int pull_to;
  69};
  70
  71#define NETFRONT_SKB_CB(skb)    ((struct netfront_cb *)((skb)->cb))
  72
  73#define RX_COPY_THRESHOLD 256
  74
  75#define GRANT_INVALID_REF       0
  76
  77#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
  78#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
  79
  80/* Minimum number of Rx slots (includes slot for GSO metadata). */
  81#define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1)
  82
  83/* Queue name is interface name with "-qNNN" appended */
  84#define QUEUE_NAME_SIZE (IFNAMSIZ + 6)
  85
  86/* IRQ name is queue name with "-tx" or "-rx" appended */
  87#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
  88
  89struct netfront_stats {
  90        u64                     packets;
  91        u64                     bytes;
  92        struct u64_stats_sync   syncp;
  93};
  94
  95struct netfront_info;
  96
  97struct netfront_queue {
  98        unsigned int id; /* Queue ID, 0-based */
  99        char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
 100        struct netfront_info *info;
 101
 102        struct napi_struct napi;
 103
 104        /* Split event channels support, tx_* == rx_* when using
 105         * single event channel.
 106         */
 107        unsigned int tx_evtchn, rx_evtchn;
 108        unsigned int tx_irq, rx_irq;
 109        /* Only used when split event channels support is enabled */
 110        char tx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-tx */
 111        char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
 112
 113        spinlock_t   tx_lock;
 114        struct xen_netif_tx_front_ring tx;
 115        int tx_ring_ref;
 116
 117        /*
 118         * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
 119         * are linked from tx_skb_freelist through skb_entry.link.
 120         *
 121         *  NB. Freelist index entries are always going to be less than
 122         *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
 123         *  greater than PAGE_OFFSET: we use this property to distinguish
 124         *  them.
 125         */
 126        union skb_entry {
 127                struct sk_buff *skb;
 128                unsigned long link;
 129        } tx_skbs[NET_TX_RING_SIZE];
 130        grant_ref_t gref_tx_head;
 131        grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
 132        struct page *grant_tx_page[NET_TX_RING_SIZE];
 133        unsigned tx_skb_freelist;
 134
 135        spinlock_t   rx_lock ____cacheline_aligned_in_smp;
 136        struct xen_netif_rx_front_ring rx;
 137        int rx_ring_ref;
 138
 139        struct timer_list rx_refill_timer;
 140
 141        struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
 142        grant_ref_t gref_rx_head;
 143        grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
 144};
 145
 146struct netfront_info {
 147        struct list_head list;
 148        struct net_device *netdev;
 149
 150        struct xenbus_device *xbdev;
 151
 152        /* Multi-queue support */
 153        struct netfront_queue *queues;
 154
 155        /* Statistics */
 156        struct netfront_stats __percpu *rx_stats;
 157        struct netfront_stats __percpu *tx_stats;
 158
 159        atomic_t rx_gso_checksum_fixup;
 160};
 161
 162struct netfront_rx_info {
 163        struct xen_netif_rx_response rx;
 164        struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 165};
 166
 167static void skb_entry_set_link(union skb_entry *list, unsigned short id)
 168{
 169        list->link = id;
 170}
 171
 172static int skb_entry_is_link(const union skb_entry *list)
 173{
 174        BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
 175        return (unsigned long)list->skb < PAGE_OFFSET;
 176}
 177
 178/*
 179 * Access macros for acquiring freeing slots in tx_skbs[].
 180 */
 181
 182static void add_id_to_freelist(unsigned *head, union skb_entry *list,
 183                               unsigned short id)
 184{
 185        skb_entry_set_link(&list[id], *head);
 186        *head = id;
 187}
 188
 189static unsigned short get_id_from_freelist(unsigned *head,
 190                                           union skb_entry *list)
 191{
 192        unsigned int id = *head;
 193        *head = list[id].link;
 194        return id;
 195}
 196
 197static int xennet_rxidx(RING_IDX idx)
 198{
 199        return idx & (NET_RX_RING_SIZE - 1);
 200}
 201
 202static struct sk_buff *xennet_get_rx_skb(struct netfront_queue *queue,
 203                                         RING_IDX ri)
 204{
 205        int i = xennet_rxidx(ri);
 206        struct sk_buff *skb = queue->rx_skbs[i];
 207        queue->rx_skbs[i] = NULL;
 208        return skb;
 209}
 210
 211static grant_ref_t xennet_get_rx_ref(struct netfront_queue *queue,
 212                                            RING_IDX ri)
 213{
 214        int i = xennet_rxidx(ri);
 215        grant_ref_t ref = queue->grant_rx_ref[i];
 216        queue->grant_rx_ref[i] = GRANT_INVALID_REF;
 217        return ref;
 218}
 219
 220#ifdef CONFIG_SYSFS
 221static const struct attribute_group xennet_dev_group;
 222#endif
 223
 224static bool xennet_can_sg(struct net_device *dev)
 225{
 226        return dev->features & NETIF_F_SG;
 227}
 228
 229
 230static void rx_refill_timeout(unsigned long data)
 231{
 232        struct netfront_queue *queue = (struct netfront_queue *)data;
 233        napi_schedule(&queue->napi);
 234}
 235
 236static int netfront_tx_slot_available(struct netfront_queue *queue)
 237{
 238        return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
 239                (NET_TX_RING_SIZE - MAX_SKB_FRAGS - 2);
 240}
 241
 242static void xennet_maybe_wake_tx(struct netfront_queue *queue)
 243{
 244        struct net_device *dev = queue->info->netdev;
 245        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, queue->id);
 246
 247        if (unlikely(netif_tx_queue_stopped(dev_queue)) &&
 248            netfront_tx_slot_available(queue) &&
 249            likely(netif_running(dev)))
 250                netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
 251}
 252
 253
 254static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
 255{
 256        struct sk_buff *skb;
 257        struct page *page;
 258
 259        skb = __netdev_alloc_skb(queue->info->netdev,
 260                                 RX_COPY_THRESHOLD + NET_IP_ALIGN,
 261                                 GFP_ATOMIC | __GFP_NOWARN);
 262        if (unlikely(!skb))
 263                return NULL;
 264
 265        page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
 266        if (!page) {
 267                kfree_skb(skb);
 268                return NULL;
 269        }
 270        skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
 271
 272        /* Align ip header to a 16 bytes boundary */
 273        skb_reserve(skb, NET_IP_ALIGN);
 274        skb->dev = queue->info->netdev;
 275
 276        return skb;
 277}
 278
 279
 280static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
 281{
 282        RING_IDX req_prod = queue->rx.req_prod_pvt;
 283        int notify;
 284
 285        if (unlikely(!netif_carrier_ok(queue->info->netdev)))
 286                return;
 287
 288        for (req_prod = queue->rx.req_prod_pvt;
 289             req_prod - queue->rx.rsp_cons < NET_RX_RING_SIZE;
 290             req_prod++) {
 291                struct sk_buff *skb;
 292                unsigned short id;
 293                grant_ref_t ref;
 294                unsigned long pfn;
 295                struct xen_netif_rx_request *req;
 296
 297                skb = xennet_alloc_one_rx_buffer(queue);
 298                if (!skb)
 299                        break;
 300
 301                id = xennet_rxidx(req_prod);
 302
 303                BUG_ON(queue->rx_skbs[id]);
 304                queue->rx_skbs[id] = skb;
 305
 306                ref = gnttab_claim_grant_reference(&queue->gref_rx_head);
 307                BUG_ON((signed short)ref < 0);
 308                queue->grant_rx_ref[id] = ref;
 309
 310                pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
 311
 312                req = RING_GET_REQUEST(&queue->rx, req_prod);
 313                gnttab_grant_foreign_access_ref(ref,
 314                                                queue->info->xbdev->otherend_id,
 315                                                pfn_to_mfn(pfn),
 316                                                0);
 317
 318                req->id = id;
 319                req->gref = ref;
 320        }
 321
 322        queue->rx.req_prod_pvt = req_prod;
 323
 324        /* Not enough requests? Try again later. */
 325        if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN) {
 326                mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10));
 327                return;
 328        }
 329
 330        wmb();          /* barrier so backend seens requests */
 331
 332        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify);
 333        if (notify)
 334                notify_remote_via_irq(queue->rx_irq);
 335}
 336
 337static int xennet_open(struct net_device *dev)
 338{
 339        struct netfront_info *np = netdev_priv(dev);
 340        unsigned int num_queues = dev->real_num_tx_queues;
 341        unsigned int i = 0;
 342        struct netfront_queue *queue = NULL;
 343
 344        for (i = 0; i < num_queues; ++i) {
 345                queue = &np->queues[i];
 346                napi_enable(&queue->napi);
 347
 348                spin_lock_bh(&queue->rx_lock);
 349                if (netif_carrier_ok(dev)) {
 350                        xennet_alloc_rx_buffers(queue);
 351                        queue->rx.sring->rsp_event = queue->rx.rsp_cons + 1;
 352                        if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
 353                                napi_schedule(&queue->napi);
 354                }
 355                spin_unlock_bh(&queue->rx_lock);
 356        }
 357
 358        netif_tx_start_all_queues(dev);
 359
 360        return 0;
 361}
 362
 363static void xennet_tx_buf_gc(struct netfront_queue *queue)
 364{
 365        RING_IDX cons, prod;
 366        unsigned short id;
 367        struct sk_buff *skb;
 368
 369        BUG_ON(!netif_carrier_ok(queue->info->netdev));
 370
 371        do {
 372                prod = queue->tx.sring->rsp_prod;
 373                rmb(); /* Ensure we see responses up to 'rp'. */
 374
 375                for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
 376                        struct xen_netif_tx_response *txrsp;
 377
 378                        txrsp = RING_GET_RESPONSE(&queue->tx, cons);
 379                        if (txrsp->status == XEN_NETIF_RSP_NULL)
 380                                continue;
 381
 382                        id  = txrsp->id;
 383                        skb = queue->tx_skbs[id].skb;
 384                        if (unlikely(gnttab_query_foreign_access(
 385                                queue->grant_tx_ref[id]) != 0)) {
 386                                pr_alert("%s: warning -- grant still in use by backend domain\n",
 387                                         __func__);
 388                                BUG();
 389                        }
 390                        gnttab_end_foreign_access_ref(
 391                                queue->grant_tx_ref[id], GNTMAP_readonly);
 392                        gnttab_release_grant_reference(
 393                                &queue->gref_tx_head, queue->grant_tx_ref[id]);
 394                        queue->grant_tx_ref[id] = GRANT_INVALID_REF;
 395                        queue->grant_tx_page[id] = NULL;
 396                        add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id);
 397                        dev_kfree_skb_irq(skb);
 398                }
 399
 400                queue->tx.rsp_cons = prod;
 401
 402                /*
 403                 * Set a new event, then check for race with update of tx_cons.
 404                 * Note that it is essential to schedule a callback, no matter
 405                 * how few buffers are pending. Even if there is space in the
 406                 * transmit ring, higher layers may be blocked because too much
 407                 * data is outstanding: in such cases notification from Xen is
 408                 * likely to be the only kick that we'll get.
 409                 */
 410                queue->tx.sring->rsp_event =
 411                        prod + ((queue->tx.sring->req_prod - prod) >> 1) + 1;
 412                mb();           /* update shared area */
 413        } while ((cons == prod) && (prod != queue->tx.sring->rsp_prod));
 414
 415        xennet_maybe_wake_tx(queue);
 416}
 417
 418static struct xen_netif_tx_request *xennet_make_one_txreq(
 419        struct netfront_queue *queue, struct sk_buff *skb,
 420        struct page *page, unsigned int offset, unsigned int len)
 421{
 422        unsigned int id;
 423        struct xen_netif_tx_request *tx;
 424        grant_ref_t ref;
 425
 426        len = min_t(unsigned int, PAGE_SIZE - offset, len);
 427
 428        id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
 429        tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
 430        ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
 431        BUG_ON((signed short)ref < 0);
 432
 433        gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
 434                                        page_to_mfn(page), GNTMAP_readonly);
 435
 436        queue->tx_skbs[id].skb = skb;
 437        queue->grant_tx_page[id] = page;
 438        queue->grant_tx_ref[id] = ref;
 439
 440        tx->id = id;
 441        tx->gref = ref;
 442        tx->offset = offset;
 443        tx->size = len;
 444        tx->flags = 0;
 445
 446        return tx;
 447}
 448
 449static struct xen_netif_tx_request *xennet_make_txreqs(
 450        struct netfront_queue *queue, struct xen_netif_tx_request *tx,
 451        struct sk_buff *skb, struct page *page,
 452        unsigned int offset, unsigned int len)
 453{
 454        /* Skip unused frames from start of page */
 455        page += offset >> PAGE_SHIFT;
 456        offset &= ~PAGE_MASK;
 457
 458        while (len) {
 459                tx->flags |= XEN_NETTXF_more_data;
 460                tx = xennet_make_one_txreq(queue, skb_get(skb),
 461                                           page, offset, len);
 462                page++;
 463                offset = 0;
 464                len -= tx->size;
 465        }
 466
 467        return tx;
 468}
 469
 470/*
 471 * Count how many ring slots are required to send this skb. Each frag
 472 * might be a compound page.
 473 */
 474static int xennet_count_skb_slots(struct sk_buff *skb)
 475{
 476        int i, frags = skb_shinfo(skb)->nr_frags;
 477        int pages;
 478
 479        pages = PFN_UP(offset_in_page(skb->data) + skb_headlen(skb));
 480
 481        for (i = 0; i < frags; i++) {
 482                skb_frag_t *frag = skb_shinfo(skb)->frags + i;
 483                unsigned long size = skb_frag_size(frag);
 484                unsigned long offset = frag->page_offset;
 485
 486                /* Skip unused frames from start of page */
 487                offset &= ~PAGE_MASK;
 488
 489                pages += PFN_UP(offset + size);
 490        }
 491
 492        return pages;
 493}
 494
 495static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
 496                               void *accel_priv, select_queue_fallback_t fallback)
 497{
 498        unsigned int num_queues = dev->real_num_tx_queues;
 499        u32 hash;
 500        u16 queue_idx;
 501
 502        /* First, check if there is only one queue */
 503        if (num_queues == 1) {
 504                queue_idx = 0;
 505        } else {
 506                hash = skb_get_hash(skb);
 507                queue_idx = hash % num_queues;
 508        }
 509
 510        return queue_idx;
 511}
 512
 513static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 514{
 515        struct netfront_info *np = netdev_priv(dev);
 516        struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
 517        struct xen_netif_tx_request *tx, *first_tx;
 518        unsigned int i;
 519        int notify;
 520        int slots;
 521        struct page *page;
 522        unsigned int offset;
 523        unsigned int len;
 524        unsigned long flags;
 525        struct netfront_queue *queue = NULL;
 526        unsigned int num_queues = dev->real_num_tx_queues;
 527        u16 queue_index;
 528
 529        /* Drop the packet if no queues are set up */
 530        if (num_queues < 1)
 531                goto drop;
 532        /* Determine which queue to transmit this SKB on */
 533        queue_index = skb_get_queue_mapping(skb);
 534        queue = &np->queues[queue_index];
 535
 536        /* If skb->len is too big for wire format, drop skb and alert
 537         * user about misconfiguration.
 538         */
 539        if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) {
 540                net_alert_ratelimited(
 541                        "xennet: skb->len = %u, too big for wire format\n",
 542                        skb->len);
 543                goto drop;
 544        }
 545
 546        slots = xennet_count_skb_slots(skb);
 547        if (unlikely(slots > MAX_SKB_FRAGS + 1)) {
 548                net_dbg_ratelimited("xennet: skb rides the rocket: %d slots, %d bytes\n",
 549                                    slots, skb->len);
 550                if (skb_linearize(skb))
 551                        goto drop;
 552        }
 553
 554        page = virt_to_page(skb->data);
 555        offset = offset_in_page(skb->data);
 556        len = skb_headlen(skb);
 557
 558        spin_lock_irqsave(&queue->tx_lock, flags);
 559
 560        if (unlikely(!netif_carrier_ok(dev) ||
 561                     (slots > 1 && !xennet_can_sg(dev)) ||
 562                     netif_needs_gso(skb, netif_skb_features(skb)))) {
 563                spin_unlock_irqrestore(&queue->tx_lock, flags);
 564                goto drop;
 565        }
 566
 567        /* First request for the linear area. */
 568        first_tx = tx = xennet_make_one_txreq(queue, skb,
 569                                              page, offset, len);
 570        page++;
 571        offset = 0;
 572        len -= tx->size;
 573
 574        if (skb->ip_summed == CHECKSUM_PARTIAL)
 575                /* local packet? */
 576                tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
 577        else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 578                /* remote but checksummed. */
 579                tx->flags |= XEN_NETTXF_data_validated;
 580
 581        /* Optional extra info after the first request. */
 582        if (skb_shinfo(skb)->gso_size) {
 583                struct xen_netif_extra_info *gso;
 584
 585                gso = (struct xen_netif_extra_info *)
 586                        RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
 587
 588                tx->flags |= XEN_NETTXF_extra_info;
 589
 590                gso->u.gso.size = skb_shinfo(skb)->gso_size;
 591                gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
 592                        XEN_NETIF_GSO_TYPE_TCPV6 :
 593                        XEN_NETIF_GSO_TYPE_TCPV4;
 594                gso->u.gso.pad = 0;
 595                gso->u.gso.features = 0;
 596
 597                gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 598                gso->flags = 0;
 599        }
 600
 601        /* Requests for the rest of the linear area. */
 602        tx = xennet_make_txreqs(queue, tx, skb, page, offset, len);
 603
 604        /* Requests for all the frags. */
 605        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 606                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 607                tx = xennet_make_txreqs(queue, tx, skb,
 608                                        skb_frag_page(frag), frag->page_offset,
 609                                        skb_frag_size(frag));
 610        }
 611
 612        /* First request has the packet length. */
 613        first_tx->size = skb->len;
 614
 615        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
 616        if (notify)
 617                notify_remote_via_irq(queue->tx_irq);
 618
 619        u64_stats_update_begin(&tx_stats->syncp);
 620        tx_stats->bytes += skb->len;
 621        tx_stats->packets++;
 622        u64_stats_update_end(&tx_stats->syncp);
 623
 624        /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */
 625        xennet_tx_buf_gc(queue);
 626
 627        if (!netfront_tx_slot_available(queue))
 628                netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
 629
 630        spin_unlock_irqrestore(&queue->tx_lock, flags);
 631
 632        return NETDEV_TX_OK;
 633
 634 drop:
 635        dev->stats.tx_dropped++;
 636        dev_kfree_skb_any(skb);
 637        return NETDEV_TX_OK;
 638}
 639
 640static int xennet_close(struct net_device *dev)
 641{
 642        struct netfront_info *np = netdev_priv(dev);
 643        unsigned int num_queues = dev->real_num_tx_queues;
 644        unsigned int i;
 645        struct netfront_queue *queue;
 646        netif_tx_stop_all_queues(np->netdev);
 647        for (i = 0; i < num_queues; ++i) {
 648                queue = &np->queues[i];
 649                napi_disable(&queue->napi);
 650        }
 651        return 0;
 652}
 653
 654static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
 655                                grant_ref_t ref)
 656{
 657        int new = xennet_rxidx(queue->rx.req_prod_pvt);
 658
 659        BUG_ON(queue->rx_skbs[new]);
 660        queue->rx_skbs[new] = skb;
 661        queue->grant_rx_ref[new] = ref;
 662        RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->id = new;
 663        RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->gref = ref;
 664        queue->rx.req_prod_pvt++;
 665}
 666
 667static int xennet_get_extras(struct netfront_queue *queue,
 668                             struct xen_netif_extra_info *extras,
 669                             RING_IDX rp)
 670
 671{
 672        struct xen_netif_extra_info *extra;
 673        struct device *dev = &queue->info->netdev->dev;
 674        RING_IDX cons = queue->rx.rsp_cons;
 675        int err = 0;
 676
 677        do {
 678                struct sk_buff *skb;
 679                grant_ref_t ref;
 680
 681                if (unlikely(cons + 1 == rp)) {
 682                        if (net_ratelimit())
 683                                dev_warn(dev, "Missing extra info\n");
 684                        err = -EBADR;
 685                        break;
 686                }
 687
 688                extra = (struct xen_netif_extra_info *)
 689                        RING_GET_RESPONSE(&queue->rx, ++cons);
 690
 691                if (unlikely(!extra->type ||
 692                             extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
 693                        if (net_ratelimit())
 694                                dev_warn(dev, "Invalid extra type: %d\n",
 695                                        extra->type);
 696                        err = -EINVAL;
 697                } else {
 698                        memcpy(&extras[extra->type - 1], extra,
 699                               sizeof(*extra));
 700                }
 701
 702                skb = xennet_get_rx_skb(queue, cons);
 703                ref = xennet_get_rx_ref(queue, cons);
 704                xennet_move_rx_slot(queue, skb, ref);
 705        } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
 706
 707        queue->rx.rsp_cons = cons;
 708        return err;
 709}
 710
 711static int xennet_get_responses(struct netfront_queue *queue,
 712                                struct netfront_rx_info *rinfo, RING_IDX rp,
 713                                struct sk_buff_head *list)
 714{
 715        struct xen_netif_rx_response *rx = &rinfo->rx;
 716        struct xen_netif_extra_info *extras = rinfo->extras;
 717        struct device *dev = &queue->info->netdev->dev;
 718        RING_IDX cons = queue->rx.rsp_cons;
 719        struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
 720        grant_ref_t ref = xennet_get_rx_ref(queue, cons);
 721        int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
 722        int slots = 1;
 723        int err = 0;
 724        unsigned long ret;
 725
 726        if (rx->flags & XEN_NETRXF_extra_info) {
 727                err = xennet_get_extras(queue, extras, rp);
 728                cons = queue->rx.rsp_cons;
 729        }
 730
 731        for (;;) {
 732                if (unlikely(rx->status < 0 ||
 733                             rx->offset + rx->status > PAGE_SIZE)) {
 734                        if (net_ratelimit())
 735                                dev_warn(dev, "rx->offset: %u, size: %d\n",
 736                                         rx->offset, rx->status);
 737                        xennet_move_rx_slot(queue, skb, ref);
 738                        err = -EINVAL;
 739                        goto next;
 740                }
 741
 742                /*
 743                 * This definitely indicates a bug, either in this driver or in
 744                 * the backend driver. In future this should flag the bad
 745                 * situation to the system controller to reboot the backend.
 746                 */
 747                if (ref == GRANT_INVALID_REF) {
 748                        if (net_ratelimit())
 749                                dev_warn(dev, "Bad rx response id %d.\n",
 750                                         rx->id);
 751                        err = -EINVAL;
 752                        goto next;
 753                }
 754
 755                ret = gnttab_end_foreign_access_ref(ref, 0);
 756                BUG_ON(!ret);
 757
 758                gnttab_release_grant_reference(&queue->gref_rx_head, ref);
 759
 760                __skb_queue_tail(list, skb);
 761
 762next:
 763                if (!(rx->flags & XEN_NETRXF_more_data))
 764                        break;
 765
 766                if (cons + slots == rp) {
 767                        if (net_ratelimit())
 768                                dev_warn(dev, "Need more slots\n");
 769                        err = -ENOENT;
 770                        break;
 771                }
 772
 773                rx = RING_GET_RESPONSE(&queue->rx, cons + slots);
 774                skb = xennet_get_rx_skb(queue, cons + slots);
 775                ref = xennet_get_rx_ref(queue, cons + slots);
 776                slots++;
 777        }
 778
 779        if (unlikely(slots > max)) {
 780                if (net_ratelimit())
 781                        dev_warn(dev, "Too many slots\n");
 782                err = -E2BIG;
 783        }
 784
 785        if (unlikely(err))
 786                queue->rx.rsp_cons = cons + slots;
 787
 788        return err;
 789}
 790
 791static int xennet_set_skb_gso(struct sk_buff *skb,
 792                              struct xen_netif_extra_info *gso)
 793{
 794        if (!gso->u.gso.size) {
 795                if (net_ratelimit())
 796                        pr_warn("GSO size must not be zero\n");
 797                return -EINVAL;
 798        }
 799
 800        if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
 801            gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
 802                if (net_ratelimit())
 803                        pr_warn("Bad GSO type %d\n", gso->u.gso.type);
 804                return -EINVAL;
 805        }
 806
 807        skb_shinfo(skb)->gso_size = gso->u.gso.size;
 808        skb_shinfo(skb)->gso_type =
 809                (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
 810                SKB_GSO_TCPV4 :
 811                SKB_GSO_TCPV6;
 812
 813        /* Header must be checked, and gso_segs computed. */
 814        skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 815        skb_shinfo(skb)->gso_segs = 0;
 816
 817        return 0;
 818}
 819
 820static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
 821                                  struct sk_buff *skb,
 822                                  struct sk_buff_head *list)
 823{
 824        struct skb_shared_info *shinfo = skb_shinfo(skb);
 825        RING_IDX cons = queue->rx.rsp_cons;
 826        struct sk_buff *nskb;
 827
 828        while ((nskb = __skb_dequeue(list))) {
 829                struct xen_netif_rx_response *rx =
 830                        RING_GET_RESPONSE(&queue->rx, ++cons);
 831                skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
 832
 833                if (shinfo->nr_frags == MAX_SKB_FRAGS) {
 834                        unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 835
 836                        BUG_ON(pull_to <= skb_headlen(skb));
 837                        __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 838                }
 839                BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS);
 840
 841                skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag),
 842                                rx->offset, rx->status, PAGE_SIZE);
 843
 844                skb_shinfo(nskb)->nr_frags = 0;
 845                kfree_skb(nskb);
 846        }
 847
 848        return cons;
 849}
 850
 851static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
 852{
 853        bool recalculate_partial_csum = false;
 854
 855        /*
 856         * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
 857         * peers can fail to set NETRXF_csum_blank when sending a GSO
 858         * frame. In this case force the SKB to CHECKSUM_PARTIAL and
 859         * recalculate the partial checksum.
 860         */
 861        if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
 862                struct netfront_info *np = netdev_priv(dev);
 863                atomic_inc(&np->rx_gso_checksum_fixup);
 864                skb->ip_summed = CHECKSUM_PARTIAL;
 865                recalculate_partial_csum = true;
 866        }
 867
 868        /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
 869        if (skb->ip_summed != CHECKSUM_PARTIAL)
 870                return 0;
 871
 872        return skb_checksum_setup(skb, recalculate_partial_csum);
 873}
 874
 875static int handle_incoming_queue(struct netfront_queue *queue,
 876                                 struct sk_buff_head *rxq)
 877{
 878        struct netfront_stats *rx_stats = this_cpu_ptr(queue->info->rx_stats);
 879        int packets_dropped = 0;
 880        struct sk_buff *skb;
 881
 882        while ((skb = __skb_dequeue(rxq)) != NULL) {
 883                int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 884
 885                if (pull_to > skb_headlen(skb))
 886                        __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 887
 888                /* Ethernet work: Delayed to here as it peeks the header. */
 889                skb->protocol = eth_type_trans(skb, queue->info->netdev);
 890                skb_reset_network_header(skb);
 891
 892                if (checksum_setup(queue->info->netdev, skb)) {
 893                        kfree_skb(skb);
 894                        packets_dropped++;
 895                        queue->info->netdev->stats.rx_errors++;
 896                        continue;
 897                }
 898
 899                u64_stats_update_begin(&rx_stats->syncp);
 900                rx_stats->packets++;
 901                rx_stats->bytes += skb->len;
 902                u64_stats_update_end(&rx_stats->syncp);
 903
 904                /* Pass it up. */
 905                napi_gro_receive(&queue->napi, skb);
 906        }
 907
 908        return packets_dropped;
 909}
 910
 911static int xennet_poll(struct napi_struct *napi, int budget)
 912{
 913        struct netfront_queue *queue = container_of(napi, struct netfront_queue, napi);
 914        struct net_device *dev = queue->info->netdev;
 915        struct sk_buff *skb;
 916        struct netfront_rx_info rinfo;
 917        struct xen_netif_rx_response *rx = &rinfo.rx;
 918        struct xen_netif_extra_info *extras = rinfo.extras;
 919        RING_IDX i, rp;
 920        int work_done;
 921        struct sk_buff_head rxq;
 922        struct sk_buff_head errq;
 923        struct sk_buff_head tmpq;
 924        int err;
 925
 926        spin_lock(&queue->rx_lock);
 927
 928        skb_queue_head_init(&rxq);
 929        skb_queue_head_init(&errq);
 930        skb_queue_head_init(&tmpq);
 931
 932        rp = queue->rx.sring->rsp_prod;
 933        rmb(); /* Ensure we see queued responses up to 'rp'. */
 934
 935        i = queue->rx.rsp_cons;
 936        work_done = 0;
 937        while ((i != rp) && (work_done < budget)) {
 938                memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx));
 939                memset(extras, 0, sizeof(rinfo.extras));
 940
 941                err = xennet_get_responses(queue, &rinfo, rp, &tmpq);
 942
 943                if (unlikely(err)) {
 944err:
 945                        while ((skb = __skb_dequeue(&tmpq)))
 946                                __skb_queue_tail(&errq, skb);
 947                        dev->stats.rx_errors++;
 948                        i = queue->rx.rsp_cons;
 949                        continue;
 950                }
 951
 952                skb = __skb_dequeue(&tmpq);
 953
 954                if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
 955                        struct xen_netif_extra_info *gso;
 956                        gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
 957
 958                        if (unlikely(xennet_set_skb_gso(skb, gso))) {
 959                                __skb_queue_head(&tmpq, skb);
 960                                queue->rx.rsp_cons += skb_queue_len(&tmpq);
 961                                goto err;
 962                        }
 963                }
 964
 965                NETFRONT_SKB_CB(skb)->pull_to = rx->status;
 966                if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
 967                        NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
 968
 969                skb_shinfo(skb)->frags[0].page_offset = rx->offset;
 970                skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
 971                skb->data_len = rx->status;
 972                skb->len += rx->status;
 973
 974                i = xennet_fill_frags(queue, skb, &tmpq);
 975
 976                if (rx->flags & XEN_NETRXF_csum_blank)
 977                        skb->ip_summed = CHECKSUM_PARTIAL;
 978                else if (rx->flags & XEN_NETRXF_data_validated)
 979                        skb->ip_summed = CHECKSUM_UNNECESSARY;
 980
 981                __skb_queue_tail(&rxq, skb);
 982
 983                queue->rx.rsp_cons = ++i;
 984                work_done++;
 985        }
 986
 987        __skb_queue_purge(&errq);
 988
 989        work_done -= handle_incoming_queue(queue, &rxq);
 990
 991        xennet_alloc_rx_buffers(queue);
 992
 993        if (work_done < budget) {
 994                int more_to_do = 0;
 995
 996                napi_complete(napi);
 997
 998                RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
 999                if (more_to_do)
1000                        napi_schedule(napi);
1001        }
1002
1003        spin_unlock(&queue->rx_lock);
1004
1005        return work_done;
1006}
1007
1008static int xennet_change_mtu(struct net_device *dev, int mtu)
1009{
1010        int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN;
1011
1012        if (mtu > max)
1013                return -EINVAL;
1014        dev->mtu = mtu;
1015        return 0;
1016}
1017
1018static struct rtnl_link_stats64 *xennet_get_stats64(struct net_device *dev,
1019                                                    struct rtnl_link_stats64 *tot)
1020{
1021        struct netfront_info *np = netdev_priv(dev);
1022        int cpu;
1023
1024        for_each_possible_cpu(cpu) {
1025                struct netfront_stats *rx_stats = per_cpu_ptr(np->rx_stats, cpu);
1026                struct netfront_stats *tx_stats = per_cpu_ptr(np->tx_stats, cpu);
1027                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
1028                unsigned int start;
1029
1030                do {
1031                        start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
1032                        tx_packets = tx_stats->packets;
1033                        tx_bytes = tx_stats->bytes;
1034                } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
1035
1036                do {
1037                        start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
1038                        rx_packets = rx_stats->packets;
1039                        rx_bytes = rx_stats->bytes;
1040                } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
1041
1042                tot->rx_packets += rx_packets;
1043                tot->tx_packets += tx_packets;
1044                tot->rx_bytes   += rx_bytes;
1045                tot->tx_bytes   += tx_bytes;
1046        }
1047
1048        tot->rx_errors  = dev->stats.rx_errors;
1049        tot->tx_dropped = dev->stats.tx_dropped;
1050
1051        return tot;
1052}
1053
1054static void xennet_release_tx_bufs(struct netfront_queue *queue)
1055{
1056        struct sk_buff *skb;
1057        int i;
1058
1059        for (i = 0; i < NET_TX_RING_SIZE; i++) {
1060                /* Skip over entries which are actually freelist references */
1061                if (skb_entry_is_link(&queue->tx_skbs[i]))
1062                        continue;
1063
1064                skb = queue->tx_skbs[i].skb;
1065                get_page(queue->grant_tx_page[i]);
1066                gnttab_end_foreign_access(queue->grant_tx_ref[i],
1067                                          GNTMAP_readonly,
1068                                          (unsigned long)page_address(queue->grant_tx_page[i]));
1069                queue->grant_tx_page[i] = NULL;
1070                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1071                add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i);
1072                dev_kfree_skb_irq(skb);
1073        }
1074}
1075
1076static void xennet_release_rx_bufs(struct netfront_queue *queue)
1077{
1078        int id, ref;
1079
1080        spin_lock_bh(&queue->rx_lock);
1081
1082        for (id = 0; id < NET_RX_RING_SIZE; id++) {
1083                struct sk_buff *skb;
1084                struct page *page;
1085
1086                skb = queue->rx_skbs[id];
1087                if (!skb)
1088                        continue;
1089
1090                ref = queue->grant_rx_ref[id];
1091                if (ref == GRANT_INVALID_REF)
1092                        continue;
1093
1094                page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
1095
1096                /* gnttab_end_foreign_access() needs a page ref until
1097                 * foreign access is ended (which may be deferred).
1098                 */
1099                get_page(page);
1100                gnttab_end_foreign_access(ref, 0,
1101                                          (unsigned long)page_address(page));
1102                queue->grant_rx_ref[id] = GRANT_INVALID_REF;
1103
1104                kfree_skb(skb);
1105        }
1106
1107        spin_unlock_bh(&queue->rx_lock);
1108}
1109
1110static netdev_features_t xennet_fix_features(struct net_device *dev,
1111        netdev_features_t features)
1112{
1113        struct netfront_info *np = netdev_priv(dev);
1114        int val;
1115
1116        if (features & NETIF_F_SG) {
1117                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
1118                                 "%d", &val) < 0)
1119                        val = 0;
1120
1121                if (!val)
1122                        features &= ~NETIF_F_SG;
1123        }
1124
1125        if (features & NETIF_F_IPV6_CSUM) {
1126                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1127                                 "feature-ipv6-csum-offload", "%d", &val) < 0)
1128                        val = 0;
1129
1130                if (!val)
1131                        features &= ~NETIF_F_IPV6_CSUM;
1132        }
1133
1134        if (features & NETIF_F_TSO) {
1135                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1136                                 "feature-gso-tcpv4", "%d", &val) < 0)
1137                        val = 0;
1138
1139                if (!val)
1140                        features &= ~NETIF_F_TSO;
1141        }
1142
1143        if (features & NETIF_F_TSO6) {
1144                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1145                                 "feature-gso-tcpv6", "%d", &val) < 0)
1146                        val = 0;
1147
1148                if (!val)
1149                        features &= ~NETIF_F_TSO6;
1150        }
1151
1152        return features;
1153}
1154
1155static int xennet_set_features(struct net_device *dev,
1156        netdev_features_t features)
1157{
1158        if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
1159                netdev_info(dev, "Reducing MTU because no SG offload");
1160                dev->mtu = ETH_DATA_LEN;
1161        }
1162
1163        return 0;
1164}
1165
1166static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
1167{
1168        struct netfront_queue *queue = dev_id;
1169        unsigned long flags;
1170
1171        spin_lock_irqsave(&queue->tx_lock, flags);
1172        xennet_tx_buf_gc(queue);
1173        spin_unlock_irqrestore(&queue->tx_lock, flags);
1174
1175        return IRQ_HANDLED;
1176}
1177
1178static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
1179{
1180        struct netfront_queue *queue = dev_id;
1181        struct net_device *dev = queue->info->netdev;
1182
1183        if (likely(netif_carrier_ok(dev) &&
1184                   RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
1185                napi_schedule(&queue->napi);
1186
1187        return IRQ_HANDLED;
1188}
1189
1190static irqreturn_t xennet_interrupt(int irq, void *dev_id)
1191{
1192        xennet_tx_interrupt(irq, dev_id);
1193        xennet_rx_interrupt(irq, dev_id);
1194        return IRQ_HANDLED;
1195}
1196
1197#ifdef CONFIG_NET_POLL_CONTROLLER
1198static void xennet_poll_controller(struct net_device *dev)
1199{
1200        /* Poll each queue */
1201        struct netfront_info *info = netdev_priv(dev);
1202        unsigned int num_queues = dev->real_num_tx_queues;
1203        unsigned int i;
1204        for (i = 0; i < num_queues; ++i)
1205                xennet_interrupt(0, &info->queues[i]);
1206}
1207#endif
1208
1209static const struct net_device_ops xennet_netdev_ops = {
1210        .ndo_open            = xennet_open,
1211        .ndo_stop            = xennet_close,
1212        .ndo_start_xmit      = xennet_start_xmit,
1213        .ndo_change_mtu      = xennet_change_mtu,
1214        .ndo_get_stats64     = xennet_get_stats64,
1215        .ndo_set_mac_address = eth_mac_addr,
1216        .ndo_validate_addr   = eth_validate_addr,
1217        .ndo_fix_features    = xennet_fix_features,
1218        .ndo_set_features    = xennet_set_features,
1219        .ndo_select_queue    = xennet_select_queue,
1220#ifdef CONFIG_NET_POLL_CONTROLLER
1221        .ndo_poll_controller = xennet_poll_controller,
1222#endif
1223};
1224
1225static void xennet_free_netdev(struct net_device *netdev)
1226{
1227        struct netfront_info *np = netdev_priv(netdev);
1228
1229        free_percpu(np->rx_stats);
1230        free_percpu(np->tx_stats);
1231        free_netdev(netdev);
1232}
1233
1234static struct net_device *xennet_create_dev(struct xenbus_device *dev)
1235{
1236        int err;
1237        struct net_device *netdev;
1238        struct netfront_info *np;
1239
1240        netdev = alloc_etherdev_mq(sizeof(struct netfront_info), xennet_max_queues);
1241        if (!netdev)
1242                return ERR_PTR(-ENOMEM);
1243
1244        np                   = netdev_priv(netdev);
1245        np->xbdev            = dev;
1246
1247        np->queues = NULL;
1248
1249        err = -ENOMEM;
1250        np->rx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1251        if (np->rx_stats == NULL)
1252                goto exit;
1253        np->tx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1254        if (np->tx_stats == NULL)
1255                goto exit;
1256
1257        netdev->netdev_ops      = &xennet_netdev_ops;
1258
1259        netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
1260                                  NETIF_F_GSO_ROBUST;
1261        netdev->hw_features     = NETIF_F_SG |
1262                                  NETIF_F_IPV6_CSUM |
1263                                  NETIF_F_TSO | NETIF_F_TSO6;
1264
1265        /*
1266         * Assume that all hw features are available for now. This set
1267         * will be adjusted by the call to netdev_update_features() in
1268         * xennet_connect() which is the earliest point where we can
1269         * negotiate with the backend regarding supported features.
1270         */
1271        netdev->features |= netdev->hw_features;
1272
1273        netdev->ethtool_ops = &xennet_ethtool_ops;
1274        SET_NETDEV_DEV(netdev, &dev->dev);
1275
1276        np->netdev = netdev;
1277
1278        netif_carrier_off(netdev);
1279
1280        return netdev;
1281
1282 exit:
1283        xennet_free_netdev(netdev);
1284        return ERR_PTR(err);
1285}
1286
1287/**
1288 * Entry point to this code when a new device is created.  Allocate the basic
1289 * structures and the ring buffers for communication with the backend, and
1290 * inform the backend of the appropriate details for those.
1291 */
1292static int netfront_probe(struct xenbus_device *dev,
1293                          const struct xenbus_device_id *id)
1294{
1295        int err;
1296        struct net_device *netdev;
1297        struct netfront_info *info;
1298
1299        netdev = xennet_create_dev(dev);
1300        if (IS_ERR(netdev)) {
1301                err = PTR_ERR(netdev);
1302                xenbus_dev_fatal(dev, err, "creating netdev");
1303                return err;
1304        }
1305
1306        info = netdev_priv(netdev);
1307        dev_set_drvdata(&dev->dev, info);
1308#ifdef CONFIG_SYSFS
1309        info->netdev->sysfs_groups[0] = &xennet_dev_group;
1310#endif
1311        err = register_netdev(info->netdev);
1312        if (err) {
1313                pr_warn("%s: register_netdev err=%d\n", __func__, err);
1314                goto fail;
1315        }
1316
1317        return 0;
1318
1319 fail:
1320        xennet_free_netdev(netdev);
1321        dev_set_drvdata(&dev->dev, NULL);
1322        return err;
1323}
1324
1325static void xennet_end_access(int ref, void *page)
1326{
1327        /* This frees the page as a side-effect */
1328        if (ref != GRANT_INVALID_REF)
1329                gnttab_end_foreign_access(ref, 0, (unsigned long)page);
1330}
1331
1332static void xennet_disconnect_backend(struct netfront_info *info)
1333{
1334        unsigned int i = 0;
1335        unsigned int num_queues = info->netdev->real_num_tx_queues;
1336
1337        netif_carrier_off(info->netdev);
1338
1339        for (i = 0; i < num_queues; ++i) {
1340                struct netfront_queue *queue = &info->queues[i];
1341
1342                if (queue->tx_irq && (queue->tx_irq == queue->rx_irq))
1343                        unbind_from_irqhandler(queue->tx_irq, queue);
1344                if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) {
1345                        unbind_from_irqhandler(queue->tx_irq, queue);
1346                        unbind_from_irqhandler(queue->rx_irq, queue);
1347                }
1348                queue->tx_evtchn = queue->rx_evtchn = 0;
1349                queue->tx_irq = queue->rx_irq = 0;
1350
1351                napi_synchronize(&queue->napi);
1352
1353                xennet_release_tx_bufs(queue);
1354                xennet_release_rx_bufs(queue);
1355                gnttab_free_grant_references(queue->gref_tx_head);
1356                gnttab_free_grant_references(queue->gref_rx_head);
1357
1358                /* End access and free the pages */
1359                xennet_end_access(queue->tx_ring_ref, queue->tx.sring);
1360                xennet_end_access(queue->rx_ring_ref, queue->rx.sring);
1361
1362                queue->tx_ring_ref = GRANT_INVALID_REF;
1363                queue->rx_ring_ref = GRANT_INVALID_REF;
1364                queue->tx.sring = NULL;
1365                queue->rx.sring = NULL;
1366        }
1367}
1368
1369/**
1370 * We are reconnecting to the backend, due to a suspend/resume, or a backend
1371 * driver restart.  We tear down our netif structure and recreate it, but
1372 * leave the device-layer structures intact so that this is transparent to the
1373 * rest of the kernel.
1374 */
1375static int netfront_resume(struct xenbus_device *dev)
1376{
1377        struct netfront_info *info = dev_get_drvdata(&dev->dev);
1378
1379        dev_dbg(&dev->dev, "%s\n", dev->nodename);
1380
1381        xennet_disconnect_backend(info);
1382        return 0;
1383}
1384
1385static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
1386{
1387        char *s, *e, *macstr;
1388        int i;
1389
1390        macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
1391        if (IS_ERR(macstr))
1392                return PTR_ERR(macstr);
1393
1394        for (i = 0; i < ETH_ALEN; i++) {
1395                mac[i] = simple_strtoul(s, &e, 16);
1396                if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
1397                        kfree(macstr);
1398                        return -ENOENT;
1399                }
1400                s = e+1;
1401        }
1402
1403        kfree(macstr);
1404        return 0;
1405}
1406
1407static int setup_netfront_single(struct netfront_queue *queue)
1408{
1409        int err;
1410
1411        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1412        if (err < 0)
1413                goto fail;
1414
1415        err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1416                                        xennet_interrupt,
1417                                        0, queue->info->netdev->name, queue);
1418        if (err < 0)
1419                goto bind_fail;
1420        queue->rx_evtchn = queue->tx_evtchn;
1421        queue->rx_irq = queue->tx_irq = err;
1422
1423        return 0;
1424
1425bind_fail:
1426        xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1427        queue->tx_evtchn = 0;
1428fail:
1429        return err;
1430}
1431
1432static int setup_netfront_split(struct netfront_queue *queue)
1433{
1434        int err;
1435
1436        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1437        if (err < 0)
1438                goto fail;
1439        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->rx_evtchn);
1440        if (err < 0)
1441                goto alloc_rx_evtchn_fail;
1442
1443        snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
1444                 "%s-tx", queue->name);
1445        err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1446                                        xennet_tx_interrupt,
1447                                        0, queue->tx_irq_name, queue);
1448        if (err < 0)
1449                goto bind_tx_fail;
1450        queue->tx_irq = err;
1451
1452        snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
1453                 "%s-rx", queue->name);
1454        err = bind_evtchn_to_irqhandler(queue->rx_evtchn,
1455                                        xennet_rx_interrupt,
1456                                        0, queue->rx_irq_name, queue);
1457        if (err < 0)
1458                goto bind_rx_fail;
1459        queue->rx_irq = err;
1460
1461        return 0;
1462
1463bind_rx_fail:
1464        unbind_from_irqhandler(queue->tx_irq, queue);
1465        queue->tx_irq = 0;
1466bind_tx_fail:
1467        xenbus_free_evtchn(queue->info->xbdev, queue->rx_evtchn);
1468        queue->rx_evtchn = 0;
1469alloc_rx_evtchn_fail:
1470        xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1471        queue->tx_evtchn = 0;
1472fail:
1473        return err;
1474}
1475
1476static int setup_netfront(struct xenbus_device *dev,
1477                        struct netfront_queue *queue, unsigned int feature_split_evtchn)
1478{
1479        struct xen_netif_tx_sring *txs;
1480        struct xen_netif_rx_sring *rxs;
1481        grant_ref_t gref;
1482        int err;
1483
1484        queue->tx_ring_ref = GRANT_INVALID_REF;
1485        queue->rx_ring_ref = GRANT_INVALID_REF;
1486        queue->rx.sring = NULL;
1487        queue->tx.sring = NULL;
1488
1489        txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1490        if (!txs) {
1491                err = -ENOMEM;
1492                xenbus_dev_fatal(dev, err, "allocating tx ring page");
1493                goto fail;
1494        }
1495        SHARED_RING_INIT(txs);
1496        FRONT_RING_INIT(&queue->tx, txs, PAGE_SIZE);
1497
1498        err = xenbus_grant_ring(dev, txs, 1, &gref);
1499        if (err < 0)
1500                goto grant_tx_ring_fail;
1501        queue->tx_ring_ref = gref;
1502
1503        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1504        if (!rxs) {
1505                err = -ENOMEM;
1506                xenbus_dev_fatal(dev, err, "allocating rx ring page");
1507                goto alloc_rx_ring_fail;
1508        }
1509        SHARED_RING_INIT(rxs);
1510        FRONT_RING_INIT(&queue->rx, rxs, PAGE_SIZE);
1511
1512        err = xenbus_grant_ring(dev, rxs, 1, &gref);
1513        if (err < 0)
1514                goto grant_rx_ring_fail;
1515        queue->rx_ring_ref = gref;
1516
1517        if (feature_split_evtchn)
1518                err = setup_netfront_split(queue);
1519        /* setup single event channel if
1520         *  a) feature-split-event-channels == 0
1521         *  b) feature-split-event-channels == 1 but failed to setup
1522         */
1523        if (!feature_split_evtchn || (feature_split_evtchn && err))
1524                err = setup_netfront_single(queue);
1525
1526        if (err)
1527                goto alloc_evtchn_fail;
1528
1529        return 0;
1530
1531        /* If we fail to setup netfront, it is safe to just revoke access to
1532         * granted pages because backend is not accessing it at this point.
1533         */
1534alloc_evtchn_fail:
1535        gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0);
1536grant_rx_ring_fail:
1537        free_page((unsigned long)rxs);
1538alloc_rx_ring_fail:
1539        gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0);
1540grant_tx_ring_fail:
1541        free_page((unsigned long)txs);
1542fail:
1543        return err;
1544}
1545
1546/* Queue-specific initialisation
1547 * This used to be done in xennet_create_dev() but must now
1548 * be run per-queue.
1549 */
1550static int xennet_init_queue(struct netfront_queue *queue)
1551{
1552        unsigned short i;
1553        int err = 0;
1554
1555        spin_lock_init(&queue->tx_lock);
1556        spin_lock_init(&queue->rx_lock);
1557
1558        setup_timer(&queue->rx_refill_timer, rx_refill_timeout,
1559                    (unsigned long)queue);
1560
1561        snprintf(queue->name, sizeof(queue->name), "%s-q%u",
1562                 queue->info->netdev->name, queue->id);
1563
1564        /* Initialise tx_skbs as a free chain containing every entry. */
1565        queue->tx_skb_freelist = 0;
1566        for (i = 0; i < NET_TX_RING_SIZE; i++) {
1567                skb_entry_set_link(&queue->tx_skbs[i], i+1);
1568                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1569                queue->grant_tx_page[i] = NULL;
1570        }
1571
1572        /* Clear out rx_skbs */
1573        for (i = 0; i < NET_RX_RING_SIZE; i++) {
1574                queue->rx_skbs[i] = NULL;
1575                queue->grant_rx_ref[i] = GRANT_INVALID_REF;
1576        }
1577
1578        /* A grant for every tx ring slot */
1579        if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
1580                                          &queue->gref_tx_head) < 0) {
1581                pr_alert("can't alloc tx grant refs\n");
1582                err = -ENOMEM;
1583                goto exit;
1584        }
1585
1586        /* A grant for every rx ring slot */
1587        if (gnttab_alloc_grant_references(NET_RX_RING_SIZE,
1588                                          &queue->gref_rx_head) < 0) {
1589                pr_alert("can't alloc rx grant refs\n");
1590                err = -ENOMEM;
1591                goto exit_free_tx;
1592        }
1593
1594        return 0;
1595
1596 exit_free_tx:
1597        gnttab_free_grant_references(queue->gref_tx_head);
1598 exit:
1599        return err;
1600}
1601
1602static int write_queue_xenstore_keys(struct netfront_queue *queue,
1603                           struct xenbus_transaction *xbt, int write_hierarchical)
1604{
1605        /* Write the queue-specific keys into XenStore in the traditional
1606         * way for a single queue, or in a queue subkeys for multiple
1607         * queues.
1608         */
1609        struct xenbus_device *dev = queue->info->xbdev;
1610        int err;
1611        const char *message;
1612        char *path;
1613        size_t pathsize;
1614
1615        /* Choose the correct place to write the keys */
1616        if (write_hierarchical) {
1617                pathsize = strlen(dev->nodename) + 10;
1618                path = kzalloc(pathsize, GFP_KERNEL);
1619                if (!path) {
1620                        err = -ENOMEM;
1621                        message = "out of memory while writing ring references";
1622                        goto error;
1623                }
1624                snprintf(path, pathsize, "%s/queue-%u",
1625                                dev->nodename, queue->id);
1626        } else {
1627                path = (char *)dev->nodename;
1628        }
1629
1630        /* Write ring references */
1631        err = xenbus_printf(*xbt, path, "tx-ring-ref", "%u",
1632                        queue->tx_ring_ref);
1633        if (err) {
1634                message = "writing tx-ring-ref";
1635                goto error;
1636        }
1637
1638        err = xenbus_printf(*xbt, path, "rx-ring-ref", "%u",
1639                        queue->rx_ring_ref);
1640        if (err) {
1641                message = "writing rx-ring-ref";
1642                goto error;
1643        }
1644
1645        /* Write event channels; taking into account both shared
1646         * and split event channel scenarios.
1647         */
1648        if (queue->tx_evtchn == queue->rx_evtchn) {
1649                /* Shared event channel */
1650                err = xenbus_printf(*xbt, path,
1651                                "event-channel", "%u", queue->tx_evtchn);
1652                if (err) {
1653                        message = "writing event-channel";
1654                        goto error;
1655                }
1656        } else {
1657                /* Split event channels */
1658                err = xenbus_printf(*xbt, path,
1659                                "event-channel-tx", "%u", queue->tx_evtchn);
1660                if (err) {
1661                        message = "writing event-channel-tx";
1662                        goto error;
1663                }
1664
1665                err = xenbus_printf(*xbt, path,
1666                                "event-channel-rx", "%u", queue->rx_evtchn);
1667                if (err) {
1668                        message = "writing event-channel-rx";
1669                        goto error;
1670                }
1671        }
1672
1673        if (write_hierarchical)
1674                kfree(path);
1675        return 0;
1676
1677error:
1678        if (write_hierarchical)
1679                kfree(path);
1680        xenbus_dev_fatal(dev, err, "%s", message);
1681        return err;
1682}
1683
1684static void xennet_destroy_queues(struct netfront_info *info)
1685{
1686        unsigned int i;
1687
1688        rtnl_lock();
1689
1690        for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
1691                struct netfront_queue *queue = &info->queues[i];
1692
1693                if (netif_running(info->netdev))
1694                        napi_disable(&queue->napi);
1695                del_timer_sync(&queue->rx_refill_timer);
1696                netif_napi_del(&queue->napi);
1697        }
1698
1699        rtnl_unlock();
1700
1701        kfree(info->queues);
1702        info->queues = NULL;
1703}
1704
1705static int xennet_create_queues(struct netfront_info *info,
1706                                unsigned int num_queues)
1707{
1708        unsigned int i;
1709        int ret;
1710
1711        info->queues = kcalloc(num_queues, sizeof(struct netfront_queue),
1712                               GFP_KERNEL);
1713        if (!info->queues)
1714                return -ENOMEM;
1715
1716        rtnl_lock();
1717
1718        for (i = 0; i < num_queues; i++) {
1719                struct netfront_queue *queue = &info->queues[i];
1720
1721                queue->id = i;
1722                queue->info = info;
1723
1724                ret = xennet_init_queue(queue);
1725                if (ret < 0) {
1726                        dev_warn(&info->netdev->dev,
1727                                 "only created %d queues\n", i);
1728                        num_queues = i;
1729                        break;
1730                }
1731
1732                netif_napi_add(queue->info->netdev, &queue->napi,
1733                               xennet_poll, 64);
1734                if (netif_running(info->netdev))
1735                        napi_enable(&queue->napi);
1736        }
1737
1738        netif_set_real_num_tx_queues(info->netdev, num_queues);
1739
1740        rtnl_unlock();
1741
1742        if (num_queues == 0) {
1743                dev_err(&info->netdev->dev, "no queues\n");
1744                return -EINVAL;
1745        }
1746        return 0;
1747}
1748
1749/* Common code used when first setting up, and when resuming. */
1750static int talk_to_netback(struct xenbus_device *dev,
1751                           struct netfront_info *info)
1752{
1753        const char *message;
1754        struct xenbus_transaction xbt;
1755        int err;
1756        unsigned int feature_split_evtchn;
1757        unsigned int i = 0;
1758        unsigned int max_queues = 0;
1759        struct netfront_queue *queue = NULL;
1760        unsigned int num_queues = 1;
1761
1762        info->netdev->irq = 0;
1763
1764        /* Check if backend supports multiple queues */
1765        err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1766                           "multi-queue-max-queues", "%u", &max_queues);
1767        if (err < 0)
1768                max_queues = 1;
1769        num_queues = min(max_queues, xennet_max_queues);
1770
1771        /* Check feature-split-event-channels */
1772        err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1773                           "feature-split-event-channels", "%u",
1774                           &feature_split_evtchn);
1775        if (err < 0)
1776                feature_split_evtchn = 0;
1777
1778        /* Read mac addr. */
1779        err = xen_net_read_mac(dev, info->netdev->dev_addr);
1780        if (err) {
1781                xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
1782                goto out;
1783        }
1784
1785        if (info->queues)
1786                xennet_destroy_queues(info);
1787
1788        err = xennet_create_queues(info, num_queues);
1789        if (err < 0)
1790                goto destroy_ring;
1791
1792        /* Create shared ring, alloc event channel -- for each queue */
1793        for (i = 0; i < num_queues; ++i) {
1794                queue = &info->queues[i];
1795                err = setup_netfront(dev, queue, feature_split_evtchn);
1796                if (err) {
1797                        /* setup_netfront() will tidy up the current
1798                         * queue on error, but we need to clean up
1799                         * those already allocated.
1800                         */
1801                        if (i > 0) {
1802                                rtnl_lock();
1803                                netif_set_real_num_tx_queues(info->netdev, i);
1804                                rtnl_unlock();
1805                                goto destroy_ring;
1806                        } else {
1807                                goto out;
1808                        }
1809                }
1810        }
1811
1812again:
1813        err = xenbus_transaction_start(&xbt);
1814        if (err) {
1815                xenbus_dev_fatal(dev, err, "starting transaction");
1816                goto destroy_ring;
1817        }
1818
1819        if (num_queues == 1) {
1820                err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0); /* flat */
1821                if (err)
1822                        goto abort_transaction_no_dev_fatal;
1823        } else {
1824                /* Write the number of queues */
1825                err = xenbus_printf(xbt, dev->nodename, "multi-queue-num-queues",
1826                                    "%u", num_queues);
1827                if (err) {
1828                        message = "writing multi-queue-num-queues";
1829                        goto abort_transaction_no_dev_fatal;
1830                }
1831
1832                /* Write the keys for each queue */
1833                for (i = 0; i < num_queues; ++i) {
1834                        queue = &info->queues[i];
1835                        err = write_queue_xenstore_keys(queue, &xbt, 1); /* hierarchical */
1836                        if (err)
1837                                goto abort_transaction_no_dev_fatal;
1838                }
1839        }
1840
1841        /* The remaining keys are not queue-specific */
1842        err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
1843                            1);
1844        if (err) {
1845                message = "writing request-rx-copy";
1846                goto abort_transaction;
1847        }
1848
1849        err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
1850        if (err) {
1851                message = "writing feature-rx-notify";
1852                goto abort_transaction;
1853        }
1854
1855        err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
1856        if (err) {
1857                message = "writing feature-sg";
1858                goto abort_transaction;
1859        }
1860
1861        err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
1862        if (err) {
1863                message = "writing feature-gso-tcpv4";
1864                goto abort_transaction;
1865        }
1866
1867        err = xenbus_write(xbt, dev->nodename, "feature-gso-tcpv6", "1");
1868        if (err) {
1869                message = "writing feature-gso-tcpv6";
1870                goto abort_transaction;
1871        }
1872
1873        err = xenbus_write(xbt, dev->nodename, "feature-ipv6-csum-offload",
1874                           "1");
1875        if (err) {
1876                message = "writing feature-ipv6-csum-offload";
1877                goto abort_transaction;
1878        }
1879
1880        err = xenbus_transaction_end(xbt, 0);
1881        if (err) {
1882                if (err == -EAGAIN)
1883                        goto again;
1884                xenbus_dev_fatal(dev, err, "completing transaction");
1885                goto destroy_ring;
1886        }
1887
1888        return 0;
1889
1890 abort_transaction:
1891        xenbus_dev_fatal(dev, err, "%s", message);
1892abort_transaction_no_dev_fatal:
1893        xenbus_transaction_end(xbt, 1);
1894 destroy_ring:
1895        xennet_disconnect_backend(info);
1896        kfree(info->queues);
1897        info->queues = NULL;
1898 out:
1899        return err;
1900}
1901
1902static int xennet_connect(struct net_device *dev)
1903{
1904        struct netfront_info *np = netdev_priv(dev);
1905        unsigned int num_queues = 0;
1906        int err;
1907        unsigned int feature_rx_copy;
1908        unsigned int j = 0;
1909        struct netfront_queue *queue = NULL;
1910
1911        err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1912                           "feature-rx-copy", "%u", &feature_rx_copy);
1913        if (err != 1)
1914                feature_rx_copy = 0;
1915
1916        if (!feature_rx_copy) {
1917                dev_info(&dev->dev,
1918                         "backend does not support copying receive path\n");
1919                return -ENODEV;
1920        }
1921
1922        err = talk_to_netback(np->xbdev, np);
1923        if (err)
1924                return err;
1925
1926        /* talk_to_netback() sets the correct number of queues */
1927        num_queues = dev->real_num_tx_queues;
1928
1929        rtnl_lock();
1930        netdev_update_features(dev);
1931        rtnl_unlock();
1932
1933        /*
1934         * All public and private state should now be sane.  Get
1935         * ready to start sending and receiving packets and give the driver
1936         * domain a kick because we've probably just requeued some
1937         * packets.
1938         */
1939        netif_carrier_on(np->netdev);
1940        for (j = 0; j < num_queues; ++j) {
1941                queue = &np->queues[j];
1942
1943                notify_remote_via_irq(queue->tx_irq);
1944                if (queue->tx_irq != queue->rx_irq)
1945                        notify_remote_via_irq(queue->rx_irq);
1946
1947                spin_lock_irq(&queue->tx_lock);
1948                xennet_tx_buf_gc(queue);
1949                spin_unlock_irq(&queue->tx_lock);
1950
1951                spin_lock_bh(&queue->rx_lock);
1952                xennet_alloc_rx_buffers(queue);
1953                spin_unlock_bh(&queue->rx_lock);
1954        }
1955
1956        return 0;
1957}
1958
1959/**
1960 * Callback received when the backend's state changes.
1961 */
1962static void netback_changed(struct xenbus_device *dev,
1963                            enum xenbus_state backend_state)
1964{
1965        struct netfront_info *np = dev_get_drvdata(&dev->dev);
1966        struct net_device *netdev = np->netdev;
1967
1968        dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
1969
1970        switch (backend_state) {
1971        case XenbusStateInitialising:
1972        case XenbusStateInitialised:
1973        case XenbusStateReconfiguring:
1974        case XenbusStateReconfigured:
1975        case XenbusStateUnknown:
1976                break;
1977
1978        case XenbusStateInitWait:
1979                if (dev->state != XenbusStateInitialising)
1980                        break;
1981                if (xennet_connect(netdev) != 0)
1982                        break;
1983                xenbus_switch_state(dev, XenbusStateConnected);
1984                break;
1985
1986        case XenbusStateConnected:
1987                netdev_notify_peers(netdev);
1988                break;
1989
1990        case XenbusStateClosed:
1991                if (dev->state == XenbusStateClosed)
1992                        break;
1993                /* Missed the backend's CLOSING state -- fallthrough */
1994        case XenbusStateClosing:
1995                xenbus_frontend_closed(dev);
1996                break;
1997        }
1998}
1999
2000static const struct xennet_stat {
2001        char name[ETH_GSTRING_LEN];
2002        u16 offset;
2003} xennet_stats[] = {
2004        {
2005                "rx_gso_checksum_fixup",
2006                offsetof(struct netfront_info, rx_gso_checksum_fixup)
2007        },
2008};
2009
2010static int xennet_get_sset_count(struct net_device *dev, int string_set)
2011{
2012        switch (string_set) {
2013        case ETH_SS_STATS:
2014                return ARRAY_SIZE(xennet_stats);
2015        default:
2016                return -EINVAL;
2017        }
2018}
2019
2020static void xennet_get_ethtool_stats(struct net_device *dev,
2021                                     struct ethtool_stats *stats, u64 * data)
2022{
2023        void *np = netdev_priv(dev);
2024        int i;
2025
2026        for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2027                data[i] = atomic_read((atomic_t *)(np + xennet_stats[i].offset));
2028}
2029
2030static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 * data)
2031{
2032        int i;
2033
2034        switch (stringset) {
2035        case ETH_SS_STATS:
2036                for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2037                        memcpy(data + i * ETH_GSTRING_LEN,
2038                               xennet_stats[i].name, ETH_GSTRING_LEN);
2039                break;
2040        }
2041}
2042
2043static const struct ethtool_ops xennet_ethtool_ops =
2044{
2045        .get_link = ethtool_op_get_link,
2046
2047        .get_sset_count = xennet_get_sset_count,
2048        .get_ethtool_stats = xennet_get_ethtool_stats,
2049        .get_strings = xennet_get_strings,
2050};
2051
2052#ifdef CONFIG_SYSFS
2053static ssize_t show_rxbuf(struct device *dev,
2054                          struct device_attribute *attr, char *buf)
2055{
2056        return sprintf(buf, "%lu\n", NET_RX_RING_SIZE);
2057}
2058
2059static ssize_t store_rxbuf(struct device *dev,
2060                           struct device_attribute *attr,
2061                           const char *buf, size_t len)
2062{
2063        char *endp;
2064        unsigned long target;
2065
2066        if (!capable(CAP_NET_ADMIN))
2067                return -EPERM;
2068
2069        target = simple_strtoul(buf, &endp, 0);
2070        if (endp == buf)
2071                return -EBADMSG;
2072
2073        /* rxbuf_min and rxbuf_max are no longer configurable. */
2074
2075        return len;
2076}
2077
2078static DEVICE_ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf);
2079static DEVICE_ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf);
2080static DEVICE_ATTR(rxbuf_cur, S_IRUGO, show_rxbuf, NULL);
2081
2082static struct attribute *xennet_dev_attrs[] = {
2083        &dev_attr_rxbuf_min.attr,
2084        &dev_attr_rxbuf_max.attr,
2085        &dev_attr_rxbuf_cur.attr,
2086        NULL
2087};
2088
2089static const struct attribute_group xennet_dev_group = {
2090        .attrs = xennet_dev_attrs
2091};
2092#endif /* CONFIG_SYSFS */
2093
2094static int xennet_remove(struct xenbus_device *dev)
2095{
2096        struct netfront_info *info = dev_get_drvdata(&dev->dev);
2097
2098        dev_dbg(&dev->dev, "%s\n", dev->nodename);
2099
2100        xennet_disconnect_backend(info);
2101
2102        unregister_netdev(info->netdev);
2103
2104        xennet_destroy_queues(info);
2105        xennet_free_netdev(info->netdev);
2106
2107        return 0;
2108}
2109
2110static const struct xenbus_device_id netfront_ids[] = {
2111        { "vif" },
2112        { "" }
2113};
2114
2115static struct xenbus_driver netfront_driver = {
2116        .ids = netfront_ids,
2117        .probe = netfront_probe,
2118        .remove = xennet_remove,
2119        .resume = netfront_resume,
2120        .otherend_changed = netback_changed,
2121};
2122
2123static int __init netif_init(void)
2124{
2125        if (!xen_domain())
2126                return -ENODEV;
2127
2128        if (!xen_has_pv_nic_devices())
2129                return -ENODEV;
2130
2131        pr_info("Initialising Xen virtual ethernet driver\n");
2132
2133        /* Allow as many queues as there are CPUs, by default */
2134        xennet_max_queues = num_online_cpus();
2135
2136        return xenbus_register_frontend(&netfront_driver);
2137}
2138module_init(netif_init);
2139
2140
2141static void __exit netif_exit(void)
2142{
2143        xenbus_unregister_driver(&netfront_driver);
2144}
2145module_exit(netif_exit);
2146
2147MODULE_DESCRIPTION("Xen virtual network device frontend");
2148MODULE_LICENSE("GPL");
2149MODULE_ALIAS("xen:vif");
2150MODULE_ALIAS("xennet");
2151