linux/drivers/net/xen-netfront.c
<<
>>
Prefs
   1/*
   2 * Virtual network driver for conversing with remote driver backends.
   3 *
   4 * Copyright (c) 2002-2005, K A Fraser
   5 * Copyright (c) 2005, XenSource Ltd
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License version 2
   9 * as published by the Free Software Foundation; or, when distributed
  10 * separately from the Linux kernel or incorporated into other
  11 * software packages, subject to the following license:
  12 *
  13 * Permission is hereby granted, free of charge, to any person obtaining a copy
  14 * of this source file (the "Software"), to deal in the Software without
  15 * restriction, including without limitation the rights to use, copy, modify,
  16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  17 * and to permit persons to whom the Software is furnished to do so, subject to
  18 * the following conditions:
  19 *
  20 * The above copyright notice and this permission notice shall be included in
  21 * all copies or substantial portions of the Software.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  29 * IN THE SOFTWARE.
  30 */
  31
  32#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  33
  34#include <linux/module.h>
  35#include <linux/kernel.h>
  36#include <linux/netdevice.h>
  37#include <linux/etherdevice.h>
  38#include <linux/skbuff.h>
  39#include <linux/ethtool.h>
  40#include <linux/if_ether.h>
  41#include <net/tcp.h>
  42#include <linux/udp.h>
  43#include <linux/moduleparam.h>
  44#include <linux/mm.h>
  45#include <linux/slab.h>
  46#include <net/ip.h>
  47
  48#include <xen/xen.h>
  49#include <xen/xenbus.h>
  50#include <xen/events.h>
  51#include <xen/page.h>
  52#include <xen/platform_pci.h>
  53#include <xen/grant_table.h>
  54
  55#include <xen/interface/io/netif.h>
  56#include <xen/interface/memory.h>
  57#include <xen/interface/grant_table.h>
  58
  59/* Module parameters */
  60#define MAX_QUEUES_DEFAULT 8
  61static unsigned int xennet_max_queues;
  62module_param_named(max_queues, xennet_max_queues, uint, 0644);
  63MODULE_PARM_DESC(max_queues,
  64                 "Maximum number of queues per virtual interface");
  65
  66static const struct ethtool_ops xennet_ethtool_ops;
  67
  68struct netfront_cb {
  69        int pull_to;
  70};
  71
  72#define NETFRONT_SKB_CB(skb)    ((struct netfront_cb *)((skb)->cb))
  73
  74#define RX_COPY_THRESHOLD 256
  75
  76#define GRANT_INVALID_REF       0
  77
  78#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)
  79#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, XEN_PAGE_SIZE)
  80
  81/* Minimum number of Rx slots (includes slot for GSO metadata). */
  82#define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1)
  83
  84/* Queue name is interface name with "-qNNN" appended */
  85#define QUEUE_NAME_SIZE (IFNAMSIZ + 6)
  86
  87/* IRQ name is queue name with "-tx" or "-rx" appended */
  88#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
  89
  90static DECLARE_WAIT_QUEUE_HEAD(module_wq);
  91
  92struct netfront_stats {
  93        u64                     packets;
  94        u64                     bytes;
  95        struct u64_stats_sync   syncp;
  96};
  97
  98struct netfront_info;
  99
 100struct netfront_queue {
 101        unsigned int id; /* Queue ID, 0-based */
 102        char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
 103        struct netfront_info *info;
 104
 105        struct napi_struct napi;
 106
 107        /* Split event channels support, tx_* == rx_* when using
 108         * single event channel.
 109         */
 110        unsigned int tx_evtchn, rx_evtchn;
 111        unsigned int tx_irq, rx_irq;
 112        /* Only used when split event channels support is enabled */
 113        char tx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-tx */
 114        char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
 115
 116        spinlock_t   tx_lock;
 117        struct xen_netif_tx_front_ring tx;
 118        int tx_ring_ref;
 119
 120        /*
 121         * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
 122         * are linked from tx_skb_freelist through skb_entry.link.
 123         *
 124         *  NB. Freelist index entries are always going to be less than
 125         *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
 126         *  greater than PAGE_OFFSET: we use this property to distinguish
 127         *  them.
 128         */
 129        union skb_entry {
 130                struct sk_buff *skb;
 131                unsigned long link;
 132        } tx_skbs[NET_TX_RING_SIZE];
 133        grant_ref_t gref_tx_head;
 134        grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
 135        struct page *grant_tx_page[NET_TX_RING_SIZE];
 136        unsigned tx_skb_freelist;
 137
 138        spinlock_t   rx_lock ____cacheline_aligned_in_smp;
 139        struct xen_netif_rx_front_ring rx;
 140        int rx_ring_ref;
 141
 142        struct timer_list rx_refill_timer;
 143
 144        struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
 145        grant_ref_t gref_rx_head;
 146        grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
 147};
 148
 149struct netfront_info {
 150        struct list_head list;
 151        struct net_device *netdev;
 152
 153        struct xenbus_device *xbdev;
 154
 155        /* Multi-queue support */
 156        struct netfront_queue *queues;
 157
 158        /* Statistics */
 159        struct netfront_stats __percpu *rx_stats;
 160        struct netfront_stats __percpu *tx_stats;
 161
 162        atomic_t rx_gso_checksum_fixup;
 163};
 164
 165struct netfront_rx_info {
 166        struct xen_netif_rx_response rx;
 167        struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 168};
 169
 170static void skb_entry_set_link(union skb_entry *list, unsigned short id)
 171{
 172        list->link = id;
 173}
 174
 175static int skb_entry_is_link(const union skb_entry *list)
 176{
 177        BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
 178        return (unsigned long)list->skb < PAGE_OFFSET;
 179}
 180
 181/*
 182 * Access macros for acquiring freeing slots in tx_skbs[].
 183 */
 184
 185static void add_id_to_freelist(unsigned *head, union skb_entry *list,
 186                               unsigned short id)
 187{
 188        skb_entry_set_link(&list[id], *head);
 189        *head = id;
 190}
 191
 192static unsigned short get_id_from_freelist(unsigned *head,
 193                                           union skb_entry *list)
 194{
 195        unsigned int id = *head;
 196        *head = list[id].link;
 197        return id;
 198}
 199
 200static int xennet_rxidx(RING_IDX idx)
 201{
 202        return idx & (NET_RX_RING_SIZE - 1);
 203}
 204
 205static struct sk_buff *xennet_get_rx_skb(struct netfront_queue *queue,
 206                                         RING_IDX ri)
 207{
 208        int i = xennet_rxidx(ri);
 209        struct sk_buff *skb = queue->rx_skbs[i];
 210        queue->rx_skbs[i] = NULL;
 211        return skb;
 212}
 213
 214static grant_ref_t xennet_get_rx_ref(struct netfront_queue *queue,
 215                                            RING_IDX ri)
 216{
 217        int i = xennet_rxidx(ri);
 218        grant_ref_t ref = queue->grant_rx_ref[i];
 219        queue->grant_rx_ref[i] = GRANT_INVALID_REF;
 220        return ref;
 221}
 222
 223#ifdef CONFIG_SYSFS
 224static const struct attribute_group xennet_dev_group;
 225#endif
 226
 227static bool xennet_can_sg(struct net_device *dev)
 228{
 229        return dev->features & NETIF_F_SG;
 230}
 231
 232
 233static void rx_refill_timeout(struct timer_list *t)
 234{
 235        struct netfront_queue *queue = from_timer(queue, t, rx_refill_timer);
 236        napi_schedule(&queue->napi);
 237}
 238
 239static int netfront_tx_slot_available(struct netfront_queue *queue)
 240{
 241        return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
 242                (NET_TX_RING_SIZE - XEN_NETIF_NR_SLOTS_MIN - 1);
 243}
 244
 245static void xennet_maybe_wake_tx(struct netfront_queue *queue)
 246{
 247        struct net_device *dev = queue->info->netdev;
 248        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, queue->id);
 249
 250        if (unlikely(netif_tx_queue_stopped(dev_queue)) &&
 251            netfront_tx_slot_available(queue) &&
 252            likely(netif_running(dev)))
 253                netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
 254}
 255
 256
 257static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
 258{
 259        struct sk_buff *skb;
 260        struct page *page;
 261
 262        skb = __netdev_alloc_skb(queue->info->netdev,
 263                                 RX_COPY_THRESHOLD + NET_IP_ALIGN,
 264                                 GFP_ATOMIC | __GFP_NOWARN);
 265        if (unlikely(!skb))
 266                return NULL;
 267
 268        page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
 269        if (!page) {
 270                kfree_skb(skb);
 271                return NULL;
 272        }
 273        skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
 274
 275        /* Align ip header to a 16 bytes boundary */
 276        skb_reserve(skb, NET_IP_ALIGN);
 277        skb->dev = queue->info->netdev;
 278
 279        return skb;
 280}
 281
 282
 283static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
 284{
 285        RING_IDX req_prod = queue->rx.req_prod_pvt;
 286        int notify;
 287        int err = 0;
 288
 289        if (unlikely(!netif_carrier_ok(queue->info->netdev)))
 290                return;
 291
 292        for (req_prod = queue->rx.req_prod_pvt;
 293             req_prod - queue->rx.rsp_cons < NET_RX_RING_SIZE;
 294             req_prod++) {
 295                struct sk_buff *skb;
 296                unsigned short id;
 297                grant_ref_t ref;
 298                struct page *page;
 299                struct xen_netif_rx_request *req;
 300
 301                skb = xennet_alloc_one_rx_buffer(queue);
 302                if (!skb) {
 303                        err = -ENOMEM;
 304                        break;
 305                }
 306
 307                id = xennet_rxidx(req_prod);
 308
 309                BUG_ON(queue->rx_skbs[id]);
 310                queue->rx_skbs[id] = skb;
 311
 312                ref = gnttab_claim_grant_reference(&queue->gref_rx_head);
 313                WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
 314                queue->grant_rx_ref[id] = ref;
 315
 316                page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
 317
 318                req = RING_GET_REQUEST(&queue->rx, req_prod);
 319                gnttab_page_grant_foreign_access_ref_one(ref,
 320                                                         queue->info->xbdev->otherend_id,
 321                                                         page,
 322                                                         0);
 323                req->id = id;
 324                req->gref = ref;
 325        }
 326
 327        queue->rx.req_prod_pvt = req_prod;
 328
 329        /* Try again later if there are not enough requests or skb allocation
 330         * failed.
 331         * Enough requests is quantified as the sum of newly created slots and
 332         * the unconsumed slots at the backend.
 333         */
 334        if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN ||
 335            unlikely(err)) {
 336                mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10));
 337                return;
 338        }
 339
 340        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify);
 341        if (notify)
 342                notify_remote_via_irq(queue->rx_irq);
 343}
 344
 345static int xennet_open(struct net_device *dev)
 346{
 347        struct netfront_info *np = netdev_priv(dev);
 348        unsigned int num_queues = dev->real_num_tx_queues;
 349        unsigned int i = 0;
 350        struct netfront_queue *queue = NULL;
 351
 352        if (!np->queues)
 353                return -ENODEV;
 354
 355        for (i = 0; i < num_queues; ++i) {
 356                queue = &np->queues[i];
 357                napi_enable(&queue->napi);
 358
 359                spin_lock_bh(&queue->rx_lock);
 360                if (netif_carrier_ok(dev)) {
 361                        xennet_alloc_rx_buffers(queue);
 362                        queue->rx.sring->rsp_event = queue->rx.rsp_cons + 1;
 363                        if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
 364                                napi_schedule(&queue->napi);
 365                }
 366                spin_unlock_bh(&queue->rx_lock);
 367        }
 368
 369        netif_tx_start_all_queues(dev);
 370
 371        return 0;
 372}
 373
 374static void xennet_tx_buf_gc(struct netfront_queue *queue)
 375{
 376        RING_IDX cons, prod;
 377        unsigned short id;
 378        struct sk_buff *skb;
 379        bool more_to_do;
 380
 381        BUG_ON(!netif_carrier_ok(queue->info->netdev));
 382
 383        do {
 384                prod = queue->tx.sring->rsp_prod;
 385                rmb(); /* Ensure we see responses up to 'rp'. */
 386
 387                for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
 388                        struct xen_netif_tx_response *txrsp;
 389
 390                        txrsp = RING_GET_RESPONSE(&queue->tx, cons);
 391                        if (txrsp->status == XEN_NETIF_RSP_NULL)
 392                                continue;
 393
 394                        id  = txrsp->id;
 395                        skb = queue->tx_skbs[id].skb;
 396                        if (unlikely(gnttab_query_foreign_access(
 397                                queue->grant_tx_ref[id]) != 0)) {
 398                                pr_alert("%s: warning -- grant still in use by backend domain\n",
 399                                         __func__);
 400                                BUG();
 401                        }
 402                        gnttab_end_foreign_access_ref(
 403                                queue->grant_tx_ref[id], GNTMAP_readonly);
 404                        gnttab_release_grant_reference(
 405                                &queue->gref_tx_head, queue->grant_tx_ref[id]);
 406                        queue->grant_tx_ref[id] = GRANT_INVALID_REF;
 407                        queue->grant_tx_page[id] = NULL;
 408                        add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id);
 409                        dev_kfree_skb_irq(skb);
 410                }
 411
 412                queue->tx.rsp_cons = prod;
 413
 414                RING_FINAL_CHECK_FOR_RESPONSES(&queue->tx, more_to_do);
 415        } while (more_to_do);
 416
 417        xennet_maybe_wake_tx(queue);
 418}
 419
 420struct xennet_gnttab_make_txreq {
 421        struct netfront_queue *queue;
 422        struct sk_buff *skb;
 423        struct page *page;
 424        struct xen_netif_tx_request *tx; /* Last request */
 425        unsigned int size;
 426};
 427
 428static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
 429                                  unsigned int len, void *data)
 430{
 431        struct xennet_gnttab_make_txreq *info = data;
 432        unsigned int id;
 433        struct xen_netif_tx_request *tx;
 434        grant_ref_t ref;
 435        /* convenient aliases */
 436        struct page *page = info->page;
 437        struct netfront_queue *queue = info->queue;
 438        struct sk_buff *skb = info->skb;
 439
 440        id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
 441        tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
 442        ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
 443        WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
 444
 445        gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
 446                                        gfn, GNTMAP_readonly);
 447
 448        queue->tx_skbs[id].skb = skb;
 449        queue->grant_tx_page[id] = page;
 450        queue->grant_tx_ref[id] = ref;
 451
 452        tx->id = id;
 453        tx->gref = ref;
 454        tx->offset = offset;
 455        tx->size = len;
 456        tx->flags = 0;
 457
 458        info->tx = tx;
 459        info->size += tx->size;
 460}
 461
 462static struct xen_netif_tx_request *xennet_make_first_txreq(
 463        struct netfront_queue *queue, struct sk_buff *skb,
 464        struct page *page, unsigned int offset, unsigned int len)
 465{
 466        struct xennet_gnttab_make_txreq info = {
 467                .queue = queue,
 468                .skb = skb,
 469                .page = page,
 470                .size = 0,
 471        };
 472
 473        gnttab_for_one_grant(page, offset, len, xennet_tx_setup_grant, &info);
 474
 475        return info.tx;
 476}
 477
 478static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset,
 479                                  unsigned int len, void *data)
 480{
 481        struct xennet_gnttab_make_txreq *info = data;
 482
 483        info->tx->flags |= XEN_NETTXF_more_data;
 484        skb_get(info->skb);
 485        xennet_tx_setup_grant(gfn, offset, len, data);
 486}
 487
 488static struct xen_netif_tx_request *xennet_make_txreqs(
 489        struct netfront_queue *queue, struct xen_netif_tx_request *tx,
 490        struct sk_buff *skb, struct page *page,
 491        unsigned int offset, unsigned int len)
 492{
 493        struct xennet_gnttab_make_txreq info = {
 494                .queue = queue,
 495                .skb = skb,
 496                .tx = tx,
 497        };
 498
 499        /* Skip unused frames from start of page */
 500        page += offset >> PAGE_SHIFT;
 501        offset &= ~PAGE_MASK;
 502
 503        while (len) {
 504                info.page = page;
 505                info.size = 0;
 506
 507                gnttab_foreach_grant_in_range(page, offset, len,
 508                                              xennet_make_one_txreq,
 509                                              &info);
 510
 511                page++;
 512                offset = 0;
 513                len -= info.size;
 514        }
 515
 516        return info.tx;
 517}
 518
 519/*
 520 * Count how many ring slots are required to send this skb. Each frag
 521 * might be a compound page.
 522 */
 523static int xennet_count_skb_slots(struct sk_buff *skb)
 524{
 525        int i, frags = skb_shinfo(skb)->nr_frags;
 526        int slots;
 527
 528        slots = gnttab_count_grant(offset_in_page(skb->data),
 529                                   skb_headlen(skb));
 530
 531        for (i = 0; i < frags; i++) {
 532                skb_frag_t *frag = skb_shinfo(skb)->frags + i;
 533                unsigned long size = skb_frag_size(frag);
 534                unsigned long offset = frag->page_offset;
 535
 536                /* Skip unused frames from start of page */
 537                offset &= ~PAGE_MASK;
 538
 539                slots += gnttab_count_grant(offset, size);
 540        }
 541
 542        return slots;
 543}
 544
 545static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
 546                               struct net_device *sb_dev)
 547{
 548        unsigned int num_queues = dev->real_num_tx_queues;
 549        u32 hash;
 550        u16 queue_idx;
 551
 552        /* First, check if there is only one queue */
 553        if (num_queues == 1) {
 554                queue_idx = 0;
 555        } else {
 556                hash = skb_get_hash(skb);
 557                queue_idx = hash % num_queues;
 558        }
 559
 560        return queue_idx;
 561}
 562
 563#define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
 564
 565static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 566{
 567        struct netfront_info *np = netdev_priv(dev);
 568        struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
 569        struct xen_netif_tx_request *tx, *first_tx;
 570        unsigned int i;
 571        int notify;
 572        int slots;
 573        struct page *page;
 574        unsigned int offset;
 575        unsigned int len;
 576        unsigned long flags;
 577        struct netfront_queue *queue = NULL;
 578        unsigned int num_queues = dev->real_num_tx_queues;
 579        u16 queue_index;
 580        struct sk_buff *nskb;
 581
 582        /* Drop the packet if no queues are set up */
 583        if (num_queues < 1)
 584                goto drop;
 585        /* Determine which queue to transmit this SKB on */
 586        queue_index = skb_get_queue_mapping(skb);
 587        queue = &np->queues[queue_index];
 588
 589        /* If skb->len is too big for wire format, drop skb and alert
 590         * user about misconfiguration.
 591         */
 592        if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) {
 593                net_alert_ratelimited(
 594                        "xennet: skb->len = %u, too big for wire format\n",
 595                        skb->len);
 596                goto drop;
 597        }
 598
 599        slots = xennet_count_skb_slots(skb);
 600        if (unlikely(slots > MAX_XEN_SKB_FRAGS + 1)) {
 601                net_dbg_ratelimited("xennet: skb rides the rocket: %d slots, %d bytes\n",
 602                                    slots, skb->len);
 603                if (skb_linearize(skb))
 604                        goto drop;
 605        }
 606
 607        page = virt_to_page(skb->data);
 608        offset = offset_in_page(skb->data);
 609
 610        /* The first req should be at least ETH_HLEN size or the packet will be
 611         * dropped by netback.
 612         */
 613        if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
 614                nskb = skb_copy(skb, GFP_ATOMIC);
 615                if (!nskb)
 616                        goto drop;
 617                dev_consume_skb_any(skb);
 618                skb = nskb;
 619                page = virt_to_page(skb->data);
 620                offset = offset_in_page(skb->data);
 621        }
 622
 623        len = skb_headlen(skb);
 624
 625        spin_lock_irqsave(&queue->tx_lock, flags);
 626
 627        if (unlikely(!netif_carrier_ok(dev) ||
 628                     (slots > 1 && !xennet_can_sg(dev)) ||
 629                     netif_needs_gso(skb, netif_skb_features(skb)))) {
 630                spin_unlock_irqrestore(&queue->tx_lock, flags);
 631                goto drop;
 632        }
 633
 634        /* First request for the linear area. */
 635        first_tx = tx = xennet_make_first_txreq(queue, skb,
 636                                                page, offset, len);
 637        offset += tx->size;
 638        if (offset == PAGE_SIZE) {
 639                page++;
 640                offset = 0;
 641        }
 642        len -= tx->size;
 643
 644        if (skb->ip_summed == CHECKSUM_PARTIAL)
 645                /* local packet? */
 646                tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
 647        else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 648                /* remote but checksummed. */
 649                tx->flags |= XEN_NETTXF_data_validated;
 650
 651        /* Optional extra info after the first request. */
 652        if (skb_shinfo(skb)->gso_size) {
 653                struct xen_netif_extra_info *gso;
 654
 655                gso = (struct xen_netif_extra_info *)
 656                        RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
 657
 658                tx->flags |= XEN_NETTXF_extra_info;
 659
 660                gso->u.gso.size = skb_shinfo(skb)->gso_size;
 661                gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
 662                        XEN_NETIF_GSO_TYPE_TCPV6 :
 663                        XEN_NETIF_GSO_TYPE_TCPV4;
 664                gso->u.gso.pad = 0;
 665                gso->u.gso.features = 0;
 666
 667                gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 668                gso->flags = 0;
 669        }
 670
 671        /* Requests for the rest of the linear area. */
 672        tx = xennet_make_txreqs(queue, tx, skb, page, offset, len);
 673
 674        /* Requests for all the frags. */
 675        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 676                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 677                tx = xennet_make_txreqs(queue, tx, skb,
 678                                        skb_frag_page(frag), frag->page_offset,
 679                                        skb_frag_size(frag));
 680        }
 681
 682        /* First request has the packet length. */
 683        first_tx->size = skb->len;
 684
 685        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
 686        if (notify)
 687                notify_remote_via_irq(queue->tx_irq);
 688
 689        u64_stats_update_begin(&tx_stats->syncp);
 690        tx_stats->bytes += skb->len;
 691        tx_stats->packets++;
 692        u64_stats_update_end(&tx_stats->syncp);
 693
 694        /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */
 695        xennet_tx_buf_gc(queue);
 696
 697        if (!netfront_tx_slot_available(queue))
 698                netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
 699
 700        spin_unlock_irqrestore(&queue->tx_lock, flags);
 701
 702        return NETDEV_TX_OK;
 703
 704 drop:
 705        dev->stats.tx_dropped++;
 706        dev_kfree_skb_any(skb);
 707        return NETDEV_TX_OK;
 708}
 709
 710static int xennet_close(struct net_device *dev)
 711{
 712        struct netfront_info *np = netdev_priv(dev);
 713        unsigned int num_queues = dev->real_num_tx_queues;
 714        unsigned int i;
 715        struct netfront_queue *queue;
 716        netif_tx_stop_all_queues(np->netdev);
 717        for (i = 0; i < num_queues; ++i) {
 718                queue = &np->queues[i];
 719                napi_disable(&queue->napi);
 720        }
 721        return 0;
 722}
 723
 724static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
 725                                grant_ref_t ref)
 726{
 727        int new = xennet_rxidx(queue->rx.req_prod_pvt);
 728
 729        BUG_ON(queue->rx_skbs[new]);
 730        queue->rx_skbs[new] = skb;
 731        queue->grant_rx_ref[new] = ref;
 732        RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->id = new;
 733        RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->gref = ref;
 734        queue->rx.req_prod_pvt++;
 735}
 736
 737static int xennet_get_extras(struct netfront_queue *queue,
 738                             struct xen_netif_extra_info *extras,
 739                             RING_IDX rp)
 740
 741{
 742        struct xen_netif_extra_info *extra;
 743        struct device *dev = &queue->info->netdev->dev;
 744        RING_IDX cons = queue->rx.rsp_cons;
 745        int err = 0;
 746
 747        do {
 748                struct sk_buff *skb;
 749                grant_ref_t ref;
 750
 751                if (unlikely(cons + 1 == rp)) {
 752                        if (net_ratelimit())
 753                                dev_warn(dev, "Missing extra info\n");
 754                        err = -EBADR;
 755                        break;
 756                }
 757
 758                extra = (struct xen_netif_extra_info *)
 759                        RING_GET_RESPONSE(&queue->rx, ++cons);
 760
 761                if (unlikely(!extra->type ||
 762                             extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
 763                        if (net_ratelimit())
 764                                dev_warn(dev, "Invalid extra type: %d\n",
 765                                        extra->type);
 766                        err = -EINVAL;
 767                } else {
 768                        memcpy(&extras[extra->type - 1], extra,
 769                               sizeof(*extra));
 770                }
 771
 772                skb = xennet_get_rx_skb(queue, cons);
 773                ref = xennet_get_rx_ref(queue, cons);
 774                xennet_move_rx_slot(queue, skb, ref);
 775        } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
 776
 777        queue->rx.rsp_cons = cons;
 778        return err;
 779}
 780
 781static int xennet_get_responses(struct netfront_queue *queue,
 782                                struct netfront_rx_info *rinfo, RING_IDX rp,
 783                                struct sk_buff_head *list)
 784{
 785        struct xen_netif_rx_response *rx = &rinfo->rx;
 786        struct xen_netif_extra_info *extras = rinfo->extras;
 787        struct device *dev = &queue->info->netdev->dev;
 788        RING_IDX cons = queue->rx.rsp_cons;
 789        struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
 790        grant_ref_t ref = xennet_get_rx_ref(queue, cons);
 791        int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD);
 792        int slots = 1;
 793        int err = 0;
 794        unsigned long ret;
 795
 796        if (rx->flags & XEN_NETRXF_extra_info) {
 797                err = xennet_get_extras(queue, extras, rp);
 798                cons = queue->rx.rsp_cons;
 799        }
 800
 801        for (;;) {
 802                if (unlikely(rx->status < 0 ||
 803                             rx->offset + rx->status > XEN_PAGE_SIZE)) {
 804                        if (net_ratelimit())
 805                                dev_warn(dev, "rx->offset: %u, size: %d\n",
 806                                         rx->offset, rx->status);
 807                        xennet_move_rx_slot(queue, skb, ref);
 808                        err = -EINVAL;
 809                        goto next;
 810                }
 811
 812                /*
 813                 * This definitely indicates a bug, either in this driver or in
 814                 * the backend driver. In future this should flag the bad
 815                 * situation to the system controller to reboot the backend.
 816                 */
 817                if (ref == GRANT_INVALID_REF) {
 818                        if (net_ratelimit())
 819                                dev_warn(dev, "Bad rx response id %d.\n",
 820                                         rx->id);
 821                        err = -EINVAL;
 822                        goto next;
 823                }
 824
 825                ret = gnttab_end_foreign_access_ref(ref, 0);
 826                BUG_ON(!ret);
 827
 828                gnttab_release_grant_reference(&queue->gref_rx_head, ref);
 829
 830                __skb_queue_tail(list, skb);
 831
 832next:
 833                if (!(rx->flags & XEN_NETRXF_more_data))
 834                        break;
 835
 836                if (cons + slots == rp) {
 837                        if (net_ratelimit())
 838                                dev_warn(dev, "Need more slots\n");
 839                        err = -ENOENT;
 840                        break;
 841                }
 842
 843                rx = RING_GET_RESPONSE(&queue->rx, cons + slots);
 844                skb = xennet_get_rx_skb(queue, cons + slots);
 845                ref = xennet_get_rx_ref(queue, cons + slots);
 846                slots++;
 847        }
 848
 849        if (unlikely(slots > max)) {
 850                if (net_ratelimit())
 851                        dev_warn(dev, "Too many slots\n");
 852                err = -E2BIG;
 853        }
 854
 855        if (unlikely(err))
 856                queue->rx.rsp_cons = cons + slots;
 857
 858        return err;
 859}
 860
 861static int xennet_set_skb_gso(struct sk_buff *skb,
 862                              struct xen_netif_extra_info *gso)
 863{
 864        if (!gso->u.gso.size) {
 865                if (net_ratelimit())
 866                        pr_warn("GSO size must not be zero\n");
 867                return -EINVAL;
 868        }
 869
 870        if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
 871            gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
 872                if (net_ratelimit())
 873                        pr_warn("Bad GSO type %d\n", gso->u.gso.type);
 874                return -EINVAL;
 875        }
 876
 877        skb_shinfo(skb)->gso_size = gso->u.gso.size;
 878        skb_shinfo(skb)->gso_type =
 879                (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
 880                SKB_GSO_TCPV4 :
 881                SKB_GSO_TCPV6;
 882
 883        /* Header must be checked, and gso_segs computed. */
 884        skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 885        skb_shinfo(skb)->gso_segs = 0;
 886
 887        return 0;
 888}
 889
 890static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
 891                                  struct sk_buff *skb,
 892                                  struct sk_buff_head *list)
 893{
 894        RING_IDX cons = queue->rx.rsp_cons;
 895        struct sk_buff *nskb;
 896
 897        while ((nskb = __skb_dequeue(list))) {
 898                struct xen_netif_rx_response *rx =
 899                        RING_GET_RESPONSE(&queue->rx, ++cons);
 900                skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
 901
 902                if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
 903                        unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 904
 905                        BUG_ON(pull_to < skb_headlen(skb));
 906                        __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 907                }
 908                if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
 909                        queue->rx.rsp_cons = ++cons;
 910                        kfree_skb(nskb);
 911                        return ~0U;
 912                }
 913
 914                skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
 915                                skb_frag_page(nfrag),
 916                                rx->offset, rx->status, PAGE_SIZE);
 917
 918                skb_shinfo(nskb)->nr_frags = 0;
 919                kfree_skb(nskb);
 920        }
 921
 922        return cons;
 923}
 924
 925static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
 926{
 927        bool recalculate_partial_csum = false;
 928
 929        /*
 930         * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
 931         * peers can fail to set NETRXF_csum_blank when sending a GSO
 932         * frame. In this case force the SKB to CHECKSUM_PARTIAL and
 933         * recalculate the partial checksum.
 934         */
 935        if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
 936                struct netfront_info *np = netdev_priv(dev);
 937                atomic_inc(&np->rx_gso_checksum_fixup);
 938                skb->ip_summed = CHECKSUM_PARTIAL;
 939                recalculate_partial_csum = true;
 940        }
 941
 942        /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
 943        if (skb->ip_summed != CHECKSUM_PARTIAL)
 944                return 0;
 945
 946        return skb_checksum_setup(skb, recalculate_partial_csum);
 947}
 948
 949static int handle_incoming_queue(struct netfront_queue *queue,
 950                                 struct sk_buff_head *rxq)
 951{
 952        struct netfront_stats *rx_stats = this_cpu_ptr(queue->info->rx_stats);
 953        int packets_dropped = 0;
 954        struct sk_buff *skb;
 955
 956        while ((skb = __skb_dequeue(rxq)) != NULL) {
 957                int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 958
 959                if (pull_to > skb_headlen(skb))
 960                        __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 961
 962                /* Ethernet work: Delayed to here as it peeks the header. */
 963                skb->protocol = eth_type_trans(skb, queue->info->netdev);
 964                skb_reset_network_header(skb);
 965
 966                if (checksum_setup(queue->info->netdev, skb)) {
 967                        kfree_skb(skb);
 968                        packets_dropped++;
 969                        queue->info->netdev->stats.rx_errors++;
 970                        continue;
 971                }
 972
 973                u64_stats_update_begin(&rx_stats->syncp);
 974                rx_stats->packets++;
 975                rx_stats->bytes += skb->len;
 976                u64_stats_update_end(&rx_stats->syncp);
 977
 978                /* Pass it up. */
 979                napi_gro_receive(&queue->napi, skb);
 980        }
 981
 982        return packets_dropped;
 983}
 984
 985static int xennet_poll(struct napi_struct *napi, int budget)
 986{
 987        struct netfront_queue *queue = container_of(napi, struct netfront_queue, napi);
 988        struct net_device *dev = queue->info->netdev;
 989        struct sk_buff *skb;
 990        struct netfront_rx_info rinfo;
 991        struct xen_netif_rx_response *rx = &rinfo.rx;
 992        struct xen_netif_extra_info *extras = rinfo.extras;
 993        RING_IDX i, rp;
 994        int work_done;
 995        struct sk_buff_head rxq;
 996        struct sk_buff_head errq;
 997        struct sk_buff_head tmpq;
 998        int err;
 999
1000        spin_lock(&queue->rx_lock);
1001
1002        skb_queue_head_init(&rxq);
1003        skb_queue_head_init(&errq);
1004        skb_queue_head_init(&tmpq);
1005
1006        rp = queue->rx.sring->rsp_prod;
1007        rmb(); /* Ensure we see queued responses up to 'rp'. */
1008
1009        i = queue->rx.rsp_cons;
1010        work_done = 0;
1011        while ((i != rp) && (work_done < budget)) {
1012                memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx));
1013                memset(extras, 0, sizeof(rinfo.extras));
1014
1015                err = xennet_get_responses(queue, &rinfo, rp, &tmpq);
1016
1017                if (unlikely(err)) {
1018err:
1019                        while ((skb = __skb_dequeue(&tmpq)))
1020                                __skb_queue_tail(&errq, skb);
1021                        dev->stats.rx_errors++;
1022                        i = queue->rx.rsp_cons;
1023                        continue;
1024                }
1025
1026                skb = __skb_dequeue(&tmpq);
1027
1028                if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1029                        struct xen_netif_extra_info *gso;
1030                        gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1031
1032                        if (unlikely(xennet_set_skb_gso(skb, gso))) {
1033                                __skb_queue_head(&tmpq, skb);
1034                                queue->rx.rsp_cons += skb_queue_len(&tmpq);
1035                                goto err;
1036                        }
1037                }
1038
1039                NETFRONT_SKB_CB(skb)->pull_to = rx->status;
1040                if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
1041                        NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
1042
1043                skb_shinfo(skb)->frags[0].page_offset = rx->offset;
1044                skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
1045                skb->data_len = rx->status;
1046                skb->len += rx->status;
1047
1048                i = xennet_fill_frags(queue, skb, &tmpq);
1049                if (unlikely(i == ~0U))
1050                        goto err;
1051
1052                if (rx->flags & XEN_NETRXF_csum_blank)
1053                        skb->ip_summed = CHECKSUM_PARTIAL;
1054                else if (rx->flags & XEN_NETRXF_data_validated)
1055                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1056
1057                __skb_queue_tail(&rxq, skb);
1058
1059                queue->rx.rsp_cons = ++i;
1060                work_done++;
1061        }
1062
1063        __skb_queue_purge(&errq);
1064
1065        work_done -= handle_incoming_queue(queue, &rxq);
1066
1067        xennet_alloc_rx_buffers(queue);
1068
1069        if (work_done < budget) {
1070                int more_to_do = 0;
1071
1072                napi_complete_done(napi, work_done);
1073
1074                RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
1075                if (more_to_do)
1076                        napi_schedule(napi);
1077        }
1078
1079        spin_unlock(&queue->rx_lock);
1080
1081        return work_done;
1082}
1083
1084static int xennet_change_mtu(struct net_device *dev, int mtu)
1085{
1086        int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN;
1087
1088        if (mtu > max)
1089                return -EINVAL;
1090        dev->mtu = mtu;
1091        return 0;
1092}
1093
1094static void xennet_get_stats64(struct net_device *dev,
1095                               struct rtnl_link_stats64 *tot)
1096{
1097        struct netfront_info *np = netdev_priv(dev);
1098        int cpu;
1099
1100        for_each_possible_cpu(cpu) {
1101                struct netfront_stats *rx_stats = per_cpu_ptr(np->rx_stats, cpu);
1102                struct netfront_stats *tx_stats = per_cpu_ptr(np->tx_stats, cpu);
1103                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
1104                unsigned int start;
1105
1106                do {
1107                        start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
1108                        tx_packets = tx_stats->packets;
1109                        tx_bytes = tx_stats->bytes;
1110                } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
1111
1112                do {
1113                        start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
1114                        rx_packets = rx_stats->packets;
1115                        rx_bytes = rx_stats->bytes;
1116                } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
1117
1118                tot->rx_packets += rx_packets;
1119                tot->tx_packets += tx_packets;
1120                tot->rx_bytes   += rx_bytes;
1121                tot->tx_bytes   += tx_bytes;
1122        }
1123
1124        tot->rx_errors  = dev->stats.rx_errors;
1125        tot->tx_dropped = dev->stats.tx_dropped;
1126}
1127
1128static void xennet_release_tx_bufs(struct netfront_queue *queue)
1129{
1130        struct sk_buff *skb;
1131        int i;
1132
1133        for (i = 0; i < NET_TX_RING_SIZE; i++) {
1134                /* Skip over entries which are actually freelist references */
1135                if (skb_entry_is_link(&queue->tx_skbs[i]))
1136                        continue;
1137
1138                skb = queue->tx_skbs[i].skb;
1139                get_page(queue->grant_tx_page[i]);
1140                gnttab_end_foreign_access(queue->grant_tx_ref[i],
1141                                          GNTMAP_readonly,
1142                                          (unsigned long)page_address(queue->grant_tx_page[i]));
1143                queue->grant_tx_page[i] = NULL;
1144                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1145                add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i);
1146                dev_kfree_skb_irq(skb);
1147        }
1148}
1149
1150static void xennet_release_rx_bufs(struct netfront_queue *queue)
1151{
1152        int id, ref;
1153
1154        spin_lock_bh(&queue->rx_lock);
1155
1156        for (id = 0; id < NET_RX_RING_SIZE; id++) {
1157                struct sk_buff *skb;
1158                struct page *page;
1159
1160                skb = queue->rx_skbs[id];
1161                if (!skb)
1162                        continue;
1163
1164                ref = queue->grant_rx_ref[id];
1165                if (ref == GRANT_INVALID_REF)
1166                        continue;
1167
1168                page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
1169
1170                /* gnttab_end_foreign_access() needs a page ref until
1171                 * foreign access is ended (which may be deferred).
1172                 */
1173                get_page(page);
1174                gnttab_end_foreign_access(ref, 0,
1175                                          (unsigned long)page_address(page));
1176                queue->grant_rx_ref[id] = GRANT_INVALID_REF;
1177
1178                kfree_skb(skb);
1179        }
1180
1181        spin_unlock_bh(&queue->rx_lock);
1182}
1183
1184static netdev_features_t xennet_fix_features(struct net_device *dev,
1185        netdev_features_t features)
1186{
1187        struct netfront_info *np = netdev_priv(dev);
1188
1189        if (features & NETIF_F_SG &&
1190            !xenbus_read_unsigned(np->xbdev->otherend, "feature-sg", 0))
1191                features &= ~NETIF_F_SG;
1192
1193        if (features & NETIF_F_IPV6_CSUM &&
1194            !xenbus_read_unsigned(np->xbdev->otherend,
1195                                  "feature-ipv6-csum-offload", 0))
1196                features &= ~NETIF_F_IPV6_CSUM;
1197
1198        if (features & NETIF_F_TSO &&
1199            !xenbus_read_unsigned(np->xbdev->otherend, "feature-gso-tcpv4", 0))
1200                features &= ~NETIF_F_TSO;
1201
1202        if (features & NETIF_F_TSO6 &&
1203            !xenbus_read_unsigned(np->xbdev->otherend, "feature-gso-tcpv6", 0))
1204                features &= ~NETIF_F_TSO6;
1205
1206        return features;
1207}
1208
1209static int xennet_set_features(struct net_device *dev,
1210        netdev_features_t features)
1211{
1212        if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
1213                netdev_info(dev, "Reducing MTU because no SG offload");
1214                dev->mtu = ETH_DATA_LEN;
1215        }
1216
1217        return 0;
1218}
1219
1220static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
1221{
1222        struct netfront_queue *queue = dev_id;
1223        unsigned long flags;
1224
1225        spin_lock_irqsave(&queue->tx_lock, flags);
1226        xennet_tx_buf_gc(queue);
1227        spin_unlock_irqrestore(&queue->tx_lock, flags);
1228
1229        return IRQ_HANDLED;
1230}
1231
1232static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
1233{
1234        struct netfront_queue *queue = dev_id;
1235        struct net_device *dev = queue->info->netdev;
1236
1237        if (likely(netif_carrier_ok(dev) &&
1238                   RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
1239                napi_schedule(&queue->napi);
1240
1241        return IRQ_HANDLED;
1242}
1243
1244static irqreturn_t xennet_interrupt(int irq, void *dev_id)
1245{
1246        xennet_tx_interrupt(irq, dev_id);
1247        xennet_rx_interrupt(irq, dev_id);
1248        return IRQ_HANDLED;
1249}
1250
1251#ifdef CONFIG_NET_POLL_CONTROLLER
1252static void xennet_poll_controller(struct net_device *dev)
1253{
1254        /* Poll each queue */
1255        struct netfront_info *info = netdev_priv(dev);
1256        unsigned int num_queues = dev->real_num_tx_queues;
1257        unsigned int i;
1258        for (i = 0; i < num_queues; ++i)
1259                xennet_interrupt(0, &info->queues[i]);
1260}
1261#endif
1262
1263static const struct net_device_ops xennet_netdev_ops = {
1264        .ndo_open            = xennet_open,
1265        .ndo_stop            = xennet_close,
1266        .ndo_start_xmit      = xennet_start_xmit,
1267        .ndo_change_mtu      = xennet_change_mtu,
1268        .ndo_get_stats64     = xennet_get_stats64,
1269        .ndo_set_mac_address = eth_mac_addr,
1270        .ndo_validate_addr   = eth_validate_addr,
1271        .ndo_fix_features    = xennet_fix_features,
1272        .ndo_set_features    = xennet_set_features,
1273        .ndo_select_queue    = xennet_select_queue,
1274#ifdef CONFIG_NET_POLL_CONTROLLER
1275        .ndo_poll_controller = xennet_poll_controller,
1276#endif
1277};
1278
1279static void xennet_free_netdev(struct net_device *netdev)
1280{
1281        struct netfront_info *np = netdev_priv(netdev);
1282
1283        free_percpu(np->rx_stats);
1284        free_percpu(np->tx_stats);
1285        free_netdev(netdev);
1286}
1287
1288static struct net_device *xennet_create_dev(struct xenbus_device *dev)
1289{
1290        int err;
1291        struct net_device *netdev;
1292        struct netfront_info *np;
1293
1294        netdev = alloc_etherdev_mq(sizeof(struct netfront_info), xennet_max_queues);
1295        if (!netdev)
1296                return ERR_PTR(-ENOMEM);
1297
1298        np                   = netdev_priv(netdev);
1299        np->xbdev            = dev;
1300
1301        np->queues = NULL;
1302
1303        err = -ENOMEM;
1304        np->rx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1305        if (np->rx_stats == NULL)
1306                goto exit;
1307        np->tx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1308        if (np->tx_stats == NULL)
1309                goto exit;
1310
1311        netdev->netdev_ops      = &xennet_netdev_ops;
1312
1313        netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
1314                                  NETIF_F_GSO_ROBUST;
1315        netdev->hw_features     = NETIF_F_SG |
1316                                  NETIF_F_IPV6_CSUM |
1317                                  NETIF_F_TSO | NETIF_F_TSO6;
1318
1319        /*
1320         * Assume that all hw features are available for now. This set
1321         * will be adjusted by the call to netdev_update_features() in
1322         * xennet_connect() which is the earliest point where we can
1323         * negotiate with the backend regarding supported features.
1324         */
1325        netdev->features |= netdev->hw_features;
1326
1327        netdev->ethtool_ops = &xennet_ethtool_ops;
1328        netdev->min_mtu = ETH_MIN_MTU;
1329        netdev->max_mtu = XEN_NETIF_MAX_TX_SIZE;
1330        SET_NETDEV_DEV(netdev, &dev->dev);
1331
1332        np->netdev = netdev;
1333
1334        netif_carrier_off(netdev);
1335
1336        xenbus_switch_state(dev, XenbusStateInitialising);
1337        wait_event(module_wq,
1338                   xenbus_read_driver_state(dev->otherend) !=
1339                   XenbusStateClosed &&
1340                   xenbus_read_driver_state(dev->otherend) !=
1341                   XenbusStateUnknown);
1342        return netdev;
1343
1344 exit:
1345        xennet_free_netdev(netdev);
1346        return ERR_PTR(err);
1347}
1348
1349/**
1350 * Entry point to this code when a new device is created.  Allocate the basic
1351 * structures and the ring buffers for communication with the backend, and
1352 * inform the backend of the appropriate details for those.
1353 */
1354static int netfront_probe(struct xenbus_device *dev,
1355                          const struct xenbus_device_id *id)
1356{
1357        int err;
1358        struct net_device *netdev;
1359        struct netfront_info *info;
1360
1361        netdev = xennet_create_dev(dev);
1362        if (IS_ERR(netdev)) {
1363                err = PTR_ERR(netdev);
1364                xenbus_dev_fatal(dev, err, "creating netdev");
1365                return err;
1366        }
1367
1368        info = netdev_priv(netdev);
1369        dev_set_drvdata(&dev->dev, info);
1370#ifdef CONFIG_SYSFS
1371        info->netdev->sysfs_groups[0] = &xennet_dev_group;
1372#endif
1373
1374        return 0;
1375}
1376
1377static void xennet_end_access(int ref, void *page)
1378{
1379        /* This frees the page as a side-effect */
1380        if (ref != GRANT_INVALID_REF)
1381                gnttab_end_foreign_access(ref, 0, (unsigned long)page);
1382}
1383
1384static void xennet_disconnect_backend(struct netfront_info *info)
1385{
1386        unsigned int i = 0;
1387        unsigned int num_queues = info->netdev->real_num_tx_queues;
1388
1389        netif_carrier_off(info->netdev);
1390
1391        for (i = 0; i < num_queues && info->queues; ++i) {
1392                struct netfront_queue *queue = &info->queues[i];
1393
1394                del_timer_sync(&queue->rx_refill_timer);
1395
1396                if (queue->tx_irq && (queue->tx_irq == queue->rx_irq))
1397                        unbind_from_irqhandler(queue->tx_irq, queue);
1398                if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) {
1399                        unbind_from_irqhandler(queue->tx_irq, queue);
1400                        unbind_from_irqhandler(queue->rx_irq, queue);
1401                }
1402                queue->tx_evtchn = queue->rx_evtchn = 0;
1403                queue->tx_irq = queue->rx_irq = 0;
1404
1405                if (netif_running(info->netdev))
1406                        napi_synchronize(&queue->napi);
1407
1408                xennet_release_tx_bufs(queue);
1409                xennet_release_rx_bufs(queue);
1410                gnttab_free_grant_references(queue->gref_tx_head);
1411                gnttab_free_grant_references(queue->gref_rx_head);
1412
1413                /* End access and free the pages */
1414                xennet_end_access(queue->tx_ring_ref, queue->tx.sring);
1415                xennet_end_access(queue->rx_ring_ref, queue->rx.sring);
1416
1417                queue->tx_ring_ref = GRANT_INVALID_REF;
1418                queue->rx_ring_ref = GRANT_INVALID_REF;
1419                queue->tx.sring = NULL;
1420                queue->rx.sring = NULL;
1421        }
1422}
1423
1424/**
1425 * We are reconnecting to the backend, due to a suspend/resume, or a backend
1426 * driver restart.  We tear down our netif structure and recreate it, but
1427 * leave the device-layer structures intact so that this is transparent to the
1428 * rest of the kernel.
1429 */
1430static int netfront_resume(struct xenbus_device *dev)
1431{
1432        struct netfront_info *info = dev_get_drvdata(&dev->dev);
1433
1434        dev_dbg(&dev->dev, "%s\n", dev->nodename);
1435
1436        xennet_disconnect_backend(info);
1437        return 0;
1438}
1439
1440static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
1441{
1442        char *s, *e, *macstr;
1443        int i;
1444
1445        macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
1446        if (IS_ERR(macstr))
1447                return PTR_ERR(macstr);
1448
1449        for (i = 0; i < ETH_ALEN; i++) {
1450                mac[i] = simple_strtoul(s, &e, 16);
1451                if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
1452                        kfree(macstr);
1453                        return -ENOENT;
1454                }
1455                s = e+1;
1456        }
1457
1458        kfree(macstr);
1459        return 0;
1460}
1461
1462static int setup_netfront_single(struct netfront_queue *queue)
1463{
1464        int err;
1465
1466        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1467        if (err < 0)
1468                goto fail;
1469
1470        err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1471                                        xennet_interrupt,
1472                                        0, queue->info->netdev->name, queue);
1473        if (err < 0)
1474                goto bind_fail;
1475        queue->rx_evtchn = queue->tx_evtchn;
1476        queue->rx_irq = queue->tx_irq = err;
1477
1478        return 0;
1479
1480bind_fail:
1481        xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1482        queue->tx_evtchn = 0;
1483fail:
1484        return err;
1485}
1486
1487static int setup_netfront_split(struct netfront_queue *queue)
1488{
1489        int err;
1490
1491        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1492        if (err < 0)
1493                goto fail;
1494        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->rx_evtchn);
1495        if (err < 0)
1496                goto alloc_rx_evtchn_fail;
1497
1498        snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
1499                 "%s-tx", queue->name);
1500        err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1501                                        xennet_tx_interrupt,
1502                                        0, queue->tx_irq_name, queue);
1503        if (err < 0)
1504                goto bind_tx_fail;
1505        queue->tx_irq = err;
1506
1507        snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
1508                 "%s-rx", queue->name);
1509        err = bind_evtchn_to_irqhandler(queue->rx_evtchn,
1510                                        xennet_rx_interrupt,
1511                                        0, queue->rx_irq_name, queue);
1512        if (err < 0)
1513                goto bind_rx_fail;
1514        queue->rx_irq = err;
1515
1516        return 0;
1517
1518bind_rx_fail:
1519        unbind_from_irqhandler(queue->tx_irq, queue);
1520        queue->tx_irq = 0;
1521bind_tx_fail:
1522        xenbus_free_evtchn(queue->info->xbdev, queue->rx_evtchn);
1523        queue->rx_evtchn = 0;
1524alloc_rx_evtchn_fail:
1525        xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1526        queue->tx_evtchn = 0;
1527fail:
1528        return err;
1529}
1530
1531static int setup_netfront(struct xenbus_device *dev,
1532                        struct netfront_queue *queue, unsigned int feature_split_evtchn)
1533{
1534        struct xen_netif_tx_sring *txs;
1535        struct xen_netif_rx_sring *rxs;
1536        grant_ref_t gref;
1537        int err;
1538
1539        queue->tx_ring_ref = GRANT_INVALID_REF;
1540        queue->rx_ring_ref = GRANT_INVALID_REF;
1541        queue->rx.sring = NULL;
1542        queue->tx.sring = NULL;
1543
1544        txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1545        if (!txs) {
1546                err = -ENOMEM;
1547                xenbus_dev_fatal(dev, err, "allocating tx ring page");
1548                goto fail;
1549        }
1550        SHARED_RING_INIT(txs);
1551        FRONT_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
1552
1553        err = xenbus_grant_ring(dev, txs, 1, &gref);
1554        if (err < 0)
1555                goto grant_tx_ring_fail;
1556        queue->tx_ring_ref = gref;
1557
1558        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1559        if (!rxs) {
1560                err = -ENOMEM;
1561                xenbus_dev_fatal(dev, err, "allocating rx ring page");
1562                goto alloc_rx_ring_fail;
1563        }
1564        SHARED_RING_INIT(rxs);
1565        FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
1566
1567        err = xenbus_grant_ring(dev, rxs, 1, &gref);
1568        if (err < 0)
1569                goto grant_rx_ring_fail;
1570        queue->rx_ring_ref = gref;
1571
1572        if (feature_split_evtchn)
1573                err = setup_netfront_split(queue);
1574        /* setup single event channel if
1575         *  a) feature-split-event-channels == 0
1576         *  b) feature-split-event-channels == 1 but failed to setup
1577         */
1578        if (!feature_split_evtchn || (feature_split_evtchn && err))
1579                err = setup_netfront_single(queue);
1580
1581        if (err)
1582                goto alloc_evtchn_fail;
1583
1584        return 0;
1585
1586        /* If we fail to setup netfront, it is safe to just revoke access to
1587         * granted pages because backend is not accessing it at this point.
1588         */
1589alloc_evtchn_fail:
1590        gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0);
1591grant_rx_ring_fail:
1592        free_page((unsigned long)rxs);
1593alloc_rx_ring_fail:
1594        gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0);
1595grant_tx_ring_fail:
1596        free_page((unsigned long)txs);
1597fail:
1598        return err;
1599}
1600
1601/* Queue-specific initialisation
1602 * This used to be done in xennet_create_dev() but must now
1603 * be run per-queue.
1604 */
1605static int xennet_init_queue(struct netfront_queue *queue)
1606{
1607        unsigned short i;
1608        int err = 0;
1609        char *devid;
1610
1611        spin_lock_init(&queue->tx_lock);
1612        spin_lock_init(&queue->rx_lock);
1613
1614        timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0);
1615
1616        devid = strrchr(queue->info->xbdev->nodename, '/') + 1;
1617        snprintf(queue->name, sizeof(queue->name), "vif%s-q%u",
1618                 devid, queue->id);
1619
1620        /* Initialise tx_skbs as a free chain containing every entry. */
1621        queue->tx_skb_freelist = 0;
1622        for (i = 0; i < NET_TX_RING_SIZE; i++) {
1623                skb_entry_set_link(&queue->tx_skbs[i], i+1);
1624                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1625                queue->grant_tx_page[i] = NULL;
1626        }
1627
1628        /* Clear out rx_skbs */
1629        for (i = 0; i < NET_RX_RING_SIZE; i++) {
1630                queue->rx_skbs[i] = NULL;
1631                queue->grant_rx_ref[i] = GRANT_INVALID_REF;
1632        }
1633
1634        /* A grant for every tx ring slot */
1635        if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
1636                                          &queue->gref_tx_head) < 0) {
1637                pr_alert("can't alloc tx grant refs\n");
1638                err = -ENOMEM;
1639                goto exit;
1640        }
1641
1642        /* A grant for every rx ring slot */
1643        if (gnttab_alloc_grant_references(NET_RX_RING_SIZE,
1644                                          &queue->gref_rx_head) < 0) {
1645                pr_alert("can't alloc rx grant refs\n");
1646                err = -ENOMEM;
1647                goto exit_free_tx;
1648        }
1649
1650        return 0;
1651
1652 exit_free_tx:
1653        gnttab_free_grant_references(queue->gref_tx_head);
1654 exit:
1655        return err;
1656}
1657
1658static int write_queue_xenstore_keys(struct netfront_queue *queue,
1659                           struct xenbus_transaction *xbt, int write_hierarchical)
1660{
1661        /* Write the queue-specific keys into XenStore in the traditional
1662         * way for a single queue, or in a queue subkeys for multiple
1663         * queues.
1664         */
1665        struct xenbus_device *dev = queue->info->xbdev;
1666        int err;
1667        const char *message;
1668        char *path;
1669        size_t pathsize;
1670
1671        /* Choose the correct place to write the keys */
1672        if (write_hierarchical) {
1673                pathsize = strlen(dev->nodename) + 10;
1674                path = kzalloc(pathsize, GFP_KERNEL);
1675                if (!path) {
1676                        err = -ENOMEM;
1677                        message = "out of memory while writing ring references";
1678                        goto error;
1679                }
1680                snprintf(path, pathsize, "%s/queue-%u",
1681                                dev->nodename, queue->id);
1682        } else {
1683                path = (char *)dev->nodename;
1684        }
1685
1686        /* Write ring references */
1687        err = xenbus_printf(*xbt, path, "tx-ring-ref", "%u",
1688                        queue->tx_ring_ref);
1689        if (err) {
1690                message = "writing tx-ring-ref";
1691                goto error;
1692        }
1693
1694        err = xenbus_printf(*xbt, path, "rx-ring-ref", "%u",
1695                        queue->rx_ring_ref);
1696        if (err) {
1697                message = "writing rx-ring-ref";
1698                goto error;
1699        }
1700
1701        /* Write event channels; taking into account both shared
1702         * and split event channel scenarios.
1703         */
1704        if (queue->tx_evtchn == queue->rx_evtchn) {
1705                /* Shared event channel */
1706                err = xenbus_printf(*xbt, path,
1707                                "event-channel", "%u", queue->tx_evtchn);
1708                if (err) {
1709                        message = "writing event-channel";
1710                        goto error;
1711                }
1712        } else {
1713                /* Split event channels */
1714                err = xenbus_printf(*xbt, path,
1715                                "event-channel-tx", "%u", queue->tx_evtchn);
1716                if (err) {
1717                        message = "writing event-channel-tx";
1718                        goto error;
1719                }
1720
1721                err = xenbus_printf(*xbt, path,
1722                                "event-channel-rx", "%u", queue->rx_evtchn);
1723                if (err) {
1724                        message = "writing event-channel-rx";
1725                        goto error;
1726                }
1727        }
1728
1729        if (write_hierarchical)
1730                kfree(path);
1731        return 0;
1732
1733error:
1734        if (write_hierarchical)
1735                kfree(path);
1736        xenbus_dev_fatal(dev, err, "%s", message);
1737        return err;
1738}
1739
1740static void xennet_destroy_queues(struct netfront_info *info)
1741{
1742        unsigned int i;
1743
1744        for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
1745                struct netfront_queue *queue = &info->queues[i];
1746
1747                if (netif_running(info->netdev))
1748                        napi_disable(&queue->napi);
1749                netif_napi_del(&queue->napi);
1750        }
1751
1752        kfree(info->queues);
1753        info->queues = NULL;
1754}
1755
1756static int xennet_create_queues(struct netfront_info *info,
1757                                unsigned int *num_queues)
1758{
1759        unsigned int i;
1760        int ret;
1761
1762        info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue),
1763                               GFP_KERNEL);
1764        if (!info->queues)
1765                return -ENOMEM;
1766
1767        for (i = 0; i < *num_queues; i++) {
1768                struct netfront_queue *queue = &info->queues[i];
1769
1770                queue->id = i;
1771                queue->info = info;
1772
1773                ret = xennet_init_queue(queue);
1774                if (ret < 0) {
1775                        dev_warn(&info->xbdev->dev,
1776                                 "only created %d queues\n", i);
1777                        *num_queues = i;
1778                        break;
1779                }
1780
1781                netif_napi_add(queue->info->netdev, &queue->napi,
1782                               xennet_poll, 64);
1783                if (netif_running(info->netdev))
1784                        napi_enable(&queue->napi);
1785        }
1786
1787        netif_set_real_num_tx_queues(info->netdev, *num_queues);
1788
1789        if (*num_queues == 0) {
1790                dev_err(&info->xbdev->dev, "no queues\n");
1791                return -EINVAL;
1792        }
1793        return 0;
1794}
1795
1796/* Common code used when first setting up, and when resuming. */
1797static int talk_to_netback(struct xenbus_device *dev,
1798                           struct netfront_info *info)
1799{
1800        const char *message;
1801        struct xenbus_transaction xbt;
1802        int err;
1803        unsigned int feature_split_evtchn;
1804        unsigned int i = 0;
1805        unsigned int max_queues = 0;
1806        struct netfront_queue *queue = NULL;
1807        unsigned int num_queues = 1;
1808
1809        info->netdev->irq = 0;
1810
1811        /* Check if backend supports multiple queues */
1812        max_queues = xenbus_read_unsigned(info->xbdev->otherend,
1813                                          "multi-queue-max-queues", 1);
1814        num_queues = min(max_queues, xennet_max_queues);
1815
1816        /* Check feature-split-event-channels */
1817        feature_split_evtchn = xenbus_read_unsigned(info->xbdev->otherend,
1818                                        "feature-split-event-channels", 0);
1819
1820        /* Read mac addr. */
1821        err = xen_net_read_mac(dev, info->netdev->dev_addr);
1822        if (err) {
1823                xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
1824                goto out_unlocked;
1825        }
1826
1827        rtnl_lock();
1828        if (info->queues)
1829                xennet_destroy_queues(info);
1830
1831        err = xennet_create_queues(info, &num_queues);
1832        if (err < 0) {
1833                xenbus_dev_fatal(dev, err, "creating queues");
1834                kfree(info->queues);
1835                info->queues = NULL;
1836                goto out;
1837        }
1838        rtnl_unlock();
1839
1840        /* Create shared ring, alloc event channel -- for each queue */
1841        for (i = 0; i < num_queues; ++i) {
1842                queue = &info->queues[i];
1843                err = setup_netfront(dev, queue, feature_split_evtchn);
1844                if (err)
1845                        goto destroy_ring;
1846        }
1847
1848again:
1849        err = xenbus_transaction_start(&xbt);
1850        if (err) {
1851                xenbus_dev_fatal(dev, err, "starting transaction");
1852                goto destroy_ring;
1853        }
1854
1855        if (xenbus_exists(XBT_NIL,
1856                          info->xbdev->otherend, "multi-queue-max-queues")) {
1857                /* Write the number of queues */
1858                err = xenbus_printf(xbt, dev->nodename,
1859                                    "multi-queue-num-queues", "%u", num_queues);
1860                if (err) {
1861                        message = "writing multi-queue-num-queues";
1862                        goto abort_transaction_no_dev_fatal;
1863                }
1864        }
1865
1866        if (num_queues == 1) {
1867                err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0); /* flat */
1868                if (err)
1869                        goto abort_transaction_no_dev_fatal;
1870        } else {
1871                /* Write the keys for each queue */
1872                for (i = 0; i < num_queues; ++i) {
1873                        queue = &info->queues[i];
1874                        err = write_queue_xenstore_keys(queue, &xbt, 1); /* hierarchical */
1875                        if (err)
1876                                goto abort_transaction_no_dev_fatal;
1877                }
1878        }
1879
1880        /* The remaining keys are not queue-specific */
1881        err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
1882                            1);
1883        if (err) {
1884                message = "writing request-rx-copy";
1885                goto abort_transaction;
1886        }
1887
1888        err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
1889        if (err) {
1890                message = "writing feature-rx-notify";
1891                goto abort_transaction;
1892        }
1893
1894        err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
1895        if (err) {
1896                message = "writing feature-sg";
1897                goto abort_transaction;
1898        }
1899
1900        err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
1901        if (err) {
1902                message = "writing feature-gso-tcpv4";
1903                goto abort_transaction;
1904        }
1905
1906        err = xenbus_write(xbt, dev->nodename, "feature-gso-tcpv6", "1");
1907        if (err) {
1908                message = "writing feature-gso-tcpv6";
1909                goto abort_transaction;
1910        }
1911
1912        err = xenbus_write(xbt, dev->nodename, "feature-ipv6-csum-offload",
1913                           "1");
1914        if (err) {
1915                message = "writing feature-ipv6-csum-offload";
1916                goto abort_transaction;
1917        }
1918
1919        err = xenbus_transaction_end(xbt, 0);
1920        if (err) {
1921                if (err == -EAGAIN)
1922                        goto again;
1923                xenbus_dev_fatal(dev, err, "completing transaction");
1924                goto destroy_ring;
1925        }
1926
1927        return 0;
1928
1929 abort_transaction:
1930        xenbus_dev_fatal(dev, err, "%s", message);
1931abort_transaction_no_dev_fatal:
1932        xenbus_transaction_end(xbt, 1);
1933 destroy_ring:
1934        xennet_disconnect_backend(info);
1935        rtnl_lock();
1936        xennet_destroy_queues(info);
1937 out:
1938        rtnl_unlock();
1939out_unlocked:
1940        device_unregister(&dev->dev);
1941        return err;
1942}
1943
1944static int xennet_connect(struct net_device *dev)
1945{
1946        struct netfront_info *np = netdev_priv(dev);
1947        unsigned int num_queues = 0;
1948        int err;
1949        unsigned int j = 0;
1950        struct netfront_queue *queue = NULL;
1951
1952        if (!xenbus_read_unsigned(np->xbdev->otherend, "feature-rx-copy", 0)) {
1953                dev_info(&dev->dev,
1954                         "backend does not support copying receive path\n");
1955                return -ENODEV;
1956        }
1957
1958        err = talk_to_netback(np->xbdev, np);
1959        if (err)
1960                return err;
1961
1962        /* talk_to_netback() sets the correct number of queues */
1963        num_queues = dev->real_num_tx_queues;
1964
1965        if (dev->reg_state == NETREG_UNINITIALIZED) {
1966                err = register_netdev(dev);
1967                if (err) {
1968                        pr_warn("%s: register_netdev err=%d\n", __func__, err);
1969                        device_unregister(&np->xbdev->dev);
1970                        return err;
1971                }
1972        }
1973
1974        rtnl_lock();
1975        netdev_update_features(dev);
1976        rtnl_unlock();
1977
1978        /*
1979         * All public and private state should now be sane.  Get
1980         * ready to start sending and receiving packets and give the driver
1981         * domain a kick because we've probably just requeued some
1982         * packets.
1983         */
1984        netif_carrier_on(np->netdev);
1985        for (j = 0; j < num_queues; ++j) {
1986                queue = &np->queues[j];
1987
1988                notify_remote_via_irq(queue->tx_irq);
1989                if (queue->tx_irq != queue->rx_irq)
1990                        notify_remote_via_irq(queue->rx_irq);
1991
1992                spin_lock_irq(&queue->tx_lock);
1993                xennet_tx_buf_gc(queue);
1994                spin_unlock_irq(&queue->tx_lock);
1995
1996                spin_lock_bh(&queue->rx_lock);
1997                xennet_alloc_rx_buffers(queue);
1998                spin_unlock_bh(&queue->rx_lock);
1999        }
2000
2001        return 0;
2002}
2003
2004/**
2005 * Callback received when the backend's state changes.
2006 */
2007static void netback_changed(struct xenbus_device *dev,
2008                            enum xenbus_state backend_state)
2009{
2010        struct netfront_info *np = dev_get_drvdata(&dev->dev);
2011        struct net_device *netdev = np->netdev;
2012
2013        dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
2014
2015        wake_up_all(&module_wq);
2016
2017        switch (backend_state) {
2018        case XenbusStateInitialising:
2019        case XenbusStateInitialised:
2020        case XenbusStateReconfiguring:
2021        case XenbusStateReconfigured:
2022        case XenbusStateUnknown:
2023                break;
2024
2025        case XenbusStateInitWait:
2026                if (dev->state != XenbusStateInitialising)
2027                        break;
2028                if (xennet_connect(netdev) != 0)
2029                        break;
2030                xenbus_switch_state(dev, XenbusStateConnected);
2031                break;
2032
2033        case XenbusStateConnected:
2034                netdev_notify_peers(netdev);
2035                break;
2036
2037        case XenbusStateClosed:
2038                if (dev->state == XenbusStateClosed)
2039                        break;
2040                /* Fall through - Missed the backend's CLOSING state. */
2041        case XenbusStateClosing:
2042                xenbus_frontend_closed(dev);
2043                break;
2044        }
2045}
2046
2047static const struct xennet_stat {
2048        char name[ETH_GSTRING_LEN];
2049        u16 offset;
2050} xennet_stats[] = {
2051        {
2052                "rx_gso_checksum_fixup",
2053                offsetof(struct netfront_info, rx_gso_checksum_fixup)
2054        },
2055};
2056
2057static int xennet_get_sset_count(struct net_device *dev, int string_set)
2058{
2059        switch (string_set) {
2060        case ETH_SS_STATS:
2061                return ARRAY_SIZE(xennet_stats);
2062        default:
2063                return -EINVAL;
2064        }
2065}
2066
2067static void xennet_get_ethtool_stats(struct net_device *dev,
2068                                     struct ethtool_stats *stats, u64 * data)
2069{
2070        void *np = netdev_priv(dev);
2071        int i;
2072
2073        for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2074                data[i] = atomic_read((atomic_t *)(np + xennet_stats[i].offset));
2075}
2076
2077static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 * data)
2078{
2079        int i;
2080
2081        switch (stringset) {
2082        case ETH_SS_STATS:
2083                for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2084                        memcpy(data + i * ETH_GSTRING_LEN,
2085                               xennet_stats[i].name, ETH_GSTRING_LEN);
2086                break;
2087        }
2088}
2089
2090static const struct ethtool_ops xennet_ethtool_ops =
2091{
2092        .get_link = ethtool_op_get_link,
2093
2094        .get_sset_count = xennet_get_sset_count,
2095        .get_ethtool_stats = xennet_get_ethtool_stats,
2096        .get_strings = xennet_get_strings,
2097};
2098
2099#ifdef CONFIG_SYSFS
2100static ssize_t show_rxbuf(struct device *dev,
2101                          struct device_attribute *attr, char *buf)
2102{
2103        return sprintf(buf, "%lu\n", NET_RX_RING_SIZE);
2104}
2105
2106static ssize_t store_rxbuf(struct device *dev,
2107                           struct device_attribute *attr,
2108                           const char *buf, size_t len)
2109{
2110        char *endp;
2111        unsigned long target;
2112
2113        if (!capable(CAP_NET_ADMIN))
2114                return -EPERM;
2115
2116        target = simple_strtoul(buf, &endp, 0);
2117        if (endp == buf)
2118                return -EBADMSG;
2119
2120        /* rxbuf_min and rxbuf_max are no longer configurable. */
2121
2122        return len;
2123}
2124
2125static DEVICE_ATTR(rxbuf_min, 0644, show_rxbuf, store_rxbuf);
2126static DEVICE_ATTR(rxbuf_max, 0644, show_rxbuf, store_rxbuf);
2127static DEVICE_ATTR(rxbuf_cur, 0444, show_rxbuf, NULL);
2128
2129static struct attribute *xennet_dev_attrs[] = {
2130        &dev_attr_rxbuf_min.attr,
2131        &dev_attr_rxbuf_max.attr,
2132        &dev_attr_rxbuf_cur.attr,
2133        NULL
2134};
2135
2136static const struct attribute_group xennet_dev_group = {
2137        .attrs = xennet_dev_attrs
2138};
2139#endif /* CONFIG_SYSFS */
2140
2141static int xennet_remove(struct xenbus_device *dev)
2142{
2143        struct netfront_info *info = dev_get_drvdata(&dev->dev);
2144
2145        dev_dbg(&dev->dev, "%s\n", dev->nodename);
2146
2147        if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) {
2148                xenbus_switch_state(dev, XenbusStateClosing);
2149                wait_event(module_wq,
2150                           xenbus_read_driver_state(dev->otherend) ==
2151                           XenbusStateClosing ||
2152                           xenbus_read_driver_state(dev->otherend) ==
2153                           XenbusStateUnknown);
2154
2155                xenbus_switch_state(dev, XenbusStateClosed);
2156                wait_event(module_wq,
2157                           xenbus_read_driver_state(dev->otherend) ==
2158                           XenbusStateClosed ||
2159                           xenbus_read_driver_state(dev->otherend) ==
2160                           XenbusStateUnknown);
2161        }
2162
2163        xennet_disconnect_backend(info);
2164
2165        if (info->netdev->reg_state == NETREG_REGISTERED)
2166                unregister_netdev(info->netdev);
2167
2168        if (info->queues) {
2169                rtnl_lock();
2170                xennet_destroy_queues(info);
2171                rtnl_unlock();
2172        }
2173        xennet_free_netdev(info->netdev);
2174
2175        return 0;
2176}
2177
2178static const struct xenbus_device_id netfront_ids[] = {
2179        { "vif" },
2180        { "" }
2181};
2182
2183static struct xenbus_driver netfront_driver = {
2184        .ids = netfront_ids,
2185        .probe = netfront_probe,
2186        .remove = xennet_remove,
2187        .resume = netfront_resume,
2188        .otherend_changed = netback_changed,
2189};
2190
2191static int __init netif_init(void)
2192{
2193        if (!xen_domain())
2194                return -ENODEV;
2195
2196        if (!xen_has_pv_nic_devices())
2197                return -ENODEV;
2198
2199        pr_info("Initialising Xen virtual ethernet driver\n");
2200
2201        /* Allow as many queues as there are CPUs inut max. 8 if user has not
2202         * specified a value.
2203         */
2204        if (xennet_max_queues == 0)
2205                xennet_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
2206                                          num_online_cpus());
2207
2208        return xenbus_register_frontend(&netfront_driver);
2209}
2210module_init(netif_init);
2211
2212
2213static void __exit netif_exit(void)
2214{
2215        xenbus_unregister_driver(&netfront_driver);
2216}
2217module_exit(netif_exit);
2218
2219MODULE_DESCRIPTION("Xen virtual network device frontend");
2220MODULE_LICENSE("GPL");
2221MODULE_ALIAS("xen:vif");
2222MODULE_ALIAS("xennet");
2223