linux/drivers/net/xen-netfront.c
<<
>>
Prefs
   1/*
   2 * Virtual network driver for conversing with remote driver backends.
   3 *
   4 * Copyright (c) 2002-2005, K A Fraser
   5 * Copyright (c) 2005, XenSource Ltd
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License version 2
   9 * as published by the Free Software Foundation; or, when distributed
  10 * separately from the Linux kernel or incorporated into other
  11 * software packages, subject to the following license:
  12 *
  13 * Permission is hereby granted, free of charge, to any person obtaining a copy
  14 * of this source file (the "Software"), to deal in the Software without
  15 * restriction, including without limitation the rights to use, copy, modify,
  16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  17 * and to permit persons to whom the Software is furnished to do so, subject to
  18 * the following conditions:
  19 *
  20 * The above copyright notice and this permission notice shall be included in
  21 * all copies or substantial portions of the Software.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  29 * IN THE SOFTWARE.
  30 */
  31
  32#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  33
  34#include <linux/module.h>
  35#include <linux/kernel.h>
  36#include <linux/netdevice.h>
  37#include <linux/etherdevice.h>
  38#include <linux/skbuff.h>
  39#include <linux/ethtool.h>
  40#include <linux/if_ether.h>
  41#include <net/tcp.h>
  42#include <linux/udp.h>
  43#include <linux/moduleparam.h>
  44#include <linux/mm.h>
  45#include <linux/slab.h>
  46#include <net/ip.h>
  47
  48#include <asm/xen/page.h>
  49#include <xen/xen.h>
  50#include <xen/xenbus.h>
  51#include <xen/events.h>
  52#include <xen/page.h>
  53#include <xen/platform_pci.h>
  54#include <xen/grant_table.h>
  55
  56#include <xen/interface/io/netif.h>
  57#include <xen/interface/memory.h>
  58#include <xen/interface/grant_table.h>
  59
  60/* Module parameters */
  61#define MAX_QUEUES_DEFAULT 8
  62static unsigned int xennet_max_queues;
  63module_param_named(max_queues, xennet_max_queues, uint, 0644);
  64MODULE_PARM_DESC(max_queues,
  65                 "Maximum number of queues per virtual interface");
  66
  67static const struct ethtool_ops xennet_ethtool_ops;
  68
  69struct netfront_cb {
  70        int pull_to;
  71};
  72
  73#define NETFRONT_SKB_CB(skb)    ((struct netfront_cb *)((skb)->cb))
  74
  75#define RX_COPY_THRESHOLD 256
  76
  77#define GRANT_INVALID_REF       0
  78
  79#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
  80#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
  81
  82/* Minimum number of Rx slots (includes slot for GSO metadata). */
  83#define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1)
  84
  85/* Queue name is interface name with "-qNNN" appended */
  86#define QUEUE_NAME_SIZE (IFNAMSIZ + 6)
  87
  88/* IRQ name is queue name with "-tx" or "-rx" appended */
  89#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
  90
  91static DECLARE_WAIT_QUEUE_HEAD(module_wq);
  92
  93struct netfront_stats {
  94        u64                     packets;
  95        u64                     bytes;
  96        struct u64_stats_sync   syncp;
  97};
  98
  99struct netfront_info;
 100
 101struct netfront_queue {
 102        unsigned int id; /* Queue ID, 0-based */
 103        char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
 104        struct netfront_info *info;
 105
 106        struct napi_struct napi;
 107
 108        /* Split event channels support, tx_* == rx_* when using
 109         * single event channel.
 110         */
 111        unsigned int tx_evtchn, rx_evtchn;
 112        unsigned int tx_irq, rx_irq;
 113        /* Only used when split event channels support is enabled */
 114        char tx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-tx */
 115        char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
 116
 117        spinlock_t   tx_lock;
 118        struct xen_netif_tx_front_ring tx;
 119        int tx_ring_ref;
 120
 121        /*
 122         * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
 123         * are linked from tx_skb_freelist through skb_entry.link.
 124         *
 125         *  NB. Freelist index entries are always going to be less than
 126         *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
 127         *  greater than PAGE_OFFSET: we use this property to distinguish
 128         *  them.
 129         */
 130        union skb_entry {
 131                struct sk_buff *skb;
 132                unsigned long link;
 133        } tx_skbs[NET_TX_RING_SIZE];
 134        grant_ref_t gref_tx_head;
 135        grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
 136        struct page *grant_tx_page[NET_TX_RING_SIZE];
 137        unsigned tx_skb_freelist;
 138
 139        spinlock_t   rx_lock ____cacheline_aligned_in_smp;
 140        struct xen_netif_rx_front_ring rx;
 141        int rx_ring_ref;
 142
 143        struct timer_list rx_refill_timer;
 144
 145        struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
 146        grant_ref_t gref_rx_head;
 147        grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
 148};
 149
 150struct netfront_info {
 151        struct list_head list;
 152        struct net_device *netdev;
 153
 154        struct xenbus_device *xbdev;
 155
 156        /* Multi-queue support */
 157        struct netfront_queue *queues;
 158
 159        /* Statistics */
 160        struct netfront_stats __percpu *rx_stats;
 161        struct netfront_stats __percpu *tx_stats;
 162
 163        atomic_t rx_gso_checksum_fixup;
 164};
 165
 166struct netfront_rx_info {
 167        struct xen_netif_rx_response rx;
 168        struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 169};
 170
 171static void skb_entry_set_link(union skb_entry *list, unsigned short id)
 172{
 173        list->link = id;
 174}
 175
 176static int skb_entry_is_link(const union skb_entry *list)
 177{
 178        BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
 179        return (unsigned long)list->skb < PAGE_OFFSET;
 180}
 181
 182/*
 183 * Access macros for acquiring freeing slots in tx_skbs[].
 184 */
 185
 186static void add_id_to_freelist(unsigned *head, union skb_entry *list,
 187                               unsigned short id)
 188{
 189        skb_entry_set_link(&list[id], *head);
 190        *head = id;
 191}
 192
 193static unsigned short get_id_from_freelist(unsigned *head,
 194                                           union skb_entry *list)
 195{
 196        unsigned int id = *head;
 197        *head = list[id].link;
 198        return id;
 199}
 200
 201static int xennet_rxidx(RING_IDX idx)
 202{
 203        return idx & (NET_RX_RING_SIZE - 1);
 204}
 205
 206static struct sk_buff *xennet_get_rx_skb(struct netfront_queue *queue,
 207                                         RING_IDX ri)
 208{
 209        int i = xennet_rxidx(ri);
 210        struct sk_buff *skb = queue->rx_skbs[i];
 211        queue->rx_skbs[i] = NULL;
 212        return skb;
 213}
 214
 215static grant_ref_t xennet_get_rx_ref(struct netfront_queue *queue,
 216                                            RING_IDX ri)
 217{
 218        int i = xennet_rxidx(ri);
 219        grant_ref_t ref = queue->grant_rx_ref[i];
 220        queue->grant_rx_ref[i] = GRANT_INVALID_REF;
 221        return ref;
 222}
 223
 224#ifdef CONFIG_SYSFS
 225static const struct attribute_group xennet_dev_group;
 226#endif
 227
 228static bool xennet_can_sg(struct net_device *dev)
 229{
 230        return dev->features & NETIF_F_SG;
 231}
 232
 233
 234static void rx_refill_timeout(unsigned long data)
 235{
 236        struct netfront_queue *queue = (struct netfront_queue *)data;
 237        napi_schedule(&queue->napi);
 238}
 239
 240static int netfront_tx_slot_available(struct netfront_queue *queue)
 241{
 242        return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
 243                (NET_TX_RING_SIZE - MAX_SKB_FRAGS - 2);
 244}
 245
 246static void xennet_maybe_wake_tx(struct netfront_queue *queue)
 247{
 248        struct net_device *dev = queue->info->netdev;
 249        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, queue->id);
 250
 251        if (unlikely(netif_tx_queue_stopped(dev_queue)) &&
 252            netfront_tx_slot_available(queue) &&
 253            likely(netif_running(dev)))
 254                netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
 255}
 256
 257
 258static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
 259{
 260        struct sk_buff *skb;
 261        struct page *page;
 262
 263        skb = __netdev_alloc_skb(queue->info->netdev,
 264                                 RX_COPY_THRESHOLD + NET_IP_ALIGN,
 265                                 GFP_ATOMIC | __GFP_NOWARN);
 266        if (unlikely(!skb))
 267                return NULL;
 268
 269        page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
 270        if (!page) {
 271                kfree_skb(skb);
 272                return NULL;
 273        }
 274        skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
 275
 276        /* Align ip header to a 16 bytes boundary */
 277        skb_reserve(skb, NET_IP_ALIGN);
 278        skb->dev = queue->info->netdev;
 279
 280        return skb;
 281}
 282
 283
 284static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
 285{
 286        RING_IDX req_prod = queue->rx.req_prod_pvt;
 287        int notify;
 288        int err = 0;
 289
 290        if (unlikely(!netif_carrier_ok(queue->info->netdev)))
 291                return;
 292
 293        for (req_prod = queue->rx.req_prod_pvt;
 294             req_prod - queue->rx.rsp_cons < NET_RX_RING_SIZE;
 295             req_prod++) {
 296                struct sk_buff *skb;
 297                unsigned short id;
 298                grant_ref_t ref;
 299                unsigned long pfn;
 300                struct xen_netif_rx_request *req;
 301
 302                skb = xennet_alloc_one_rx_buffer(queue);
 303                if (!skb) {
 304                        err = -ENOMEM;
 305                        break;
 306                }
 307
 308                id = xennet_rxidx(req_prod);
 309
 310                BUG_ON(queue->rx_skbs[id]);
 311                queue->rx_skbs[id] = skb;
 312
 313                ref = gnttab_claim_grant_reference(&queue->gref_rx_head);
 314                WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
 315                queue->grant_rx_ref[id] = ref;
 316
 317                pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
 318
 319                req = RING_GET_REQUEST(&queue->rx, req_prod);
 320                gnttab_grant_foreign_access_ref(ref,
 321                                                queue->info->xbdev->otherend_id,
 322                                                pfn_to_mfn(pfn),
 323                                                0);
 324
 325                req->id = id;
 326                req->gref = ref;
 327        }
 328
 329        queue->rx.req_prod_pvt = req_prod;
 330
 331        /* Try again later if there are not enough requests or skb allocation
 332         * failed.
 333         * Enough requests is quantified as the sum of newly created slots and
 334         * the unconsumed slots at the backend.
 335         */
 336        if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN ||
 337            unlikely(err)) {
 338                mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10));
 339                return;
 340        }
 341
 342        wmb();          /* barrier so backend seens requests */
 343
 344        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify);
 345        if (notify)
 346                notify_remote_via_irq(queue->rx_irq);
 347}
 348
 349static int xennet_open(struct net_device *dev)
 350{
 351        struct netfront_info *np = netdev_priv(dev);
 352        unsigned int num_queues = dev->real_num_tx_queues;
 353        unsigned int i = 0;
 354        struct netfront_queue *queue = NULL;
 355
 356        if (!np->queues)
 357                return -ENODEV;
 358
 359        for (i = 0; i < num_queues; ++i) {
 360                queue = &np->queues[i];
 361                napi_enable(&queue->napi);
 362
 363                spin_lock_bh(&queue->rx_lock);
 364                if (netif_carrier_ok(dev)) {
 365                        xennet_alloc_rx_buffers(queue);
 366                        queue->rx.sring->rsp_event = queue->rx.rsp_cons + 1;
 367                        if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
 368                                napi_schedule(&queue->napi);
 369                }
 370                spin_unlock_bh(&queue->rx_lock);
 371        }
 372
 373        netif_tx_start_all_queues(dev);
 374
 375        return 0;
 376}
 377
 378static void xennet_tx_buf_gc(struct netfront_queue *queue)
 379{
 380        RING_IDX cons, prod;
 381        unsigned short id;
 382        struct sk_buff *skb;
 383        bool more_to_do;
 384
 385        BUG_ON(!netif_carrier_ok(queue->info->netdev));
 386
 387        do {
 388                prod = queue->tx.sring->rsp_prod;
 389                rmb(); /* Ensure we see responses up to 'rp'. */
 390
 391                for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
 392                        struct xen_netif_tx_response *txrsp;
 393
 394                        txrsp = RING_GET_RESPONSE(&queue->tx, cons);
 395                        if (txrsp->status == XEN_NETIF_RSP_NULL)
 396                                continue;
 397
 398                        id  = txrsp->id;
 399                        skb = queue->tx_skbs[id].skb;
 400                        if (unlikely(gnttab_query_foreign_access(
 401                                queue->grant_tx_ref[id]) != 0)) {
 402                                pr_alert("%s: warning -- grant still in use by backend domain\n",
 403                                         __func__);
 404                                BUG();
 405                        }
 406                        gnttab_end_foreign_access_ref(
 407                                queue->grant_tx_ref[id], GNTMAP_readonly);
 408                        gnttab_release_grant_reference(
 409                                &queue->gref_tx_head, queue->grant_tx_ref[id]);
 410                        queue->grant_tx_ref[id] = GRANT_INVALID_REF;
 411                        queue->grant_tx_page[id] = NULL;
 412                        add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id);
 413                        dev_kfree_skb_irq(skb);
 414                }
 415
 416                queue->tx.rsp_cons = prod;
 417
 418                RING_FINAL_CHECK_FOR_RESPONSES(&queue->tx, more_to_do);
 419        } while (more_to_do);
 420
 421        xennet_maybe_wake_tx(queue);
 422}
 423
 424static struct xen_netif_tx_request *xennet_make_one_txreq(
 425        struct netfront_queue *queue, struct sk_buff *skb,
 426        struct page *page, unsigned int offset, unsigned int len)
 427{
 428        unsigned int id;
 429        struct xen_netif_tx_request *tx;
 430        grant_ref_t ref;
 431
 432        len = min_t(unsigned int, PAGE_SIZE - offset, len);
 433
 434        id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
 435        tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
 436        ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
 437        WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
 438
 439        gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
 440                                        page_to_mfn(page), GNTMAP_readonly);
 441
 442        queue->tx_skbs[id].skb = skb;
 443        queue->grant_tx_page[id] = page;
 444        queue->grant_tx_ref[id] = ref;
 445
 446        tx->id = id;
 447        tx->gref = ref;
 448        tx->offset = offset;
 449        tx->size = len;
 450        tx->flags = 0;
 451
 452        return tx;
 453}
 454
 455static struct xen_netif_tx_request *xennet_make_txreqs(
 456        struct netfront_queue *queue, struct xen_netif_tx_request *tx,
 457        struct sk_buff *skb, struct page *page,
 458        unsigned int offset, unsigned int len)
 459{
 460        /* Skip unused frames from start of page */
 461        page += offset >> PAGE_SHIFT;
 462        offset &= ~PAGE_MASK;
 463
 464        while (len) {
 465                tx->flags |= XEN_NETTXF_more_data;
 466                tx = xennet_make_one_txreq(queue, skb_get(skb),
 467                                           page, offset, len);
 468                page++;
 469                offset = 0;
 470                len -= tx->size;
 471        }
 472
 473        return tx;
 474}
 475
 476/*
 477 * Count how many ring slots are required to send this skb. Each frag
 478 * might be a compound page.
 479 */
 480static int xennet_count_skb_slots(struct sk_buff *skb)
 481{
 482        int i, frags = skb_shinfo(skb)->nr_frags;
 483        int pages;
 484
 485        pages = PFN_UP(offset_in_page(skb->data) + skb_headlen(skb));
 486
 487        for (i = 0; i < frags; i++) {
 488                skb_frag_t *frag = skb_shinfo(skb)->frags + i;
 489                unsigned long size = skb_frag_size(frag);
 490                unsigned long offset = frag->page_offset;
 491
 492                /* Skip unused frames from start of page */
 493                offset &= ~PAGE_MASK;
 494
 495                pages += PFN_UP(offset + size);
 496        }
 497
 498        return pages;
 499}
 500
 501static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
 502                               void *accel_priv, select_queue_fallback_t fallback)
 503{
 504        unsigned int num_queues = dev->real_num_tx_queues;
 505        u32 hash;
 506        u16 queue_idx;
 507
 508        /* First, check if there is only one queue */
 509        if (num_queues == 1) {
 510                queue_idx = 0;
 511        } else {
 512                hash = skb_get_hash(skb);
 513                queue_idx = hash % num_queues;
 514        }
 515
 516        return queue_idx;
 517}
 518
 519static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 520{
 521        struct netfront_info *np = netdev_priv(dev);
 522        struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
 523        struct xen_netif_tx_request *tx, *first_tx;
 524        unsigned int i;
 525        int notify;
 526        int slots;
 527        struct page *page;
 528        unsigned int offset;
 529        unsigned int len;
 530        unsigned long flags;
 531        struct netfront_queue *queue = NULL;
 532        unsigned int num_queues = dev->real_num_tx_queues;
 533        u16 queue_index;
 534        struct sk_buff *nskb;
 535
 536        /* Drop the packet if no queues are set up */
 537        if (num_queues < 1)
 538                goto drop;
 539        /* Determine which queue to transmit this SKB on */
 540        queue_index = skb_get_queue_mapping(skb);
 541        queue = &np->queues[queue_index];
 542
 543        /* If skb->len is too big for wire format, drop skb and alert
 544         * user about misconfiguration.
 545         */
 546        if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) {
 547                net_alert_ratelimited(
 548                        "xennet: skb->len = %u, too big for wire format\n",
 549                        skb->len);
 550                goto drop;
 551        }
 552
 553        slots = xennet_count_skb_slots(skb);
 554        if (unlikely(slots > MAX_SKB_FRAGS + 1)) {
 555                net_dbg_ratelimited("xennet: skb rides the rocket: %d slots, %d bytes\n",
 556                                    slots, skb->len);
 557                if (skb_linearize(skb))
 558                        goto drop;
 559        }
 560
 561        page = virt_to_page(skb->data);
 562        offset = offset_in_page(skb->data);
 563
 564        /* The first req should be at least ETH_HLEN size or the packet will be
 565         * dropped by netback.
 566         */
 567        if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
 568                nskb = skb_copy(skb, GFP_ATOMIC);
 569                if (!nskb)
 570                        goto drop;
 571                dev_kfree_skb_any(skb);
 572                skb = nskb;
 573                page = virt_to_page(skb->data);
 574                offset = offset_in_page(skb->data);
 575        }
 576
 577        len = skb_headlen(skb);
 578
 579        spin_lock_irqsave(&queue->tx_lock, flags);
 580
 581        if (unlikely(!netif_carrier_ok(dev) ||
 582                     (slots > 1 && !xennet_can_sg(dev)) ||
 583                     netif_needs_gso(skb, netif_skb_features(skb)))) {
 584                spin_unlock_irqrestore(&queue->tx_lock, flags);
 585                goto drop;
 586        }
 587
 588        /* First request for the linear area. */
 589        first_tx = tx = xennet_make_one_txreq(queue, skb,
 590                                              page, offset, len);
 591        page++;
 592        offset = 0;
 593        len -= tx->size;
 594
 595        if (skb->ip_summed == CHECKSUM_PARTIAL)
 596                /* local packet? */
 597                tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
 598        else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 599                /* remote but checksummed. */
 600                tx->flags |= XEN_NETTXF_data_validated;
 601
 602        /* Optional extra info after the first request. */
 603        if (skb_shinfo(skb)->gso_size) {
 604                struct xen_netif_extra_info *gso;
 605
 606                gso = (struct xen_netif_extra_info *)
 607                        RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
 608
 609                tx->flags |= XEN_NETTXF_extra_info;
 610
 611                gso->u.gso.size = skb_shinfo(skb)->gso_size;
 612                gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
 613                        XEN_NETIF_GSO_TYPE_TCPV6 :
 614                        XEN_NETIF_GSO_TYPE_TCPV4;
 615                gso->u.gso.pad = 0;
 616                gso->u.gso.features = 0;
 617
 618                gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 619                gso->flags = 0;
 620        }
 621
 622        /* Requests for the rest of the linear area. */
 623        tx = xennet_make_txreqs(queue, tx, skb, page, offset, len);
 624
 625        /* Requests for all the frags. */
 626        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 627                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 628                tx = xennet_make_txreqs(queue, tx, skb,
 629                                        skb_frag_page(frag), frag->page_offset,
 630                                        skb_frag_size(frag));
 631        }
 632
 633        /* First request has the packet length. */
 634        first_tx->size = skb->len;
 635
 636        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
 637        if (notify)
 638                notify_remote_via_irq(queue->tx_irq);
 639
 640        u64_stats_update_begin(&tx_stats->syncp);
 641        tx_stats->bytes += skb->len;
 642        tx_stats->packets++;
 643        u64_stats_update_end(&tx_stats->syncp);
 644
 645        /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */
 646        xennet_tx_buf_gc(queue);
 647
 648        if (!netfront_tx_slot_available(queue))
 649                netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
 650
 651        spin_unlock_irqrestore(&queue->tx_lock, flags);
 652
 653        return NETDEV_TX_OK;
 654
 655 drop:
 656        dev->stats.tx_dropped++;
 657        dev_kfree_skb_any(skb);
 658        return NETDEV_TX_OK;
 659}
 660
 661static int xennet_close(struct net_device *dev)
 662{
 663        struct netfront_info *np = netdev_priv(dev);
 664        unsigned int num_queues = dev->real_num_tx_queues;
 665        unsigned int i;
 666        struct netfront_queue *queue;
 667        netif_tx_stop_all_queues(np->netdev);
 668        for (i = 0; i < num_queues; ++i) {
 669                queue = &np->queues[i];
 670                napi_disable(&queue->napi);
 671        }
 672        return 0;
 673}
 674
 675static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
 676                                grant_ref_t ref)
 677{
 678        int new = xennet_rxidx(queue->rx.req_prod_pvt);
 679
 680        BUG_ON(queue->rx_skbs[new]);
 681        queue->rx_skbs[new] = skb;
 682        queue->grant_rx_ref[new] = ref;
 683        RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->id = new;
 684        RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->gref = ref;
 685        queue->rx.req_prod_pvt++;
 686}
 687
 688static int xennet_get_extras(struct netfront_queue *queue,
 689                             struct xen_netif_extra_info *extras,
 690                             RING_IDX rp)
 691
 692{
 693        struct xen_netif_extra_info *extra;
 694        struct device *dev = &queue->info->netdev->dev;
 695        RING_IDX cons = queue->rx.rsp_cons;
 696        int err = 0;
 697
 698        do {
 699                struct sk_buff *skb;
 700                grant_ref_t ref;
 701
 702                if (unlikely(cons + 1 == rp)) {
 703                        if (net_ratelimit())
 704                                dev_warn(dev, "Missing extra info\n");
 705                        err = -EBADR;
 706                        break;
 707                }
 708
 709                extra = (struct xen_netif_extra_info *)
 710                        RING_GET_RESPONSE(&queue->rx, ++cons);
 711
 712                if (unlikely(!extra->type ||
 713                             extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
 714                        if (net_ratelimit())
 715                                dev_warn(dev, "Invalid extra type: %d\n",
 716                                        extra->type);
 717                        err = -EINVAL;
 718                } else {
 719                        memcpy(&extras[extra->type - 1], extra,
 720                               sizeof(*extra));
 721                }
 722
 723                skb = xennet_get_rx_skb(queue, cons);
 724                ref = xennet_get_rx_ref(queue, cons);
 725                xennet_move_rx_slot(queue, skb, ref);
 726        } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
 727
 728        queue->rx.rsp_cons = cons;
 729        return err;
 730}
 731
 732static int xennet_get_responses(struct netfront_queue *queue,
 733                                struct netfront_rx_info *rinfo, RING_IDX rp,
 734                                struct sk_buff_head *list)
 735{
 736        struct xen_netif_rx_response *rx = &rinfo->rx;
 737        struct xen_netif_extra_info *extras = rinfo->extras;
 738        struct device *dev = &queue->info->netdev->dev;
 739        RING_IDX cons = queue->rx.rsp_cons;
 740        struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
 741        grant_ref_t ref = xennet_get_rx_ref(queue, cons);
 742        int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
 743        int slots = 1;
 744        int err = 0;
 745        unsigned long ret;
 746
 747        if (rx->flags & XEN_NETRXF_extra_info) {
 748                err = xennet_get_extras(queue, extras, rp);
 749                cons = queue->rx.rsp_cons;
 750        }
 751
 752        for (;;) {
 753                if (unlikely(rx->status < 0 ||
 754                             rx->offset + rx->status > PAGE_SIZE)) {
 755                        if (net_ratelimit())
 756                                dev_warn(dev, "rx->offset: %x, size: %u\n",
 757                                         rx->offset, rx->status);
 758                        xennet_move_rx_slot(queue, skb, ref);
 759                        err = -EINVAL;
 760                        goto next;
 761                }
 762
 763                /*
 764                 * This definitely indicates a bug, either in this driver or in
 765                 * the backend driver. In future this should flag the bad
 766                 * situation to the system controller to reboot the backend.
 767                 */
 768                if (ref == GRANT_INVALID_REF) {
 769                        if (net_ratelimit())
 770                                dev_warn(dev, "Bad rx response id %d.\n",
 771                                         rx->id);
 772                        err = -EINVAL;
 773                        goto next;
 774                }
 775
 776                ret = gnttab_end_foreign_access_ref(ref, 0);
 777                BUG_ON(!ret);
 778
 779                gnttab_release_grant_reference(&queue->gref_rx_head, ref);
 780
 781                __skb_queue_tail(list, skb);
 782
 783next:
 784                if (!(rx->flags & XEN_NETRXF_more_data))
 785                        break;
 786
 787                if (cons + slots == rp) {
 788                        if (net_ratelimit())
 789                                dev_warn(dev, "Need more slots\n");
 790                        err = -ENOENT;
 791                        break;
 792                }
 793
 794                rx = RING_GET_RESPONSE(&queue->rx, cons + slots);
 795                skb = xennet_get_rx_skb(queue, cons + slots);
 796                ref = xennet_get_rx_ref(queue, cons + slots);
 797                slots++;
 798        }
 799
 800        if (unlikely(slots > max)) {
 801                if (net_ratelimit())
 802                        dev_warn(dev, "Too many slots\n");
 803                err = -E2BIG;
 804        }
 805
 806        if (unlikely(err))
 807                queue->rx.rsp_cons = cons + slots;
 808
 809        return err;
 810}
 811
 812static int xennet_set_skb_gso(struct sk_buff *skb,
 813                              struct xen_netif_extra_info *gso)
 814{
 815        if (!gso->u.gso.size) {
 816                if (net_ratelimit())
 817                        pr_warn("GSO size must not be zero\n");
 818                return -EINVAL;
 819        }
 820
 821        if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
 822            gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
 823                if (net_ratelimit())
 824                        pr_warn("Bad GSO type %d\n", gso->u.gso.type);
 825                return -EINVAL;
 826        }
 827
 828        skb_shinfo(skb)->gso_size = gso->u.gso.size;
 829        skb_shinfo(skb)->gso_type =
 830                (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
 831                SKB_GSO_TCPV4 :
 832                SKB_GSO_TCPV6;
 833
 834        /* Header must be checked, and gso_segs computed. */
 835        skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 836        skb_shinfo(skb)->gso_segs = 0;
 837
 838        return 0;
 839}
 840
 841static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
 842                                  struct sk_buff *skb,
 843                                  struct sk_buff_head *list)
 844{
 845        struct skb_shared_info *shinfo = skb_shinfo(skb);
 846        RING_IDX cons = queue->rx.rsp_cons;
 847        struct sk_buff *nskb;
 848
 849        while ((nskb = __skb_dequeue(list))) {
 850                struct xen_netif_rx_response *rx =
 851                        RING_GET_RESPONSE(&queue->rx, ++cons);
 852                skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
 853
 854                if (shinfo->nr_frags == MAX_SKB_FRAGS) {
 855                        unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 856
 857                        BUG_ON(pull_to <= skb_headlen(skb));
 858                        __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 859                }
 860                BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS);
 861
 862                skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag),
 863                                rx->offset, rx->status, PAGE_SIZE);
 864
 865                skb_shinfo(nskb)->nr_frags = 0;
 866                kfree_skb(nskb);
 867        }
 868
 869        return cons;
 870}
 871
 872static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
 873{
 874        bool recalculate_partial_csum = false;
 875
 876        /*
 877         * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
 878         * peers can fail to set NETRXF_csum_blank when sending a GSO
 879         * frame. In this case force the SKB to CHECKSUM_PARTIAL and
 880         * recalculate the partial checksum.
 881         */
 882        if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
 883                struct netfront_info *np = netdev_priv(dev);
 884                atomic_inc(&np->rx_gso_checksum_fixup);
 885                skb->ip_summed = CHECKSUM_PARTIAL;
 886                recalculate_partial_csum = true;
 887        }
 888
 889        /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
 890        if (skb->ip_summed != CHECKSUM_PARTIAL)
 891                return 0;
 892
 893        return skb_checksum_setup(skb, recalculate_partial_csum);
 894}
 895
 896static int handle_incoming_queue(struct netfront_queue *queue,
 897                                 struct sk_buff_head *rxq)
 898{
 899        struct netfront_stats *rx_stats = this_cpu_ptr(queue->info->rx_stats);
 900        int packets_dropped = 0;
 901        struct sk_buff *skb;
 902
 903        while ((skb = __skb_dequeue(rxq)) != NULL) {
 904                int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 905
 906                if (pull_to > skb_headlen(skb))
 907                        __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 908
 909                /* Ethernet work: Delayed to here as it peeks the header. */
 910                skb->protocol = eth_type_trans(skb, queue->info->netdev);
 911                skb_reset_network_header(skb);
 912
 913                if (checksum_setup(queue->info->netdev, skb)) {
 914                        kfree_skb(skb);
 915                        packets_dropped++;
 916                        queue->info->netdev->stats.rx_errors++;
 917                        continue;
 918                }
 919
 920                u64_stats_update_begin(&rx_stats->syncp);
 921                rx_stats->packets++;
 922                rx_stats->bytes += skb->len;
 923                u64_stats_update_end(&rx_stats->syncp);
 924
 925                /* Pass it up. */
 926                napi_gro_receive(&queue->napi, skb);
 927        }
 928
 929        return packets_dropped;
 930}
 931
 932static int xennet_poll(struct napi_struct *napi, int budget)
 933{
 934        struct netfront_queue *queue = container_of(napi, struct netfront_queue, napi);
 935        struct net_device *dev = queue->info->netdev;
 936        struct sk_buff *skb;
 937        struct netfront_rx_info rinfo;
 938        struct xen_netif_rx_response *rx = &rinfo.rx;
 939        struct xen_netif_extra_info *extras = rinfo.extras;
 940        RING_IDX i, rp;
 941        int work_done;
 942        struct sk_buff_head rxq;
 943        struct sk_buff_head errq;
 944        struct sk_buff_head tmpq;
 945        int err;
 946
 947        spin_lock(&queue->rx_lock);
 948
 949        skb_queue_head_init(&rxq);
 950        skb_queue_head_init(&errq);
 951        skb_queue_head_init(&tmpq);
 952
 953        rp = queue->rx.sring->rsp_prod;
 954        rmb(); /* Ensure we see queued responses up to 'rp'. */
 955
 956        i = queue->rx.rsp_cons;
 957        work_done = 0;
 958        while ((i != rp) && (work_done < budget)) {
 959                memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx));
 960                memset(extras, 0, sizeof(rinfo.extras));
 961
 962                err = xennet_get_responses(queue, &rinfo, rp, &tmpq);
 963
 964                if (unlikely(err)) {
 965err:
 966                        while ((skb = __skb_dequeue(&tmpq)))
 967                                __skb_queue_tail(&errq, skb);
 968                        dev->stats.rx_errors++;
 969                        i = queue->rx.rsp_cons;
 970                        continue;
 971                }
 972
 973                skb = __skb_dequeue(&tmpq);
 974
 975                if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
 976                        struct xen_netif_extra_info *gso;
 977                        gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
 978
 979                        if (unlikely(xennet_set_skb_gso(skb, gso))) {
 980                                __skb_queue_head(&tmpq, skb);
 981                                queue->rx.rsp_cons += skb_queue_len(&tmpq);
 982                                goto err;
 983                        }
 984                }
 985
 986                NETFRONT_SKB_CB(skb)->pull_to = rx->status;
 987                if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
 988                        NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
 989
 990                skb_shinfo(skb)->frags[0].page_offset = rx->offset;
 991                skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
 992                skb->data_len = rx->status;
 993                skb->len += rx->status;
 994
 995                i = xennet_fill_frags(queue, skb, &tmpq);
 996
 997                if (rx->flags & XEN_NETRXF_csum_blank)
 998                        skb->ip_summed = CHECKSUM_PARTIAL;
 999                else if (rx->flags & XEN_NETRXF_data_validated)
1000                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1001
1002                __skb_queue_tail(&rxq, skb);
1003
1004                queue->rx.rsp_cons = ++i;
1005                work_done++;
1006        }
1007
1008        __skb_queue_purge(&errq);
1009
1010        work_done -= handle_incoming_queue(queue, &rxq);
1011
1012        xennet_alloc_rx_buffers(queue);
1013
1014        if (work_done < budget) {
1015                int more_to_do = 0;
1016
1017                napi_complete(napi);
1018
1019                RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
1020                if (more_to_do)
1021                        napi_schedule(napi);
1022        }
1023
1024        spin_unlock(&queue->rx_lock);
1025
1026        return work_done;
1027}
1028
1029static int xennet_change_mtu(struct net_device *dev, int mtu)
1030{
1031        int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN;
1032
1033        if (mtu > max)
1034                return -EINVAL;
1035        dev->mtu = mtu;
1036        return 0;
1037}
1038
1039static void xennet_get_stats64(struct net_device *dev,
1040                               struct rtnl_link_stats64 *tot)
1041{
1042        struct netfront_info *np = netdev_priv(dev);
1043        int cpu;
1044
1045        for_each_possible_cpu(cpu) {
1046                struct netfront_stats *rx_stats = per_cpu_ptr(np->rx_stats, cpu);
1047                struct netfront_stats *tx_stats = per_cpu_ptr(np->tx_stats, cpu);
1048                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
1049                unsigned int start;
1050
1051                do {
1052                        start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
1053                        tx_packets = tx_stats->packets;
1054                        tx_bytes = tx_stats->bytes;
1055                } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
1056
1057                do {
1058                        start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
1059                        rx_packets = rx_stats->packets;
1060                        rx_bytes = rx_stats->bytes;
1061                } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
1062
1063                tot->rx_packets += rx_packets;
1064                tot->tx_packets += tx_packets;
1065                tot->rx_bytes   += rx_bytes;
1066                tot->tx_bytes   += tx_bytes;
1067        }
1068
1069        tot->rx_errors  = dev->stats.rx_errors;
1070        tot->tx_dropped = dev->stats.tx_dropped;
1071}
1072
1073static void xennet_release_tx_bufs(struct netfront_queue *queue)
1074{
1075        struct sk_buff *skb;
1076        int i;
1077
1078        for (i = 0; i < NET_TX_RING_SIZE; i++) {
1079                /* Skip over entries which are actually freelist references */
1080                if (skb_entry_is_link(&queue->tx_skbs[i]))
1081                        continue;
1082
1083                skb = queue->tx_skbs[i].skb;
1084                get_page(queue->grant_tx_page[i]);
1085                gnttab_end_foreign_access(queue->grant_tx_ref[i],
1086                                          GNTMAP_readonly,
1087                                          (unsigned long)page_address(queue->grant_tx_page[i]));
1088                queue->grant_tx_page[i] = NULL;
1089                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1090                add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i);
1091                dev_kfree_skb_irq(skb);
1092        }
1093}
1094
1095static void xennet_release_rx_bufs(struct netfront_queue *queue)
1096{
1097        int id, ref;
1098
1099        spin_lock_bh(&queue->rx_lock);
1100
1101        for (id = 0; id < NET_RX_RING_SIZE; id++) {
1102                struct sk_buff *skb;
1103                struct page *page;
1104
1105                skb = queue->rx_skbs[id];
1106                if (!skb)
1107                        continue;
1108
1109                ref = queue->grant_rx_ref[id];
1110                if (ref == GRANT_INVALID_REF)
1111                        continue;
1112
1113                page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
1114
1115                /* gnttab_end_foreign_access() needs a page ref until
1116                 * foreign access is ended (which may be deferred).
1117                 */
1118                get_page(page);
1119                gnttab_end_foreign_access(ref, 0,
1120                                          (unsigned long)page_address(page));
1121                queue->grant_rx_ref[id] = GRANT_INVALID_REF;
1122
1123                kfree_skb(skb);
1124        }
1125
1126        spin_unlock_bh(&queue->rx_lock);
1127}
1128
1129static netdev_features_t xennet_fix_features(struct net_device *dev,
1130        netdev_features_t features)
1131{
1132        struct netfront_info *np = netdev_priv(dev);
1133        int val;
1134
1135        if (features & NETIF_F_SG) {
1136                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
1137                                 "%d", &val) < 0)
1138                        val = 0;
1139
1140                if (!val)
1141                        features &= ~NETIF_F_SG;
1142        }
1143
1144        if (features & NETIF_F_IPV6_CSUM) {
1145                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1146                                 "feature-ipv6-csum-offload", "%d", &val) < 0)
1147                        val = 0;
1148
1149                if (!val)
1150                        features &= ~NETIF_F_IPV6_CSUM;
1151        }
1152
1153        if (features & NETIF_F_TSO) {
1154                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1155                                 "feature-gso-tcpv4", "%d", &val) < 0)
1156                        val = 0;
1157
1158                if (!val)
1159                        features &= ~NETIF_F_TSO;
1160        }
1161
1162        if (features & NETIF_F_TSO6) {
1163                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1164                                 "feature-gso-tcpv6", "%d", &val) < 0)
1165                        val = 0;
1166
1167                if (!val)
1168                        features &= ~NETIF_F_TSO6;
1169        }
1170
1171        return features;
1172}
1173
1174static int xennet_set_features(struct net_device *dev,
1175        netdev_features_t features)
1176{
1177        if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
1178                netdev_info(dev, "Reducing MTU because no SG offload");
1179                dev->mtu = ETH_DATA_LEN;
1180        }
1181
1182        return 0;
1183}
1184
1185static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
1186{
1187        struct netfront_queue *queue = dev_id;
1188        unsigned long flags;
1189
1190        spin_lock_irqsave(&queue->tx_lock, flags);
1191        xennet_tx_buf_gc(queue);
1192        spin_unlock_irqrestore(&queue->tx_lock, flags);
1193
1194        return IRQ_HANDLED;
1195}
1196
1197static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
1198{
1199        struct netfront_queue *queue = dev_id;
1200        struct net_device *dev = queue->info->netdev;
1201
1202        if (likely(netif_carrier_ok(dev) &&
1203                   RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
1204                napi_schedule(&queue->napi);
1205
1206        return IRQ_HANDLED;
1207}
1208
1209static irqreturn_t xennet_interrupt(int irq, void *dev_id)
1210{
1211        xennet_tx_interrupt(irq, dev_id);
1212        xennet_rx_interrupt(irq, dev_id);
1213        return IRQ_HANDLED;
1214}
1215
1216#ifdef CONFIG_NET_POLL_CONTROLLER
1217static void xennet_poll_controller(struct net_device *dev)
1218{
1219        /* Poll each queue */
1220        struct netfront_info *info = netdev_priv(dev);
1221        unsigned int num_queues = dev->real_num_tx_queues;
1222        unsigned int i;
1223        for (i = 0; i < num_queues; ++i)
1224                xennet_interrupt(0, &info->queues[i]);
1225}
1226#endif
1227
1228static const struct net_device_ops xennet_netdev_ops = {
1229        .ndo_size            = sizeof(struct net_device_ops),
1230        .ndo_open            = xennet_open,
1231        .ndo_stop            = xennet_close,
1232        .ndo_start_xmit      = xennet_start_xmit,
1233        .extended.ndo_change_mtu = xennet_change_mtu,
1234        .ndo_get_stats64     = xennet_get_stats64,
1235        .ndo_set_mac_address = eth_mac_addr,
1236        .ndo_validate_addr   = eth_validate_addr,
1237        .ndo_fix_features    = xennet_fix_features,
1238        .ndo_set_features    = xennet_set_features,
1239        .ndo_select_queue    = xennet_select_queue,
1240#ifdef CONFIG_NET_POLL_CONTROLLER
1241        .ndo_poll_controller = xennet_poll_controller,
1242#endif
1243};
1244
1245static void xennet_free_netdev(struct net_device *netdev)
1246{
1247        struct netfront_info *np = netdev_priv(netdev);
1248
1249        free_percpu(np->rx_stats);
1250        free_percpu(np->tx_stats);
1251        free_netdev(netdev);
1252}
1253
1254static struct net_device *xennet_create_dev(struct xenbus_device *dev)
1255{
1256        int err;
1257        struct net_device *netdev;
1258        struct netfront_info *np;
1259
1260        netdev = alloc_etherdev_mq(sizeof(struct netfront_info), xennet_max_queues);
1261        if (!netdev)
1262                return ERR_PTR(-ENOMEM);
1263
1264        np                   = netdev_priv(netdev);
1265        np->xbdev            = dev;
1266
1267        np->queues = NULL;
1268
1269        err = -ENOMEM;
1270        np->rx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1271        if (np->rx_stats == NULL)
1272                goto exit;
1273        np->tx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1274        if (np->tx_stats == NULL)
1275                goto exit;
1276
1277        netdev->netdev_ops      = &xennet_netdev_ops;
1278
1279        netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
1280                                  NETIF_F_GSO_ROBUST;
1281        netdev->hw_features     = NETIF_F_SG |
1282                                  NETIF_F_IPV6_CSUM |
1283                                  NETIF_F_TSO | NETIF_F_TSO6;
1284
1285        /*
1286         * Assume that all hw features are available for now. This set
1287         * will be adjusted by the call to netdev_update_features() in
1288         * xennet_connect() which is the earliest point where we can
1289         * negotiate with the backend regarding supported features.
1290         */
1291        netdev->features |= netdev->hw_features;
1292
1293        SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops);
1294        netdev->extended->min_mtu = ETH_MIN_MTU;
1295        netdev->extended->max_mtu = XEN_NETIF_MAX_TX_SIZE;
1296        SET_NETDEV_DEV(netdev, &dev->dev);
1297
1298        np->netdev = netdev;
1299
1300        netif_carrier_off(netdev);
1301
1302        xenbus_switch_state(dev, XenbusStateInitialising);
1303        wait_event(module_wq,
1304                   xenbus_read_driver_state(dev->otherend) !=
1305                   XenbusStateClosed &&
1306                   xenbus_read_driver_state(dev->otherend) !=
1307                   XenbusStateUnknown);
1308        return netdev;
1309
1310 exit:
1311        xennet_free_netdev(netdev);
1312        return ERR_PTR(err);
1313}
1314
1315/**
1316 * Entry point to this code when a new device is created.  Allocate the basic
1317 * structures and the ring buffers for communication with the backend, and
1318 * inform the backend of the appropriate details for those.
1319 */
1320static int netfront_probe(struct xenbus_device *dev,
1321                          const struct xenbus_device_id *id)
1322{
1323        int err;
1324        struct net_device *netdev;
1325        struct netfront_info *info;
1326
1327        netdev = xennet_create_dev(dev);
1328        if (IS_ERR(netdev)) {
1329                err = PTR_ERR(netdev);
1330                xenbus_dev_fatal(dev, err, "creating netdev");
1331                return err;
1332        }
1333
1334        info = netdev_priv(netdev);
1335        dev_set_drvdata(&dev->dev, info);
1336#ifdef CONFIG_SYSFS
1337        info->netdev->sysfs_groups[0] = &xennet_dev_group;
1338#endif
1339
1340        return 0;
1341}
1342
1343static void xennet_end_access(int ref, void *page)
1344{
1345        /* This frees the page as a side-effect */
1346        if (ref != GRANT_INVALID_REF)
1347                gnttab_end_foreign_access(ref, 0, (unsigned long)page);
1348}
1349
1350static void xennet_disconnect_backend(struct netfront_info *info)
1351{
1352        unsigned int i = 0;
1353        unsigned int num_queues = info->netdev->real_num_tx_queues;
1354
1355        netif_carrier_off(info->netdev);
1356
1357        for (i = 0; i < num_queues && info->queues; ++i) {
1358                struct netfront_queue *queue = &info->queues[i];
1359
1360                del_timer_sync(&queue->rx_refill_timer);
1361
1362                if (queue->tx_irq && (queue->tx_irq == queue->rx_irq))
1363                        unbind_from_irqhandler(queue->tx_irq, queue);
1364                if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) {
1365                        unbind_from_irqhandler(queue->tx_irq, queue);
1366                        unbind_from_irqhandler(queue->rx_irq, queue);
1367                }
1368                queue->tx_evtchn = queue->rx_evtchn = 0;
1369                queue->tx_irq = queue->rx_irq = 0;
1370
1371                if (netif_running(info->netdev))
1372                        napi_synchronize(&queue->napi);
1373
1374                xennet_release_tx_bufs(queue);
1375                xennet_release_rx_bufs(queue);
1376                gnttab_free_grant_references(queue->gref_tx_head);
1377                gnttab_free_grant_references(queue->gref_rx_head);
1378
1379                /* End access and free the pages */
1380                xennet_end_access(queue->tx_ring_ref, queue->tx.sring);
1381                xennet_end_access(queue->rx_ring_ref, queue->rx.sring);
1382
1383                queue->tx_ring_ref = GRANT_INVALID_REF;
1384                queue->rx_ring_ref = GRANT_INVALID_REF;
1385                queue->tx.sring = NULL;
1386                queue->rx.sring = NULL;
1387        }
1388}
1389
1390/**
1391 * We are reconnecting to the backend, due to a suspend/resume, or a backend
1392 * driver restart.  We tear down our netif structure and recreate it, but
1393 * leave the device-layer structures intact so that this is transparent to the
1394 * rest of the kernel.
1395 */
1396static int netfront_resume(struct xenbus_device *dev)
1397{
1398        struct netfront_info *info = dev_get_drvdata(&dev->dev);
1399
1400        dev_dbg(&dev->dev, "%s\n", dev->nodename);
1401
1402        xennet_disconnect_backend(info);
1403        return 0;
1404}
1405
1406static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
1407{
1408        char *s, *e, *macstr;
1409        int i;
1410
1411        macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
1412        if (IS_ERR(macstr))
1413                return PTR_ERR(macstr);
1414
1415        for (i = 0; i < ETH_ALEN; i++) {
1416                mac[i] = simple_strtoul(s, &e, 16);
1417                if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
1418                        kfree(macstr);
1419                        return -ENOENT;
1420                }
1421                s = e+1;
1422        }
1423
1424        kfree(macstr);
1425        return 0;
1426}
1427
1428static int setup_netfront_single(struct netfront_queue *queue)
1429{
1430        int err;
1431
1432        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1433        if (err < 0)
1434                goto fail;
1435
1436        err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1437                                        xennet_interrupt,
1438                                        0, queue->info->netdev->name, queue);
1439        if (err < 0)
1440                goto bind_fail;
1441        queue->rx_evtchn = queue->tx_evtchn;
1442        queue->rx_irq = queue->tx_irq = err;
1443
1444        return 0;
1445
1446bind_fail:
1447        xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1448        queue->tx_evtchn = 0;
1449fail:
1450        return err;
1451}
1452
1453static int setup_netfront_split(struct netfront_queue *queue)
1454{
1455        int err;
1456
1457        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1458        if (err < 0)
1459                goto fail;
1460        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->rx_evtchn);
1461        if (err < 0)
1462                goto alloc_rx_evtchn_fail;
1463
1464        snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
1465                 "%s-tx", queue->name);
1466        err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1467                                        xennet_tx_interrupt,
1468                                        0, queue->tx_irq_name, queue);
1469        if (err < 0)
1470                goto bind_tx_fail;
1471        queue->tx_irq = err;
1472
1473        snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
1474                 "%s-rx", queue->name);
1475        err = bind_evtchn_to_irqhandler(queue->rx_evtchn,
1476                                        xennet_rx_interrupt,
1477                                        0, queue->rx_irq_name, queue);
1478        if (err < 0)
1479                goto bind_rx_fail;
1480        queue->rx_irq = err;
1481
1482        return 0;
1483
1484bind_rx_fail:
1485        unbind_from_irqhandler(queue->tx_irq, queue);
1486        queue->tx_irq = 0;
1487bind_tx_fail:
1488        xenbus_free_evtchn(queue->info->xbdev, queue->rx_evtchn);
1489        queue->rx_evtchn = 0;
1490alloc_rx_evtchn_fail:
1491        xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1492        queue->tx_evtchn = 0;
1493fail:
1494        return err;
1495}
1496
1497static int setup_netfront(struct xenbus_device *dev,
1498                        struct netfront_queue *queue, unsigned int feature_split_evtchn)
1499{
1500        struct xen_netif_tx_sring *txs;
1501        struct xen_netif_rx_sring *rxs;
1502        int err;
1503
1504        queue->tx_ring_ref = GRANT_INVALID_REF;
1505        queue->rx_ring_ref = GRANT_INVALID_REF;
1506        queue->rx.sring = NULL;
1507        queue->tx.sring = NULL;
1508
1509        txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1510        if (!txs) {
1511                err = -ENOMEM;
1512                xenbus_dev_fatal(dev, err, "allocating tx ring page");
1513                goto fail;
1514        }
1515        SHARED_RING_INIT(txs);
1516        FRONT_RING_INIT(&queue->tx, txs, PAGE_SIZE);
1517
1518        err = xenbus_grant_ring(dev, virt_to_mfn(txs));
1519        if (err < 0)
1520                goto grant_tx_ring_fail;
1521        queue->tx_ring_ref = err;
1522
1523        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1524        if (!rxs) {
1525                err = -ENOMEM;
1526                xenbus_dev_fatal(dev, err, "allocating rx ring page");
1527                goto alloc_rx_ring_fail;
1528        }
1529        SHARED_RING_INIT(rxs);
1530        FRONT_RING_INIT(&queue->rx, rxs, PAGE_SIZE);
1531
1532        err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
1533        if (err < 0)
1534                goto grant_rx_ring_fail;
1535        queue->rx_ring_ref = err;
1536
1537        if (feature_split_evtchn)
1538                err = setup_netfront_split(queue);
1539        /* setup single event channel if
1540         *  a) feature-split-event-channels == 0
1541         *  b) feature-split-event-channels == 1 but failed to setup
1542         */
1543        if (!feature_split_evtchn || (feature_split_evtchn && err))
1544                err = setup_netfront_single(queue);
1545
1546        if (err)
1547                goto alloc_evtchn_fail;
1548
1549        return 0;
1550
1551        /* If we fail to setup netfront, it is safe to just revoke access to
1552         * granted pages because backend is not accessing it at this point.
1553         */
1554alloc_evtchn_fail:
1555        gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0);
1556grant_rx_ring_fail:
1557        free_page((unsigned long)rxs);
1558alloc_rx_ring_fail:
1559        gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0);
1560grant_tx_ring_fail:
1561        free_page((unsigned long)txs);
1562fail:
1563        return err;
1564}
1565
1566/* Queue-specific initialisation
1567 * This used to be done in xennet_create_dev() but must now
1568 * be run per-queue.
1569 */
1570static int xennet_init_queue(struct netfront_queue *queue)
1571{
1572        unsigned short i;
1573        int err = 0;
1574        char *devid;
1575
1576        spin_lock_init(&queue->tx_lock);
1577        spin_lock_init(&queue->rx_lock);
1578
1579        setup_timer(&queue->rx_refill_timer, rx_refill_timeout,
1580                    (unsigned long)queue);
1581
1582        devid = strrchr(queue->info->xbdev->nodename, '/') + 1;
1583        snprintf(queue->name, sizeof(queue->name), "vif%s-q%u",
1584                 devid, queue->id);
1585
1586        /* Initialise tx_skbs as a free chain containing every entry. */
1587        queue->tx_skb_freelist = 0;
1588        for (i = 0; i < NET_TX_RING_SIZE; i++) {
1589                skb_entry_set_link(&queue->tx_skbs[i], i+1);
1590                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1591                queue->grant_tx_page[i] = NULL;
1592        }
1593
1594        /* Clear out rx_skbs */
1595        for (i = 0; i < NET_RX_RING_SIZE; i++) {
1596                queue->rx_skbs[i] = NULL;
1597                queue->grant_rx_ref[i] = GRANT_INVALID_REF;
1598        }
1599
1600        /* A grant for every tx ring slot */
1601        if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
1602                                          &queue->gref_tx_head) < 0) {
1603                pr_alert("can't alloc tx grant refs\n");
1604                err = -ENOMEM;
1605                goto exit;
1606        }
1607
1608        /* A grant for every rx ring slot */
1609        if (gnttab_alloc_grant_references(NET_RX_RING_SIZE,
1610                                          &queue->gref_rx_head) < 0) {
1611                pr_alert("can't alloc rx grant refs\n");
1612                err = -ENOMEM;
1613                goto exit_free_tx;
1614        }
1615
1616        return 0;
1617
1618 exit_free_tx:
1619        gnttab_free_grant_references(queue->gref_tx_head);
1620 exit:
1621        return err;
1622}
1623
1624static int write_queue_xenstore_keys(struct netfront_queue *queue,
1625                           struct xenbus_transaction *xbt, int write_hierarchical)
1626{
1627        /* Write the queue-specific keys into XenStore in the traditional
1628         * way for a single queue, or in a queue subkeys for multiple
1629         * queues.
1630         */
1631        struct xenbus_device *dev = queue->info->xbdev;
1632        int err;
1633        const char *message;
1634        char *path;
1635        size_t pathsize;
1636
1637        /* Choose the correct place to write the keys */
1638        if (write_hierarchical) {
1639                pathsize = strlen(dev->nodename) + 10;
1640                path = kzalloc(pathsize, GFP_KERNEL);
1641                if (!path) {
1642                        err = -ENOMEM;
1643                        message = "out of memory while writing ring references";
1644                        goto error;
1645                }
1646                snprintf(path, pathsize, "%s/queue-%u",
1647                                dev->nodename, queue->id);
1648        } else {
1649                path = (char *)dev->nodename;
1650        }
1651
1652        /* Write ring references */
1653        err = xenbus_printf(*xbt, path, "tx-ring-ref", "%u",
1654                        queue->tx_ring_ref);
1655        if (err) {
1656                message = "writing tx-ring-ref";
1657                goto error;
1658        }
1659
1660        err = xenbus_printf(*xbt, path, "rx-ring-ref", "%u",
1661                        queue->rx_ring_ref);
1662        if (err) {
1663                message = "writing rx-ring-ref";
1664                goto error;
1665        }
1666
1667        /* Write event channels; taking into account both shared
1668         * and split event channel scenarios.
1669         */
1670        if (queue->tx_evtchn == queue->rx_evtchn) {
1671                /* Shared event channel */
1672                err = xenbus_printf(*xbt, path,
1673                                "event-channel", "%u", queue->tx_evtchn);
1674                if (err) {
1675                        message = "writing event-channel";
1676                        goto error;
1677                }
1678        } else {
1679                /* Split event channels */
1680                err = xenbus_printf(*xbt, path,
1681                                "event-channel-tx", "%u", queue->tx_evtchn);
1682                if (err) {
1683                        message = "writing event-channel-tx";
1684                        goto error;
1685                }
1686
1687                err = xenbus_printf(*xbt, path,
1688                                "event-channel-rx", "%u", queue->rx_evtchn);
1689                if (err) {
1690                        message = "writing event-channel-rx";
1691                        goto error;
1692                }
1693        }
1694
1695        if (write_hierarchical)
1696                kfree(path);
1697        return 0;
1698
1699error:
1700        if (write_hierarchical)
1701                kfree(path);
1702        xenbus_dev_fatal(dev, err, "%s", message);
1703        return err;
1704}
1705
1706static void xennet_destroy_queues(struct netfront_info *info)
1707{
1708        unsigned int i;
1709
1710        for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
1711                struct netfront_queue *queue = &info->queues[i];
1712
1713                if (netif_running(info->netdev))
1714                        napi_disable(&queue->napi);
1715                netif_napi_del(&queue->napi);
1716        }
1717
1718        kfree(info->queues);
1719        info->queues = NULL;
1720}
1721
1722static int xennet_create_queues(struct netfront_info *info,
1723                                unsigned int *num_queues)
1724{
1725        unsigned int i;
1726        int ret;
1727
1728        info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue),
1729                               GFP_KERNEL);
1730        if (!info->queues)
1731                return -ENOMEM;
1732
1733        for (i = 0; i < *num_queues; i++) {
1734                struct netfront_queue *queue = &info->queues[i];
1735
1736                queue->id = i;
1737                queue->info = info;
1738
1739                ret = xennet_init_queue(queue);
1740                if (ret < 0) {
1741                        dev_warn(&info->xbdev->dev,
1742                                 "only created %d queues\n", i);
1743                        *num_queues = i;
1744                        break;
1745                }
1746
1747                netif_napi_add(queue->info->netdev, &queue->napi,
1748                               xennet_poll, 64);
1749                if (netif_running(info->netdev))
1750                        napi_enable(&queue->napi);
1751        }
1752
1753        netif_set_real_num_tx_queues(info->netdev, *num_queues);
1754
1755        if (*num_queues == 0) {
1756                dev_err(&info->xbdev->dev, "no queues\n");
1757                return -EINVAL;
1758        }
1759        return 0;
1760}
1761
1762/* Common code used when first setting up, and when resuming. */
1763static int talk_to_netback(struct xenbus_device *dev,
1764                           struct netfront_info *info)
1765{
1766        const char *message;
1767        struct xenbus_transaction xbt;
1768        int err;
1769        unsigned int feature_split_evtchn;
1770        unsigned int i = 0;
1771        unsigned int max_queues = 0;
1772        struct netfront_queue *queue = NULL;
1773        unsigned int num_queues = 1;
1774
1775        info->netdev->irq = 0;
1776
1777        /* Check if backend supports multiple queues */
1778        err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1779                           "multi-queue-max-queues", "%u", &max_queues);
1780        if (err < 0)
1781                max_queues = 1;
1782        num_queues = min(max_queues, xennet_max_queues);
1783
1784        /* Check feature-split-event-channels */
1785        err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1786                           "feature-split-event-channels", "%u",
1787                           &feature_split_evtchn);
1788        if (err < 0)
1789                feature_split_evtchn = 0;
1790
1791        /* Read mac addr. */
1792        err = xen_net_read_mac(dev, info->netdev->dev_addr);
1793        if (err) {
1794                xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
1795                goto out_unlocked;
1796        }
1797
1798        rtnl_lock();
1799        if (info->queues)
1800                xennet_destroy_queues(info);
1801
1802        err = xennet_create_queues(info, &num_queues);
1803        if (err < 0) {
1804                xenbus_dev_fatal(dev, err, "creating queues");
1805                kfree(info->queues);
1806                info->queues = NULL;
1807                goto out;
1808        }
1809        rtnl_unlock();
1810
1811        /* Create shared ring, alloc event channel -- for each queue */
1812        for (i = 0; i < num_queues; ++i) {
1813                queue = &info->queues[i];
1814                err = setup_netfront(dev, queue, feature_split_evtchn);
1815                if (err)
1816                        goto destroy_ring;
1817        }
1818
1819again:
1820        err = xenbus_transaction_start(&xbt);
1821        if (err) {
1822                xenbus_dev_fatal(dev, err, "starting transaction");
1823                goto destroy_ring;
1824        }
1825
1826        if (xenbus_exists(XBT_NIL,
1827                          info->xbdev->otherend, "multi-queue-max-queues")) {
1828                /* Write the number of queues */
1829                err = xenbus_printf(xbt, dev->nodename,
1830                                    "multi-queue-num-queues", "%u", num_queues);
1831                if (err) {
1832                        message = "writing multi-queue-num-queues";
1833                        goto abort_transaction_no_dev_fatal;
1834                }
1835        }
1836
1837        if (num_queues == 1) {
1838                err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0); /* flat */
1839                if (err)
1840                        goto abort_transaction_no_dev_fatal;
1841        } else {
1842                /* Write the keys for each queue */
1843                for (i = 0; i < num_queues; ++i) {
1844                        queue = &info->queues[i];
1845                        err = write_queue_xenstore_keys(queue, &xbt, 1); /* hierarchical */
1846                        if (err)
1847                                goto abort_transaction_no_dev_fatal;
1848                }
1849        }
1850
1851        /* The remaining keys are not queue-specific */
1852        err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
1853                            1);
1854        if (err) {
1855                message = "writing request-rx-copy";
1856                goto abort_transaction;
1857        }
1858
1859        err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
1860        if (err) {
1861                message = "writing feature-rx-notify";
1862                goto abort_transaction;
1863        }
1864
1865        err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
1866        if (err) {
1867                message = "writing feature-sg";
1868                goto abort_transaction;
1869        }
1870
1871        err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
1872        if (err) {
1873                message = "writing feature-gso-tcpv4";
1874                goto abort_transaction;
1875        }
1876
1877        err = xenbus_write(xbt, dev->nodename, "feature-gso-tcpv6", "1");
1878        if (err) {
1879                message = "writing feature-gso-tcpv6";
1880                goto abort_transaction;
1881        }
1882
1883        err = xenbus_write(xbt, dev->nodename, "feature-ipv6-csum-offload",
1884                           "1");
1885        if (err) {
1886                message = "writing feature-ipv6-csum-offload";
1887                goto abort_transaction;
1888        }
1889
1890        err = xenbus_transaction_end(xbt, 0);
1891        if (err) {
1892                if (err == -EAGAIN)
1893                        goto again;
1894                xenbus_dev_fatal(dev, err, "completing transaction");
1895                goto destroy_ring;
1896        }
1897
1898        return 0;
1899
1900 abort_transaction:
1901        xenbus_dev_fatal(dev, err, "%s", message);
1902abort_transaction_no_dev_fatal:
1903        xenbus_transaction_end(xbt, 1);
1904 destroy_ring:
1905        xennet_disconnect_backend(info);
1906        rtnl_lock();
1907        xennet_destroy_queues(info);
1908 out:
1909        rtnl_unlock();
1910out_unlocked:
1911        device_unregister(&dev->dev);
1912        return err;
1913}
1914
1915static int xennet_connect(struct net_device *dev)
1916{
1917        struct netfront_info *np = netdev_priv(dev);
1918        unsigned int num_queues = 0;
1919        int err;
1920        unsigned int feature_rx_copy;
1921        unsigned int j = 0;
1922        struct netfront_queue *queue = NULL;
1923
1924        err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1925                           "feature-rx-copy", "%u", &feature_rx_copy);
1926        if (err != 1)
1927                feature_rx_copy = 0;
1928
1929        if (!feature_rx_copy) {
1930                dev_info(&dev->dev,
1931                         "backend does not support copying receive path\n");
1932                return -ENODEV;
1933        }
1934
1935        err = talk_to_netback(np->xbdev, np);
1936        if (err)
1937                return err;
1938
1939        /* talk_to_netback() sets the correct number of queues */
1940        num_queues = dev->real_num_tx_queues;
1941
1942        if (dev->reg_state == NETREG_UNINITIALIZED) {
1943                err = register_netdev(dev);
1944                if (err) {
1945                        pr_warn("%s: register_netdev err=%d\n", __func__, err);
1946                        device_unregister(&np->xbdev->dev);
1947                        return err;
1948                }
1949        }
1950
1951        rtnl_lock();
1952        netdev_update_features(dev);
1953        rtnl_unlock();
1954
1955        /*
1956         * All public and private state should now be sane.  Get
1957         * ready to start sending and receiving packets and give the driver
1958         * domain a kick because we've probably just requeued some
1959         * packets.
1960         */
1961        netif_carrier_on(np->netdev);
1962        for (j = 0; j < num_queues; ++j) {
1963                queue = &np->queues[j];
1964
1965                notify_remote_via_irq(queue->tx_irq);
1966                if (queue->tx_irq != queue->rx_irq)
1967                        notify_remote_via_irq(queue->rx_irq);
1968
1969                spin_lock_irq(&queue->tx_lock);
1970                xennet_tx_buf_gc(queue);
1971                spin_unlock_irq(&queue->tx_lock);
1972
1973                spin_lock_bh(&queue->rx_lock);
1974                xennet_alloc_rx_buffers(queue);
1975                spin_unlock_bh(&queue->rx_lock);
1976        }
1977
1978        return 0;
1979}
1980
1981/**
1982 * Callback received when the backend's state changes.
1983 */
1984static void netback_changed(struct xenbus_device *dev,
1985                            enum xenbus_state backend_state)
1986{
1987        struct netfront_info *np = dev_get_drvdata(&dev->dev);
1988        struct net_device *netdev = np->netdev;
1989
1990        dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
1991
1992        wake_up_all(&module_wq);
1993
1994        switch (backend_state) {
1995        case XenbusStateInitialising:
1996        case XenbusStateInitialised:
1997        case XenbusStateReconfiguring:
1998        case XenbusStateReconfigured:
1999        case XenbusStateUnknown:
2000                break;
2001
2002        case XenbusStateInitWait:
2003                if (dev->state != XenbusStateInitialising)
2004                        break;
2005                if (xennet_connect(netdev) != 0)
2006                        break;
2007                xenbus_switch_state(dev, XenbusStateConnected);
2008                break;
2009
2010        case XenbusStateConnected:
2011                netdev_notify_peers(netdev);
2012                break;
2013
2014        case XenbusStateClosed:
2015                if (dev->state == XenbusStateClosed)
2016                        break;
2017                /* Missed the backend's CLOSING state -- fallthrough */
2018        case XenbusStateClosing:
2019                xenbus_frontend_closed(dev);
2020                break;
2021        }
2022}
2023
2024static const struct xennet_stat {
2025        char name[ETH_GSTRING_LEN];
2026        u16 offset;
2027} xennet_stats[] = {
2028        {
2029                "rx_gso_checksum_fixup",
2030                offsetof(struct netfront_info, rx_gso_checksum_fixup)
2031        },
2032};
2033
2034static int xennet_get_sset_count(struct net_device *dev, int string_set)
2035{
2036        switch (string_set) {
2037        case ETH_SS_STATS:
2038                return ARRAY_SIZE(xennet_stats);
2039        default:
2040                return -EINVAL;
2041        }
2042}
2043
2044static void xennet_get_ethtool_stats(struct net_device *dev,
2045                                     struct ethtool_stats *stats, u64 * data)
2046{
2047        void *np = netdev_priv(dev);
2048        int i;
2049
2050        for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2051                data[i] = atomic_read((atomic_t *)(np + xennet_stats[i].offset));
2052}
2053
2054static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 * data)
2055{
2056        int i;
2057
2058        switch (stringset) {
2059        case ETH_SS_STATS:
2060                for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2061                        memcpy(data + i * ETH_GSTRING_LEN,
2062                               xennet_stats[i].name, ETH_GSTRING_LEN);
2063                break;
2064        }
2065}
2066
2067static const struct ethtool_ops xennet_ethtool_ops =
2068{
2069        .get_link = ethtool_op_get_link,
2070
2071        .get_sset_count = xennet_get_sset_count,
2072        .get_ethtool_stats = xennet_get_ethtool_stats,
2073        .get_strings = xennet_get_strings,
2074};
2075
2076#ifdef CONFIG_SYSFS
2077static ssize_t show_rxbuf(struct device *dev,
2078                          struct device_attribute *attr, char *buf)
2079{
2080        return sprintf(buf, "%lu\n", NET_RX_RING_SIZE);
2081}
2082
2083static ssize_t store_rxbuf(struct device *dev,
2084                           struct device_attribute *attr,
2085                           const char *buf, size_t len)
2086{
2087        char *endp;
2088        unsigned long target;
2089
2090        if (!capable(CAP_NET_ADMIN))
2091                return -EPERM;
2092
2093        target = simple_strtoul(buf, &endp, 0);
2094        if (endp == buf)
2095                return -EBADMSG;
2096
2097        /* rxbuf_min and rxbuf_max are no longer configurable. */
2098
2099        return len;
2100}
2101
2102static DEVICE_ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf);
2103static DEVICE_ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf);
2104static DEVICE_ATTR(rxbuf_cur, S_IRUGO, show_rxbuf, NULL);
2105
2106static struct attribute *xennet_dev_attrs[] = {
2107        &dev_attr_rxbuf_min.attr,
2108        &dev_attr_rxbuf_max.attr,
2109        &dev_attr_rxbuf_cur.attr,
2110        NULL
2111};
2112
2113static const struct attribute_group xennet_dev_group = {
2114        .attrs = xennet_dev_attrs
2115};
2116#endif /* CONFIG_SYSFS */
2117
2118static const struct xenbus_device_id netfront_ids[] = {
2119        { "vif" },
2120        { "" }
2121};
2122
2123
2124static int xennet_remove(struct xenbus_device *dev)
2125{
2126        struct netfront_info *info = dev_get_drvdata(&dev->dev);
2127
2128        dev_dbg(&dev->dev, "%s\n", dev->nodename);
2129
2130        if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) {
2131                xenbus_switch_state(dev, XenbusStateClosing);
2132                wait_event(module_wq,
2133                           xenbus_read_driver_state(dev->otherend) ==
2134                           XenbusStateClosing ||
2135                           xenbus_read_driver_state(dev->otherend) ==
2136                           XenbusStateUnknown);
2137
2138                xenbus_switch_state(dev, XenbusStateClosed);
2139                wait_event(module_wq,
2140                           xenbus_read_driver_state(dev->otherend) ==
2141                           XenbusStateClosed ||
2142                           xenbus_read_driver_state(dev->otherend) ==
2143                           XenbusStateUnknown);
2144        }
2145
2146        xennet_disconnect_backend(info);
2147
2148        if (info->netdev->reg_state == NETREG_REGISTERED)
2149                unregister_netdev(info->netdev);
2150
2151        if (info->queues) {
2152                rtnl_lock();
2153                xennet_destroy_queues(info);
2154                rtnl_unlock();
2155        }
2156        xennet_free_netdev(info->netdev);
2157
2158        return 0;
2159}
2160
2161static DEFINE_XENBUS_DRIVER(netfront, ,
2162        .probe = netfront_probe,
2163        .remove = xennet_remove,
2164        .resume = netfront_resume,
2165        .otherend_changed = netback_changed,
2166);
2167
2168static int __init netif_init(void)
2169{
2170        if (!xen_domain())
2171                return -ENODEV;
2172
2173        if (!xen_has_pv_nic_devices())
2174                return -ENODEV;
2175
2176        pr_info("Initialising Xen virtual ethernet driver\n");
2177
2178        /* Allow as many queues as there are CPUs inut max. 8 if user has not
2179         * specified a value.
2180         */
2181        if (xennet_max_queues == 0)
2182                xennet_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,
2183                                          num_online_cpus());
2184
2185        return xenbus_register_frontend(&netfront_driver);
2186}
2187module_init(netif_init);
2188
2189
2190static void __exit netif_exit(void)
2191{
2192        xenbus_unregister_driver(&netfront_driver);
2193}
2194module_exit(netif_exit);
2195
2196MODULE_DESCRIPTION("Xen virtual network device frontend");
2197MODULE_LICENSE("GPL");
2198MODULE_ALIAS("xen:vif");
2199MODULE_ALIAS("xennet");
2200