linux/drivers/net/xen-netfront.c
<<
>>
Prefs
   1/*
   2 * Virtual network driver for conversing with remote driver backends.
   3 *
   4 * Copyright (c) 2002-2005, K A Fraser
   5 * Copyright (c) 2005, XenSource Ltd
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License version 2
   9 * as published by the Free Software Foundation; or, when distributed
  10 * separately from the Linux kernel or incorporated into other
  11 * software packages, subject to the following license:
  12 *
  13 * Permission is hereby granted, free of charge, to any person obtaining a copy
  14 * of this source file (the "Software"), to deal in the Software without
  15 * restriction, including without limitation the rights to use, copy, modify,
  16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  17 * and to permit persons to whom the Software is furnished to do so, subject to
  18 * the following conditions:
  19 *
  20 * The above copyright notice and this permission notice shall be included in
  21 * all copies or substantial portions of the Software.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  29 * IN THE SOFTWARE.
  30 */
  31
  32#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  33
  34#include <linux/module.h>
  35#include <linux/kernel.h>
  36#include <linux/netdevice.h>
  37#include <linux/etherdevice.h>
  38#include <linux/skbuff.h>
  39#include <linux/ethtool.h>
  40#include <linux/if_ether.h>
  41#include <net/tcp.h>
  42#include <linux/udp.h>
  43#include <linux/moduleparam.h>
  44#include <linux/mm.h>
  45#include <linux/slab.h>
  46#include <net/ip.h>
  47
  48#include <xen/xen.h>
  49#include <xen/xenbus.h>
  50#include <xen/events.h>
  51#include <xen/page.h>
  52#include <xen/platform_pci.h>
  53#include <xen/grant_table.h>
  54
  55#include <xen/interface/io/netif.h>
  56#include <xen/interface/memory.h>
  57#include <xen/interface/grant_table.h>
  58
  59/* Module parameters */
  60static unsigned int xennet_max_queues;
  61module_param_named(max_queues, xennet_max_queues, uint, 0644);
  62MODULE_PARM_DESC(max_queues,
  63                 "Maximum number of queues per virtual interface");
  64
  65static const struct ethtool_ops xennet_ethtool_ops;
  66
  67struct netfront_cb {
  68        int pull_to;
  69};
  70
  71#define NETFRONT_SKB_CB(skb)    ((struct netfront_cb *)((skb)->cb))
  72
  73#define RX_COPY_THRESHOLD 256
  74
  75#define GRANT_INVALID_REF       0
  76
  77#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)
  78#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, XEN_PAGE_SIZE)
  79
  80/* Minimum number of Rx slots (includes slot for GSO metadata). */
  81#define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1)
  82
  83/* Queue name is interface name with "-qNNN" appended */
  84#define QUEUE_NAME_SIZE (IFNAMSIZ + 6)
  85
  86/* IRQ name is queue name with "-tx" or "-rx" appended */
  87#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
  88
  89struct netfront_stats {
  90        u64                     packets;
  91        u64                     bytes;
  92        struct u64_stats_sync   syncp;
  93};
  94
  95struct netfront_info;
  96
  97struct netfront_queue {
  98        unsigned int id; /* Queue ID, 0-based */
  99        char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
 100        struct netfront_info *info;
 101
 102        struct napi_struct napi;
 103
 104        /* Split event channels support, tx_* == rx_* when using
 105         * single event channel.
 106         */
 107        unsigned int tx_evtchn, rx_evtchn;
 108        unsigned int tx_irq, rx_irq;
 109        /* Only used when split event channels support is enabled */
 110        char tx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-tx */
 111        char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
 112
 113        spinlock_t   tx_lock;
 114        struct xen_netif_tx_front_ring tx;
 115        int tx_ring_ref;
 116
 117        /*
 118         * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
 119         * are linked from tx_skb_freelist through skb_entry.link.
 120         *
 121         *  NB. Freelist index entries are always going to be less than
 122         *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
 123         *  greater than PAGE_OFFSET: we use this property to distinguish
 124         *  them.
 125         */
 126        union skb_entry {
 127                struct sk_buff *skb;
 128                unsigned long link;
 129        } tx_skbs[NET_TX_RING_SIZE];
 130        grant_ref_t gref_tx_head;
 131        grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
 132        struct page *grant_tx_page[NET_TX_RING_SIZE];
 133        unsigned tx_skb_freelist;
 134
 135        spinlock_t   rx_lock ____cacheline_aligned_in_smp;
 136        struct xen_netif_rx_front_ring rx;
 137        int rx_ring_ref;
 138
 139        struct timer_list rx_refill_timer;
 140
 141        struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
 142        grant_ref_t gref_rx_head;
 143        grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
 144};
 145
 146struct netfront_info {
 147        struct list_head list;
 148        struct net_device *netdev;
 149
 150        struct xenbus_device *xbdev;
 151
 152        /* Multi-queue support */
 153        struct netfront_queue *queues;
 154
 155        /* Statistics */
 156        struct netfront_stats __percpu *rx_stats;
 157        struct netfront_stats __percpu *tx_stats;
 158
 159        atomic_t rx_gso_checksum_fixup;
 160};
 161
 162struct netfront_rx_info {
 163        struct xen_netif_rx_response rx;
 164        struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 165};
 166
 167static void skb_entry_set_link(union skb_entry *list, unsigned short id)
 168{
 169        list->link = id;
 170}
 171
 172static int skb_entry_is_link(const union skb_entry *list)
 173{
 174        BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
 175        return (unsigned long)list->skb < PAGE_OFFSET;
 176}
 177
 178/*
 179 * Access macros for acquiring freeing slots in tx_skbs[].
 180 */
 181
 182static void add_id_to_freelist(unsigned *head, union skb_entry *list,
 183                               unsigned short id)
 184{
 185        skb_entry_set_link(&list[id], *head);
 186        *head = id;
 187}
 188
 189static unsigned short get_id_from_freelist(unsigned *head,
 190                                           union skb_entry *list)
 191{
 192        unsigned int id = *head;
 193        *head = list[id].link;
 194        return id;
 195}
 196
 197static int xennet_rxidx(RING_IDX idx)
 198{
 199        return idx & (NET_RX_RING_SIZE - 1);
 200}
 201
 202static struct sk_buff *xennet_get_rx_skb(struct netfront_queue *queue,
 203                                         RING_IDX ri)
 204{
 205        int i = xennet_rxidx(ri);
 206        struct sk_buff *skb = queue->rx_skbs[i];
 207        queue->rx_skbs[i] = NULL;
 208        return skb;
 209}
 210
 211static grant_ref_t xennet_get_rx_ref(struct netfront_queue *queue,
 212                                            RING_IDX ri)
 213{
 214        int i = xennet_rxidx(ri);
 215        grant_ref_t ref = queue->grant_rx_ref[i];
 216        queue->grant_rx_ref[i] = GRANT_INVALID_REF;
 217        return ref;
 218}
 219
 220#ifdef CONFIG_SYSFS
 221static const struct attribute_group xennet_dev_group;
 222#endif
 223
 224static bool xennet_can_sg(struct net_device *dev)
 225{
 226        return dev->features & NETIF_F_SG;
 227}
 228
 229
 230static void rx_refill_timeout(unsigned long data)
 231{
 232        struct netfront_queue *queue = (struct netfront_queue *)data;
 233        napi_schedule(&queue->napi);
 234}
 235
 236static int netfront_tx_slot_available(struct netfront_queue *queue)
 237{
 238        return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
 239                (NET_TX_RING_SIZE - MAX_SKB_FRAGS - 2);
 240}
 241
 242static void xennet_maybe_wake_tx(struct netfront_queue *queue)
 243{
 244        struct net_device *dev = queue->info->netdev;
 245        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, queue->id);
 246
 247        if (unlikely(netif_tx_queue_stopped(dev_queue)) &&
 248            netfront_tx_slot_available(queue) &&
 249            likely(netif_running(dev)))
 250                netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
 251}
 252
 253
 254static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
 255{
 256        struct sk_buff *skb;
 257        struct page *page;
 258
 259        skb = __netdev_alloc_skb(queue->info->netdev,
 260                                 RX_COPY_THRESHOLD + NET_IP_ALIGN,
 261                                 GFP_ATOMIC | __GFP_NOWARN);
 262        if (unlikely(!skb))
 263                return NULL;
 264
 265        page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
 266        if (!page) {
 267                kfree_skb(skb);
 268                return NULL;
 269        }
 270        skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
 271
 272        /* Align ip header to a 16 bytes boundary */
 273        skb_reserve(skb, NET_IP_ALIGN);
 274        skb->dev = queue->info->netdev;
 275
 276        return skb;
 277}
 278
 279
 280static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
 281{
 282        RING_IDX req_prod = queue->rx.req_prod_pvt;
 283        int notify;
 284
 285        if (unlikely(!netif_carrier_ok(queue->info->netdev)))
 286                return;
 287
 288        for (req_prod = queue->rx.req_prod_pvt;
 289             req_prod - queue->rx.rsp_cons < NET_RX_RING_SIZE;
 290             req_prod++) {
 291                struct sk_buff *skb;
 292                unsigned short id;
 293                grant_ref_t ref;
 294                struct page *page;
 295                struct xen_netif_rx_request *req;
 296
 297                skb = xennet_alloc_one_rx_buffer(queue);
 298                if (!skb)
 299                        break;
 300
 301                id = xennet_rxidx(req_prod);
 302
 303                BUG_ON(queue->rx_skbs[id]);
 304                queue->rx_skbs[id] = skb;
 305
 306                ref = gnttab_claim_grant_reference(&queue->gref_rx_head);
 307                BUG_ON((signed short)ref < 0);
 308                queue->grant_rx_ref[id] = ref;
 309
 310                page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
 311
 312                req = RING_GET_REQUEST(&queue->rx, req_prod);
 313                gnttab_page_grant_foreign_access_ref_one(ref,
 314                                                         queue->info->xbdev->otherend_id,
 315                                                         page,
 316                                                         0);
 317                req->id = id;
 318                req->gref = ref;
 319        }
 320
 321        queue->rx.req_prod_pvt = req_prod;
 322
 323        /* Not enough requests? Try again later. */
 324        if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN) {
 325                mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10));
 326                return;
 327        }
 328
 329        wmb();          /* barrier so backend seens requests */
 330
 331        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify);
 332        if (notify)
 333                notify_remote_via_irq(queue->rx_irq);
 334}
 335
 336static int xennet_open(struct net_device *dev)
 337{
 338        struct netfront_info *np = netdev_priv(dev);
 339        unsigned int num_queues = dev->real_num_tx_queues;
 340        unsigned int i = 0;
 341        struct netfront_queue *queue = NULL;
 342
 343        for (i = 0; i < num_queues; ++i) {
 344                queue = &np->queues[i];
 345                napi_enable(&queue->napi);
 346
 347                spin_lock_bh(&queue->rx_lock);
 348                if (netif_carrier_ok(dev)) {
 349                        xennet_alloc_rx_buffers(queue);
 350                        queue->rx.sring->rsp_event = queue->rx.rsp_cons + 1;
 351                        if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
 352                                napi_schedule(&queue->napi);
 353                }
 354                spin_unlock_bh(&queue->rx_lock);
 355        }
 356
 357        netif_tx_start_all_queues(dev);
 358
 359        return 0;
 360}
 361
 362static void xennet_tx_buf_gc(struct netfront_queue *queue)
 363{
 364        RING_IDX cons, prod;
 365        unsigned short id;
 366        struct sk_buff *skb;
 367        bool more_to_do;
 368
 369        BUG_ON(!netif_carrier_ok(queue->info->netdev));
 370
 371        do {
 372                prod = queue->tx.sring->rsp_prod;
 373                rmb(); /* Ensure we see responses up to 'rp'. */
 374
 375                for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
 376                        struct xen_netif_tx_response *txrsp;
 377
 378                        txrsp = RING_GET_RESPONSE(&queue->tx, cons);
 379                        if (txrsp->status == XEN_NETIF_RSP_NULL)
 380                                continue;
 381
 382                        id  = txrsp->id;
 383                        skb = queue->tx_skbs[id].skb;
 384                        if (unlikely(gnttab_query_foreign_access(
 385                                queue->grant_tx_ref[id]) != 0)) {
 386                                pr_alert("%s: warning -- grant still in use by backend domain\n",
 387                                         __func__);
 388                                BUG();
 389                        }
 390                        gnttab_end_foreign_access_ref(
 391                                queue->grant_tx_ref[id], GNTMAP_readonly);
 392                        gnttab_release_grant_reference(
 393                                &queue->gref_tx_head, queue->grant_tx_ref[id]);
 394                        queue->grant_tx_ref[id] = GRANT_INVALID_REF;
 395                        queue->grant_tx_page[id] = NULL;
 396                        add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id);
 397                        dev_kfree_skb_irq(skb);
 398                }
 399
 400                queue->tx.rsp_cons = prod;
 401
 402                RING_FINAL_CHECK_FOR_RESPONSES(&queue->tx, more_to_do);
 403        } while (more_to_do);
 404
 405        xennet_maybe_wake_tx(queue);
 406}
 407
 408struct xennet_gnttab_make_txreq {
 409        struct netfront_queue *queue;
 410        struct sk_buff *skb;
 411        struct page *page;
 412        struct xen_netif_tx_request *tx; /* Last request */
 413        unsigned int size;
 414};
 415
 416static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
 417                                  unsigned int len, void *data)
 418{
 419        struct xennet_gnttab_make_txreq *info = data;
 420        unsigned int id;
 421        struct xen_netif_tx_request *tx;
 422        grant_ref_t ref;
 423        /* convenient aliases */
 424        struct page *page = info->page;
 425        struct netfront_queue *queue = info->queue;
 426        struct sk_buff *skb = info->skb;
 427
 428        id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
 429        tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
 430        ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
 431        BUG_ON((signed short)ref < 0);
 432
 433        gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
 434                                        gfn, GNTMAP_readonly);
 435
 436        queue->tx_skbs[id].skb = skb;
 437        queue->grant_tx_page[id] = page;
 438        queue->grant_tx_ref[id] = ref;
 439
 440        tx->id = id;
 441        tx->gref = ref;
 442        tx->offset = offset;
 443        tx->size = len;
 444        tx->flags = 0;
 445
 446        info->tx = tx;
 447        info->size += tx->size;
 448}
 449
 450static struct xen_netif_tx_request *xennet_make_first_txreq(
 451        struct netfront_queue *queue, struct sk_buff *skb,
 452        struct page *page, unsigned int offset, unsigned int len)
 453{
 454        struct xennet_gnttab_make_txreq info = {
 455                .queue = queue,
 456                .skb = skb,
 457                .page = page,
 458                .size = 0,
 459        };
 460
 461        gnttab_for_one_grant(page, offset, len, xennet_tx_setup_grant, &info);
 462
 463        return info.tx;
 464}
 465
 466static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset,
 467                                  unsigned int len, void *data)
 468{
 469        struct xennet_gnttab_make_txreq *info = data;
 470
 471        info->tx->flags |= XEN_NETTXF_more_data;
 472        skb_get(info->skb);
 473        xennet_tx_setup_grant(gfn, offset, len, data);
 474}
 475
 476static struct xen_netif_tx_request *xennet_make_txreqs(
 477        struct netfront_queue *queue, struct xen_netif_tx_request *tx,
 478        struct sk_buff *skb, struct page *page,
 479        unsigned int offset, unsigned int len)
 480{
 481        struct xennet_gnttab_make_txreq info = {
 482                .queue = queue,
 483                .skb = skb,
 484                .tx = tx,
 485        };
 486
 487        /* Skip unused frames from start of page */
 488        page += offset >> PAGE_SHIFT;
 489        offset &= ~PAGE_MASK;
 490
 491        while (len) {
 492                info.page = page;
 493                info.size = 0;
 494
 495                gnttab_foreach_grant_in_range(page, offset, len,
 496                                              xennet_make_one_txreq,
 497                                              &info);
 498
 499                page++;
 500                offset = 0;
 501                len -= info.size;
 502        }
 503
 504        return info.tx;
 505}
 506
 507/*
 508 * Count how many ring slots are required to send this skb. Each frag
 509 * might be a compound page.
 510 */
 511static int xennet_count_skb_slots(struct sk_buff *skb)
 512{
 513        int i, frags = skb_shinfo(skb)->nr_frags;
 514        int slots;
 515
 516        slots = gnttab_count_grant(offset_in_page(skb->data),
 517                                   skb_headlen(skb));
 518
 519        for (i = 0; i < frags; i++) {
 520                skb_frag_t *frag = skb_shinfo(skb)->frags + i;
 521                unsigned long size = skb_frag_size(frag);
 522                unsigned long offset = frag->page_offset;
 523
 524                /* Skip unused frames from start of page */
 525                offset &= ~PAGE_MASK;
 526
 527                slots += gnttab_count_grant(offset, size);
 528        }
 529
 530        return slots;
 531}
 532
 533static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
 534                               void *accel_priv, select_queue_fallback_t fallback)
 535{
 536        unsigned int num_queues = dev->real_num_tx_queues;
 537        u32 hash;
 538        u16 queue_idx;
 539
 540        /* First, check if there is only one queue */
 541        if (num_queues == 1) {
 542                queue_idx = 0;
 543        } else {
 544                hash = skb_get_hash(skb);
 545                queue_idx = hash % num_queues;
 546        }
 547
 548        return queue_idx;
 549}
 550
 551#define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
 552
 553static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 554{
 555        struct netfront_info *np = netdev_priv(dev);
 556        struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
 557        struct xen_netif_tx_request *tx, *first_tx;
 558        unsigned int i;
 559        int notify;
 560        int slots;
 561        struct page *page;
 562        unsigned int offset;
 563        unsigned int len;
 564        unsigned long flags;
 565        struct netfront_queue *queue = NULL;
 566        unsigned int num_queues = dev->real_num_tx_queues;
 567        u16 queue_index;
 568
 569        /* Drop the packet if no queues are set up */
 570        if (num_queues < 1)
 571                goto drop;
 572        /* Determine which queue to transmit this SKB on */
 573        queue_index = skb_get_queue_mapping(skb);
 574        queue = &np->queues[queue_index];
 575
 576        /* If skb->len is too big for wire format, drop skb and alert
 577         * user about misconfiguration.
 578         */
 579        if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) {
 580                net_alert_ratelimited(
 581                        "xennet: skb->len = %u, too big for wire format\n",
 582                        skb->len);
 583                goto drop;
 584        }
 585
 586        slots = xennet_count_skb_slots(skb);
 587        if (unlikely(slots > MAX_XEN_SKB_FRAGS + 1)) {
 588                net_dbg_ratelimited("xennet: skb rides the rocket: %d slots, %d bytes\n",
 589                                    slots, skb->len);
 590                if (skb_linearize(skb))
 591                        goto drop;
 592        }
 593
 594        page = virt_to_page(skb->data);
 595        offset = offset_in_page(skb->data);
 596        len = skb_headlen(skb);
 597
 598        spin_lock_irqsave(&queue->tx_lock, flags);
 599
 600        if (unlikely(!netif_carrier_ok(dev) ||
 601                     (slots > 1 && !xennet_can_sg(dev)) ||
 602                     netif_needs_gso(skb, netif_skb_features(skb)))) {
 603                spin_unlock_irqrestore(&queue->tx_lock, flags);
 604                goto drop;
 605        }
 606
 607        /* First request for the linear area. */
 608        first_tx = tx = xennet_make_first_txreq(queue, skb,
 609                                                page, offset, len);
 610        offset += tx->size;
 611        if (offset == PAGE_SIZE) {
 612                page++;
 613                offset = 0;
 614        }
 615        len -= tx->size;
 616
 617        if (skb->ip_summed == CHECKSUM_PARTIAL)
 618                /* local packet? */
 619                tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
 620        else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 621                /* remote but checksummed. */
 622                tx->flags |= XEN_NETTXF_data_validated;
 623
 624        /* Optional extra info after the first request. */
 625        if (skb_shinfo(skb)->gso_size) {
 626                struct xen_netif_extra_info *gso;
 627
 628                gso = (struct xen_netif_extra_info *)
 629                        RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
 630
 631                tx->flags |= XEN_NETTXF_extra_info;
 632
 633                gso->u.gso.size = skb_shinfo(skb)->gso_size;
 634                gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
 635                        XEN_NETIF_GSO_TYPE_TCPV6 :
 636                        XEN_NETIF_GSO_TYPE_TCPV4;
 637                gso->u.gso.pad = 0;
 638                gso->u.gso.features = 0;
 639
 640                gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 641                gso->flags = 0;
 642        }
 643
 644        /* Requests for the rest of the linear area. */
 645        tx = xennet_make_txreqs(queue, tx, skb, page, offset, len);
 646
 647        /* Requests for all the frags. */
 648        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 649                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 650                tx = xennet_make_txreqs(queue, tx, skb,
 651                                        skb_frag_page(frag), frag->page_offset,
 652                                        skb_frag_size(frag));
 653        }
 654
 655        /* First request has the packet length. */
 656        first_tx->size = skb->len;
 657
 658        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
 659        if (notify)
 660                notify_remote_via_irq(queue->tx_irq);
 661
 662        u64_stats_update_begin(&tx_stats->syncp);
 663        tx_stats->bytes += skb->len;
 664        tx_stats->packets++;
 665        u64_stats_update_end(&tx_stats->syncp);
 666
 667        /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */
 668        xennet_tx_buf_gc(queue);
 669
 670        if (!netfront_tx_slot_available(queue))
 671                netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
 672
 673        spin_unlock_irqrestore(&queue->tx_lock, flags);
 674
 675        return NETDEV_TX_OK;
 676
 677 drop:
 678        dev->stats.tx_dropped++;
 679        dev_kfree_skb_any(skb);
 680        return NETDEV_TX_OK;
 681}
 682
 683static int xennet_close(struct net_device *dev)
 684{
 685        struct netfront_info *np = netdev_priv(dev);
 686        unsigned int num_queues = dev->real_num_tx_queues;
 687        unsigned int i;
 688        struct netfront_queue *queue;
 689        netif_tx_stop_all_queues(np->netdev);
 690        for (i = 0; i < num_queues; ++i) {
 691                queue = &np->queues[i];
 692                napi_disable(&queue->napi);
 693        }
 694        return 0;
 695}
 696
 697static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
 698                                grant_ref_t ref)
 699{
 700        int new = xennet_rxidx(queue->rx.req_prod_pvt);
 701
 702        BUG_ON(queue->rx_skbs[new]);
 703        queue->rx_skbs[new] = skb;
 704        queue->grant_rx_ref[new] = ref;
 705        RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->id = new;
 706        RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->gref = ref;
 707        queue->rx.req_prod_pvt++;
 708}
 709
 710static int xennet_get_extras(struct netfront_queue *queue,
 711                             struct xen_netif_extra_info *extras,
 712                             RING_IDX rp)
 713
 714{
 715        struct xen_netif_extra_info *extra;
 716        struct device *dev = &queue->info->netdev->dev;
 717        RING_IDX cons = queue->rx.rsp_cons;
 718        int err = 0;
 719
 720        do {
 721                struct sk_buff *skb;
 722                grant_ref_t ref;
 723
 724                if (unlikely(cons + 1 == rp)) {
 725                        if (net_ratelimit())
 726                                dev_warn(dev, "Missing extra info\n");
 727                        err = -EBADR;
 728                        break;
 729                }
 730
 731                extra = (struct xen_netif_extra_info *)
 732                        RING_GET_RESPONSE(&queue->rx, ++cons);
 733
 734                if (unlikely(!extra->type ||
 735                             extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
 736                        if (net_ratelimit())
 737                                dev_warn(dev, "Invalid extra type: %d\n",
 738                                        extra->type);
 739                        err = -EINVAL;
 740                } else {
 741                        memcpy(&extras[extra->type - 1], extra,
 742                               sizeof(*extra));
 743                }
 744
 745                skb = xennet_get_rx_skb(queue, cons);
 746                ref = xennet_get_rx_ref(queue, cons);
 747                xennet_move_rx_slot(queue, skb, ref);
 748        } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
 749
 750        queue->rx.rsp_cons = cons;
 751        return err;
 752}
 753
 754static int xennet_get_responses(struct netfront_queue *queue,
 755                                struct netfront_rx_info *rinfo, RING_IDX rp,
 756                                struct sk_buff_head *list)
 757{
 758        struct xen_netif_rx_response *rx = &rinfo->rx;
 759        struct xen_netif_extra_info *extras = rinfo->extras;
 760        struct device *dev = &queue->info->netdev->dev;
 761        RING_IDX cons = queue->rx.rsp_cons;
 762        struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
 763        grant_ref_t ref = xennet_get_rx_ref(queue, cons);
 764        int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
 765        int slots = 1;
 766        int err = 0;
 767        unsigned long ret;
 768
 769        if (rx->flags & XEN_NETRXF_extra_info) {
 770                err = xennet_get_extras(queue, extras, rp);
 771                cons = queue->rx.rsp_cons;
 772        }
 773
 774        for (;;) {
 775                if (unlikely(rx->status < 0 ||
 776                             rx->offset + rx->status > XEN_PAGE_SIZE)) {
 777                        if (net_ratelimit())
 778                                dev_warn(dev, "rx->offset: %u, size: %d\n",
 779                                         rx->offset, rx->status);
 780                        xennet_move_rx_slot(queue, skb, ref);
 781                        err = -EINVAL;
 782                        goto next;
 783                }
 784
 785                /*
 786                 * This definitely indicates a bug, either in this driver or in
 787                 * the backend driver. In future this should flag the bad
 788                 * situation to the system controller to reboot the backend.
 789                 */
 790                if (ref == GRANT_INVALID_REF) {
 791                        if (net_ratelimit())
 792                                dev_warn(dev, "Bad rx response id %d.\n",
 793                                         rx->id);
 794                        err = -EINVAL;
 795                        goto next;
 796                }
 797
 798                ret = gnttab_end_foreign_access_ref(ref, 0);
 799                BUG_ON(!ret);
 800
 801                gnttab_release_grant_reference(&queue->gref_rx_head, ref);
 802
 803                __skb_queue_tail(list, skb);
 804
 805next:
 806                if (!(rx->flags & XEN_NETRXF_more_data))
 807                        break;
 808
 809                if (cons + slots == rp) {
 810                        if (net_ratelimit())
 811                                dev_warn(dev, "Need more slots\n");
 812                        err = -ENOENT;
 813                        break;
 814                }
 815
 816                rx = RING_GET_RESPONSE(&queue->rx, cons + slots);
 817                skb = xennet_get_rx_skb(queue, cons + slots);
 818                ref = xennet_get_rx_ref(queue, cons + slots);
 819                slots++;
 820        }
 821
 822        if (unlikely(slots > max)) {
 823                if (net_ratelimit())
 824                        dev_warn(dev, "Too many slots\n");
 825                err = -E2BIG;
 826        }
 827
 828        if (unlikely(err))
 829                queue->rx.rsp_cons = cons + slots;
 830
 831        return err;
 832}
 833
 834static int xennet_set_skb_gso(struct sk_buff *skb,
 835                              struct xen_netif_extra_info *gso)
 836{
 837        if (!gso->u.gso.size) {
 838                if (net_ratelimit())
 839                        pr_warn("GSO size must not be zero\n");
 840                return -EINVAL;
 841        }
 842
 843        if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
 844            gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
 845                if (net_ratelimit())
 846                        pr_warn("Bad GSO type %d\n", gso->u.gso.type);
 847                return -EINVAL;
 848        }
 849
 850        skb_shinfo(skb)->gso_size = gso->u.gso.size;
 851        skb_shinfo(skb)->gso_type =
 852                (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
 853                SKB_GSO_TCPV4 :
 854                SKB_GSO_TCPV6;
 855
 856        /* Header must be checked, and gso_segs computed. */
 857        skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 858        skb_shinfo(skb)->gso_segs = 0;
 859
 860        return 0;
 861}
 862
 863static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
 864                                  struct sk_buff *skb,
 865                                  struct sk_buff_head *list)
 866{
 867        struct skb_shared_info *shinfo = skb_shinfo(skb);
 868        RING_IDX cons = queue->rx.rsp_cons;
 869        struct sk_buff *nskb;
 870
 871        while ((nskb = __skb_dequeue(list))) {
 872                struct xen_netif_rx_response *rx =
 873                        RING_GET_RESPONSE(&queue->rx, ++cons);
 874                skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
 875
 876                if (shinfo->nr_frags == MAX_SKB_FRAGS) {
 877                        unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 878
 879                        BUG_ON(pull_to <= skb_headlen(skb));
 880                        __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 881                }
 882                BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS);
 883
 884                skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag),
 885                                rx->offset, rx->status, PAGE_SIZE);
 886
 887                skb_shinfo(nskb)->nr_frags = 0;
 888                kfree_skb(nskb);
 889        }
 890
 891        return cons;
 892}
 893
 894static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
 895{
 896        bool recalculate_partial_csum = false;
 897
 898        /*
 899         * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
 900         * peers can fail to set NETRXF_csum_blank when sending a GSO
 901         * frame. In this case force the SKB to CHECKSUM_PARTIAL and
 902         * recalculate the partial checksum.
 903         */
 904        if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
 905                struct netfront_info *np = netdev_priv(dev);
 906                atomic_inc(&np->rx_gso_checksum_fixup);
 907                skb->ip_summed = CHECKSUM_PARTIAL;
 908                recalculate_partial_csum = true;
 909        }
 910
 911        /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
 912        if (skb->ip_summed != CHECKSUM_PARTIAL)
 913                return 0;
 914
 915        return skb_checksum_setup(skb, recalculate_partial_csum);
 916}
 917
 918static int handle_incoming_queue(struct netfront_queue *queue,
 919                                 struct sk_buff_head *rxq)
 920{
 921        struct netfront_stats *rx_stats = this_cpu_ptr(queue->info->rx_stats);
 922        int packets_dropped = 0;
 923        struct sk_buff *skb;
 924
 925        while ((skb = __skb_dequeue(rxq)) != NULL) {
 926                int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 927
 928                if (pull_to > skb_headlen(skb))
 929                        __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 930
 931                /* Ethernet work: Delayed to here as it peeks the header. */
 932                skb->protocol = eth_type_trans(skb, queue->info->netdev);
 933                skb_reset_network_header(skb);
 934
 935                if (checksum_setup(queue->info->netdev, skb)) {
 936                        kfree_skb(skb);
 937                        packets_dropped++;
 938                        queue->info->netdev->stats.rx_errors++;
 939                        continue;
 940                }
 941
 942                u64_stats_update_begin(&rx_stats->syncp);
 943                rx_stats->packets++;
 944                rx_stats->bytes += skb->len;
 945                u64_stats_update_end(&rx_stats->syncp);
 946
 947                /* Pass it up. */
 948                napi_gro_receive(&queue->napi, skb);
 949        }
 950
 951        return packets_dropped;
 952}
 953
 954static int xennet_poll(struct napi_struct *napi, int budget)
 955{
 956        struct netfront_queue *queue = container_of(napi, struct netfront_queue, napi);
 957        struct net_device *dev = queue->info->netdev;
 958        struct sk_buff *skb;
 959        struct netfront_rx_info rinfo;
 960        struct xen_netif_rx_response *rx = &rinfo.rx;
 961        struct xen_netif_extra_info *extras = rinfo.extras;
 962        RING_IDX i, rp;
 963        int work_done;
 964        struct sk_buff_head rxq;
 965        struct sk_buff_head errq;
 966        struct sk_buff_head tmpq;
 967        int err;
 968
 969        spin_lock(&queue->rx_lock);
 970
 971        skb_queue_head_init(&rxq);
 972        skb_queue_head_init(&errq);
 973        skb_queue_head_init(&tmpq);
 974
 975        rp = queue->rx.sring->rsp_prod;
 976        rmb(); /* Ensure we see queued responses up to 'rp'. */
 977
 978        i = queue->rx.rsp_cons;
 979        work_done = 0;
 980        while ((i != rp) && (work_done < budget)) {
 981                memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx));
 982                memset(extras, 0, sizeof(rinfo.extras));
 983
 984                err = xennet_get_responses(queue, &rinfo, rp, &tmpq);
 985
 986                if (unlikely(err)) {
 987err:
 988                        while ((skb = __skb_dequeue(&tmpq)))
 989                                __skb_queue_tail(&errq, skb);
 990                        dev->stats.rx_errors++;
 991                        i = queue->rx.rsp_cons;
 992                        continue;
 993                }
 994
 995                skb = __skb_dequeue(&tmpq);
 996
 997                if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
 998                        struct xen_netif_extra_info *gso;
 999                        gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1000
1001                        if (unlikely(xennet_set_skb_gso(skb, gso))) {
1002                                __skb_queue_head(&tmpq, skb);
1003                                queue->rx.rsp_cons += skb_queue_len(&tmpq);
1004                                goto err;
1005                        }
1006                }
1007
1008                NETFRONT_SKB_CB(skb)->pull_to = rx->status;
1009                if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
1010                        NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
1011
1012                skb_shinfo(skb)->frags[0].page_offset = rx->offset;
1013                skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
1014                skb->data_len = rx->status;
1015                skb->len += rx->status;
1016
1017                i = xennet_fill_frags(queue, skb, &tmpq);
1018
1019                if (rx->flags & XEN_NETRXF_csum_blank)
1020                        skb->ip_summed = CHECKSUM_PARTIAL;
1021                else if (rx->flags & XEN_NETRXF_data_validated)
1022                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1023
1024                __skb_queue_tail(&rxq, skb);
1025
1026                queue->rx.rsp_cons = ++i;
1027                work_done++;
1028        }
1029
1030        __skb_queue_purge(&errq);
1031
1032        work_done -= handle_incoming_queue(queue, &rxq);
1033
1034        xennet_alloc_rx_buffers(queue);
1035
1036        if (work_done < budget) {
1037                int more_to_do = 0;
1038
1039                napi_complete(napi);
1040
1041                RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
1042                if (more_to_do)
1043                        napi_schedule(napi);
1044        }
1045
1046        spin_unlock(&queue->rx_lock);
1047
1048        return work_done;
1049}
1050
1051static int xennet_change_mtu(struct net_device *dev, int mtu)
1052{
1053        int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN;
1054
1055        if (mtu > max)
1056                return -EINVAL;
1057        dev->mtu = mtu;
1058        return 0;
1059}
1060
1061static struct rtnl_link_stats64 *xennet_get_stats64(struct net_device *dev,
1062                                                    struct rtnl_link_stats64 *tot)
1063{
1064        struct netfront_info *np = netdev_priv(dev);
1065        int cpu;
1066
1067        for_each_possible_cpu(cpu) {
1068                struct netfront_stats *rx_stats = per_cpu_ptr(np->rx_stats, cpu);
1069                struct netfront_stats *tx_stats = per_cpu_ptr(np->tx_stats, cpu);
1070                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
1071                unsigned int start;
1072
1073                do {
1074                        start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
1075                        tx_packets = tx_stats->packets;
1076                        tx_bytes = tx_stats->bytes;
1077                } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
1078
1079                do {
1080                        start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
1081                        rx_packets = rx_stats->packets;
1082                        rx_bytes = rx_stats->bytes;
1083                } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
1084
1085                tot->rx_packets += rx_packets;
1086                tot->tx_packets += tx_packets;
1087                tot->rx_bytes   += rx_bytes;
1088                tot->tx_bytes   += tx_bytes;
1089        }
1090
1091        tot->rx_errors  = dev->stats.rx_errors;
1092        tot->tx_dropped = dev->stats.tx_dropped;
1093
1094        return tot;
1095}
1096
1097static void xennet_release_tx_bufs(struct netfront_queue *queue)
1098{
1099        struct sk_buff *skb;
1100        int i;
1101
1102        for (i = 0; i < NET_TX_RING_SIZE; i++) {
1103                /* Skip over entries which are actually freelist references */
1104                if (skb_entry_is_link(&queue->tx_skbs[i]))
1105                        continue;
1106
1107                skb = queue->tx_skbs[i].skb;
1108                get_page(queue->grant_tx_page[i]);
1109                gnttab_end_foreign_access(queue->grant_tx_ref[i],
1110                                          GNTMAP_readonly,
1111                                          (unsigned long)page_address(queue->grant_tx_page[i]));
1112                queue->grant_tx_page[i] = NULL;
1113                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1114                add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i);
1115                dev_kfree_skb_irq(skb);
1116        }
1117}
1118
1119static void xennet_release_rx_bufs(struct netfront_queue *queue)
1120{
1121        int id, ref;
1122
1123        spin_lock_bh(&queue->rx_lock);
1124
1125        for (id = 0; id < NET_RX_RING_SIZE; id++) {
1126                struct sk_buff *skb;
1127                struct page *page;
1128
1129                skb = queue->rx_skbs[id];
1130                if (!skb)
1131                        continue;
1132
1133                ref = queue->grant_rx_ref[id];
1134                if (ref == GRANT_INVALID_REF)
1135                        continue;
1136
1137                page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
1138
1139                /* gnttab_end_foreign_access() needs a page ref until
1140                 * foreign access is ended (which may be deferred).
1141                 */
1142                get_page(page);
1143                gnttab_end_foreign_access(ref, 0,
1144                                          (unsigned long)page_address(page));
1145                queue->grant_rx_ref[id] = GRANT_INVALID_REF;
1146
1147                kfree_skb(skb);
1148        }
1149
1150        spin_unlock_bh(&queue->rx_lock);
1151}
1152
1153static netdev_features_t xennet_fix_features(struct net_device *dev,
1154        netdev_features_t features)
1155{
1156        struct netfront_info *np = netdev_priv(dev);
1157        int val;
1158
1159        if (features & NETIF_F_SG) {
1160                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
1161                                 "%d", &val) < 0)
1162                        val = 0;
1163
1164                if (!val)
1165                        features &= ~NETIF_F_SG;
1166        }
1167
1168        if (features & NETIF_F_IPV6_CSUM) {
1169                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1170                                 "feature-ipv6-csum-offload", "%d", &val) < 0)
1171                        val = 0;
1172
1173                if (!val)
1174                        features &= ~NETIF_F_IPV6_CSUM;
1175        }
1176
1177        if (features & NETIF_F_TSO) {
1178                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1179                                 "feature-gso-tcpv4", "%d", &val) < 0)
1180                        val = 0;
1181
1182                if (!val)
1183                        features &= ~NETIF_F_TSO;
1184        }
1185
1186        if (features & NETIF_F_TSO6) {
1187                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1188                                 "feature-gso-tcpv6", "%d", &val) < 0)
1189                        val = 0;
1190
1191                if (!val)
1192                        features &= ~NETIF_F_TSO6;
1193        }
1194
1195        return features;
1196}
1197
1198static int xennet_set_features(struct net_device *dev,
1199        netdev_features_t features)
1200{
1201        if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
1202                netdev_info(dev, "Reducing MTU because no SG offload");
1203                dev->mtu = ETH_DATA_LEN;
1204        }
1205
1206        return 0;
1207}
1208
1209static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
1210{
1211        struct netfront_queue *queue = dev_id;
1212        unsigned long flags;
1213
1214        spin_lock_irqsave(&queue->tx_lock, flags);
1215        xennet_tx_buf_gc(queue);
1216        spin_unlock_irqrestore(&queue->tx_lock, flags);
1217
1218        return IRQ_HANDLED;
1219}
1220
1221static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
1222{
1223        struct netfront_queue *queue = dev_id;
1224        struct net_device *dev = queue->info->netdev;
1225
1226        if (likely(netif_carrier_ok(dev) &&
1227                   RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
1228                napi_schedule(&queue->napi);
1229
1230        return IRQ_HANDLED;
1231}
1232
1233static irqreturn_t xennet_interrupt(int irq, void *dev_id)
1234{
1235        xennet_tx_interrupt(irq, dev_id);
1236        xennet_rx_interrupt(irq, dev_id);
1237        return IRQ_HANDLED;
1238}
1239
1240#ifdef CONFIG_NET_POLL_CONTROLLER
1241static void xennet_poll_controller(struct net_device *dev)
1242{
1243        /* Poll each queue */
1244        struct netfront_info *info = netdev_priv(dev);
1245        unsigned int num_queues = dev->real_num_tx_queues;
1246        unsigned int i;
1247        for (i = 0; i < num_queues; ++i)
1248                xennet_interrupt(0, &info->queues[i]);
1249}
1250#endif
1251
1252static const struct net_device_ops xennet_netdev_ops = {
1253        .ndo_open            = xennet_open,
1254        .ndo_stop            = xennet_close,
1255        .ndo_start_xmit      = xennet_start_xmit,
1256        .ndo_change_mtu      = xennet_change_mtu,
1257        .ndo_get_stats64     = xennet_get_stats64,
1258        .ndo_set_mac_address = eth_mac_addr,
1259        .ndo_validate_addr   = eth_validate_addr,
1260        .ndo_fix_features    = xennet_fix_features,
1261        .ndo_set_features    = xennet_set_features,
1262        .ndo_select_queue    = xennet_select_queue,
1263#ifdef CONFIG_NET_POLL_CONTROLLER
1264        .ndo_poll_controller = xennet_poll_controller,
1265#endif
1266};
1267
1268static void xennet_free_netdev(struct net_device *netdev)
1269{
1270        struct netfront_info *np = netdev_priv(netdev);
1271
1272        free_percpu(np->rx_stats);
1273        free_percpu(np->tx_stats);
1274        free_netdev(netdev);
1275}
1276
1277static struct net_device *xennet_create_dev(struct xenbus_device *dev)
1278{
1279        int err;
1280        struct net_device *netdev;
1281        struct netfront_info *np;
1282
1283        netdev = alloc_etherdev_mq(sizeof(struct netfront_info), xennet_max_queues);
1284        if (!netdev)
1285                return ERR_PTR(-ENOMEM);
1286
1287        np                   = netdev_priv(netdev);
1288        np->xbdev            = dev;
1289
1290        np->queues = NULL;
1291
1292        err = -ENOMEM;
1293        np->rx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1294        if (np->rx_stats == NULL)
1295                goto exit;
1296        np->tx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1297        if (np->tx_stats == NULL)
1298                goto exit;
1299
1300        netdev->netdev_ops      = &xennet_netdev_ops;
1301
1302        netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
1303                                  NETIF_F_GSO_ROBUST;
1304        netdev->hw_features     = NETIF_F_SG |
1305                                  NETIF_F_IPV6_CSUM |
1306                                  NETIF_F_TSO | NETIF_F_TSO6;
1307
1308        /*
1309         * Assume that all hw features are available for now. This set
1310         * will be adjusted by the call to netdev_update_features() in
1311         * xennet_connect() which is the earliest point where we can
1312         * negotiate with the backend regarding supported features.
1313         */
1314        netdev->features |= netdev->hw_features;
1315
1316        netdev->ethtool_ops = &xennet_ethtool_ops;
1317        SET_NETDEV_DEV(netdev, &dev->dev);
1318
1319        np->netdev = netdev;
1320
1321        netif_carrier_off(netdev);
1322
1323        return netdev;
1324
1325 exit:
1326        xennet_free_netdev(netdev);
1327        return ERR_PTR(err);
1328}
1329
1330/**
1331 * Entry point to this code when a new device is created.  Allocate the basic
1332 * structures and the ring buffers for communication with the backend, and
1333 * inform the backend of the appropriate details for those.
1334 */
1335static int netfront_probe(struct xenbus_device *dev,
1336                          const struct xenbus_device_id *id)
1337{
1338        int err;
1339        struct net_device *netdev;
1340        struct netfront_info *info;
1341
1342        netdev = xennet_create_dev(dev);
1343        if (IS_ERR(netdev)) {
1344                err = PTR_ERR(netdev);
1345                xenbus_dev_fatal(dev, err, "creating netdev");
1346                return err;
1347        }
1348
1349        info = netdev_priv(netdev);
1350        dev_set_drvdata(&dev->dev, info);
1351#ifdef CONFIG_SYSFS
1352        info->netdev->sysfs_groups[0] = &xennet_dev_group;
1353#endif
1354        err = register_netdev(info->netdev);
1355        if (err) {
1356                pr_warn("%s: register_netdev err=%d\n", __func__, err);
1357                goto fail;
1358        }
1359
1360        return 0;
1361
1362 fail:
1363        xennet_free_netdev(netdev);
1364        dev_set_drvdata(&dev->dev, NULL);
1365        return err;
1366}
1367
1368static void xennet_end_access(int ref, void *page)
1369{
1370        /* This frees the page as a side-effect */
1371        if (ref != GRANT_INVALID_REF)
1372                gnttab_end_foreign_access(ref, 0, (unsigned long)page);
1373}
1374
1375static void xennet_disconnect_backend(struct netfront_info *info)
1376{
1377        unsigned int i = 0;
1378        unsigned int num_queues = info->netdev->real_num_tx_queues;
1379
1380        netif_carrier_off(info->netdev);
1381
1382        for (i = 0; i < num_queues && info->queues; ++i) {
1383                struct netfront_queue *queue = &info->queues[i];
1384
1385                if (queue->tx_irq && (queue->tx_irq == queue->rx_irq))
1386                        unbind_from_irqhandler(queue->tx_irq, queue);
1387                if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) {
1388                        unbind_from_irqhandler(queue->tx_irq, queue);
1389                        unbind_from_irqhandler(queue->rx_irq, queue);
1390                }
1391                queue->tx_evtchn = queue->rx_evtchn = 0;
1392                queue->tx_irq = queue->rx_irq = 0;
1393
1394                if (netif_running(info->netdev))
1395                        napi_synchronize(&queue->napi);
1396
1397                xennet_release_tx_bufs(queue);
1398                xennet_release_rx_bufs(queue);
1399                gnttab_free_grant_references(queue->gref_tx_head);
1400                gnttab_free_grant_references(queue->gref_rx_head);
1401
1402                /* End access and free the pages */
1403                xennet_end_access(queue->tx_ring_ref, queue->tx.sring);
1404                xennet_end_access(queue->rx_ring_ref, queue->rx.sring);
1405
1406                queue->tx_ring_ref = GRANT_INVALID_REF;
1407                queue->rx_ring_ref = GRANT_INVALID_REF;
1408                queue->tx.sring = NULL;
1409                queue->rx.sring = NULL;
1410        }
1411}
1412
1413/**
1414 * We are reconnecting to the backend, due to a suspend/resume, or a backend
1415 * driver restart.  We tear down our netif structure and recreate it, but
1416 * leave the device-layer structures intact so that this is transparent to the
1417 * rest of the kernel.
1418 */
1419static int netfront_resume(struct xenbus_device *dev)
1420{
1421        struct netfront_info *info = dev_get_drvdata(&dev->dev);
1422
1423        dev_dbg(&dev->dev, "%s\n", dev->nodename);
1424
1425        xennet_disconnect_backend(info);
1426        return 0;
1427}
1428
1429static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
1430{
1431        char *s, *e, *macstr;
1432        int i;
1433
1434        macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
1435        if (IS_ERR(macstr))
1436                return PTR_ERR(macstr);
1437
1438        for (i = 0; i < ETH_ALEN; i++) {
1439                mac[i] = simple_strtoul(s, &e, 16);
1440                if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
1441                        kfree(macstr);
1442                        return -ENOENT;
1443                }
1444                s = e+1;
1445        }
1446
1447        kfree(macstr);
1448        return 0;
1449}
1450
1451static int setup_netfront_single(struct netfront_queue *queue)
1452{
1453        int err;
1454
1455        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1456        if (err < 0)
1457                goto fail;
1458
1459        err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1460                                        xennet_interrupt,
1461                                        0, queue->info->netdev->name, queue);
1462        if (err < 0)
1463                goto bind_fail;
1464        queue->rx_evtchn = queue->tx_evtchn;
1465        queue->rx_irq = queue->tx_irq = err;
1466
1467        return 0;
1468
1469bind_fail:
1470        xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1471        queue->tx_evtchn = 0;
1472fail:
1473        return err;
1474}
1475
1476static int setup_netfront_split(struct netfront_queue *queue)
1477{
1478        int err;
1479
1480        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1481        if (err < 0)
1482                goto fail;
1483        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->rx_evtchn);
1484        if (err < 0)
1485                goto alloc_rx_evtchn_fail;
1486
1487        snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
1488                 "%s-tx", queue->name);
1489        err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1490                                        xennet_tx_interrupt,
1491                                        0, queue->tx_irq_name, queue);
1492        if (err < 0)
1493                goto bind_tx_fail;
1494        queue->tx_irq = err;
1495
1496        snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
1497                 "%s-rx", queue->name);
1498        err = bind_evtchn_to_irqhandler(queue->rx_evtchn,
1499                                        xennet_rx_interrupt,
1500                                        0, queue->rx_irq_name, queue);
1501        if (err < 0)
1502                goto bind_rx_fail;
1503        queue->rx_irq = err;
1504
1505        return 0;
1506
1507bind_rx_fail:
1508        unbind_from_irqhandler(queue->tx_irq, queue);
1509        queue->tx_irq = 0;
1510bind_tx_fail:
1511        xenbus_free_evtchn(queue->info->xbdev, queue->rx_evtchn);
1512        queue->rx_evtchn = 0;
1513alloc_rx_evtchn_fail:
1514        xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1515        queue->tx_evtchn = 0;
1516fail:
1517        return err;
1518}
1519
1520static int setup_netfront(struct xenbus_device *dev,
1521                        struct netfront_queue *queue, unsigned int feature_split_evtchn)
1522{
1523        struct xen_netif_tx_sring *txs;
1524        struct xen_netif_rx_sring *rxs;
1525        grant_ref_t gref;
1526        int err;
1527
1528        queue->tx_ring_ref = GRANT_INVALID_REF;
1529        queue->rx_ring_ref = GRANT_INVALID_REF;
1530        queue->rx.sring = NULL;
1531        queue->tx.sring = NULL;
1532
1533        txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1534        if (!txs) {
1535                err = -ENOMEM;
1536                xenbus_dev_fatal(dev, err, "allocating tx ring page");
1537                goto fail;
1538        }
1539        SHARED_RING_INIT(txs);
1540        FRONT_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
1541
1542        err = xenbus_grant_ring(dev, txs, 1, &gref);
1543        if (err < 0)
1544                goto grant_tx_ring_fail;
1545        queue->tx_ring_ref = gref;
1546
1547        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1548        if (!rxs) {
1549                err = -ENOMEM;
1550                xenbus_dev_fatal(dev, err, "allocating rx ring page");
1551                goto alloc_rx_ring_fail;
1552        }
1553        SHARED_RING_INIT(rxs);
1554        FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
1555
1556        err = xenbus_grant_ring(dev, rxs, 1, &gref);
1557        if (err < 0)
1558                goto grant_rx_ring_fail;
1559        queue->rx_ring_ref = gref;
1560
1561        if (feature_split_evtchn)
1562                err = setup_netfront_split(queue);
1563        /* setup single event channel if
1564         *  a) feature-split-event-channels == 0
1565         *  b) feature-split-event-channels == 1 but failed to setup
1566         */
1567        if (!feature_split_evtchn || (feature_split_evtchn && err))
1568                err = setup_netfront_single(queue);
1569
1570        if (err)
1571                goto alloc_evtchn_fail;
1572
1573        return 0;
1574
1575        /* If we fail to setup netfront, it is safe to just revoke access to
1576         * granted pages because backend is not accessing it at this point.
1577         */
1578alloc_evtchn_fail:
1579        gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0);
1580grant_rx_ring_fail:
1581        free_page((unsigned long)rxs);
1582alloc_rx_ring_fail:
1583        gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0);
1584grant_tx_ring_fail:
1585        free_page((unsigned long)txs);
1586fail:
1587        return err;
1588}
1589
1590/* Queue-specific initialisation
1591 * This used to be done in xennet_create_dev() but must now
1592 * be run per-queue.
1593 */
1594static int xennet_init_queue(struct netfront_queue *queue)
1595{
1596        unsigned short i;
1597        int err = 0;
1598
1599        spin_lock_init(&queue->tx_lock);
1600        spin_lock_init(&queue->rx_lock);
1601
1602        setup_timer(&queue->rx_refill_timer, rx_refill_timeout,
1603                    (unsigned long)queue);
1604
1605        snprintf(queue->name, sizeof(queue->name), "%s-q%u",
1606                 queue->info->netdev->name, queue->id);
1607
1608        /* Initialise tx_skbs as a free chain containing every entry. */
1609        queue->tx_skb_freelist = 0;
1610        for (i = 0; i < NET_TX_RING_SIZE; i++) {
1611                skb_entry_set_link(&queue->tx_skbs[i], i+1);
1612                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1613                queue->grant_tx_page[i] = NULL;
1614        }
1615
1616        /* Clear out rx_skbs */
1617        for (i = 0; i < NET_RX_RING_SIZE; i++) {
1618                queue->rx_skbs[i] = NULL;
1619                queue->grant_rx_ref[i] = GRANT_INVALID_REF;
1620        }
1621
1622        /* A grant for every tx ring slot */
1623        if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
1624                                          &queue->gref_tx_head) < 0) {
1625                pr_alert("can't alloc tx grant refs\n");
1626                err = -ENOMEM;
1627                goto exit;
1628        }
1629
1630        /* A grant for every rx ring slot */
1631        if (gnttab_alloc_grant_references(NET_RX_RING_SIZE,
1632                                          &queue->gref_rx_head) < 0) {
1633                pr_alert("can't alloc rx grant refs\n");
1634                err = -ENOMEM;
1635                goto exit_free_tx;
1636        }
1637
1638        return 0;
1639
1640 exit_free_tx:
1641        gnttab_free_grant_references(queue->gref_tx_head);
1642 exit:
1643        return err;
1644}
1645
1646static int write_queue_xenstore_keys(struct netfront_queue *queue,
1647                           struct xenbus_transaction *xbt, int write_hierarchical)
1648{
1649        /* Write the queue-specific keys into XenStore in the traditional
1650         * way for a single queue, or in a queue subkeys for multiple
1651         * queues.
1652         */
1653        struct xenbus_device *dev = queue->info->xbdev;
1654        int err;
1655        const char *message;
1656        char *path;
1657        size_t pathsize;
1658
1659        /* Choose the correct place to write the keys */
1660        if (write_hierarchical) {
1661                pathsize = strlen(dev->nodename) + 10;
1662                path = kzalloc(pathsize, GFP_KERNEL);
1663                if (!path) {
1664                        err = -ENOMEM;
1665                        message = "out of memory while writing ring references";
1666                        goto error;
1667                }
1668                snprintf(path, pathsize, "%s/queue-%u",
1669                                dev->nodename, queue->id);
1670        } else {
1671                path = (char *)dev->nodename;
1672        }
1673
1674        /* Write ring references */
1675        err = xenbus_printf(*xbt, path, "tx-ring-ref", "%u",
1676                        queue->tx_ring_ref);
1677        if (err) {
1678                message = "writing tx-ring-ref";
1679                goto error;
1680        }
1681
1682        err = xenbus_printf(*xbt, path, "rx-ring-ref", "%u",
1683                        queue->rx_ring_ref);
1684        if (err) {
1685                message = "writing rx-ring-ref";
1686                goto error;
1687        }
1688
1689        /* Write event channels; taking into account both shared
1690         * and split event channel scenarios.
1691         */
1692        if (queue->tx_evtchn == queue->rx_evtchn) {
1693                /* Shared event channel */
1694                err = xenbus_printf(*xbt, path,
1695                                "event-channel", "%u", queue->tx_evtchn);
1696                if (err) {
1697                        message = "writing event-channel";
1698                        goto error;
1699                }
1700        } else {
1701                /* Split event channels */
1702                err = xenbus_printf(*xbt, path,
1703                                "event-channel-tx", "%u", queue->tx_evtchn);
1704                if (err) {
1705                        message = "writing event-channel-tx";
1706                        goto error;
1707                }
1708
1709                err = xenbus_printf(*xbt, path,
1710                                "event-channel-rx", "%u", queue->rx_evtchn);
1711                if (err) {
1712                        message = "writing event-channel-rx";
1713                        goto error;
1714                }
1715        }
1716
1717        if (write_hierarchical)
1718                kfree(path);
1719        return 0;
1720
1721error:
1722        if (write_hierarchical)
1723                kfree(path);
1724        xenbus_dev_fatal(dev, err, "%s", message);
1725        return err;
1726}
1727
1728static void xennet_destroy_queues(struct netfront_info *info)
1729{
1730        unsigned int i;
1731
1732        rtnl_lock();
1733
1734        for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
1735                struct netfront_queue *queue = &info->queues[i];
1736
1737                if (netif_running(info->netdev))
1738                        napi_disable(&queue->napi);
1739                del_timer_sync(&queue->rx_refill_timer);
1740                netif_napi_del(&queue->napi);
1741        }
1742
1743        rtnl_unlock();
1744
1745        kfree(info->queues);
1746        info->queues = NULL;
1747}
1748
1749static int xennet_create_queues(struct netfront_info *info,
1750                                unsigned int *num_queues)
1751{
1752        unsigned int i;
1753        int ret;
1754
1755        info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue),
1756                               GFP_KERNEL);
1757        if (!info->queues)
1758                return -ENOMEM;
1759
1760        rtnl_lock();
1761
1762        for (i = 0; i < *num_queues; i++) {
1763                struct netfront_queue *queue = &info->queues[i];
1764
1765                queue->id = i;
1766                queue->info = info;
1767
1768                ret = xennet_init_queue(queue);
1769                if (ret < 0) {
1770                        dev_warn(&info->netdev->dev,
1771                                 "only created %d queues\n", i);
1772                        *num_queues = i;
1773                        break;
1774                }
1775
1776                netif_napi_add(queue->info->netdev, &queue->napi,
1777                               xennet_poll, 64);
1778                if (netif_running(info->netdev))
1779                        napi_enable(&queue->napi);
1780        }
1781
1782        netif_set_real_num_tx_queues(info->netdev, *num_queues);
1783
1784        rtnl_unlock();
1785
1786        if (*num_queues == 0) {
1787                dev_err(&info->netdev->dev, "no queues\n");
1788                return -EINVAL;
1789        }
1790        return 0;
1791}
1792
1793/* Common code used when first setting up, and when resuming. */
1794static int talk_to_netback(struct xenbus_device *dev,
1795                           struct netfront_info *info)
1796{
1797        const char *message;
1798        struct xenbus_transaction xbt;
1799        int err;
1800        unsigned int feature_split_evtchn;
1801        unsigned int i = 0;
1802        unsigned int max_queues = 0;
1803        struct netfront_queue *queue = NULL;
1804        unsigned int num_queues = 1;
1805
1806        info->netdev->irq = 0;
1807
1808        /* Check if backend supports multiple queues */
1809        err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1810                           "multi-queue-max-queues", "%u", &max_queues);
1811        if (err < 0)
1812                max_queues = 1;
1813        num_queues = min(max_queues, xennet_max_queues);
1814
1815        /* Check feature-split-event-channels */
1816        err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1817                           "feature-split-event-channels", "%u",
1818                           &feature_split_evtchn);
1819        if (err < 0)
1820                feature_split_evtchn = 0;
1821
1822        /* Read mac addr. */
1823        err = xen_net_read_mac(dev, info->netdev->dev_addr);
1824        if (err) {
1825                xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
1826                goto out;
1827        }
1828
1829        if (info->queues)
1830                xennet_destroy_queues(info);
1831
1832        err = xennet_create_queues(info, &num_queues);
1833        if (err < 0)
1834                goto destroy_ring;
1835
1836        /* Create shared ring, alloc event channel -- for each queue */
1837        for (i = 0; i < num_queues; ++i) {
1838                queue = &info->queues[i];
1839                err = setup_netfront(dev, queue, feature_split_evtchn);
1840                if (err) {
1841                        /* setup_netfront() will tidy up the current
1842                         * queue on error, but we need to clean up
1843                         * those already allocated.
1844                         */
1845                        if (i > 0) {
1846                                rtnl_lock();
1847                                netif_set_real_num_tx_queues(info->netdev, i);
1848                                rtnl_unlock();
1849                                goto destroy_ring;
1850                        } else {
1851                                goto out;
1852                        }
1853                }
1854        }
1855
1856again:
1857        err = xenbus_transaction_start(&xbt);
1858        if (err) {
1859                xenbus_dev_fatal(dev, err, "starting transaction");
1860                goto destroy_ring;
1861        }
1862
1863        if (xenbus_exists(XBT_NIL,
1864                          info->xbdev->otherend, "multi-queue-max-queues")) {
1865                /* Write the number of queues */
1866                err = xenbus_printf(xbt, dev->nodename,
1867                                    "multi-queue-num-queues", "%u", num_queues);
1868                if (err) {
1869                        message = "writing multi-queue-num-queues";
1870                        goto abort_transaction_no_dev_fatal;
1871                }
1872        }
1873
1874        if (num_queues == 1) {
1875                err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0); /* flat */
1876                if (err)
1877                        goto abort_transaction_no_dev_fatal;
1878        } else {
1879                /* Write the keys for each queue */
1880                for (i = 0; i < num_queues; ++i) {
1881                        queue = &info->queues[i];
1882                        err = write_queue_xenstore_keys(queue, &xbt, 1); /* hierarchical */
1883                        if (err)
1884                                goto abort_transaction_no_dev_fatal;
1885                }
1886        }
1887
1888        /* The remaining keys are not queue-specific */
1889        err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
1890                            1);
1891        if (err) {
1892                message = "writing request-rx-copy";
1893                goto abort_transaction;
1894        }
1895
1896        err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
1897        if (err) {
1898                message = "writing feature-rx-notify";
1899                goto abort_transaction;
1900        }
1901
1902        err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
1903        if (err) {
1904                message = "writing feature-sg";
1905                goto abort_transaction;
1906        }
1907
1908        err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
1909        if (err) {
1910                message = "writing feature-gso-tcpv4";
1911                goto abort_transaction;
1912        }
1913
1914        err = xenbus_write(xbt, dev->nodename, "feature-gso-tcpv6", "1");
1915        if (err) {
1916                message = "writing feature-gso-tcpv6";
1917                goto abort_transaction;
1918        }
1919
1920        err = xenbus_write(xbt, dev->nodename, "feature-ipv6-csum-offload",
1921                           "1");
1922        if (err) {
1923                message = "writing feature-ipv6-csum-offload";
1924                goto abort_transaction;
1925        }
1926
1927        err = xenbus_transaction_end(xbt, 0);
1928        if (err) {
1929                if (err == -EAGAIN)
1930                        goto again;
1931                xenbus_dev_fatal(dev, err, "completing transaction");
1932                goto destroy_ring;
1933        }
1934
1935        return 0;
1936
1937 abort_transaction:
1938        xenbus_dev_fatal(dev, err, "%s", message);
1939abort_transaction_no_dev_fatal:
1940        xenbus_transaction_end(xbt, 1);
1941 destroy_ring:
1942        xennet_disconnect_backend(info);
1943        kfree(info->queues);
1944        info->queues = NULL;
1945 out:
1946        return err;
1947}
1948
1949static int xennet_connect(struct net_device *dev)
1950{
1951        struct netfront_info *np = netdev_priv(dev);
1952        unsigned int num_queues = 0;
1953        int err;
1954        unsigned int feature_rx_copy;
1955        unsigned int j = 0;
1956        struct netfront_queue *queue = NULL;
1957
1958        err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1959                           "feature-rx-copy", "%u", &feature_rx_copy);
1960        if (err != 1)
1961                feature_rx_copy = 0;
1962
1963        if (!feature_rx_copy) {
1964                dev_info(&dev->dev,
1965                         "backend does not support copying receive path\n");
1966                return -ENODEV;
1967        }
1968
1969        err = talk_to_netback(np->xbdev, np);
1970        if (err)
1971                return err;
1972
1973        /* talk_to_netback() sets the correct number of queues */
1974        num_queues = dev->real_num_tx_queues;
1975
1976        rtnl_lock();
1977        netdev_update_features(dev);
1978        rtnl_unlock();
1979
1980        /*
1981         * All public and private state should now be sane.  Get
1982         * ready to start sending and receiving packets and give the driver
1983         * domain a kick because we've probably just requeued some
1984         * packets.
1985         */
1986        netif_carrier_on(np->netdev);
1987        for (j = 0; j < num_queues; ++j) {
1988                queue = &np->queues[j];
1989
1990                notify_remote_via_irq(queue->tx_irq);
1991                if (queue->tx_irq != queue->rx_irq)
1992                        notify_remote_via_irq(queue->rx_irq);
1993
1994                spin_lock_irq(&queue->tx_lock);
1995                xennet_tx_buf_gc(queue);
1996                spin_unlock_irq(&queue->tx_lock);
1997
1998                spin_lock_bh(&queue->rx_lock);
1999                xennet_alloc_rx_buffers(queue);
2000                spin_unlock_bh(&queue->rx_lock);
2001        }
2002
2003        return 0;
2004}
2005
2006/**
2007 * Callback received when the backend's state changes.
2008 */
2009static void netback_changed(struct xenbus_device *dev,
2010                            enum xenbus_state backend_state)
2011{
2012        struct netfront_info *np = dev_get_drvdata(&dev->dev);
2013        struct net_device *netdev = np->netdev;
2014
2015        dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
2016
2017        switch (backend_state) {
2018        case XenbusStateInitialising:
2019        case XenbusStateInitialised:
2020        case XenbusStateReconfiguring:
2021        case XenbusStateReconfigured:
2022        case XenbusStateUnknown:
2023                break;
2024
2025        case XenbusStateInitWait:
2026                if (dev->state != XenbusStateInitialising)
2027                        break;
2028                if (xennet_connect(netdev) != 0)
2029                        break;
2030                xenbus_switch_state(dev, XenbusStateConnected);
2031                break;
2032
2033        case XenbusStateConnected:
2034                netdev_notify_peers(netdev);
2035                break;
2036
2037        case XenbusStateClosed:
2038                if (dev->state == XenbusStateClosed)
2039                        break;
2040                /* Missed the backend's CLOSING state -- fallthrough */
2041        case XenbusStateClosing:
2042                xenbus_frontend_closed(dev);
2043                break;
2044        }
2045}
2046
2047static const struct xennet_stat {
2048        char name[ETH_GSTRING_LEN];
2049        u16 offset;
2050} xennet_stats[] = {
2051        {
2052                "rx_gso_checksum_fixup",
2053                offsetof(struct netfront_info, rx_gso_checksum_fixup)
2054        },
2055};
2056
2057static int xennet_get_sset_count(struct net_device *dev, int string_set)
2058{
2059        switch (string_set) {
2060        case ETH_SS_STATS:
2061                return ARRAY_SIZE(xennet_stats);
2062        default:
2063                return -EINVAL;
2064        }
2065}
2066
2067static void xennet_get_ethtool_stats(struct net_device *dev,
2068                                     struct ethtool_stats *stats, u64 * data)
2069{
2070        void *np = netdev_priv(dev);
2071        int i;
2072
2073        for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2074                data[i] = atomic_read((atomic_t *)(np + xennet_stats[i].offset));
2075}
2076
2077static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 * data)
2078{
2079        int i;
2080
2081        switch (stringset) {
2082        case ETH_SS_STATS:
2083                for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2084                        memcpy(data + i * ETH_GSTRING_LEN,
2085                               xennet_stats[i].name, ETH_GSTRING_LEN);
2086                break;
2087        }
2088}
2089
2090static const struct ethtool_ops xennet_ethtool_ops =
2091{
2092        .get_link = ethtool_op_get_link,
2093
2094        .get_sset_count = xennet_get_sset_count,
2095        .get_ethtool_stats = xennet_get_ethtool_stats,
2096        .get_strings = xennet_get_strings,
2097};
2098
2099#ifdef CONFIG_SYSFS
2100static ssize_t show_rxbuf(struct device *dev,
2101                          struct device_attribute *attr, char *buf)
2102{
2103        return sprintf(buf, "%lu\n", NET_RX_RING_SIZE);
2104}
2105
2106static ssize_t store_rxbuf(struct device *dev,
2107                           struct device_attribute *attr,
2108                           const char *buf, size_t len)
2109{
2110        char *endp;
2111        unsigned long target;
2112
2113        if (!capable(CAP_NET_ADMIN))
2114                return -EPERM;
2115
2116        target = simple_strtoul(buf, &endp, 0);
2117        if (endp == buf)
2118                return -EBADMSG;
2119
2120        /* rxbuf_min and rxbuf_max are no longer configurable. */
2121
2122        return len;
2123}
2124
2125static DEVICE_ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf);
2126static DEVICE_ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf);
2127static DEVICE_ATTR(rxbuf_cur, S_IRUGO, show_rxbuf, NULL);
2128
2129static struct attribute *xennet_dev_attrs[] = {
2130        &dev_attr_rxbuf_min.attr,
2131        &dev_attr_rxbuf_max.attr,
2132        &dev_attr_rxbuf_cur.attr,
2133        NULL
2134};
2135
2136static const struct attribute_group xennet_dev_group = {
2137        .attrs = xennet_dev_attrs
2138};
2139#endif /* CONFIG_SYSFS */
2140
2141static int xennet_remove(struct xenbus_device *dev)
2142{
2143        struct netfront_info *info = dev_get_drvdata(&dev->dev);
2144
2145        dev_dbg(&dev->dev, "%s\n", dev->nodename);
2146
2147        xennet_disconnect_backend(info);
2148
2149        unregister_netdev(info->netdev);
2150
2151        if (info->queues)
2152                xennet_destroy_queues(info);
2153        xennet_free_netdev(info->netdev);
2154
2155        return 0;
2156}
2157
2158static const struct xenbus_device_id netfront_ids[] = {
2159        { "vif" },
2160        { "" }
2161};
2162
2163static struct xenbus_driver netfront_driver = {
2164        .ids = netfront_ids,
2165        .probe = netfront_probe,
2166        .remove = xennet_remove,
2167        .resume = netfront_resume,
2168        .otherend_changed = netback_changed,
2169};
2170
2171static int __init netif_init(void)
2172{
2173        if (!xen_domain())
2174                return -ENODEV;
2175
2176        if (!xen_has_pv_nic_devices())
2177                return -ENODEV;
2178
2179        pr_info("Initialising Xen virtual ethernet driver\n");
2180
2181        /* Allow as many queues as there are CPUs if user has not
2182         * specified a value.
2183         */
2184        if (xennet_max_queues == 0)
2185                xennet_max_queues = num_online_cpus();
2186
2187        return xenbus_register_frontend(&netfront_driver);
2188}
2189module_init(netif_init);
2190
2191
2192static void __exit netif_exit(void)
2193{
2194        xenbus_unregister_driver(&netfront_driver);
2195}
2196module_exit(netif_exit);
2197
2198MODULE_DESCRIPTION("Xen virtual network device frontend");
2199MODULE_LICENSE("GPL");
2200MODULE_ALIAS("xen:vif");
2201MODULE_ALIAS("xennet");
2202