linux/drivers/net/xen-netfront.c
<<
>>
Prefs
   1/*
   2 * Virtual network driver for conversing with remote driver backends.
   3 *
   4 * Copyright (c) 2002-2005, K A Fraser
   5 * Copyright (c) 2005, XenSource Ltd
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License version 2
   9 * as published by the Free Software Foundation; or, when distributed
  10 * separately from the Linux kernel or incorporated into other
  11 * software packages, subject to the following license:
  12 *
  13 * Permission is hereby granted, free of charge, to any person obtaining a copy
  14 * of this source file (the "Software"), to deal in the Software without
  15 * restriction, including without limitation the rights to use, copy, modify,
  16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  17 * and to permit persons to whom the Software is furnished to do so, subject to
  18 * the following conditions:
  19 *
  20 * The above copyright notice and this permission notice shall be included in
  21 * all copies or substantial portions of the Software.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  29 * IN THE SOFTWARE.
  30 */
  31
  32#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  33
  34#include <linux/module.h>
  35#include <linux/kernel.h>
  36#include <linux/netdevice.h>
  37#include <linux/etherdevice.h>
  38#include <linux/skbuff.h>
  39#include <linux/ethtool.h>
  40#include <linux/if_ether.h>
  41#include <net/tcp.h>
  42#include <linux/udp.h>
  43#include <linux/moduleparam.h>
  44#include <linux/mm.h>
  45#include <linux/slab.h>
  46#include <net/ip.h>
  47
  48#include <xen/xen.h>
  49#include <xen/xenbus.h>
  50#include <xen/events.h>
  51#include <xen/page.h>
  52#include <xen/platform_pci.h>
  53#include <xen/grant_table.h>
  54
  55#include <xen/interface/io/netif.h>
  56#include <xen/interface/memory.h>
  57#include <xen/interface/grant_table.h>
  58
  59/* Module parameters */
  60static unsigned int xennet_max_queues;
  61module_param_named(max_queues, xennet_max_queues, uint, 0644);
  62MODULE_PARM_DESC(max_queues,
  63                 "Maximum number of queues per virtual interface");
  64
  65static const struct ethtool_ops xennet_ethtool_ops;
  66
  67struct netfront_cb {
  68        int pull_to;
  69};
  70
  71#define NETFRONT_SKB_CB(skb)    ((struct netfront_cb *)((skb)->cb))
  72
  73#define RX_COPY_THRESHOLD 256
  74
  75#define GRANT_INVALID_REF       0
  76
  77#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)
  78#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, XEN_PAGE_SIZE)
  79
  80/* Minimum number of Rx slots (includes slot for GSO metadata). */
  81#define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1)
  82
  83/* Queue name is interface name with "-qNNN" appended */
  84#define QUEUE_NAME_SIZE (IFNAMSIZ + 6)
  85
  86/* IRQ name is queue name with "-tx" or "-rx" appended */
  87#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
  88
  89struct netfront_stats {
  90        u64                     packets;
  91        u64                     bytes;
  92        struct u64_stats_sync   syncp;
  93};
  94
  95struct netfront_info;
  96
  97struct netfront_queue {
  98        unsigned int id; /* Queue ID, 0-based */
  99        char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
 100        struct netfront_info *info;
 101
 102        struct napi_struct napi;
 103
 104        /* Split event channels support, tx_* == rx_* when using
 105         * single event channel.
 106         */
 107        unsigned int tx_evtchn, rx_evtchn;
 108        unsigned int tx_irq, rx_irq;
 109        /* Only used when split event channels support is enabled */
 110        char tx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-tx */
 111        char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
 112
 113        spinlock_t   tx_lock;
 114        struct xen_netif_tx_front_ring tx;
 115        int tx_ring_ref;
 116
 117        /*
 118         * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
 119         * are linked from tx_skb_freelist through skb_entry.link.
 120         *
 121         *  NB. Freelist index entries are always going to be less than
 122         *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
 123         *  greater than PAGE_OFFSET: we use this property to distinguish
 124         *  them.
 125         */
 126        union skb_entry {
 127                struct sk_buff *skb;
 128                unsigned long link;
 129        } tx_skbs[NET_TX_RING_SIZE];
 130        grant_ref_t gref_tx_head;
 131        grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
 132        struct page *grant_tx_page[NET_TX_RING_SIZE];
 133        unsigned tx_skb_freelist;
 134
 135        spinlock_t   rx_lock ____cacheline_aligned_in_smp;
 136        struct xen_netif_rx_front_ring rx;
 137        int rx_ring_ref;
 138
 139        struct timer_list rx_refill_timer;
 140
 141        struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
 142        grant_ref_t gref_rx_head;
 143        grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
 144};
 145
 146struct netfront_info {
 147        struct list_head list;
 148        struct net_device *netdev;
 149
 150        struct xenbus_device *xbdev;
 151
 152        /* Multi-queue support */
 153        struct netfront_queue *queues;
 154
 155        /* Statistics */
 156        struct netfront_stats __percpu *rx_stats;
 157        struct netfront_stats __percpu *tx_stats;
 158
 159        atomic_t rx_gso_checksum_fixup;
 160};
 161
 162struct netfront_rx_info {
 163        struct xen_netif_rx_response rx;
 164        struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 165};
 166
 167static void skb_entry_set_link(union skb_entry *list, unsigned short id)
 168{
 169        list->link = id;
 170}
 171
 172static int skb_entry_is_link(const union skb_entry *list)
 173{
 174        BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
 175        return (unsigned long)list->skb < PAGE_OFFSET;
 176}
 177
 178/*
 179 * Access macros for acquiring freeing slots in tx_skbs[].
 180 */
 181
 182static void add_id_to_freelist(unsigned *head, union skb_entry *list,
 183                               unsigned short id)
 184{
 185        skb_entry_set_link(&list[id], *head);
 186        *head = id;
 187}
 188
 189static unsigned short get_id_from_freelist(unsigned *head,
 190                                           union skb_entry *list)
 191{
 192        unsigned int id = *head;
 193        *head = list[id].link;
 194        return id;
 195}
 196
 197static int xennet_rxidx(RING_IDX idx)
 198{
 199        return idx & (NET_RX_RING_SIZE - 1);
 200}
 201
 202static struct sk_buff *xennet_get_rx_skb(struct netfront_queue *queue,
 203                                         RING_IDX ri)
 204{
 205        int i = xennet_rxidx(ri);
 206        struct sk_buff *skb = queue->rx_skbs[i];
 207        queue->rx_skbs[i] = NULL;
 208        return skb;
 209}
 210
 211static grant_ref_t xennet_get_rx_ref(struct netfront_queue *queue,
 212                                            RING_IDX ri)
 213{
 214        int i = xennet_rxidx(ri);
 215        grant_ref_t ref = queue->grant_rx_ref[i];
 216        queue->grant_rx_ref[i] = GRANT_INVALID_REF;
 217        return ref;
 218}
 219
 220#ifdef CONFIG_SYSFS
 221static const struct attribute_group xennet_dev_group;
 222#endif
 223
 224static bool xennet_can_sg(struct net_device *dev)
 225{
 226        return dev->features & NETIF_F_SG;
 227}
 228
 229
 230static void rx_refill_timeout(unsigned long data)
 231{
 232        struct netfront_queue *queue = (struct netfront_queue *)data;
 233        napi_schedule(&queue->napi);
 234}
 235
 236static int netfront_tx_slot_available(struct netfront_queue *queue)
 237{
 238        return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
 239                (NET_TX_RING_SIZE - MAX_SKB_FRAGS - 2);
 240}
 241
 242static void xennet_maybe_wake_tx(struct netfront_queue *queue)
 243{
 244        struct net_device *dev = queue->info->netdev;
 245        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, queue->id);
 246
 247        if (unlikely(netif_tx_queue_stopped(dev_queue)) &&
 248            netfront_tx_slot_available(queue) &&
 249            likely(netif_running(dev)))
 250                netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
 251}
 252
 253
 254static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
 255{
 256        struct sk_buff *skb;
 257        struct page *page;
 258
 259        skb = __netdev_alloc_skb(queue->info->netdev,
 260                                 RX_COPY_THRESHOLD + NET_IP_ALIGN,
 261                                 GFP_ATOMIC | __GFP_NOWARN);
 262        if (unlikely(!skb))
 263                return NULL;
 264
 265        page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
 266        if (!page) {
 267                kfree_skb(skb);
 268                return NULL;
 269        }
 270        skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
 271
 272        /* Align ip header to a 16 bytes boundary */
 273        skb_reserve(skb, NET_IP_ALIGN);
 274        skb->dev = queue->info->netdev;
 275
 276        return skb;
 277}
 278
 279
 280static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
 281{
 282        RING_IDX req_prod = queue->rx.req_prod_pvt;
 283        int notify;
 284
 285        if (unlikely(!netif_carrier_ok(queue->info->netdev)))
 286                return;
 287
 288        for (req_prod = queue->rx.req_prod_pvt;
 289             req_prod - queue->rx.rsp_cons < NET_RX_RING_SIZE;
 290             req_prod++) {
 291                struct sk_buff *skb;
 292                unsigned short id;
 293                grant_ref_t ref;
 294                struct page *page;
 295                struct xen_netif_rx_request *req;
 296
 297                skb = xennet_alloc_one_rx_buffer(queue);
 298                if (!skb)
 299                        break;
 300
 301                id = xennet_rxidx(req_prod);
 302
 303                BUG_ON(queue->rx_skbs[id]);
 304                queue->rx_skbs[id] = skb;
 305
 306                ref = gnttab_claim_grant_reference(&queue->gref_rx_head);
 307                WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
 308                queue->grant_rx_ref[id] = ref;
 309
 310                page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
 311
 312                req = RING_GET_REQUEST(&queue->rx, req_prod);
 313                gnttab_page_grant_foreign_access_ref_one(ref,
 314                                                         queue->info->xbdev->otherend_id,
 315                                                         page,
 316                                                         0);
 317                req->id = id;
 318                req->gref = ref;
 319        }
 320
 321        queue->rx.req_prod_pvt = req_prod;
 322
 323        /* Not enough requests? Try again later. */
 324        if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN) {
 325                mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10));
 326                return;
 327        }
 328
 329        wmb();          /* barrier so backend seens requests */
 330
 331        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify);
 332        if (notify)
 333                notify_remote_via_irq(queue->rx_irq);
 334}
 335
 336static int xennet_open(struct net_device *dev)
 337{
 338        struct netfront_info *np = netdev_priv(dev);
 339        unsigned int num_queues = dev->real_num_tx_queues;
 340        unsigned int i = 0;
 341        struct netfront_queue *queue = NULL;
 342
 343        for (i = 0; i < num_queues; ++i) {
 344                queue = &np->queues[i];
 345                napi_enable(&queue->napi);
 346
 347                spin_lock_bh(&queue->rx_lock);
 348                if (netif_carrier_ok(dev)) {
 349                        xennet_alloc_rx_buffers(queue);
 350                        queue->rx.sring->rsp_event = queue->rx.rsp_cons + 1;
 351                        if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
 352                                napi_schedule(&queue->napi);
 353                }
 354                spin_unlock_bh(&queue->rx_lock);
 355        }
 356
 357        netif_tx_start_all_queues(dev);
 358
 359        return 0;
 360}
 361
 362static void xennet_tx_buf_gc(struct netfront_queue *queue)
 363{
 364        RING_IDX cons, prod;
 365        unsigned short id;
 366        struct sk_buff *skb;
 367        bool more_to_do;
 368
 369        BUG_ON(!netif_carrier_ok(queue->info->netdev));
 370
 371        do {
 372                prod = queue->tx.sring->rsp_prod;
 373                rmb(); /* Ensure we see responses up to 'rp'. */
 374
 375                for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
 376                        struct xen_netif_tx_response *txrsp;
 377
 378                        txrsp = RING_GET_RESPONSE(&queue->tx, cons);
 379                        if (txrsp->status == XEN_NETIF_RSP_NULL)
 380                                continue;
 381
 382                        id  = txrsp->id;
 383                        skb = queue->tx_skbs[id].skb;
 384                        if (unlikely(gnttab_query_foreign_access(
 385                                queue->grant_tx_ref[id]) != 0)) {
 386                                pr_alert("%s: warning -- grant still in use by backend domain\n",
 387                                         __func__);
 388                                BUG();
 389                        }
 390                        gnttab_end_foreign_access_ref(
 391                                queue->grant_tx_ref[id], GNTMAP_readonly);
 392                        gnttab_release_grant_reference(
 393                                &queue->gref_tx_head, queue->grant_tx_ref[id]);
 394                        queue->grant_tx_ref[id] = GRANT_INVALID_REF;
 395                        queue->grant_tx_page[id] = NULL;
 396                        add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id);
 397                        dev_kfree_skb_irq(skb);
 398                }
 399
 400                queue->tx.rsp_cons = prod;
 401
 402                RING_FINAL_CHECK_FOR_RESPONSES(&queue->tx, more_to_do);
 403        } while (more_to_do);
 404
 405        xennet_maybe_wake_tx(queue);
 406}
 407
 408struct xennet_gnttab_make_txreq {
 409        struct netfront_queue *queue;
 410        struct sk_buff *skb;
 411        struct page *page;
 412        struct xen_netif_tx_request *tx; /* Last request */
 413        unsigned int size;
 414};
 415
 416static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
 417                                  unsigned int len, void *data)
 418{
 419        struct xennet_gnttab_make_txreq *info = data;
 420        unsigned int id;
 421        struct xen_netif_tx_request *tx;
 422        grant_ref_t ref;
 423        /* convenient aliases */
 424        struct page *page = info->page;
 425        struct netfront_queue *queue = info->queue;
 426        struct sk_buff *skb = info->skb;
 427
 428        id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
 429        tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
 430        ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
 431        WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
 432
 433        gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
 434                                        gfn, GNTMAP_readonly);
 435
 436        queue->tx_skbs[id].skb = skb;
 437        queue->grant_tx_page[id] = page;
 438        queue->grant_tx_ref[id] = ref;
 439
 440        tx->id = id;
 441        tx->gref = ref;
 442        tx->offset = offset;
 443        tx->size = len;
 444        tx->flags = 0;
 445
 446        info->tx = tx;
 447        info->size += tx->size;
 448}
 449
 450static struct xen_netif_tx_request *xennet_make_first_txreq(
 451        struct netfront_queue *queue, struct sk_buff *skb,
 452        struct page *page, unsigned int offset, unsigned int len)
 453{
 454        struct xennet_gnttab_make_txreq info = {
 455                .queue = queue,
 456                .skb = skb,
 457                .page = page,
 458                .size = 0,
 459        };
 460
 461        gnttab_for_one_grant(page, offset, len, xennet_tx_setup_grant, &info);
 462
 463        return info.tx;
 464}
 465
 466static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset,
 467                                  unsigned int len, void *data)
 468{
 469        struct xennet_gnttab_make_txreq *info = data;
 470
 471        info->tx->flags |= XEN_NETTXF_more_data;
 472        skb_get(info->skb);
 473        xennet_tx_setup_grant(gfn, offset, len, data);
 474}
 475
 476static struct xen_netif_tx_request *xennet_make_txreqs(
 477        struct netfront_queue *queue, struct xen_netif_tx_request *tx,
 478        struct sk_buff *skb, struct page *page,
 479        unsigned int offset, unsigned int len)
 480{
 481        struct xennet_gnttab_make_txreq info = {
 482                .queue = queue,
 483                .skb = skb,
 484                .tx = tx,
 485        };
 486
 487        /* Skip unused frames from start of page */
 488        page += offset >> PAGE_SHIFT;
 489        offset &= ~PAGE_MASK;
 490
 491        while (len) {
 492                info.page = page;
 493                info.size = 0;
 494
 495                gnttab_foreach_grant_in_range(page, offset, len,
 496                                              xennet_make_one_txreq,
 497                                              &info);
 498
 499                page++;
 500                offset = 0;
 501                len -= info.size;
 502        }
 503
 504        return info.tx;
 505}
 506
 507/*
 508 * Count how many ring slots are required to send this skb. Each frag
 509 * might be a compound page.
 510 */
 511static int xennet_count_skb_slots(struct sk_buff *skb)
 512{
 513        int i, frags = skb_shinfo(skb)->nr_frags;
 514        int slots;
 515
 516        slots = gnttab_count_grant(offset_in_page(skb->data),
 517                                   skb_headlen(skb));
 518
 519        for (i = 0; i < frags; i++) {
 520                skb_frag_t *frag = skb_shinfo(skb)->frags + i;
 521                unsigned long size = skb_frag_size(frag);
 522                unsigned long offset = frag->page_offset;
 523
 524                /* Skip unused frames from start of page */
 525                offset &= ~PAGE_MASK;
 526
 527                slots += gnttab_count_grant(offset, size);
 528        }
 529
 530        return slots;
 531}
 532
 533static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
 534                               void *accel_priv, select_queue_fallback_t fallback)
 535{
 536        unsigned int num_queues = dev->real_num_tx_queues;
 537        u32 hash;
 538        u16 queue_idx;
 539
 540        /* First, check if there is only one queue */
 541        if (num_queues == 1) {
 542                queue_idx = 0;
 543        } else {
 544                hash = skb_get_hash(skb);
 545                queue_idx = hash % num_queues;
 546        }
 547
 548        return queue_idx;
 549}
 550
 551#define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
 552
 553static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 554{
 555        struct netfront_info *np = netdev_priv(dev);
 556        struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
 557        struct xen_netif_tx_request *tx, *first_tx;
 558        unsigned int i;
 559        int notify;
 560        int slots;
 561        struct page *page;
 562        unsigned int offset;
 563        unsigned int len;
 564        unsigned long flags;
 565        struct netfront_queue *queue = NULL;
 566        unsigned int num_queues = dev->real_num_tx_queues;
 567        u16 queue_index;
 568        struct sk_buff *nskb;
 569
 570        /* Drop the packet if no queues are set up */
 571        if (num_queues < 1)
 572                goto drop;
 573        /* Determine which queue to transmit this SKB on */
 574        queue_index = skb_get_queue_mapping(skb);
 575        queue = &np->queues[queue_index];
 576
 577        /* If skb->len is too big for wire format, drop skb and alert
 578         * user about misconfiguration.
 579         */
 580        if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) {
 581                net_alert_ratelimited(
 582                        "xennet: skb->len = %u, too big for wire format\n",
 583                        skb->len);
 584                goto drop;
 585        }
 586
 587        slots = xennet_count_skb_slots(skb);
 588        if (unlikely(slots > MAX_XEN_SKB_FRAGS + 1)) {
 589                net_dbg_ratelimited("xennet: skb rides the rocket: %d slots, %d bytes\n",
 590                                    slots, skb->len);
 591                if (skb_linearize(skb))
 592                        goto drop;
 593        }
 594
 595        page = virt_to_page(skb->data);
 596        offset = offset_in_page(skb->data);
 597
 598        /* The first req should be at least ETH_HLEN size or the packet will be
 599         * dropped by netback.
 600         */
 601        if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
 602                nskb = skb_copy(skb, GFP_ATOMIC);
 603                if (!nskb)
 604                        goto drop;
 605                dev_kfree_skb_any(skb);
 606                skb = nskb;
 607                page = virt_to_page(skb->data);
 608                offset = offset_in_page(skb->data);
 609        }
 610
 611        len = skb_headlen(skb);
 612
 613        spin_lock_irqsave(&queue->tx_lock, flags);
 614
 615        if (unlikely(!netif_carrier_ok(dev) ||
 616                     (slots > 1 && !xennet_can_sg(dev)) ||
 617                     netif_needs_gso(skb, netif_skb_features(skb)))) {
 618                spin_unlock_irqrestore(&queue->tx_lock, flags);
 619                goto drop;
 620        }
 621
 622        /* First request for the linear area. */
 623        first_tx = tx = xennet_make_first_txreq(queue, skb,
 624                                                page, offset, len);
 625        offset += tx->size;
 626        if (offset == PAGE_SIZE) {
 627                page++;
 628                offset = 0;
 629        }
 630        len -= tx->size;
 631
 632        if (skb->ip_summed == CHECKSUM_PARTIAL)
 633                /* local packet? */
 634                tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
 635        else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 636                /* remote but checksummed. */
 637                tx->flags |= XEN_NETTXF_data_validated;
 638
 639        /* Optional extra info after the first request. */
 640        if (skb_shinfo(skb)->gso_size) {
 641                struct xen_netif_extra_info *gso;
 642
 643                gso = (struct xen_netif_extra_info *)
 644                        RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
 645
 646                tx->flags |= XEN_NETTXF_extra_info;
 647
 648                gso->u.gso.size = skb_shinfo(skb)->gso_size;
 649                gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
 650                        XEN_NETIF_GSO_TYPE_TCPV6 :
 651                        XEN_NETIF_GSO_TYPE_TCPV4;
 652                gso->u.gso.pad = 0;
 653                gso->u.gso.features = 0;
 654
 655                gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 656                gso->flags = 0;
 657        }
 658
 659        /* Requests for the rest of the linear area. */
 660        tx = xennet_make_txreqs(queue, tx, skb, page, offset, len);
 661
 662        /* Requests for all the frags. */
 663        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 664                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 665                tx = xennet_make_txreqs(queue, tx, skb,
 666                                        skb_frag_page(frag), frag->page_offset,
 667                                        skb_frag_size(frag));
 668        }
 669
 670        /* First request has the packet length. */
 671        first_tx->size = skb->len;
 672
 673        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
 674        if (notify)
 675                notify_remote_via_irq(queue->tx_irq);
 676
 677        u64_stats_update_begin(&tx_stats->syncp);
 678        tx_stats->bytes += skb->len;
 679        tx_stats->packets++;
 680        u64_stats_update_end(&tx_stats->syncp);
 681
 682        /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */
 683        xennet_tx_buf_gc(queue);
 684
 685        if (!netfront_tx_slot_available(queue))
 686                netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
 687
 688        spin_unlock_irqrestore(&queue->tx_lock, flags);
 689
 690        return NETDEV_TX_OK;
 691
 692 drop:
 693        dev->stats.tx_dropped++;
 694        dev_kfree_skb_any(skb);
 695        return NETDEV_TX_OK;
 696}
 697
 698static int xennet_close(struct net_device *dev)
 699{
 700        struct netfront_info *np = netdev_priv(dev);
 701        unsigned int num_queues = dev->real_num_tx_queues;
 702        unsigned int i;
 703        struct netfront_queue *queue;
 704        netif_tx_stop_all_queues(np->netdev);
 705        for (i = 0; i < num_queues; ++i) {
 706                queue = &np->queues[i];
 707                napi_disable(&queue->napi);
 708        }
 709        return 0;
 710}
 711
 712static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
 713                                grant_ref_t ref)
 714{
 715        int new = xennet_rxidx(queue->rx.req_prod_pvt);
 716
 717        BUG_ON(queue->rx_skbs[new]);
 718        queue->rx_skbs[new] = skb;
 719        queue->grant_rx_ref[new] = ref;
 720        RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->id = new;
 721        RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->gref = ref;
 722        queue->rx.req_prod_pvt++;
 723}
 724
 725static int xennet_get_extras(struct netfront_queue *queue,
 726                             struct xen_netif_extra_info *extras,
 727                             RING_IDX rp)
 728
 729{
 730        struct xen_netif_extra_info *extra;
 731        struct device *dev = &queue->info->netdev->dev;
 732        RING_IDX cons = queue->rx.rsp_cons;
 733        int err = 0;
 734
 735        do {
 736                struct sk_buff *skb;
 737                grant_ref_t ref;
 738
 739                if (unlikely(cons + 1 == rp)) {
 740                        if (net_ratelimit())
 741                                dev_warn(dev, "Missing extra info\n");
 742                        err = -EBADR;
 743                        break;
 744                }
 745
 746                extra = (struct xen_netif_extra_info *)
 747                        RING_GET_RESPONSE(&queue->rx, ++cons);
 748
 749                if (unlikely(!extra->type ||
 750                             extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
 751                        if (net_ratelimit())
 752                                dev_warn(dev, "Invalid extra type: %d\n",
 753                                        extra->type);
 754                        err = -EINVAL;
 755                } else {
 756                        memcpy(&extras[extra->type - 1], extra,
 757                               sizeof(*extra));
 758                }
 759
 760                skb = xennet_get_rx_skb(queue, cons);
 761                ref = xennet_get_rx_ref(queue, cons);
 762                xennet_move_rx_slot(queue, skb, ref);
 763        } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
 764
 765        queue->rx.rsp_cons = cons;
 766        return err;
 767}
 768
 769static int xennet_get_responses(struct netfront_queue *queue,
 770                                struct netfront_rx_info *rinfo, RING_IDX rp,
 771                                struct sk_buff_head *list)
 772{
 773        struct xen_netif_rx_response *rx = &rinfo->rx;
 774        struct xen_netif_extra_info *extras = rinfo->extras;
 775        struct device *dev = &queue->info->netdev->dev;
 776        RING_IDX cons = queue->rx.rsp_cons;
 777        struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
 778        grant_ref_t ref = xennet_get_rx_ref(queue, cons);
 779        int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
 780        int slots = 1;
 781        int err = 0;
 782        unsigned long ret;
 783
 784        if (rx->flags & XEN_NETRXF_extra_info) {
 785                err = xennet_get_extras(queue, extras, rp);
 786                cons = queue->rx.rsp_cons;
 787        }
 788
 789        for (;;) {
 790                if (unlikely(rx->status < 0 ||
 791                             rx->offset + rx->status > XEN_PAGE_SIZE)) {
 792                        if (net_ratelimit())
 793                                dev_warn(dev, "rx->offset: %u, size: %d\n",
 794                                         rx->offset, rx->status);
 795                        xennet_move_rx_slot(queue, skb, ref);
 796                        err = -EINVAL;
 797                        goto next;
 798                }
 799
 800                /*
 801                 * This definitely indicates a bug, either in this driver or in
 802                 * the backend driver. In future this should flag the bad
 803                 * situation to the system controller to reboot the backend.
 804                 */
 805                if (ref == GRANT_INVALID_REF) {
 806                        if (net_ratelimit())
 807                                dev_warn(dev, "Bad rx response id %d.\n",
 808                                         rx->id);
 809                        err = -EINVAL;
 810                        goto next;
 811                }
 812
 813                ret = gnttab_end_foreign_access_ref(ref, 0);
 814                BUG_ON(!ret);
 815
 816                gnttab_release_grant_reference(&queue->gref_rx_head, ref);
 817
 818                __skb_queue_tail(list, skb);
 819
 820next:
 821                if (!(rx->flags & XEN_NETRXF_more_data))
 822                        break;
 823
 824                if (cons + slots == rp) {
 825                        if (net_ratelimit())
 826                                dev_warn(dev, "Need more slots\n");
 827                        err = -ENOENT;
 828                        break;
 829                }
 830
 831                rx = RING_GET_RESPONSE(&queue->rx, cons + slots);
 832                skb = xennet_get_rx_skb(queue, cons + slots);
 833                ref = xennet_get_rx_ref(queue, cons + slots);
 834                slots++;
 835        }
 836
 837        if (unlikely(slots > max)) {
 838                if (net_ratelimit())
 839                        dev_warn(dev, "Too many slots\n");
 840                err = -E2BIG;
 841        }
 842
 843        if (unlikely(err))
 844                queue->rx.rsp_cons = cons + slots;
 845
 846        return err;
 847}
 848
 849static int xennet_set_skb_gso(struct sk_buff *skb,
 850                              struct xen_netif_extra_info *gso)
 851{
 852        if (!gso->u.gso.size) {
 853                if (net_ratelimit())
 854                        pr_warn("GSO size must not be zero\n");
 855                return -EINVAL;
 856        }
 857
 858        if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
 859            gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
 860                if (net_ratelimit())
 861                        pr_warn("Bad GSO type %d\n", gso->u.gso.type);
 862                return -EINVAL;
 863        }
 864
 865        skb_shinfo(skb)->gso_size = gso->u.gso.size;
 866        skb_shinfo(skb)->gso_type =
 867                (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
 868                SKB_GSO_TCPV4 :
 869                SKB_GSO_TCPV6;
 870
 871        /* Header must be checked, and gso_segs computed. */
 872        skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 873        skb_shinfo(skb)->gso_segs = 0;
 874
 875        return 0;
 876}
 877
 878static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
 879                                  struct sk_buff *skb,
 880                                  struct sk_buff_head *list)
 881{
 882        struct skb_shared_info *shinfo = skb_shinfo(skb);
 883        RING_IDX cons = queue->rx.rsp_cons;
 884        struct sk_buff *nskb;
 885
 886        while ((nskb = __skb_dequeue(list))) {
 887                struct xen_netif_rx_response *rx =
 888                        RING_GET_RESPONSE(&queue->rx, ++cons);
 889                skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
 890
 891                if (shinfo->nr_frags == MAX_SKB_FRAGS) {
 892                        unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 893
 894                        BUG_ON(pull_to <= skb_headlen(skb));
 895                        __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 896                }
 897                BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS);
 898
 899                skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag),
 900                                rx->offset, rx->status, PAGE_SIZE);
 901
 902                skb_shinfo(nskb)->nr_frags = 0;
 903                kfree_skb(nskb);
 904        }
 905
 906        return cons;
 907}
 908
 909static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
 910{
 911        bool recalculate_partial_csum = false;
 912
 913        /*
 914         * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
 915         * peers can fail to set NETRXF_csum_blank when sending a GSO
 916         * frame. In this case force the SKB to CHECKSUM_PARTIAL and
 917         * recalculate the partial checksum.
 918         */
 919        if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
 920                struct netfront_info *np = netdev_priv(dev);
 921                atomic_inc(&np->rx_gso_checksum_fixup);
 922                skb->ip_summed = CHECKSUM_PARTIAL;
 923                recalculate_partial_csum = true;
 924        }
 925
 926        /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
 927        if (skb->ip_summed != CHECKSUM_PARTIAL)
 928                return 0;
 929
 930        return skb_checksum_setup(skb, recalculate_partial_csum);
 931}
 932
 933static int handle_incoming_queue(struct netfront_queue *queue,
 934                                 struct sk_buff_head *rxq)
 935{
 936        struct netfront_stats *rx_stats = this_cpu_ptr(queue->info->rx_stats);
 937        int packets_dropped = 0;
 938        struct sk_buff *skb;
 939
 940        while ((skb = __skb_dequeue(rxq)) != NULL) {
 941                int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 942
 943                if (pull_to > skb_headlen(skb))
 944                        __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 945
 946                /* Ethernet work: Delayed to here as it peeks the header. */
 947                skb->protocol = eth_type_trans(skb, queue->info->netdev);
 948                skb_reset_network_header(skb);
 949
 950                if (checksum_setup(queue->info->netdev, skb)) {
 951                        kfree_skb(skb);
 952                        packets_dropped++;
 953                        queue->info->netdev->stats.rx_errors++;
 954                        continue;
 955                }
 956
 957                u64_stats_update_begin(&rx_stats->syncp);
 958                rx_stats->packets++;
 959                rx_stats->bytes += skb->len;
 960                u64_stats_update_end(&rx_stats->syncp);
 961
 962                /* Pass it up. */
 963                napi_gro_receive(&queue->napi, skb);
 964        }
 965
 966        return packets_dropped;
 967}
 968
 969static int xennet_poll(struct napi_struct *napi, int budget)
 970{
 971        struct netfront_queue *queue = container_of(napi, struct netfront_queue, napi);
 972        struct net_device *dev = queue->info->netdev;
 973        struct sk_buff *skb;
 974        struct netfront_rx_info rinfo;
 975        struct xen_netif_rx_response *rx = &rinfo.rx;
 976        struct xen_netif_extra_info *extras = rinfo.extras;
 977        RING_IDX i, rp;
 978        int work_done;
 979        struct sk_buff_head rxq;
 980        struct sk_buff_head errq;
 981        struct sk_buff_head tmpq;
 982        int err;
 983
 984        spin_lock(&queue->rx_lock);
 985
 986        skb_queue_head_init(&rxq);
 987        skb_queue_head_init(&errq);
 988        skb_queue_head_init(&tmpq);
 989
 990        rp = queue->rx.sring->rsp_prod;
 991        rmb(); /* Ensure we see queued responses up to 'rp'. */
 992
 993        i = queue->rx.rsp_cons;
 994        work_done = 0;
 995        while ((i != rp) && (work_done < budget)) {
 996                memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx));
 997                memset(extras, 0, sizeof(rinfo.extras));
 998
 999                err = xennet_get_responses(queue, &rinfo, rp, &tmpq);
1000
1001                if (unlikely(err)) {
1002err:
1003                        while ((skb = __skb_dequeue(&tmpq)))
1004                                __skb_queue_tail(&errq, skb);
1005                        dev->stats.rx_errors++;
1006                        i = queue->rx.rsp_cons;
1007                        continue;
1008                }
1009
1010                skb = __skb_dequeue(&tmpq);
1011
1012                if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1013                        struct xen_netif_extra_info *gso;
1014                        gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1015
1016                        if (unlikely(xennet_set_skb_gso(skb, gso))) {
1017                                __skb_queue_head(&tmpq, skb);
1018                                queue->rx.rsp_cons += skb_queue_len(&tmpq);
1019                                goto err;
1020                        }
1021                }
1022
1023                NETFRONT_SKB_CB(skb)->pull_to = rx->status;
1024                if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
1025                        NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
1026
1027                skb_shinfo(skb)->frags[0].page_offset = rx->offset;
1028                skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
1029                skb->data_len = rx->status;
1030                skb->len += rx->status;
1031
1032                i = xennet_fill_frags(queue, skb, &tmpq);
1033
1034                if (rx->flags & XEN_NETRXF_csum_blank)
1035                        skb->ip_summed = CHECKSUM_PARTIAL;
1036                else if (rx->flags & XEN_NETRXF_data_validated)
1037                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1038
1039                __skb_queue_tail(&rxq, skb);
1040
1041                queue->rx.rsp_cons = ++i;
1042                work_done++;
1043        }
1044
1045        __skb_queue_purge(&errq);
1046
1047        work_done -= handle_incoming_queue(queue, &rxq);
1048
1049        xennet_alloc_rx_buffers(queue);
1050
1051        if (work_done < budget) {
1052                int more_to_do = 0;
1053
1054                napi_complete(napi);
1055
1056                RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
1057                if (more_to_do)
1058                        napi_schedule(napi);
1059        }
1060
1061        spin_unlock(&queue->rx_lock);
1062
1063        return work_done;
1064}
1065
1066static int xennet_change_mtu(struct net_device *dev, int mtu)
1067{
1068        int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN;
1069
1070        if (mtu > max)
1071                return -EINVAL;
1072        dev->mtu = mtu;
1073        return 0;
1074}
1075
1076static struct rtnl_link_stats64 *xennet_get_stats64(struct net_device *dev,
1077                                                    struct rtnl_link_stats64 *tot)
1078{
1079        struct netfront_info *np = netdev_priv(dev);
1080        int cpu;
1081
1082        for_each_possible_cpu(cpu) {
1083                struct netfront_stats *rx_stats = per_cpu_ptr(np->rx_stats, cpu);
1084                struct netfront_stats *tx_stats = per_cpu_ptr(np->tx_stats, cpu);
1085                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
1086                unsigned int start;
1087
1088                do {
1089                        start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
1090                        tx_packets = tx_stats->packets;
1091                        tx_bytes = tx_stats->bytes;
1092                } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
1093
1094                do {
1095                        start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
1096                        rx_packets = rx_stats->packets;
1097                        rx_bytes = rx_stats->bytes;
1098                } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
1099
1100                tot->rx_packets += rx_packets;
1101                tot->tx_packets += tx_packets;
1102                tot->rx_bytes   += rx_bytes;
1103                tot->tx_bytes   += tx_bytes;
1104        }
1105
1106        tot->rx_errors  = dev->stats.rx_errors;
1107        tot->tx_dropped = dev->stats.tx_dropped;
1108
1109        return tot;
1110}
1111
1112static void xennet_release_tx_bufs(struct netfront_queue *queue)
1113{
1114        struct sk_buff *skb;
1115        int i;
1116
1117        for (i = 0; i < NET_TX_RING_SIZE; i++) {
1118                /* Skip over entries which are actually freelist references */
1119                if (skb_entry_is_link(&queue->tx_skbs[i]))
1120                        continue;
1121
1122                skb = queue->tx_skbs[i].skb;
1123                get_page(queue->grant_tx_page[i]);
1124                gnttab_end_foreign_access(queue->grant_tx_ref[i],
1125                                          GNTMAP_readonly,
1126                                          (unsigned long)page_address(queue->grant_tx_page[i]));
1127                queue->grant_tx_page[i] = NULL;
1128                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1129                add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i);
1130                dev_kfree_skb_irq(skb);
1131        }
1132}
1133
1134static void xennet_release_rx_bufs(struct netfront_queue *queue)
1135{
1136        int id, ref;
1137
1138        spin_lock_bh(&queue->rx_lock);
1139
1140        for (id = 0; id < NET_RX_RING_SIZE; id++) {
1141                struct sk_buff *skb;
1142                struct page *page;
1143
1144                skb = queue->rx_skbs[id];
1145                if (!skb)
1146                        continue;
1147
1148                ref = queue->grant_rx_ref[id];
1149                if (ref == GRANT_INVALID_REF)
1150                        continue;
1151
1152                page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
1153
1154                /* gnttab_end_foreign_access() needs a page ref until
1155                 * foreign access is ended (which may be deferred).
1156                 */
1157                get_page(page);
1158                gnttab_end_foreign_access(ref, 0,
1159                                          (unsigned long)page_address(page));
1160                queue->grant_rx_ref[id] = GRANT_INVALID_REF;
1161
1162                kfree_skb(skb);
1163        }
1164
1165        spin_unlock_bh(&queue->rx_lock);
1166}
1167
1168static netdev_features_t xennet_fix_features(struct net_device *dev,
1169        netdev_features_t features)
1170{
1171        struct netfront_info *np = netdev_priv(dev);
1172        int val;
1173
1174        if (features & NETIF_F_SG) {
1175                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
1176                                 "%d", &val) < 0)
1177                        val = 0;
1178
1179                if (!val)
1180                        features &= ~NETIF_F_SG;
1181        }
1182
1183        if (features & NETIF_F_IPV6_CSUM) {
1184                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1185                                 "feature-ipv6-csum-offload", "%d", &val) < 0)
1186                        val = 0;
1187
1188                if (!val)
1189                        features &= ~NETIF_F_IPV6_CSUM;
1190        }
1191
1192        if (features & NETIF_F_TSO) {
1193                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1194                                 "feature-gso-tcpv4", "%d", &val) < 0)
1195                        val = 0;
1196
1197                if (!val)
1198                        features &= ~NETIF_F_TSO;
1199        }
1200
1201        if (features & NETIF_F_TSO6) {
1202                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1203                                 "feature-gso-tcpv6", "%d", &val) < 0)
1204                        val = 0;
1205
1206                if (!val)
1207                        features &= ~NETIF_F_TSO6;
1208        }
1209
1210        return features;
1211}
1212
1213static int xennet_set_features(struct net_device *dev,
1214        netdev_features_t features)
1215{
1216        if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
1217                netdev_info(dev, "Reducing MTU because no SG offload");
1218                dev->mtu = ETH_DATA_LEN;
1219        }
1220
1221        return 0;
1222}
1223
1224static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
1225{
1226        struct netfront_queue *queue = dev_id;
1227        unsigned long flags;
1228
1229        spin_lock_irqsave(&queue->tx_lock, flags);
1230        xennet_tx_buf_gc(queue);
1231        spin_unlock_irqrestore(&queue->tx_lock, flags);
1232
1233        return IRQ_HANDLED;
1234}
1235
1236static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
1237{
1238        struct netfront_queue *queue = dev_id;
1239        struct net_device *dev = queue->info->netdev;
1240
1241        if (likely(netif_carrier_ok(dev) &&
1242                   RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
1243                napi_schedule(&queue->napi);
1244
1245        return IRQ_HANDLED;
1246}
1247
1248static irqreturn_t xennet_interrupt(int irq, void *dev_id)
1249{
1250        xennet_tx_interrupt(irq, dev_id);
1251        xennet_rx_interrupt(irq, dev_id);
1252        return IRQ_HANDLED;
1253}
1254
1255#ifdef CONFIG_NET_POLL_CONTROLLER
1256static void xennet_poll_controller(struct net_device *dev)
1257{
1258        /* Poll each queue */
1259        struct netfront_info *info = netdev_priv(dev);
1260        unsigned int num_queues = dev->real_num_tx_queues;
1261        unsigned int i;
1262        for (i = 0; i < num_queues; ++i)
1263                xennet_interrupt(0, &info->queues[i]);
1264}
1265#endif
1266
1267static const struct net_device_ops xennet_netdev_ops = {
1268        .ndo_open            = xennet_open,
1269        .ndo_stop            = xennet_close,
1270        .ndo_start_xmit      = xennet_start_xmit,
1271        .ndo_change_mtu      = xennet_change_mtu,
1272        .ndo_get_stats64     = xennet_get_stats64,
1273        .ndo_set_mac_address = eth_mac_addr,
1274        .ndo_validate_addr   = eth_validate_addr,
1275        .ndo_fix_features    = xennet_fix_features,
1276        .ndo_set_features    = xennet_set_features,
1277        .ndo_select_queue    = xennet_select_queue,
1278#ifdef CONFIG_NET_POLL_CONTROLLER
1279        .ndo_poll_controller = xennet_poll_controller,
1280#endif
1281};
1282
1283static void xennet_free_netdev(struct net_device *netdev)
1284{
1285        struct netfront_info *np = netdev_priv(netdev);
1286
1287        free_percpu(np->rx_stats);
1288        free_percpu(np->tx_stats);
1289        free_netdev(netdev);
1290}
1291
1292static struct net_device *xennet_create_dev(struct xenbus_device *dev)
1293{
1294        int err;
1295        struct net_device *netdev;
1296        struct netfront_info *np;
1297
1298        netdev = alloc_etherdev_mq(sizeof(struct netfront_info), xennet_max_queues);
1299        if (!netdev)
1300                return ERR_PTR(-ENOMEM);
1301
1302        np                   = netdev_priv(netdev);
1303        np->xbdev            = dev;
1304
1305        np->queues = NULL;
1306
1307        err = -ENOMEM;
1308        np->rx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1309        if (np->rx_stats == NULL)
1310                goto exit;
1311        np->tx_stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1312        if (np->tx_stats == NULL)
1313                goto exit;
1314
1315        netdev->netdev_ops      = &xennet_netdev_ops;
1316
1317        netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
1318                                  NETIF_F_GSO_ROBUST;
1319        netdev->hw_features     = NETIF_F_SG |
1320                                  NETIF_F_IPV6_CSUM |
1321                                  NETIF_F_TSO | NETIF_F_TSO6;
1322
1323        /*
1324         * Assume that all hw features are available for now. This set
1325         * will be adjusted by the call to netdev_update_features() in
1326         * xennet_connect() which is the earliest point where we can
1327         * negotiate with the backend regarding supported features.
1328         */
1329        netdev->features |= netdev->hw_features;
1330
1331        netdev->ethtool_ops = &xennet_ethtool_ops;
1332        SET_NETDEV_DEV(netdev, &dev->dev);
1333
1334        np->netdev = netdev;
1335
1336        netif_carrier_off(netdev);
1337
1338        return netdev;
1339
1340 exit:
1341        xennet_free_netdev(netdev);
1342        return ERR_PTR(err);
1343}
1344
1345/**
1346 * Entry point to this code when a new device is created.  Allocate the basic
1347 * structures and the ring buffers for communication with the backend, and
1348 * inform the backend of the appropriate details for those.
1349 */
1350static int netfront_probe(struct xenbus_device *dev,
1351                          const struct xenbus_device_id *id)
1352{
1353        int err;
1354        struct net_device *netdev;
1355        struct netfront_info *info;
1356
1357        netdev = xennet_create_dev(dev);
1358        if (IS_ERR(netdev)) {
1359                err = PTR_ERR(netdev);
1360                xenbus_dev_fatal(dev, err, "creating netdev");
1361                return err;
1362        }
1363
1364        info = netdev_priv(netdev);
1365        dev_set_drvdata(&dev->dev, info);
1366#ifdef CONFIG_SYSFS
1367        info->netdev->sysfs_groups[0] = &xennet_dev_group;
1368#endif
1369        err = register_netdev(info->netdev);
1370        if (err) {
1371                pr_warn("%s: register_netdev err=%d\n", __func__, err);
1372                goto fail;
1373        }
1374
1375        return 0;
1376
1377 fail:
1378        xennet_free_netdev(netdev);
1379        dev_set_drvdata(&dev->dev, NULL);
1380        return err;
1381}
1382
1383static void xennet_end_access(int ref, void *page)
1384{
1385        /* This frees the page as a side-effect */
1386        if (ref != GRANT_INVALID_REF)
1387                gnttab_end_foreign_access(ref, 0, (unsigned long)page);
1388}
1389
1390static void xennet_disconnect_backend(struct netfront_info *info)
1391{
1392        unsigned int i = 0;
1393        unsigned int num_queues = info->netdev->real_num_tx_queues;
1394
1395        netif_carrier_off(info->netdev);
1396
1397        for (i = 0; i < num_queues && info->queues; ++i) {
1398                struct netfront_queue *queue = &info->queues[i];
1399
1400                if (queue->tx_irq && (queue->tx_irq == queue->rx_irq))
1401                        unbind_from_irqhandler(queue->tx_irq, queue);
1402                if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) {
1403                        unbind_from_irqhandler(queue->tx_irq, queue);
1404                        unbind_from_irqhandler(queue->rx_irq, queue);
1405                }
1406                queue->tx_evtchn = queue->rx_evtchn = 0;
1407                queue->tx_irq = queue->rx_irq = 0;
1408
1409                if (netif_running(info->netdev))
1410                        napi_synchronize(&queue->napi);
1411
1412                xennet_release_tx_bufs(queue);
1413                xennet_release_rx_bufs(queue);
1414                gnttab_free_grant_references(queue->gref_tx_head);
1415                gnttab_free_grant_references(queue->gref_rx_head);
1416
1417                /* End access and free the pages */
1418                xennet_end_access(queue->tx_ring_ref, queue->tx.sring);
1419                xennet_end_access(queue->rx_ring_ref, queue->rx.sring);
1420
1421                queue->tx_ring_ref = GRANT_INVALID_REF;
1422                queue->rx_ring_ref = GRANT_INVALID_REF;
1423                queue->tx.sring = NULL;
1424                queue->rx.sring = NULL;
1425        }
1426}
1427
1428/**
1429 * We are reconnecting to the backend, due to a suspend/resume, or a backend
1430 * driver restart.  We tear down our netif structure and recreate it, but
1431 * leave the device-layer structures intact so that this is transparent to the
1432 * rest of the kernel.
1433 */
1434static int netfront_resume(struct xenbus_device *dev)
1435{
1436        struct netfront_info *info = dev_get_drvdata(&dev->dev);
1437
1438        dev_dbg(&dev->dev, "%s\n", dev->nodename);
1439
1440        xennet_disconnect_backend(info);
1441        return 0;
1442}
1443
1444static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
1445{
1446        char *s, *e, *macstr;
1447        int i;
1448
1449        macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
1450        if (IS_ERR(macstr))
1451                return PTR_ERR(macstr);
1452
1453        for (i = 0; i < ETH_ALEN; i++) {
1454                mac[i] = simple_strtoul(s, &e, 16);
1455                if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
1456                        kfree(macstr);
1457                        return -ENOENT;
1458                }
1459                s = e+1;
1460        }
1461
1462        kfree(macstr);
1463        return 0;
1464}
1465
1466static int setup_netfront_single(struct netfront_queue *queue)
1467{
1468        int err;
1469
1470        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1471        if (err < 0)
1472                goto fail;
1473
1474        err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1475                                        xennet_interrupt,
1476                                        0, queue->info->netdev->name, queue);
1477        if (err < 0)
1478                goto bind_fail;
1479        queue->rx_evtchn = queue->tx_evtchn;
1480        queue->rx_irq = queue->tx_irq = err;
1481
1482        return 0;
1483
1484bind_fail:
1485        xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1486        queue->tx_evtchn = 0;
1487fail:
1488        return err;
1489}
1490
1491static int setup_netfront_split(struct netfront_queue *queue)
1492{
1493        int err;
1494
1495        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1496        if (err < 0)
1497                goto fail;
1498        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->rx_evtchn);
1499        if (err < 0)
1500                goto alloc_rx_evtchn_fail;
1501
1502        snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
1503                 "%s-tx", queue->name);
1504        err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1505                                        xennet_tx_interrupt,
1506                                        0, queue->tx_irq_name, queue);
1507        if (err < 0)
1508                goto bind_tx_fail;
1509        queue->tx_irq = err;
1510
1511        snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
1512                 "%s-rx", queue->name);
1513        err = bind_evtchn_to_irqhandler(queue->rx_evtchn,
1514                                        xennet_rx_interrupt,
1515                                        0, queue->rx_irq_name, queue);
1516        if (err < 0)
1517                goto bind_rx_fail;
1518        queue->rx_irq = err;
1519
1520        return 0;
1521
1522bind_rx_fail:
1523        unbind_from_irqhandler(queue->tx_irq, queue);
1524        queue->tx_irq = 0;
1525bind_tx_fail:
1526        xenbus_free_evtchn(queue->info->xbdev, queue->rx_evtchn);
1527        queue->rx_evtchn = 0;
1528alloc_rx_evtchn_fail:
1529        xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1530        queue->tx_evtchn = 0;
1531fail:
1532        return err;
1533}
1534
1535static int setup_netfront(struct xenbus_device *dev,
1536                        struct netfront_queue *queue, unsigned int feature_split_evtchn)
1537{
1538        struct xen_netif_tx_sring *txs;
1539        struct xen_netif_rx_sring *rxs;
1540        grant_ref_t gref;
1541        int err;
1542
1543        queue->tx_ring_ref = GRANT_INVALID_REF;
1544        queue->rx_ring_ref = GRANT_INVALID_REF;
1545        queue->rx.sring = NULL;
1546        queue->tx.sring = NULL;
1547
1548        txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1549        if (!txs) {
1550                err = -ENOMEM;
1551                xenbus_dev_fatal(dev, err, "allocating tx ring page");
1552                goto fail;
1553        }
1554        SHARED_RING_INIT(txs);
1555        FRONT_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
1556
1557        err = xenbus_grant_ring(dev, txs, 1, &gref);
1558        if (err < 0)
1559                goto grant_tx_ring_fail;
1560        queue->tx_ring_ref = gref;
1561
1562        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1563        if (!rxs) {
1564                err = -ENOMEM;
1565                xenbus_dev_fatal(dev, err, "allocating rx ring page");
1566                goto alloc_rx_ring_fail;
1567        }
1568        SHARED_RING_INIT(rxs);
1569        FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
1570
1571        err = xenbus_grant_ring(dev, rxs, 1, &gref);
1572        if (err < 0)
1573                goto grant_rx_ring_fail;
1574        queue->rx_ring_ref = gref;
1575
1576        if (feature_split_evtchn)
1577                err = setup_netfront_split(queue);
1578        /* setup single event channel if
1579         *  a) feature-split-event-channels == 0
1580         *  b) feature-split-event-channels == 1 but failed to setup
1581         */
1582        if (!feature_split_evtchn || (feature_split_evtchn && err))
1583                err = setup_netfront_single(queue);
1584
1585        if (err)
1586                goto alloc_evtchn_fail;
1587
1588        return 0;
1589
1590        /* If we fail to setup netfront, it is safe to just revoke access to
1591         * granted pages because backend is not accessing it at this point.
1592         */
1593alloc_evtchn_fail:
1594        gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0);
1595grant_rx_ring_fail:
1596        free_page((unsigned long)rxs);
1597alloc_rx_ring_fail:
1598        gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0);
1599grant_tx_ring_fail:
1600        free_page((unsigned long)txs);
1601fail:
1602        return err;
1603}
1604
1605/* Queue-specific initialisation
1606 * This used to be done in xennet_create_dev() but must now
1607 * be run per-queue.
1608 */
1609static int xennet_init_queue(struct netfront_queue *queue)
1610{
1611        unsigned short i;
1612        int err = 0;
1613
1614        spin_lock_init(&queue->tx_lock);
1615        spin_lock_init(&queue->rx_lock);
1616
1617        setup_timer(&queue->rx_refill_timer, rx_refill_timeout,
1618                    (unsigned long)queue);
1619
1620        snprintf(queue->name, sizeof(queue->name), "%s-q%u",
1621                 queue->info->netdev->name, queue->id);
1622
1623        /* Initialise tx_skbs as a free chain containing every entry. */
1624        queue->tx_skb_freelist = 0;
1625        for (i = 0; i < NET_TX_RING_SIZE; i++) {
1626                skb_entry_set_link(&queue->tx_skbs[i], i+1);
1627                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1628                queue->grant_tx_page[i] = NULL;
1629        }
1630
1631        /* Clear out rx_skbs */
1632        for (i = 0; i < NET_RX_RING_SIZE; i++) {
1633                queue->rx_skbs[i] = NULL;
1634                queue->grant_rx_ref[i] = GRANT_INVALID_REF;
1635        }
1636
1637        /* A grant for every tx ring slot */
1638        if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
1639                                          &queue->gref_tx_head) < 0) {
1640                pr_alert("can't alloc tx grant refs\n");
1641                err = -ENOMEM;
1642                goto exit;
1643        }
1644
1645        /* A grant for every rx ring slot */
1646        if (gnttab_alloc_grant_references(NET_RX_RING_SIZE,
1647                                          &queue->gref_rx_head) < 0) {
1648                pr_alert("can't alloc rx grant refs\n");
1649                err = -ENOMEM;
1650                goto exit_free_tx;
1651        }
1652
1653        return 0;
1654
1655 exit_free_tx:
1656        gnttab_free_grant_references(queue->gref_tx_head);
1657 exit:
1658        return err;
1659}
1660
1661static int write_queue_xenstore_keys(struct netfront_queue *queue,
1662                           struct xenbus_transaction *xbt, int write_hierarchical)
1663{
1664        /* Write the queue-specific keys into XenStore in the traditional
1665         * way for a single queue, or in a queue subkeys for multiple
1666         * queues.
1667         */
1668        struct xenbus_device *dev = queue->info->xbdev;
1669        int err;
1670        const char *message;
1671        char *path;
1672        size_t pathsize;
1673
1674        /* Choose the correct place to write the keys */
1675        if (write_hierarchical) {
1676                pathsize = strlen(dev->nodename) + 10;
1677                path = kzalloc(pathsize, GFP_KERNEL);
1678                if (!path) {
1679                        err = -ENOMEM;
1680                        message = "out of memory while writing ring references";
1681                        goto error;
1682                }
1683                snprintf(path, pathsize, "%s/queue-%u",
1684                                dev->nodename, queue->id);
1685        } else {
1686                path = (char *)dev->nodename;
1687        }
1688
1689        /* Write ring references */
1690        err = xenbus_printf(*xbt, path, "tx-ring-ref", "%u",
1691                        queue->tx_ring_ref);
1692        if (err) {
1693                message = "writing tx-ring-ref";
1694                goto error;
1695        }
1696
1697        err = xenbus_printf(*xbt, path, "rx-ring-ref", "%u",
1698                        queue->rx_ring_ref);
1699        if (err) {
1700                message = "writing rx-ring-ref";
1701                goto error;
1702        }
1703
1704        /* Write event channels; taking into account both shared
1705         * and split event channel scenarios.
1706         */
1707        if (queue->tx_evtchn == queue->rx_evtchn) {
1708                /* Shared event channel */
1709                err = xenbus_printf(*xbt, path,
1710                                "event-channel", "%u", queue->tx_evtchn);
1711                if (err) {
1712                        message = "writing event-channel";
1713                        goto error;
1714                }
1715        } else {
1716                /* Split event channels */
1717                err = xenbus_printf(*xbt, path,
1718                                "event-channel-tx", "%u", queue->tx_evtchn);
1719                if (err) {
1720                        message = "writing event-channel-tx";
1721                        goto error;
1722                }
1723
1724                err = xenbus_printf(*xbt, path,
1725                                "event-channel-rx", "%u", queue->rx_evtchn);
1726                if (err) {
1727                        message = "writing event-channel-rx";
1728                        goto error;
1729                }
1730        }
1731
1732        if (write_hierarchical)
1733                kfree(path);
1734        return 0;
1735
1736error:
1737        if (write_hierarchical)
1738                kfree(path);
1739        xenbus_dev_fatal(dev, err, "%s", message);
1740        return err;
1741}
1742
1743static void xennet_destroy_queues(struct netfront_info *info)
1744{
1745        unsigned int i;
1746
1747        rtnl_lock();
1748
1749        for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
1750                struct netfront_queue *queue = &info->queues[i];
1751
1752                if (netif_running(info->netdev))
1753                        napi_disable(&queue->napi);
1754                del_timer_sync(&queue->rx_refill_timer);
1755                netif_napi_del(&queue->napi);
1756        }
1757
1758        rtnl_unlock();
1759
1760        kfree(info->queues);
1761        info->queues = NULL;
1762}
1763
1764static int xennet_create_queues(struct netfront_info *info,
1765                                unsigned int *num_queues)
1766{
1767        unsigned int i;
1768        int ret;
1769
1770        info->queues = kcalloc(*num_queues, sizeof(struct netfront_queue),
1771                               GFP_KERNEL);
1772        if (!info->queues)
1773                return -ENOMEM;
1774
1775        rtnl_lock();
1776
1777        for (i = 0; i < *num_queues; i++) {
1778                struct netfront_queue *queue = &info->queues[i];
1779
1780                queue->id = i;
1781                queue->info = info;
1782
1783                ret = xennet_init_queue(queue);
1784                if (ret < 0) {
1785                        dev_warn(&info->netdev->dev,
1786                                 "only created %d queues\n", i);
1787                        *num_queues = i;
1788                        break;
1789                }
1790
1791                netif_napi_add(queue->info->netdev, &queue->napi,
1792                               xennet_poll, 64);
1793                if (netif_running(info->netdev))
1794                        napi_enable(&queue->napi);
1795        }
1796
1797        netif_set_real_num_tx_queues(info->netdev, *num_queues);
1798
1799        rtnl_unlock();
1800
1801        if (*num_queues == 0) {
1802                dev_err(&info->netdev->dev, "no queues\n");
1803                return -EINVAL;
1804        }
1805        return 0;
1806}
1807
1808/* Common code used when first setting up, and when resuming. */
1809static int talk_to_netback(struct xenbus_device *dev,
1810                           struct netfront_info *info)
1811{
1812        const char *message;
1813        struct xenbus_transaction xbt;
1814        int err;
1815        unsigned int feature_split_evtchn;
1816        unsigned int i = 0;
1817        unsigned int max_queues = 0;
1818        struct netfront_queue *queue = NULL;
1819        unsigned int num_queues = 1;
1820
1821        info->netdev->irq = 0;
1822
1823        /* Check if backend supports multiple queues */
1824        err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1825                           "multi-queue-max-queues", "%u", &max_queues);
1826        if (err < 0)
1827                max_queues = 1;
1828        num_queues = min(max_queues, xennet_max_queues);
1829
1830        /* Check feature-split-event-channels */
1831        err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1832                           "feature-split-event-channels", "%u",
1833                           &feature_split_evtchn);
1834        if (err < 0)
1835                feature_split_evtchn = 0;
1836
1837        /* Read mac addr. */
1838        err = xen_net_read_mac(dev, info->netdev->dev_addr);
1839        if (err) {
1840                xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
1841                goto out;
1842        }
1843
1844        if (info->queues)
1845                xennet_destroy_queues(info);
1846
1847        err = xennet_create_queues(info, &num_queues);
1848        if (err < 0)
1849                goto destroy_ring;
1850
1851        /* Create shared ring, alloc event channel -- for each queue */
1852        for (i = 0; i < num_queues; ++i) {
1853                queue = &info->queues[i];
1854                err = setup_netfront(dev, queue, feature_split_evtchn);
1855                if (err) {
1856                        /* setup_netfront() will tidy up the current
1857                         * queue on error, but we need to clean up
1858                         * those already allocated.
1859                         */
1860                        if (i > 0) {
1861                                rtnl_lock();
1862                                netif_set_real_num_tx_queues(info->netdev, i);
1863                                rtnl_unlock();
1864                                goto destroy_ring;
1865                        } else {
1866                                goto out;
1867                        }
1868                }
1869        }
1870
1871again:
1872        err = xenbus_transaction_start(&xbt);
1873        if (err) {
1874                xenbus_dev_fatal(dev, err, "starting transaction");
1875                goto destroy_ring;
1876        }
1877
1878        if (xenbus_exists(XBT_NIL,
1879                          info->xbdev->otherend, "multi-queue-max-queues")) {
1880                /* Write the number of queues */
1881                err = xenbus_printf(xbt, dev->nodename,
1882                                    "multi-queue-num-queues", "%u", num_queues);
1883                if (err) {
1884                        message = "writing multi-queue-num-queues";
1885                        goto abort_transaction_no_dev_fatal;
1886                }
1887        }
1888
1889        if (num_queues == 1) {
1890                err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0); /* flat */
1891                if (err)
1892                        goto abort_transaction_no_dev_fatal;
1893        } else {
1894                /* Write the keys for each queue */
1895                for (i = 0; i < num_queues; ++i) {
1896                        queue = &info->queues[i];
1897                        err = write_queue_xenstore_keys(queue, &xbt, 1); /* hierarchical */
1898                        if (err)
1899                                goto abort_transaction_no_dev_fatal;
1900                }
1901        }
1902
1903        /* The remaining keys are not queue-specific */
1904        err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
1905                            1);
1906        if (err) {
1907                message = "writing request-rx-copy";
1908                goto abort_transaction;
1909        }
1910
1911        err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
1912        if (err) {
1913                message = "writing feature-rx-notify";
1914                goto abort_transaction;
1915        }
1916
1917        err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
1918        if (err) {
1919                message = "writing feature-sg";
1920                goto abort_transaction;
1921        }
1922
1923        err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
1924        if (err) {
1925                message = "writing feature-gso-tcpv4";
1926                goto abort_transaction;
1927        }
1928
1929        err = xenbus_write(xbt, dev->nodename, "feature-gso-tcpv6", "1");
1930        if (err) {
1931                message = "writing feature-gso-tcpv6";
1932                goto abort_transaction;
1933        }
1934
1935        err = xenbus_write(xbt, dev->nodename, "feature-ipv6-csum-offload",
1936                           "1");
1937        if (err) {
1938                message = "writing feature-ipv6-csum-offload";
1939                goto abort_transaction;
1940        }
1941
1942        err = xenbus_transaction_end(xbt, 0);
1943        if (err) {
1944                if (err == -EAGAIN)
1945                        goto again;
1946                xenbus_dev_fatal(dev, err, "completing transaction");
1947                goto destroy_ring;
1948        }
1949
1950        return 0;
1951
1952 abort_transaction:
1953        xenbus_dev_fatal(dev, err, "%s", message);
1954abort_transaction_no_dev_fatal:
1955        xenbus_transaction_end(xbt, 1);
1956 destroy_ring:
1957        xennet_disconnect_backend(info);
1958        kfree(info->queues);
1959        info->queues = NULL;
1960 out:
1961        return err;
1962}
1963
1964static int xennet_connect(struct net_device *dev)
1965{
1966        struct netfront_info *np = netdev_priv(dev);
1967        unsigned int num_queues = 0;
1968        int err;
1969        unsigned int feature_rx_copy;
1970        unsigned int j = 0;
1971        struct netfront_queue *queue = NULL;
1972
1973        err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1974                           "feature-rx-copy", "%u", &feature_rx_copy);
1975        if (err != 1)
1976                feature_rx_copy = 0;
1977
1978        if (!feature_rx_copy) {
1979                dev_info(&dev->dev,
1980                         "backend does not support copying receive path\n");
1981                return -ENODEV;
1982        }
1983
1984        err = talk_to_netback(np->xbdev, np);
1985        if (err)
1986                return err;
1987
1988        /* talk_to_netback() sets the correct number of queues */
1989        num_queues = dev->real_num_tx_queues;
1990
1991        rtnl_lock();
1992        netdev_update_features(dev);
1993        rtnl_unlock();
1994
1995        /*
1996         * All public and private state should now be sane.  Get
1997         * ready to start sending and receiving packets and give the driver
1998         * domain a kick because we've probably just requeued some
1999         * packets.
2000         */
2001        netif_carrier_on(np->netdev);
2002        for (j = 0; j < num_queues; ++j) {
2003                queue = &np->queues[j];
2004
2005                notify_remote_via_irq(queue->tx_irq);
2006                if (queue->tx_irq != queue->rx_irq)
2007                        notify_remote_via_irq(queue->rx_irq);
2008
2009                spin_lock_irq(&queue->tx_lock);
2010                xennet_tx_buf_gc(queue);
2011                spin_unlock_irq(&queue->tx_lock);
2012
2013                spin_lock_bh(&queue->rx_lock);
2014                xennet_alloc_rx_buffers(queue);
2015                spin_unlock_bh(&queue->rx_lock);
2016        }
2017
2018        return 0;
2019}
2020
2021/**
2022 * Callback received when the backend's state changes.
2023 */
2024static void netback_changed(struct xenbus_device *dev,
2025                            enum xenbus_state backend_state)
2026{
2027        struct netfront_info *np = dev_get_drvdata(&dev->dev);
2028        struct net_device *netdev = np->netdev;
2029
2030        dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
2031
2032        switch (backend_state) {
2033        case XenbusStateInitialising:
2034        case XenbusStateInitialised:
2035        case XenbusStateReconfiguring:
2036        case XenbusStateReconfigured:
2037        case XenbusStateUnknown:
2038                break;
2039
2040        case XenbusStateInitWait:
2041                if (dev->state != XenbusStateInitialising)
2042                        break;
2043                if (xennet_connect(netdev) != 0)
2044                        break;
2045                xenbus_switch_state(dev, XenbusStateConnected);
2046                break;
2047
2048        case XenbusStateConnected:
2049                netdev_notify_peers(netdev);
2050                break;
2051
2052        case XenbusStateClosed:
2053                if (dev->state == XenbusStateClosed)
2054                        break;
2055                /* Missed the backend's CLOSING state -- fallthrough */
2056        case XenbusStateClosing:
2057                xenbus_frontend_closed(dev);
2058                break;
2059        }
2060}
2061
2062static const struct xennet_stat {
2063        char name[ETH_GSTRING_LEN];
2064        u16 offset;
2065} xennet_stats[] = {
2066        {
2067                "rx_gso_checksum_fixup",
2068                offsetof(struct netfront_info, rx_gso_checksum_fixup)
2069        },
2070};
2071
2072static int xennet_get_sset_count(struct net_device *dev, int string_set)
2073{
2074        switch (string_set) {
2075        case ETH_SS_STATS:
2076                return ARRAY_SIZE(xennet_stats);
2077        default:
2078                return -EINVAL;
2079        }
2080}
2081
2082static void xennet_get_ethtool_stats(struct net_device *dev,
2083                                     struct ethtool_stats *stats, u64 * data)
2084{
2085        void *np = netdev_priv(dev);
2086        int i;
2087
2088        for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2089                data[i] = atomic_read((atomic_t *)(np + xennet_stats[i].offset));
2090}
2091
2092static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 * data)
2093{
2094        int i;
2095
2096        switch (stringset) {
2097        case ETH_SS_STATS:
2098                for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2099                        memcpy(data + i * ETH_GSTRING_LEN,
2100                               xennet_stats[i].name, ETH_GSTRING_LEN);
2101                break;
2102        }
2103}
2104
2105static const struct ethtool_ops xennet_ethtool_ops =
2106{
2107        .get_link = ethtool_op_get_link,
2108
2109        .get_sset_count = xennet_get_sset_count,
2110        .get_ethtool_stats = xennet_get_ethtool_stats,
2111        .get_strings = xennet_get_strings,
2112};
2113
2114#ifdef CONFIG_SYSFS
2115static ssize_t show_rxbuf(struct device *dev,
2116                          struct device_attribute *attr, char *buf)
2117{
2118        return sprintf(buf, "%lu\n", NET_RX_RING_SIZE);
2119}
2120
2121static ssize_t store_rxbuf(struct device *dev,
2122                           struct device_attribute *attr,
2123                           const char *buf, size_t len)
2124{
2125        char *endp;
2126        unsigned long target;
2127
2128        if (!capable(CAP_NET_ADMIN))
2129                return -EPERM;
2130
2131        target = simple_strtoul(buf, &endp, 0);
2132        if (endp == buf)
2133                return -EBADMSG;
2134
2135        /* rxbuf_min and rxbuf_max are no longer configurable. */
2136
2137        return len;
2138}
2139
2140static DEVICE_ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf);
2141static DEVICE_ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf, store_rxbuf);
2142static DEVICE_ATTR(rxbuf_cur, S_IRUGO, show_rxbuf, NULL);
2143
2144static struct attribute *xennet_dev_attrs[] = {
2145        &dev_attr_rxbuf_min.attr,
2146        &dev_attr_rxbuf_max.attr,
2147        &dev_attr_rxbuf_cur.attr,
2148        NULL
2149};
2150
2151static const struct attribute_group xennet_dev_group = {
2152        .attrs = xennet_dev_attrs
2153};
2154#endif /* CONFIG_SYSFS */
2155
2156static int xennet_remove(struct xenbus_device *dev)
2157{
2158        struct netfront_info *info = dev_get_drvdata(&dev->dev);
2159
2160        dev_dbg(&dev->dev, "%s\n", dev->nodename);
2161
2162        xennet_disconnect_backend(info);
2163
2164        unregister_netdev(info->netdev);
2165
2166        if (info->queues)
2167                xennet_destroy_queues(info);
2168        xennet_free_netdev(info->netdev);
2169
2170        return 0;
2171}
2172
2173static const struct xenbus_device_id netfront_ids[] = {
2174        { "vif" },
2175        { "" }
2176};
2177
2178static struct xenbus_driver netfront_driver = {
2179        .ids = netfront_ids,
2180        .probe = netfront_probe,
2181        .remove = xennet_remove,
2182        .resume = netfront_resume,
2183        .otherend_changed = netback_changed,
2184};
2185
2186static int __init netif_init(void)
2187{
2188        if (!xen_domain())
2189                return -ENODEV;
2190
2191        if (!xen_has_pv_nic_devices())
2192                return -ENODEV;
2193
2194        pr_info("Initialising Xen virtual ethernet driver\n");
2195
2196        /* Allow as many queues as there are CPUs if user has not
2197         * specified a value.
2198         */
2199        if (xennet_max_queues == 0)
2200                xennet_max_queues = num_online_cpus();
2201
2202        return xenbus_register_frontend(&netfront_driver);
2203}
2204module_init(netif_init);
2205
2206
2207static void __exit netif_exit(void)
2208{
2209        xenbus_unregister_driver(&netfront_driver);
2210}
2211module_exit(netif_exit);
2212
2213MODULE_DESCRIPTION("Xen virtual network device frontend");
2214MODULE_LICENSE("GPL");
2215MODULE_ALIAS("xen:vif");
2216MODULE_ALIAS("xennet");
2217