linux/drivers/net/xen-netfront.c
<<
>>
Prefs
   1/*
   2 * Virtual network driver for conversing with remote driver backends.
   3 *
   4 * Copyright (c) 2002-2005, K A Fraser
   5 * Copyright (c) 2005, XenSource Ltd
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License version 2
   9 * as published by the Free Software Foundation; or, when distributed
  10 * separately from the Linux kernel or incorporated into other
  11 * software packages, subject to the following license:
  12 *
  13 * Permission is hereby granted, free of charge, to any person obtaining a copy
  14 * of this source file (the "Software"), to deal in the Software without
  15 * restriction, including without limitation the rights to use, copy, modify,
  16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  17 * and to permit persons to whom the Software is furnished to do so, subject to
  18 * the following conditions:
  19 *
  20 * The above copyright notice and this permission notice shall be included in
  21 * all copies or substantial portions of the Software.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  29 * IN THE SOFTWARE.
  30 */
  31
  32#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  33
  34#include <linux/module.h>
  35#include <linux/kernel.h>
  36#include <linux/netdevice.h>
  37#include <linux/etherdevice.h>
  38#include <linux/skbuff.h>
  39#include <linux/ethtool.h>
  40#include <linux/if_ether.h>
  41#include <net/tcp.h>
  42#include <linux/udp.h>
  43#include <linux/moduleparam.h>
  44#include <linux/mm.h>
  45#include <linux/slab.h>
  46#include <net/ip.h>
  47
  48#include <asm/xen/page.h>
  49#include <xen/xen.h>
  50#include <xen/xenbus.h>
  51#include <xen/events.h>
  52#include <xen/page.h>
  53#include <xen/platform_pci.h>
  54#include <xen/grant_table.h>
  55
  56#include <xen/interface/io/netif.h>
  57#include <xen/interface/memory.h>
  58#include <xen/interface/grant_table.h>
  59
  60/* Module parameters */
  61static unsigned int xennet_max_queues;
  62module_param_named(max_queues, xennet_max_queues, uint, 0644);
  63MODULE_PARM_DESC(max_queues,
  64                 "Maximum number of queues per virtual interface");
  65
  66static const struct ethtool_ops xennet_ethtool_ops;
  67
  68struct netfront_cb {
  69        int pull_to;
  70};
  71
  72#define NETFRONT_SKB_CB(skb)    ((struct netfront_cb *)((skb)->cb))
  73
  74#define RX_COPY_THRESHOLD 256
  75
  76#define GRANT_INVALID_REF       0
  77
  78#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
  79#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
  80#define TX_MAX_TARGET min_t(int, NET_TX_RING_SIZE, 256)
  81
  82/* Queue name is interface name with "-qNNN" appended */
  83#define QUEUE_NAME_SIZE (IFNAMSIZ + 6)
  84
  85/* IRQ name is queue name with "-tx" or "-rx" appended */
  86#define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
  87
  88struct netfront_stats {
  89        u64                     rx_packets;
  90        u64                     tx_packets;
  91        u64                     rx_bytes;
  92        u64                     tx_bytes;
  93        struct u64_stats_sync   syncp;
  94};
  95
  96struct netfront_info;
  97
  98struct netfront_queue {
  99        unsigned int id; /* Queue ID, 0-based */
 100        char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
 101        struct netfront_info *info;
 102
 103        struct napi_struct napi;
 104
 105        /* Split event channels support, tx_* == rx_* when using
 106         * single event channel.
 107         */
 108        unsigned int tx_evtchn, rx_evtchn;
 109        unsigned int tx_irq, rx_irq;
 110        /* Only used when split event channels support is enabled */
 111        char tx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-tx */
 112        char rx_irq_name[IRQ_NAME_SIZE]; /* DEVNAME-qN-rx */
 113
 114        spinlock_t   tx_lock;
 115        struct xen_netif_tx_front_ring tx;
 116        int tx_ring_ref;
 117
 118        /*
 119         * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
 120         * are linked from tx_skb_freelist through skb_entry.link.
 121         *
 122         *  NB. Freelist index entries are always going to be less than
 123         *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
 124         *  greater than PAGE_OFFSET: we use this property to distinguish
 125         *  them.
 126         */
 127        union skb_entry {
 128                struct sk_buff *skb;
 129                unsigned long link;
 130        } tx_skbs[NET_TX_RING_SIZE];
 131        grant_ref_t gref_tx_head;
 132        grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
 133        struct page *grant_tx_page[NET_TX_RING_SIZE];
 134        unsigned tx_skb_freelist;
 135
 136        spinlock_t   rx_lock ____cacheline_aligned_in_smp;
 137        struct xen_netif_rx_front_ring rx;
 138        int rx_ring_ref;
 139
 140        /* Receive-ring batched refills. */
 141#define RX_MIN_TARGET 8
 142#define RX_DFL_MIN_TARGET 64
 143#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
 144        unsigned rx_min_target, rx_max_target, rx_target;
 145        struct sk_buff_head rx_batch;
 146
 147        struct timer_list rx_refill_timer;
 148
 149        struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
 150        grant_ref_t gref_rx_head;
 151        grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
 152
 153        unsigned long rx_pfn_array[NET_RX_RING_SIZE];
 154        struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
 155        struct mmu_update rx_mmu[NET_RX_RING_SIZE];
 156};
 157
 158struct netfront_info {
 159        struct list_head list;
 160        struct net_device *netdev;
 161
 162        struct xenbus_device *xbdev;
 163
 164        /* Multi-queue support */
 165        struct netfront_queue *queues;
 166
 167        /* Statistics */
 168        struct netfront_stats __percpu *stats;
 169
 170        atomic_t rx_gso_checksum_fixup;
 171};
 172
 173struct netfront_rx_info {
 174        struct xen_netif_rx_response rx;
 175        struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 176};
 177
 178static void skb_entry_set_link(union skb_entry *list, unsigned short id)
 179{
 180        list->link = id;
 181}
 182
 183static int skb_entry_is_link(const union skb_entry *list)
 184{
 185        BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
 186        return (unsigned long)list->skb < PAGE_OFFSET;
 187}
 188
 189/*
 190 * Access macros for acquiring freeing slots in tx_skbs[].
 191 */
 192
 193static void add_id_to_freelist(unsigned *head, union skb_entry *list,
 194                               unsigned short id)
 195{
 196        skb_entry_set_link(&list[id], *head);
 197        *head = id;
 198}
 199
 200static unsigned short get_id_from_freelist(unsigned *head,
 201                                           union skb_entry *list)
 202{
 203        unsigned int id = *head;
 204        *head = list[id].link;
 205        return id;
 206}
 207
 208static int xennet_rxidx(RING_IDX idx)
 209{
 210        return idx & (NET_RX_RING_SIZE - 1);
 211}
 212
 213static struct sk_buff *xennet_get_rx_skb(struct netfront_queue *queue,
 214                                         RING_IDX ri)
 215{
 216        int i = xennet_rxidx(ri);
 217        struct sk_buff *skb = queue->rx_skbs[i];
 218        queue->rx_skbs[i] = NULL;
 219        return skb;
 220}
 221
 222static grant_ref_t xennet_get_rx_ref(struct netfront_queue *queue,
 223                                            RING_IDX ri)
 224{
 225        int i = xennet_rxidx(ri);
 226        grant_ref_t ref = queue->grant_rx_ref[i];
 227        queue->grant_rx_ref[i] = GRANT_INVALID_REF;
 228        return ref;
 229}
 230
 231#ifdef CONFIG_SYSFS
 232static int xennet_sysfs_addif(struct net_device *netdev);
 233static void xennet_sysfs_delif(struct net_device *netdev);
 234#else /* !CONFIG_SYSFS */
 235#define xennet_sysfs_addif(dev) (0)
 236#define xennet_sysfs_delif(dev) do { } while (0)
 237#endif
 238
 239static bool xennet_can_sg(struct net_device *dev)
 240{
 241        return dev->features & NETIF_F_SG;
 242}
 243
 244
 245static void rx_refill_timeout(unsigned long data)
 246{
 247        struct netfront_queue *queue = (struct netfront_queue *)data;
 248        napi_schedule(&queue->napi);
 249}
 250
 251static int netfront_tx_slot_available(struct netfront_queue *queue)
 252{
 253        return (queue->tx.req_prod_pvt - queue->tx.rsp_cons) <
 254                (TX_MAX_TARGET - MAX_SKB_FRAGS - 2);
 255}
 256
 257static void xennet_maybe_wake_tx(struct netfront_queue *queue)
 258{
 259        struct net_device *dev = queue->info->netdev;
 260        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, queue->id);
 261
 262        if (unlikely(netif_tx_queue_stopped(dev_queue)) &&
 263            netfront_tx_slot_available(queue) &&
 264            likely(netif_running(dev)))
 265                netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
 266}
 267
 268static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
 269{
 270        unsigned short id;
 271        struct sk_buff *skb;
 272        struct page *page;
 273        int i, batch_target, notify;
 274        RING_IDX req_prod = queue->rx.req_prod_pvt;
 275        grant_ref_t ref;
 276        unsigned long pfn;
 277        void *vaddr;
 278        struct xen_netif_rx_request *req;
 279
 280        if (unlikely(!netif_carrier_ok(queue->info->netdev)))
 281                return;
 282
 283        /*
 284         * Allocate skbuffs greedily, even though we batch updates to the
 285         * receive ring. This creates a less bursty demand on the memory
 286         * allocator, so should reduce the chance of failed allocation requests
 287         * both for ourself and for other kernel subsystems.
 288         */
 289        batch_target = queue->rx_target - (req_prod - queue->rx.rsp_cons);
 290        for (i = skb_queue_len(&queue->rx_batch); i < batch_target; i++) {
 291                skb = __netdev_alloc_skb(queue->info->netdev,
 292                                         RX_COPY_THRESHOLD + NET_IP_ALIGN,
 293                                         GFP_ATOMIC | __GFP_NOWARN);
 294                if (unlikely(!skb))
 295                        goto no_skb;
 296
 297                /* Align ip header to a 16 bytes boundary */
 298                skb_reserve(skb, NET_IP_ALIGN);
 299
 300                page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
 301                if (!page) {
 302                        kfree_skb(skb);
 303no_skb:
 304                        /* Could not allocate any skbuffs. Try again later. */
 305                        mod_timer(&queue->rx_refill_timer,
 306                                  jiffies + (HZ/10));
 307
 308                        /* Any skbuffs queued for refill? Force them out. */
 309                        if (i != 0)
 310                                goto refill;
 311                        break;
 312                }
 313
 314                skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
 315                __skb_queue_tail(&queue->rx_batch, skb);
 316        }
 317
 318        /* Is the batch large enough to be worthwhile? */
 319        if (i < (queue->rx_target/2)) {
 320                if (req_prod > queue->rx.sring->req_prod)
 321                        goto push;
 322                return;
 323        }
 324
 325        /* Adjust our fill target if we risked running out of buffers. */
 326        if (((req_prod - queue->rx.sring->rsp_prod) < (queue->rx_target / 4)) &&
 327            ((queue->rx_target *= 2) > queue->rx_max_target))
 328                queue->rx_target = queue->rx_max_target;
 329
 330 refill:
 331        for (i = 0; ; i++) {
 332                skb = __skb_dequeue(&queue->rx_batch);
 333                if (skb == NULL)
 334                        break;
 335
 336                skb->dev = queue->info->netdev;
 337
 338                id = xennet_rxidx(req_prod + i);
 339
 340                BUG_ON(queue->rx_skbs[id]);
 341                queue->rx_skbs[id] = skb;
 342
 343                ref = gnttab_claim_grant_reference(&queue->gref_rx_head);
 344                BUG_ON((signed short)ref < 0);
 345                queue->grant_rx_ref[id] = ref;
 346
 347                pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
 348                vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0]));
 349
 350                req = RING_GET_REQUEST(&queue->rx, req_prod + i);
 351                gnttab_grant_foreign_access_ref(ref,
 352                                                queue->info->xbdev->otherend_id,
 353                                                pfn_to_mfn(pfn),
 354                                                0);
 355
 356                req->id = id;
 357                req->gref = ref;
 358        }
 359
 360        wmb();          /* barrier so backend seens requests */
 361
 362        /* Above is a suitable barrier to ensure backend will see requests. */
 363        queue->rx.req_prod_pvt = req_prod + i;
 364 push:
 365        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->rx, notify);
 366        if (notify)
 367                notify_remote_via_irq(queue->rx_irq);
 368}
 369
 370static int xennet_open(struct net_device *dev)
 371{
 372        struct netfront_info *np = netdev_priv(dev);
 373        unsigned int num_queues = dev->real_num_tx_queues;
 374        unsigned int i = 0;
 375        struct netfront_queue *queue = NULL;
 376
 377        for (i = 0; i < num_queues; ++i) {
 378                queue = &np->queues[i];
 379                napi_enable(&queue->napi);
 380
 381                spin_lock_bh(&queue->rx_lock);
 382                if (netif_carrier_ok(dev)) {
 383                        xennet_alloc_rx_buffers(queue);
 384                        queue->rx.sring->rsp_event = queue->rx.rsp_cons + 1;
 385                        if (RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))
 386                                napi_schedule(&queue->napi);
 387                }
 388                spin_unlock_bh(&queue->rx_lock);
 389        }
 390
 391        netif_tx_start_all_queues(dev);
 392
 393        return 0;
 394}
 395
 396static void xennet_tx_buf_gc(struct netfront_queue *queue)
 397{
 398        RING_IDX cons, prod;
 399        unsigned short id;
 400        struct sk_buff *skb;
 401
 402        BUG_ON(!netif_carrier_ok(queue->info->netdev));
 403
 404        do {
 405                prod = queue->tx.sring->rsp_prod;
 406                rmb(); /* Ensure we see responses up to 'rp'. */
 407
 408                for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
 409                        struct xen_netif_tx_response *txrsp;
 410
 411                        txrsp = RING_GET_RESPONSE(&queue->tx, cons);
 412                        if (txrsp->status == XEN_NETIF_RSP_NULL)
 413                                continue;
 414
 415                        id  = txrsp->id;
 416                        skb = queue->tx_skbs[id].skb;
 417                        if (unlikely(gnttab_query_foreign_access(
 418                                queue->grant_tx_ref[id]) != 0)) {
 419                                pr_alert("%s: warning -- grant still in use by backend domain\n",
 420                                         __func__);
 421                                BUG();
 422                        }
 423                        gnttab_end_foreign_access_ref(
 424                                queue->grant_tx_ref[id], GNTMAP_readonly);
 425                        gnttab_release_grant_reference(
 426                                &queue->gref_tx_head, queue->grant_tx_ref[id]);
 427                        queue->grant_tx_ref[id] = GRANT_INVALID_REF;
 428                        queue->grant_tx_page[id] = NULL;
 429                        add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id);
 430                        dev_kfree_skb_irq(skb);
 431                }
 432
 433                queue->tx.rsp_cons = prod;
 434
 435                /*
 436                 * Set a new event, then check for race with update of tx_cons.
 437                 * Note that it is essential to schedule a callback, no matter
 438                 * how few buffers are pending. Even if there is space in the
 439                 * transmit ring, higher layers may be blocked because too much
 440                 * data is outstanding: in such cases notification from Xen is
 441                 * likely to be the only kick that we'll get.
 442                 */
 443                queue->tx.sring->rsp_event =
 444                        prod + ((queue->tx.sring->req_prod - prod) >> 1) + 1;
 445                mb();           /* update shared area */
 446        } while ((cons == prod) && (prod != queue->tx.sring->rsp_prod));
 447
 448        xennet_maybe_wake_tx(queue);
 449}
 450
 451static void xennet_make_frags(struct sk_buff *skb, struct netfront_queue *queue,
 452                              struct xen_netif_tx_request *tx)
 453{
 454        char *data = skb->data;
 455        unsigned long mfn;
 456        RING_IDX prod = queue->tx.req_prod_pvt;
 457        int frags = skb_shinfo(skb)->nr_frags;
 458        unsigned int offset = offset_in_page(data);
 459        unsigned int len = skb_headlen(skb);
 460        unsigned int id;
 461        grant_ref_t ref;
 462        int i;
 463
 464        /* While the header overlaps a page boundary (including being
 465           larger than a page), split it it into page-sized chunks. */
 466        while (len > PAGE_SIZE - offset) {
 467                tx->size = PAGE_SIZE - offset;
 468                tx->flags |= XEN_NETTXF_more_data;
 469                len -= tx->size;
 470                data += tx->size;
 471                offset = 0;
 472
 473                id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
 474                queue->tx_skbs[id].skb = skb_get(skb);
 475                tx = RING_GET_REQUEST(&queue->tx, prod++);
 476                tx->id = id;
 477                ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
 478                BUG_ON((signed short)ref < 0);
 479
 480                mfn = virt_to_mfn(data);
 481                gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
 482                                                mfn, GNTMAP_readonly);
 483
 484                queue->grant_tx_page[id] = virt_to_page(data);
 485                tx->gref = queue->grant_tx_ref[id] = ref;
 486                tx->offset = offset;
 487                tx->size = len;
 488                tx->flags = 0;
 489        }
 490
 491        /* Grant backend access to each skb fragment page. */
 492        for (i = 0; i < frags; i++) {
 493                skb_frag_t *frag = skb_shinfo(skb)->frags + i;
 494                struct page *page = skb_frag_page(frag);
 495
 496                len = skb_frag_size(frag);
 497                offset = frag->page_offset;
 498
 499                /* Data must not cross a page boundary. */
 500                BUG_ON(len + offset > PAGE_SIZE<<compound_order(page));
 501
 502                /* Skip unused frames from start of page */
 503                page += offset >> PAGE_SHIFT;
 504                offset &= ~PAGE_MASK;
 505
 506                while (len > 0) {
 507                        unsigned long bytes;
 508
 509                        BUG_ON(offset >= PAGE_SIZE);
 510
 511                        bytes = PAGE_SIZE - offset;
 512                        if (bytes > len)
 513                                bytes = len;
 514
 515                        tx->flags |= XEN_NETTXF_more_data;
 516
 517                        id = get_id_from_freelist(&queue->tx_skb_freelist,
 518                                                  queue->tx_skbs);
 519                        queue->tx_skbs[id].skb = skb_get(skb);
 520                        tx = RING_GET_REQUEST(&queue->tx, prod++);
 521                        tx->id = id;
 522                        ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
 523                        BUG_ON((signed short)ref < 0);
 524
 525                        mfn = pfn_to_mfn(page_to_pfn(page));
 526                        gnttab_grant_foreign_access_ref(ref,
 527                                                        queue->info->xbdev->otherend_id,
 528                                                        mfn, GNTMAP_readonly);
 529
 530                        queue->grant_tx_page[id] = page;
 531                        tx->gref = queue->grant_tx_ref[id] = ref;
 532                        tx->offset = offset;
 533                        tx->size = bytes;
 534                        tx->flags = 0;
 535
 536                        offset += bytes;
 537                        len -= bytes;
 538
 539                        /* Next frame */
 540                        if (offset == PAGE_SIZE && len) {
 541                                BUG_ON(!PageCompound(page));
 542                                page++;
 543                                offset = 0;
 544                        }
 545                }
 546        }
 547
 548        queue->tx.req_prod_pvt = prod;
 549}
 550
 551/*
 552 * Count how many ring slots are required to send the frags of this
 553 * skb. Each frag might be a compound page.
 554 */
 555static int xennet_count_skb_frag_slots(struct sk_buff *skb)
 556{
 557        int i, frags = skb_shinfo(skb)->nr_frags;
 558        int pages = 0;
 559
 560        for (i = 0; i < frags; i++) {
 561                skb_frag_t *frag = skb_shinfo(skb)->frags + i;
 562                unsigned long size = skb_frag_size(frag);
 563                unsigned long offset = frag->page_offset;
 564
 565                /* Skip unused frames from start of page */
 566                offset &= ~PAGE_MASK;
 567
 568                pages += PFN_UP(offset + size);
 569        }
 570
 571        return pages;
 572}
 573
 574static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
 575                               void *accel_priv, select_queue_fallback_t fallback)
 576{
 577        unsigned int num_queues = dev->real_num_tx_queues;
 578        u32 hash;
 579        u16 queue_idx;
 580
 581        /* First, check if there is only one queue */
 582        if (num_queues == 1) {
 583                queue_idx = 0;
 584        } else {
 585                hash = skb_get_hash(skb);
 586                queue_idx = hash % num_queues;
 587        }
 588
 589        return queue_idx;
 590}
 591
 592static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 593{
 594        unsigned short id;
 595        struct netfront_info *np = netdev_priv(dev);
 596        struct netfront_stats *stats = this_cpu_ptr(np->stats);
 597        struct xen_netif_tx_request *tx;
 598        char *data = skb->data;
 599        RING_IDX i;
 600        grant_ref_t ref;
 601        unsigned long mfn;
 602        int notify;
 603        int slots;
 604        unsigned int offset = offset_in_page(data);
 605        unsigned int len = skb_headlen(skb);
 606        unsigned long flags;
 607        struct netfront_queue *queue = NULL;
 608        unsigned int num_queues = dev->real_num_tx_queues;
 609        u16 queue_index;
 610
 611        /* Drop the packet if no queues are set up */
 612        if (num_queues < 1)
 613                goto drop;
 614        /* Determine which queue to transmit this SKB on */
 615        queue_index = skb_get_queue_mapping(skb);
 616        queue = &np->queues[queue_index];
 617
 618        /* If skb->len is too big for wire format, drop skb and alert
 619         * user about misconfiguration.
 620         */
 621        if (unlikely(skb->len > XEN_NETIF_MAX_TX_SIZE)) {
 622                net_alert_ratelimited(
 623                        "xennet: skb->len = %u, too big for wire format\n",
 624                        skb->len);
 625                goto drop;
 626        }
 627
 628        slots = DIV_ROUND_UP(offset + len, PAGE_SIZE) +
 629                xennet_count_skb_frag_slots(skb);
 630        if (unlikely(slots > MAX_SKB_FRAGS + 1)) {
 631                net_dbg_ratelimited("xennet: skb rides the rocket: %d slots, %d bytes\n",
 632                                    slots, skb->len);
 633                if (skb_linearize(skb))
 634                        goto drop;
 635        }
 636
 637        spin_lock_irqsave(&queue->tx_lock, flags);
 638
 639        if (unlikely(!netif_carrier_ok(dev) ||
 640                     (slots > 1 && !xennet_can_sg(dev)) ||
 641                     netif_needs_gso(skb, netif_skb_features(skb)))) {
 642                spin_unlock_irqrestore(&queue->tx_lock, flags);
 643                goto drop;
 644        }
 645
 646        i = queue->tx.req_prod_pvt;
 647
 648        id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
 649        queue->tx_skbs[id].skb = skb;
 650
 651        tx = RING_GET_REQUEST(&queue->tx, i);
 652
 653        tx->id   = id;
 654        ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
 655        BUG_ON((signed short)ref < 0);
 656        mfn = virt_to_mfn(data);
 657        gnttab_grant_foreign_access_ref(
 658                ref, queue->info->xbdev->otherend_id, mfn, GNTMAP_readonly);
 659        queue->grant_tx_page[id] = virt_to_page(data);
 660        tx->gref = queue->grant_tx_ref[id] = ref;
 661        tx->offset = offset;
 662        tx->size = len;
 663
 664        tx->flags = 0;
 665        if (skb->ip_summed == CHECKSUM_PARTIAL)
 666                /* local packet? */
 667                tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
 668        else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 669                /* remote but checksummed. */
 670                tx->flags |= XEN_NETTXF_data_validated;
 671
 672        if (skb_shinfo(skb)->gso_size) {
 673                struct xen_netif_extra_info *gso;
 674
 675                gso = (struct xen_netif_extra_info *)
 676                        RING_GET_REQUEST(&queue->tx, ++i);
 677
 678                tx->flags |= XEN_NETTXF_extra_info;
 679
 680                gso->u.gso.size = skb_shinfo(skb)->gso_size;
 681                gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
 682                        XEN_NETIF_GSO_TYPE_TCPV6 :
 683                        XEN_NETIF_GSO_TYPE_TCPV4;
 684                gso->u.gso.pad = 0;
 685                gso->u.gso.features = 0;
 686
 687                gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 688                gso->flags = 0;
 689        }
 690
 691        queue->tx.req_prod_pvt = i + 1;
 692
 693        xennet_make_frags(skb, queue, tx);
 694        tx->size = skb->len;
 695
 696        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
 697        if (notify)
 698                notify_remote_via_irq(queue->tx_irq);
 699
 700        u64_stats_update_begin(&stats->syncp);
 701        stats->tx_bytes += skb->len;
 702        stats->tx_packets++;
 703        u64_stats_update_end(&stats->syncp);
 704
 705        /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */
 706        xennet_tx_buf_gc(queue);
 707
 708        if (!netfront_tx_slot_available(queue))
 709                netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
 710
 711        spin_unlock_irqrestore(&queue->tx_lock, flags);
 712
 713        return NETDEV_TX_OK;
 714
 715 drop:
 716        dev->stats.tx_dropped++;
 717        dev_kfree_skb_any(skb);
 718        return NETDEV_TX_OK;
 719}
 720
 721static int xennet_close(struct net_device *dev)
 722{
 723        struct netfront_info *np = netdev_priv(dev);
 724        unsigned int num_queues = dev->real_num_tx_queues;
 725        unsigned int i;
 726        struct netfront_queue *queue;
 727        netif_tx_stop_all_queues(np->netdev);
 728        for (i = 0; i < num_queues; ++i) {
 729                queue = &np->queues[i];
 730                napi_disable(&queue->napi);
 731        }
 732        return 0;
 733}
 734
 735static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
 736                                grant_ref_t ref)
 737{
 738        int new = xennet_rxidx(queue->rx.req_prod_pvt);
 739
 740        BUG_ON(queue->rx_skbs[new]);
 741        queue->rx_skbs[new] = skb;
 742        queue->grant_rx_ref[new] = ref;
 743        RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->id = new;
 744        RING_GET_REQUEST(&queue->rx, queue->rx.req_prod_pvt)->gref = ref;
 745        queue->rx.req_prod_pvt++;
 746}
 747
 748static int xennet_get_extras(struct netfront_queue *queue,
 749                             struct xen_netif_extra_info *extras,
 750                             RING_IDX rp)
 751
 752{
 753        struct xen_netif_extra_info *extra;
 754        struct device *dev = &queue->info->netdev->dev;
 755        RING_IDX cons = queue->rx.rsp_cons;
 756        int err = 0;
 757
 758        do {
 759                struct sk_buff *skb;
 760                grant_ref_t ref;
 761
 762                if (unlikely(cons + 1 == rp)) {
 763                        if (net_ratelimit())
 764                                dev_warn(dev, "Missing extra info\n");
 765                        err = -EBADR;
 766                        break;
 767                }
 768
 769                extra = (struct xen_netif_extra_info *)
 770                        RING_GET_RESPONSE(&queue->rx, ++cons);
 771
 772                if (unlikely(!extra->type ||
 773                             extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
 774                        if (net_ratelimit())
 775                                dev_warn(dev, "Invalid extra type: %d\n",
 776                                        extra->type);
 777                        err = -EINVAL;
 778                } else {
 779                        memcpy(&extras[extra->type - 1], extra,
 780                               sizeof(*extra));
 781                }
 782
 783                skb = xennet_get_rx_skb(queue, cons);
 784                ref = xennet_get_rx_ref(queue, cons);
 785                xennet_move_rx_slot(queue, skb, ref);
 786        } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
 787
 788        queue->rx.rsp_cons = cons;
 789        return err;
 790}
 791
 792static int xennet_get_responses(struct netfront_queue *queue,
 793                                struct netfront_rx_info *rinfo, RING_IDX rp,
 794                                struct sk_buff_head *list)
 795{
 796        struct xen_netif_rx_response *rx = &rinfo->rx;
 797        struct xen_netif_extra_info *extras = rinfo->extras;
 798        struct device *dev = &queue->info->netdev->dev;
 799        RING_IDX cons = queue->rx.rsp_cons;
 800        struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
 801        grant_ref_t ref = xennet_get_rx_ref(queue, cons);
 802        int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
 803        int slots = 1;
 804        int err = 0;
 805        unsigned long ret;
 806
 807        if (rx->flags & XEN_NETRXF_extra_info) {
 808                err = xennet_get_extras(queue, extras, rp);
 809                cons = queue->rx.rsp_cons;
 810        }
 811
 812        for (;;) {
 813                if (unlikely(rx->status < 0 ||
 814                             rx->offset + rx->status > PAGE_SIZE)) {
 815                        if (net_ratelimit())
 816                                dev_warn(dev, "rx->offset: %x, size: %u\n",
 817                                         rx->offset, rx->status);
 818                        xennet_move_rx_slot(queue, skb, ref);
 819                        err = -EINVAL;
 820                        goto next;
 821                }
 822
 823                /*
 824                 * This definitely indicates a bug, either in this driver or in
 825                 * the backend driver. In future this should flag the bad
 826                 * situation to the system controller to reboot the backend.
 827                 */
 828                if (ref == GRANT_INVALID_REF) {
 829                        if (net_ratelimit())
 830                                dev_warn(dev, "Bad rx response id %d.\n",
 831                                         rx->id);
 832                        err = -EINVAL;
 833                        goto next;
 834                }
 835
 836                ret = gnttab_end_foreign_access_ref(ref, 0);
 837                BUG_ON(!ret);
 838
 839                gnttab_release_grant_reference(&queue->gref_rx_head, ref);
 840
 841                __skb_queue_tail(list, skb);
 842
 843next:
 844                if (!(rx->flags & XEN_NETRXF_more_data))
 845                        break;
 846
 847                if (cons + slots == rp) {
 848                        if (net_ratelimit())
 849                                dev_warn(dev, "Need more slots\n");
 850                        err = -ENOENT;
 851                        break;
 852                }
 853
 854                rx = RING_GET_RESPONSE(&queue->rx, cons + slots);
 855                skb = xennet_get_rx_skb(queue, cons + slots);
 856                ref = xennet_get_rx_ref(queue, cons + slots);
 857                slots++;
 858        }
 859
 860        if (unlikely(slots > max)) {
 861                if (net_ratelimit())
 862                        dev_warn(dev, "Too many slots\n");
 863                err = -E2BIG;
 864        }
 865
 866        if (unlikely(err))
 867                queue->rx.rsp_cons = cons + slots;
 868
 869        return err;
 870}
 871
 872static int xennet_set_skb_gso(struct sk_buff *skb,
 873                              struct xen_netif_extra_info *gso)
 874{
 875        if (!gso->u.gso.size) {
 876                if (net_ratelimit())
 877                        pr_warn("GSO size must not be zero\n");
 878                return -EINVAL;
 879        }
 880
 881        if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
 882            gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
 883                if (net_ratelimit())
 884                        pr_warn("Bad GSO type %d\n", gso->u.gso.type);
 885                return -EINVAL;
 886        }
 887
 888        skb_shinfo(skb)->gso_size = gso->u.gso.size;
 889        skb_shinfo(skb)->gso_type =
 890                (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
 891                SKB_GSO_TCPV4 :
 892                SKB_GSO_TCPV6;
 893
 894        /* Header must be checked, and gso_segs computed. */
 895        skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 896        skb_shinfo(skb)->gso_segs = 0;
 897
 898        return 0;
 899}
 900
 901static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
 902                                  struct sk_buff *skb,
 903                                  struct sk_buff_head *list)
 904{
 905        struct skb_shared_info *shinfo = skb_shinfo(skb);
 906        RING_IDX cons = queue->rx.rsp_cons;
 907        struct sk_buff *nskb;
 908
 909        while ((nskb = __skb_dequeue(list))) {
 910                struct xen_netif_rx_response *rx =
 911                        RING_GET_RESPONSE(&queue->rx, ++cons);
 912                skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
 913
 914                if (shinfo->nr_frags == MAX_SKB_FRAGS) {
 915                        unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 916
 917                        BUG_ON(pull_to <= skb_headlen(skb));
 918                        __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 919                }
 920                BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS);
 921
 922                skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag),
 923                                rx->offset, rx->status, PAGE_SIZE);
 924
 925                skb_shinfo(nskb)->nr_frags = 0;
 926                kfree_skb(nskb);
 927        }
 928
 929        return cons;
 930}
 931
 932static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
 933{
 934        bool recalculate_partial_csum = false;
 935
 936        /*
 937         * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
 938         * peers can fail to set NETRXF_csum_blank when sending a GSO
 939         * frame. In this case force the SKB to CHECKSUM_PARTIAL and
 940         * recalculate the partial checksum.
 941         */
 942        if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
 943                struct netfront_info *np = netdev_priv(dev);
 944                atomic_inc(&np->rx_gso_checksum_fixup);
 945                skb->ip_summed = CHECKSUM_PARTIAL;
 946                recalculate_partial_csum = true;
 947        }
 948
 949        /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
 950        if (skb->ip_summed != CHECKSUM_PARTIAL)
 951                return 0;
 952
 953        return skb_checksum_setup(skb, recalculate_partial_csum);
 954}
 955
 956static int handle_incoming_queue(struct netfront_queue *queue,
 957                                 struct sk_buff_head *rxq)
 958{
 959        struct netfront_stats *stats = this_cpu_ptr(queue->info->stats);
 960        int packets_dropped = 0;
 961        struct sk_buff *skb;
 962
 963        while ((skb = __skb_dequeue(rxq)) != NULL) {
 964                int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 965
 966                if (pull_to > skb_headlen(skb))
 967                        __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 968
 969                /* Ethernet work: Delayed to here as it peeks the header. */
 970                skb->protocol = eth_type_trans(skb, queue->info->netdev);
 971                skb_reset_network_header(skb);
 972
 973                if (checksum_setup(queue->info->netdev, skb)) {
 974                        kfree_skb(skb);
 975                        packets_dropped++;
 976                        queue->info->netdev->stats.rx_errors++;
 977                        continue;
 978                }
 979
 980                u64_stats_update_begin(&stats->syncp);
 981                stats->rx_packets++;
 982                stats->rx_bytes += skb->len;
 983                u64_stats_update_end(&stats->syncp);
 984
 985                /* Pass it up. */
 986                napi_gro_receive(&queue->napi, skb);
 987        }
 988
 989        return packets_dropped;
 990}
 991
 992static int xennet_poll(struct napi_struct *napi, int budget)
 993{
 994        struct netfront_queue *queue = container_of(napi, struct netfront_queue, napi);
 995        struct net_device *dev = queue->info->netdev;
 996        struct sk_buff *skb;
 997        struct netfront_rx_info rinfo;
 998        struct xen_netif_rx_response *rx = &rinfo.rx;
 999        struct xen_netif_extra_info *extras = rinfo.extras;
1000        RING_IDX i, rp;
1001        int work_done;
1002        struct sk_buff_head rxq;
1003        struct sk_buff_head errq;
1004        struct sk_buff_head tmpq;
1005        unsigned long flags;
1006        int err;
1007
1008        spin_lock(&queue->rx_lock);
1009
1010        skb_queue_head_init(&rxq);
1011        skb_queue_head_init(&errq);
1012        skb_queue_head_init(&tmpq);
1013
1014        rp = queue->rx.sring->rsp_prod;
1015        rmb(); /* Ensure we see queued responses up to 'rp'. */
1016
1017        i = queue->rx.rsp_cons;
1018        work_done = 0;
1019        while ((i != rp) && (work_done < budget)) {
1020                memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx));
1021                memset(extras, 0, sizeof(rinfo.extras));
1022
1023                err = xennet_get_responses(queue, &rinfo, rp, &tmpq);
1024
1025                if (unlikely(err)) {
1026err:
1027                        while ((skb = __skb_dequeue(&tmpq)))
1028                                __skb_queue_tail(&errq, skb);
1029                        dev->stats.rx_errors++;
1030                        i = queue->rx.rsp_cons;
1031                        continue;
1032                }
1033
1034                skb = __skb_dequeue(&tmpq);
1035
1036                if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1037                        struct xen_netif_extra_info *gso;
1038                        gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1039
1040                        if (unlikely(xennet_set_skb_gso(skb, gso))) {
1041                                __skb_queue_head(&tmpq, skb);
1042                                queue->rx.rsp_cons += skb_queue_len(&tmpq);
1043                                goto err;
1044                        }
1045                }
1046
1047                NETFRONT_SKB_CB(skb)->pull_to = rx->status;
1048                if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
1049                        NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
1050
1051                skb_shinfo(skb)->frags[0].page_offset = rx->offset;
1052                skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
1053                skb->data_len = rx->status;
1054                skb->len += rx->status;
1055
1056                i = xennet_fill_frags(queue, skb, &tmpq);
1057
1058                if (rx->flags & XEN_NETRXF_csum_blank)
1059                        skb->ip_summed = CHECKSUM_PARTIAL;
1060                else if (rx->flags & XEN_NETRXF_data_validated)
1061                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1062
1063                __skb_queue_tail(&rxq, skb);
1064
1065                queue->rx.rsp_cons = ++i;
1066                work_done++;
1067        }
1068
1069        __skb_queue_purge(&errq);
1070
1071        work_done -= handle_incoming_queue(queue, &rxq);
1072
1073        /* If we get a callback with very few responses, reduce fill target. */
1074        /* NB. Note exponential increase, linear decrease. */
1075        if (((queue->rx.req_prod_pvt - queue->rx.sring->rsp_prod) >
1076             ((3*queue->rx_target) / 4)) &&
1077            (--queue->rx_target < queue->rx_min_target))
1078                queue->rx_target = queue->rx_min_target;
1079
1080        xennet_alloc_rx_buffers(queue);
1081
1082        if (work_done < budget) {
1083                int more_to_do = 0;
1084
1085                napi_gro_flush(napi, false);
1086
1087                local_irq_save(flags);
1088
1089                RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
1090                if (!more_to_do)
1091                        __napi_complete(napi);
1092
1093                local_irq_restore(flags);
1094        }
1095
1096        spin_unlock(&queue->rx_lock);
1097
1098        return work_done;
1099}
1100
1101static int xennet_change_mtu(struct net_device *dev, int mtu)
1102{
1103        int max = xennet_can_sg(dev) ?
1104                XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER : ETH_DATA_LEN;
1105
1106        if (mtu > max)
1107                return -EINVAL;
1108        dev->mtu = mtu;
1109        return 0;
1110}
1111
1112static struct rtnl_link_stats64 *xennet_get_stats64(struct net_device *dev,
1113                                                    struct rtnl_link_stats64 *tot)
1114{
1115        struct netfront_info *np = netdev_priv(dev);
1116        int cpu;
1117
1118        for_each_possible_cpu(cpu) {
1119                struct netfront_stats *stats = per_cpu_ptr(np->stats, cpu);
1120                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
1121                unsigned int start;
1122
1123                do {
1124                        start = u64_stats_fetch_begin_irq(&stats->syncp);
1125
1126                        rx_packets = stats->rx_packets;
1127                        tx_packets = stats->tx_packets;
1128                        rx_bytes = stats->rx_bytes;
1129                        tx_bytes = stats->tx_bytes;
1130                } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
1131
1132                tot->rx_packets += rx_packets;
1133                tot->tx_packets += tx_packets;
1134                tot->rx_bytes   += rx_bytes;
1135                tot->tx_bytes   += tx_bytes;
1136        }
1137
1138        tot->rx_errors  = dev->stats.rx_errors;
1139        tot->tx_dropped = dev->stats.tx_dropped;
1140
1141        return tot;
1142}
1143
1144static void xennet_release_tx_bufs(struct netfront_queue *queue)
1145{
1146        struct sk_buff *skb;
1147        int i;
1148
1149        for (i = 0; i < NET_TX_RING_SIZE; i++) {
1150                /* Skip over entries which are actually freelist references */
1151                if (skb_entry_is_link(&queue->tx_skbs[i]))
1152                        continue;
1153
1154                skb = queue->tx_skbs[i].skb;
1155                get_page(queue->grant_tx_page[i]);
1156                gnttab_end_foreign_access(queue->grant_tx_ref[i],
1157                                          GNTMAP_readonly,
1158                                          (unsigned long)page_address(queue->grant_tx_page[i]));
1159                queue->grant_tx_page[i] = NULL;
1160                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1161                add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i);
1162                dev_kfree_skb_irq(skb);
1163        }
1164}
1165
1166static void xennet_release_rx_bufs(struct netfront_queue *queue)
1167{
1168        int id, ref;
1169
1170        spin_lock_bh(&queue->rx_lock);
1171
1172        for (id = 0; id < NET_RX_RING_SIZE; id++) {
1173                struct sk_buff *skb;
1174                struct page *page;
1175
1176                skb = queue->rx_skbs[id];
1177                if (!skb)
1178                        continue;
1179
1180                ref = queue->grant_rx_ref[id];
1181                if (ref == GRANT_INVALID_REF)
1182                        continue;
1183
1184                page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
1185
1186                /* gnttab_end_foreign_access() needs a page ref until
1187                 * foreign access is ended (which may be deferred).
1188                 */
1189                get_page(page);
1190                gnttab_end_foreign_access(ref, 0,
1191                                          (unsigned long)page_address(page));
1192                queue->grant_rx_ref[id] = GRANT_INVALID_REF;
1193
1194                kfree_skb(skb);
1195        }
1196
1197        spin_unlock_bh(&queue->rx_lock);
1198}
1199
1200static netdev_features_t xennet_fix_features(struct net_device *dev,
1201        netdev_features_t features)
1202{
1203        struct netfront_info *np = netdev_priv(dev);
1204        int val;
1205
1206        if (features & NETIF_F_SG) {
1207                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
1208                                 "%d", &val) < 0)
1209                        val = 0;
1210
1211                if (!val)
1212                        features &= ~NETIF_F_SG;
1213        }
1214
1215        if (features & NETIF_F_IPV6_CSUM) {
1216                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1217                                 "feature-ipv6-csum-offload", "%d", &val) < 0)
1218                        val = 0;
1219
1220                if (!val)
1221                        features &= ~NETIF_F_IPV6_CSUM;
1222        }
1223
1224        if (features & NETIF_F_TSO) {
1225                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1226                                 "feature-gso-tcpv4", "%d", &val) < 0)
1227                        val = 0;
1228
1229                if (!val)
1230                        features &= ~NETIF_F_TSO;
1231        }
1232
1233        if (features & NETIF_F_TSO6) {
1234                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1235                                 "feature-gso-tcpv6", "%d", &val) < 0)
1236                        val = 0;
1237
1238                if (!val)
1239                        features &= ~NETIF_F_TSO6;
1240        }
1241
1242        return features;
1243}
1244
1245static int xennet_set_features(struct net_device *dev,
1246        netdev_features_t features)
1247{
1248        if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
1249                netdev_info(dev, "Reducing MTU because no SG offload");
1250                dev->mtu = ETH_DATA_LEN;
1251        }
1252
1253        return 0;
1254}
1255
1256static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
1257{
1258        struct netfront_queue *queue = dev_id;
1259        unsigned long flags;
1260
1261        spin_lock_irqsave(&queue->tx_lock, flags);
1262        xennet_tx_buf_gc(queue);
1263        spin_unlock_irqrestore(&queue->tx_lock, flags);
1264
1265        return IRQ_HANDLED;
1266}
1267
1268static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
1269{
1270        struct netfront_queue *queue = dev_id;
1271        struct net_device *dev = queue->info->netdev;
1272
1273        if (likely(netif_carrier_ok(dev) &&
1274                   RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
1275                napi_schedule(&queue->napi);
1276
1277        return IRQ_HANDLED;
1278}
1279
1280static irqreturn_t xennet_interrupt(int irq, void *dev_id)
1281{
1282        xennet_tx_interrupt(irq, dev_id);
1283        xennet_rx_interrupt(irq, dev_id);
1284        return IRQ_HANDLED;
1285}
1286
1287#ifdef CONFIG_NET_POLL_CONTROLLER
1288static void xennet_poll_controller(struct net_device *dev)
1289{
1290        /* Poll each queue */
1291        struct netfront_info *info = netdev_priv(dev);
1292        unsigned int num_queues = dev->real_num_tx_queues;
1293        unsigned int i;
1294        for (i = 0; i < num_queues; ++i)
1295                xennet_interrupt(0, &info->queues[i]);
1296}
1297#endif
1298
1299static const struct net_device_ops xennet_netdev_ops = {
1300        .ndo_open            = xennet_open,
1301        .ndo_stop            = xennet_close,
1302        .ndo_start_xmit      = xennet_start_xmit,
1303        .ndo_change_mtu      = xennet_change_mtu,
1304        .ndo_get_stats64     = xennet_get_stats64,
1305        .ndo_set_mac_address = eth_mac_addr,
1306        .ndo_validate_addr   = eth_validate_addr,
1307        .ndo_fix_features    = xennet_fix_features,
1308        .ndo_set_features    = xennet_set_features,
1309        .ndo_select_queue    = xennet_select_queue,
1310#ifdef CONFIG_NET_POLL_CONTROLLER
1311        .ndo_poll_controller = xennet_poll_controller,
1312#endif
1313};
1314
1315static struct net_device *xennet_create_dev(struct xenbus_device *dev)
1316{
1317        int err;
1318        struct net_device *netdev;
1319        struct netfront_info *np;
1320
1321        netdev = alloc_etherdev_mq(sizeof(struct netfront_info), xennet_max_queues);
1322        if (!netdev)
1323                return ERR_PTR(-ENOMEM);
1324
1325        np                   = netdev_priv(netdev);
1326        np->xbdev            = dev;
1327
1328        /* No need to use rtnl_lock() before the call below as it
1329         * happens before register_netdev().
1330         */
1331        netif_set_real_num_tx_queues(netdev, 0);
1332        np->queues = NULL;
1333
1334        err = -ENOMEM;
1335        np->stats = netdev_alloc_pcpu_stats(struct netfront_stats);
1336        if (np->stats == NULL)
1337                goto exit;
1338
1339        netdev->netdev_ops      = &xennet_netdev_ops;
1340
1341        netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
1342                                  NETIF_F_GSO_ROBUST;
1343        netdev->hw_features     = NETIF_F_SG |
1344                                  NETIF_F_IPV6_CSUM |
1345                                  NETIF_F_TSO | NETIF_F_TSO6;
1346
1347        /*
1348         * Assume that all hw features are available for now. This set
1349         * will be adjusted by the call to netdev_update_features() in
1350         * xennet_connect() which is the earliest point where we can
1351         * negotiate with the backend regarding supported features.
1352         */
1353        netdev->features |= netdev->hw_features;
1354
1355        netdev->ethtool_ops = &xennet_ethtool_ops;
1356        SET_NETDEV_DEV(netdev, &dev->dev);
1357
1358        netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER);
1359
1360        np->netdev = netdev;
1361
1362        netif_carrier_off(netdev);
1363
1364        return netdev;
1365
1366 exit:
1367        free_netdev(netdev);
1368        return ERR_PTR(err);
1369}
1370
1371/**
1372 * Entry point to this code when a new device is created.  Allocate the basic
1373 * structures and the ring buffers for communication with the backend, and
1374 * inform the backend of the appropriate details for those.
1375 */
1376static int netfront_probe(struct xenbus_device *dev,
1377                          const struct xenbus_device_id *id)
1378{
1379        int err;
1380        struct net_device *netdev;
1381        struct netfront_info *info;
1382
1383        netdev = xennet_create_dev(dev);
1384        if (IS_ERR(netdev)) {
1385                err = PTR_ERR(netdev);
1386                xenbus_dev_fatal(dev, err, "creating netdev");
1387                return err;
1388        }
1389
1390        info = netdev_priv(netdev);
1391        dev_set_drvdata(&dev->dev, info);
1392
1393        err = register_netdev(info->netdev);
1394        if (err) {
1395                pr_warn("%s: register_netdev err=%d\n", __func__, err);
1396                goto fail;
1397        }
1398
1399        err = xennet_sysfs_addif(info->netdev);
1400        if (err) {
1401                unregister_netdev(info->netdev);
1402                pr_warn("%s: add sysfs failed err=%d\n", __func__, err);
1403                goto fail;
1404        }
1405
1406        return 0;
1407
1408 fail:
1409        free_netdev(netdev);
1410        dev_set_drvdata(&dev->dev, NULL);
1411        return err;
1412}
1413
1414static void xennet_end_access(int ref, void *page)
1415{
1416        /* This frees the page as a side-effect */
1417        if (ref != GRANT_INVALID_REF)
1418                gnttab_end_foreign_access(ref, 0, (unsigned long)page);
1419}
1420
1421static void xennet_disconnect_backend(struct netfront_info *info)
1422{
1423        unsigned int i = 0;
1424        unsigned int num_queues = info->netdev->real_num_tx_queues;
1425
1426        netif_carrier_off(info->netdev);
1427
1428        for (i = 0; i < num_queues; ++i) {
1429                struct netfront_queue *queue = &info->queues[i];
1430
1431                if (queue->tx_irq && (queue->tx_irq == queue->rx_irq))
1432                        unbind_from_irqhandler(queue->tx_irq, queue);
1433                if (queue->tx_irq && (queue->tx_irq != queue->rx_irq)) {
1434                        unbind_from_irqhandler(queue->tx_irq, queue);
1435                        unbind_from_irqhandler(queue->rx_irq, queue);
1436                }
1437                queue->tx_evtchn = queue->rx_evtchn = 0;
1438                queue->tx_irq = queue->rx_irq = 0;
1439
1440                napi_synchronize(&queue->napi);
1441
1442                xennet_release_tx_bufs(queue);
1443                xennet_release_rx_bufs(queue);
1444                gnttab_free_grant_references(queue->gref_tx_head);
1445                gnttab_free_grant_references(queue->gref_rx_head);
1446
1447                /* End access and free the pages */
1448                xennet_end_access(queue->tx_ring_ref, queue->tx.sring);
1449                xennet_end_access(queue->rx_ring_ref, queue->rx.sring);
1450
1451                queue->tx_ring_ref = GRANT_INVALID_REF;
1452                queue->rx_ring_ref = GRANT_INVALID_REF;
1453                queue->tx.sring = NULL;
1454                queue->rx.sring = NULL;
1455        }
1456}
1457
1458/**
1459 * We are reconnecting to the backend, due to a suspend/resume, or a backend
1460 * driver restart.  We tear down our netif structure and recreate it, but
1461 * leave the device-layer structures intact so that this is transparent to the
1462 * rest of the kernel.
1463 */
1464static int netfront_resume(struct xenbus_device *dev)
1465{
1466        struct netfront_info *info = dev_get_drvdata(&dev->dev);
1467
1468        dev_dbg(&dev->dev, "%s\n", dev->nodename);
1469
1470        xennet_disconnect_backend(info);
1471        return 0;
1472}
1473
1474static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
1475{
1476        char *s, *e, *macstr;
1477        int i;
1478
1479        macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
1480        if (IS_ERR(macstr))
1481                return PTR_ERR(macstr);
1482
1483        for (i = 0; i < ETH_ALEN; i++) {
1484                mac[i] = simple_strtoul(s, &e, 16);
1485                if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
1486                        kfree(macstr);
1487                        return -ENOENT;
1488                }
1489                s = e+1;
1490        }
1491
1492        kfree(macstr);
1493        return 0;
1494}
1495
1496static int setup_netfront_single(struct netfront_queue *queue)
1497{
1498        int err;
1499
1500        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1501        if (err < 0)
1502                goto fail;
1503
1504        err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1505                                        xennet_interrupt,
1506                                        0, queue->info->netdev->name, queue);
1507        if (err < 0)
1508                goto bind_fail;
1509        queue->rx_evtchn = queue->tx_evtchn;
1510        queue->rx_irq = queue->tx_irq = err;
1511
1512        return 0;
1513
1514bind_fail:
1515        xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1516        queue->tx_evtchn = 0;
1517fail:
1518        return err;
1519}
1520
1521static int setup_netfront_split(struct netfront_queue *queue)
1522{
1523        int err;
1524
1525        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->tx_evtchn);
1526        if (err < 0)
1527                goto fail;
1528        err = xenbus_alloc_evtchn(queue->info->xbdev, &queue->rx_evtchn);
1529        if (err < 0)
1530                goto alloc_rx_evtchn_fail;
1531
1532        snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
1533                 "%s-tx", queue->name);
1534        err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
1535                                        xennet_tx_interrupt,
1536                                        0, queue->tx_irq_name, queue);
1537        if (err < 0)
1538                goto bind_tx_fail;
1539        queue->tx_irq = err;
1540
1541        snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
1542                 "%s-rx", queue->name);
1543        err = bind_evtchn_to_irqhandler(queue->rx_evtchn,
1544                                        xennet_rx_interrupt,
1545                                        0, queue->rx_irq_name, queue);
1546        if (err < 0)
1547                goto bind_rx_fail;
1548        queue->rx_irq = err;
1549
1550        return 0;
1551
1552bind_rx_fail:
1553        unbind_from_irqhandler(queue->tx_irq, queue);
1554        queue->tx_irq = 0;
1555bind_tx_fail:
1556        xenbus_free_evtchn(queue->info->xbdev, queue->rx_evtchn);
1557        queue->rx_evtchn = 0;
1558alloc_rx_evtchn_fail:
1559        xenbus_free_evtchn(queue->info->xbdev, queue->tx_evtchn);
1560        queue->tx_evtchn = 0;
1561fail:
1562        return err;
1563}
1564
1565static int setup_netfront(struct xenbus_device *dev,
1566                        struct netfront_queue *queue, unsigned int feature_split_evtchn)
1567{
1568        struct xen_netif_tx_sring *txs;
1569        struct xen_netif_rx_sring *rxs;
1570        int err;
1571
1572        queue->tx_ring_ref = GRANT_INVALID_REF;
1573        queue->rx_ring_ref = GRANT_INVALID_REF;
1574        queue->rx.sring = NULL;
1575        queue->tx.sring = NULL;
1576
1577        txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1578        if (!txs) {
1579                err = -ENOMEM;
1580                xenbus_dev_fatal(dev, err, "allocating tx ring page");
1581                goto fail;
1582        }
1583        SHARED_RING_INIT(txs);
1584        FRONT_RING_INIT(&queue->tx, txs, PAGE_SIZE);
1585
1586        err = xenbus_grant_ring(dev, virt_to_mfn(txs));
1587        if (err < 0)
1588                goto grant_tx_ring_fail;
1589        queue->tx_ring_ref = err;
1590
1591        rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
1592        if (!rxs) {
1593                err = -ENOMEM;
1594                xenbus_dev_fatal(dev, err, "allocating rx ring page");
1595                goto alloc_rx_ring_fail;
1596        }
1597        SHARED_RING_INIT(rxs);
1598        FRONT_RING_INIT(&queue->rx, rxs, PAGE_SIZE);
1599
1600        err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
1601        if (err < 0)
1602                goto grant_rx_ring_fail;
1603        queue->rx_ring_ref = err;
1604
1605        if (feature_split_evtchn)
1606                err = setup_netfront_split(queue);
1607        /* setup single event channel if
1608         *  a) feature-split-event-channels == 0
1609         *  b) feature-split-event-channels == 1 but failed to setup
1610         */
1611        if (!feature_split_evtchn || (feature_split_evtchn && err))
1612                err = setup_netfront_single(queue);
1613
1614        if (err)
1615                goto alloc_evtchn_fail;
1616
1617        return 0;
1618
1619        /* If we fail to setup netfront, it is safe to just revoke access to
1620         * granted pages because backend is not accessing it at this point.
1621         */
1622alloc_evtchn_fail:
1623        gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0);
1624grant_rx_ring_fail:
1625        free_page((unsigned long)rxs);
1626alloc_rx_ring_fail:
1627        gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0);
1628grant_tx_ring_fail:
1629        free_page((unsigned long)txs);
1630fail:
1631        return err;
1632}
1633
1634/* Queue-specific initialisation
1635 * This used to be done in xennet_create_dev() but must now
1636 * be run per-queue.
1637 */
1638static int xennet_init_queue(struct netfront_queue *queue)
1639{
1640        unsigned short i;
1641        int err = 0;
1642
1643        spin_lock_init(&queue->tx_lock);
1644        spin_lock_init(&queue->rx_lock);
1645
1646        skb_queue_head_init(&queue->rx_batch);
1647        queue->rx_target     = RX_DFL_MIN_TARGET;
1648        queue->rx_min_target = RX_DFL_MIN_TARGET;
1649        queue->rx_max_target = RX_MAX_TARGET;
1650
1651        init_timer(&queue->rx_refill_timer);
1652        queue->rx_refill_timer.data = (unsigned long)queue;
1653        queue->rx_refill_timer.function = rx_refill_timeout;
1654
1655        snprintf(queue->name, sizeof(queue->name), "%s-q%u",
1656                 queue->info->netdev->name, queue->id);
1657
1658        /* Initialise tx_skbs as a free chain containing every entry. */
1659        queue->tx_skb_freelist = 0;
1660        for (i = 0; i < NET_TX_RING_SIZE; i++) {
1661                skb_entry_set_link(&queue->tx_skbs[i], i+1);
1662                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
1663                queue->grant_tx_page[i] = NULL;
1664        }
1665
1666        /* Clear out rx_skbs */
1667        for (i = 0; i < NET_RX_RING_SIZE; i++) {
1668                queue->rx_skbs[i] = NULL;
1669                queue->grant_rx_ref[i] = GRANT_INVALID_REF;
1670        }
1671
1672        /* A grant for every tx ring slot */
1673        if (gnttab_alloc_grant_references(TX_MAX_TARGET,
1674                                          &queue->gref_tx_head) < 0) {
1675                pr_alert("can't alloc tx grant refs\n");
1676                err = -ENOMEM;
1677                goto exit;
1678        }
1679
1680        /* A grant for every rx ring slot */
1681        if (gnttab_alloc_grant_references(RX_MAX_TARGET,
1682                                          &queue->gref_rx_head) < 0) {
1683                pr_alert("can't alloc rx grant refs\n");
1684                err = -ENOMEM;
1685                goto exit_free_tx;
1686        }
1687
1688        return 0;
1689
1690 exit_free_tx:
1691        gnttab_free_grant_references(queue->gref_tx_head);
1692 exit:
1693        return err;
1694}
1695
1696static int write_queue_xenstore_keys(struct netfront_queue *queue,
1697                           struct xenbus_transaction *xbt, int write_hierarchical)
1698{
1699        /* Write the queue-specific keys into XenStore in the traditional
1700         * way for a single queue, or in a queue subkeys for multiple
1701         * queues.
1702         */
1703        struct xenbus_device *dev = queue->info->xbdev;
1704        int err;
1705        const char *message;
1706        char *path;
1707        size_t pathsize;
1708
1709        /* Choose the correct place to write the keys */
1710        if (write_hierarchical) {
1711                pathsize = strlen(dev->nodename) + 10;
1712                path = kzalloc(pathsize, GFP_KERNEL);
1713                if (!path) {
1714                        err = -ENOMEM;
1715                        message = "out of memory while writing ring references";
1716                        goto error;
1717                }
1718                snprintf(path, pathsize, "%s/queue-%u",
1719                                dev->nodename, queue->id);
1720        } else {
1721                path = (char *)dev->nodename;
1722        }
1723
1724        /* Write ring references */
1725        err = xenbus_printf(*xbt, path, "tx-ring-ref", "%u",
1726                        queue->tx_ring_ref);
1727        if (err) {
1728                message = "writing tx-ring-ref";
1729                goto error;
1730        }
1731
1732        err = xenbus_printf(*xbt, path, "rx-ring-ref", "%u",
1733                        queue->rx_ring_ref);
1734        if (err) {
1735                message = "writing rx-ring-ref";
1736                goto error;
1737        }
1738
1739        /* Write event channels; taking into account both shared
1740         * and split event channel scenarios.
1741         */
1742        if (queue->tx_evtchn == queue->rx_evtchn) {
1743                /* Shared event channel */
1744                err = xenbus_printf(*xbt, path,
1745                                "event-channel", "%u", queue->tx_evtchn);
1746                if (err) {
1747                        message = "writing event-channel";
1748                        goto error;
1749                }
1750        } else {
1751                /* Split event channels */
1752                err = xenbus_printf(*xbt, path,
1753                                "event-channel-tx", "%u", queue->tx_evtchn);
1754                if (err) {
1755                        message = "writing event-channel-tx";
1756                        goto error;
1757                }
1758
1759                err = xenbus_printf(*xbt, path,
1760                                "event-channel-rx", "%u", queue->rx_evtchn);
1761                if (err) {
1762                        message = "writing event-channel-rx";
1763                        goto error;
1764                }
1765        }
1766
1767        if (write_hierarchical)
1768                kfree(path);
1769        return 0;
1770
1771error:
1772        if (write_hierarchical)
1773                kfree(path);
1774        xenbus_dev_fatal(dev, err, "%s", message);
1775        return err;
1776}
1777
1778static void xennet_destroy_queues(struct netfront_info *info)
1779{
1780        unsigned int i;
1781
1782        rtnl_lock();
1783
1784        for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
1785                struct netfront_queue *queue = &info->queues[i];
1786
1787                if (netif_running(info->netdev))
1788                        napi_disable(&queue->napi);
1789                netif_napi_del(&queue->napi);
1790        }
1791
1792        rtnl_unlock();
1793
1794        kfree(info->queues);
1795        info->queues = NULL;
1796}
1797
1798static int xennet_create_queues(struct netfront_info *info,
1799                                unsigned int num_queues)
1800{
1801        unsigned int i;
1802        int ret;
1803
1804        info->queues = kcalloc(num_queues, sizeof(struct netfront_queue),
1805                               GFP_KERNEL);
1806        if (!info->queues)
1807                return -ENOMEM;
1808
1809        rtnl_lock();
1810
1811        for (i = 0; i < num_queues; i++) {
1812                struct netfront_queue *queue = &info->queues[i];
1813
1814                queue->id = i;
1815                queue->info = info;
1816
1817                ret = xennet_init_queue(queue);
1818                if (ret < 0) {
1819                        dev_warn(&info->netdev->dev,
1820                                 "only created %d queues\n", i);
1821                        num_queues = i;
1822                        break;
1823                }
1824
1825                netif_napi_add(queue->info->netdev, &queue->napi,
1826                               xennet_poll, 64);
1827                if (netif_running(info->netdev))
1828                        napi_enable(&queue->napi);
1829        }
1830
1831        netif_set_real_num_tx_queues(info->netdev, num_queues);
1832
1833        rtnl_unlock();
1834
1835        if (num_queues == 0) {
1836                dev_err(&info->netdev->dev, "no queues\n");
1837                return -EINVAL;
1838        }
1839        return 0;
1840}
1841
1842/* Common code used when first setting up, and when resuming. */
1843static int talk_to_netback(struct xenbus_device *dev,
1844                           struct netfront_info *info)
1845{
1846        const char *message;
1847        struct xenbus_transaction xbt;
1848        int err;
1849        unsigned int feature_split_evtchn;
1850        unsigned int i = 0;
1851        unsigned int max_queues = 0;
1852        struct netfront_queue *queue = NULL;
1853        unsigned int num_queues = 1;
1854
1855        info->netdev->irq = 0;
1856
1857        /* Check if backend supports multiple queues */
1858        err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1859                           "multi-queue-max-queues", "%u", &max_queues);
1860        if (err < 0)
1861                max_queues = 1;
1862        num_queues = min(max_queues, xennet_max_queues);
1863
1864        /* Check feature-split-event-channels */
1865        err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1866                           "feature-split-event-channels", "%u",
1867                           &feature_split_evtchn);
1868        if (err < 0)
1869                feature_split_evtchn = 0;
1870
1871        /* Read mac addr. */
1872        err = xen_net_read_mac(dev, info->netdev->dev_addr);
1873        if (err) {
1874                xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
1875                goto out;
1876        }
1877
1878        if (info->queues)
1879                xennet_destroy_queues(info);
1880
1881        err = xennet_create_queues(info, num_queues);
1882        if (err < 0)
1883                goto destroy_ring;
1884
1885        /* Create shared ring, alloc event channel -- for each queue */
1886        for (i = 0; i < num_queues; ++i) {
1887                queue = &info->queues[i];
1888                err = setup_netfront(dev, queue, feature_split_evtchn);
1889                if (err) {
1890                        /* setup_netfront() will tidy up the current
1891                         * queue on error, but we need to clean up
1892                         * those already allocated.
1893                         */
1894                        if (i > 0) {
1895                                rtnl_lock();
1896                                netif_set_real_num_tx_queues(info->netdev, i);
1897                                rtnl_unlock();
1898                                goto destroy_ring;
1899                        } else {
1900                                goto out;
1901                        }
1902                }
1903        }
1904
1905again:
1906        err = xenbus_transaction_start(&xbt);
1907        if (err) {
1908                xenbus_dev_fatal(dev, err, "starting transaction");
1909                goto destroy_ring;
1910        }
1911
1912        if (num_queues == 1) {
1913                err = write_queue_xenstore_keys(&info->queues[0], &xbt, 0); /* flat */
1914                if (err)
1915                        goto abort_transaction_no_dev_fatal;
1916        } else {
1917                /* Write the number of queues */
1918                err = xenbus_printf(xbt, dev->nodename, "multi-queue-num-queues",
1919                                    "%u", num_queues);
1920                if (err) {
1921                        message = "writing multi-queue-num-queues";
1922                        goto abort_transaction_no_dev_fatal;
1923                }
1924
1925                /* Write the keys for each queue */
1926                for (i = 0; i < num_queues; ++i) {
1927                        queue = &info->queues[i];
1928                        err = write_queue_xenstore_keys(queue, &xbt, 1); /* hierarchical */
1929                        if (err)
1930                                goto abort_transaction_no_dev_fatal;
1931                }
1932        }
1933
1934        /* The remaining keys are not queue-specific */
1935        err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
1936                            1);
1937        if (err) {
1938                message = "writing request-rx-copy";
1939                goto abort_transaction;
1940        }
1941
1942        err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
1943        if (err) {
1944                message = "writing feature-rx-notify";
1945                goto abort_transaction;
1946        }
1947
1948        err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
1949        if (err) {
1950                message = "writing feature-sg";
1951                goto abort_transaction;
1952        }
1953
1954        err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
1955        if (err) {
1956                message = "writing feature-gso-tcpv4";
1957                goto abort_transaction;
1958        }
1959
1960        err = xenbus_write(xbt, dev->nodename, "feature-gso-tcpv6", "1");
1961        if (err) {
1962                message = "writing feature-gso-tcpv6";
1963                goto abort_transaction;
1964        }
1965
1966        err = xenbus_write(xbt, dev->nodename, "feature-ipv6-csum-offload",
1967                           "1");
1968        if (err) {
1969                message = "writing feature-ipv6-csum-offload";
1970                goto abort_transaction;
1971        }
1972
1973        err = xenbus_transaction_end(xbt, 0);
1974        if (err) {
1975                if (err == -EAGAIN)
1976                        goto again;
1977                xenbus_dev_fatal(dev, err, "completing transaction");
1978                goto destroy_ring;
1979        }
1980
1981        return 0;
1982
1983 abort_transaction:
1984        xenbus_dev_fatal(dev, err, "%s", message);
1985abort_transaction_no_dev_fatal:
1986        xenbus_transaction_end(xbt, 1);
1987 destroy_ring:
1988        xennet_disconnect_backend(info);
1989        kfree(info->queues);
1990        info->queues = NULL;
1991        rtnl_lock();
1992        netif_set_real_num_tx_queues(info->netdev, 0);
1993        rtnl_unlock();
1994 out:
1995        return err;
1996}
1997
1998static int xennet_connect(struct net_device *dev)
1999{
2000        struct netfront_info *np = netdev_priv(dev);
2001        unsigned int num_queues = 0;
2002        int err;
2003        unsigned int feature_rx_copy;
2004        unsigned int j = 0;
2005        struct netfront_queue *queue = NULL;
2006
2007        err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
2008                           "feature-rx-copy", "%u", &feature_rx_copy);
2009        if (err != 1)
2010                feature_rx_copy = 0;
2011
2012        if (!feature_rx_copy) {
2013                dev_info(&dev->dev,
2014                         "backend does not support copying receive path\n");
2015                return -ENODEV;
2016        }
2017
2018        err = talk_to_netback(np->xbdev, np);
2019        if (err)
2020                return err;
2021
2022        /* talk_to_netback() sets the correct number of queues */
2023        num_queues = dev->real_num_tx_queues;
2024
2025        rtnl_lock();
2026        netdev_update_features(dev);
2027        rtnl_unlock();
2028
2029        /*
2030         * All public and private state should now be sane.  Get
2031         * ready to start sending and receiving packets and give the driver
2032         * domain a kick because we've probably just requeued some
2033         * packets.
2034         */
2035        netif_carrier_on(np->netdev);
2036        for (j = 0; j < num_queues; ++j) {
2037                queue = &np->queues[j];
2038
2039                notify_remote_via_irq(queue->tx_irq);
2040                if (queue->tx_irq != queue->rx_irq)
2041                        notify_remote_via_irq(queue->rx_irq);
2042
2043                spin_lock_irq(&queue->tx_lock);
2044                xennet_tx_buf_gc(queue);
2045                spin_unlock_irq(&queue->tx_lock);
2046
2047                spin_lock_bh(&queue->rx_lock);
2048                xennet_alloc_rx_buffers(queue);
2049                spin_unlock_bh(&queue->rx_lock);
2050        }
2051
2052        return 0;
2053}
2054
2055/**
2056 * Callback received when the backend's state changes.
2057 */
2058static void netback_changed(struct xenbus_device *dev,
2059                            enum xenbus_state backend_state)
2060{
2061        struct netfront_info *np = dev_get_drvdata(&dev->dev);
2062        struct net_device *netdev = np->netdev;
2063
2064        dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
2065
2066        switch (backend_state) {
2067        case XenbusStateInitialising:
2068        case XenbusStateInitialised:
2069        case XenbusStateReconfiguring:
2070        case XenbusStateReconfigured:
2071        case XenbusStateUnknown:
2072                break;
2073
2074        case XenbusStateInitWait:
2075                if (dev->state != XenbusStateInitialising)
2076                        break;
2077                if (xennet_connect(netdev) != 0)
2078                        break;
2079                xenbus_switch_state(dev, XenbusStateConnected);
2080                break;
2081
2082        case XenbusStateConnected:
2083                netdev_notify_peers(netdev);
2084                break;
2085
2086        case XenbusStateClosed:
2087                if (dev->state == XenbusStateClosed)
2088                        break;
2089                /* Missed the backend's CLOSING state -- fallthrough */
2090        case XenbusStateClosing:
2091                xenbus_frontend_closed(dev);
2092                break;
2093        }
2094}
2095
2096static const struct xennet_stat {
2097        char name[ETH_GSTRING_LEN];
2098        u16 offset;
2099} xennet_stats[] = {
2100        {
2101                "rx_gso_checksum_fixup",
2102                offsetof(struct netfront_info, rx_gso_checksum_fixup)
2103        },
2104};
2105
2106static int xennet_get_sset_count(struct net_device *dev, int string_set)
2107{
2108        switch (string_set) {
2109        case ETH_SS_STATS:
2110                return ARRAY_SIZE(xennet_stats);
2111        default:
2112                return -EINVAL;
2113        }
2114}
2115
2116static void xennet_get_ethtool_stats(struct net_device *dev,
2117                                     struct ethtool_stats *stats, u64 * data)
2118{
2119        void *np = netdev_priv(dev);
2120        int i;
2121
2122        for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2123                data[i] = atomic_read((atomic_t *)(np + xennet_stats[i].offset));
2124}
2125
2126static void xennet_get_strings(struct net_device *dev, u32 stringset, u8 * data)
2127{
2128        int i;
2129
2130        switch (stringset) {
2131        case ETH_SS_STATS:
2132                for (i = 0; i < ARRAY_SIZE(xennet_stats); i++)
2133                        memcpy(data + i * ETH_GSTRING_LEN,
2134                               xennet_stats[i].name, ETH_GSTRING_LEN);
2135                break;
2136        }
2137}
2138
2139static const struct ethtool_ops xennet_ethtool_ops =
2140{
2141        .get_link = ethtool_op_get_link,
2142
2143        .get_sset_count = xennet_get_sset_count,
2144        .get_ethtool_stats = xennet_get_ethtool_stats,
2145        .get_strings = xennet_get_strings,
2146};
2147
2148#ifdef CONFIG_SYSFS
2149static ssize_t show_rxbuf_min(struct device *dev,
2150                              struct device_attribute *attr, char *buf)
2151{
2152        struct net_device *netdev = to_net_dev(dev);
2153        struct netfront_info *info = netdev_priv(netdev);
2154        unsigned int num_queues = netdev->real_num_tx_queues;
2155
2156        if (num_queues)
2157                return sprintf(buf, "%u\n", info->queues[0].rx_min_target);
2158        else
2159                return sprintf(buf, "%u\n", RX_MIN_TARGET);
2160}
2161
2162static ssize_t store_rxbuf_min(struct device *dev,
2163                               struct device_attribute *attr,
2164                               const char *buf, size_t len)
2165{
2166        struct net_device *netdev = to_net_dev(dev);
2167        struct netfront_info *np = netdev_priv(netdev);
2168        unsigned int num_queues = netdev->real_num_tx_queues;
2169        char *endp;
2170        unsigned long target;
2171        unsigned int i;
2172        struct netfront_queue *queue;
2173
2174        if (!capable(CAP_NET_ADMIN))
2175                return -EPERM;
2176
2177        target = simple_strtoul(buf, &endp, 0);
2178        if (endp == buf)
2179                return -EBADMSG;
2180
2181        if (target < RX_MIN_TARGET)
2182                target = RX_MIN_TARGET;
2183        if (target > RX_MAX_TARGET)
2184                target = RX_MAX_TARGET;
2185
2186        for (i = 0; i < num_queues; ++i) {
2187                queue = &np->queues[i];
2188                spin_lock_bh(&queue->rx_lock);
2189                if (target > queue->rx_max_target)
2190                        queue->rx_max_target = target;
2191                queue->rx_min_target = target;
2192                if (target > queue->rx_target)
2193                        queue->rx_target = target;
2194
2195                xennet_alloc_rx_buffers(queue);
2196
2197                spin_unlock_bh(&queue->rx_lock);
2198        }
2199        return len;
2200}
2201
2202static ssize_t show_rxbuf_max(struct device *dev,
2203                              struct device_attribute *attr, char *buf)
2204{
2205        struct net_device *netdev = to_net_dev(dev);
2206        struct netfront_info *info = netdev_priv(netdev);
2207        unsigned int num_queues = netdev->real_num_tx_queues;
2208
2209        if (num_queues)
2210                return sprintf(buf, "%u\n", info->queues[0].rx_max_target);
2211        else
2212                return sprintf(buf, "%u\n", RX_MAX_TARGET);
2213}
2214
2215static ssize_t store_rxbuf_max(struct device *dev,
2216                               struct device_attribute *attr,
2217                               const char *buf, size_t len)
2218{
2219        struct net_device *netdev = to_net_dev(dev);
2220        struct netfront_info *np = netdev_priv(netdev);
2221        unsigned int num_queues = netdev->real_num_tx_queues;
2222        char *endp;
2223        unsigned long target;
2224        unsigned int i = 0;
2225        struct netfront_queue *queue = NULL;
2226
2227        if (!capable(CAP_NET_ADMIN))
2228                return -EPERM;
2229
2230        target = simple_strtoul(buf, &endp, 0);
2231        if (endp == buf)
2232                return -EBADMSG;
2233
2234        if (target < RX_MIN_TARGET)
2235                target = RX_MIN_TARGET;
2236        if (target > RX_MAX_TARGET)
2237                target = RX_MAX_TARGET;
2238
2239        for (i = 0; i < num_queues; ++i) {
2240                queue = &np->queues[i];
2241                spin_lock_bh(&queue->rx_lock);
2242                if (target < queue->rx_min_target)
2243                        queue->rx_min_target = target;
2244                queue->rx_max_target = target;
2245                if (target < queue->rx_target)
2246                        queue->rx_target = target;
2247
2248                xennet_alloc_rx_buffers(queue);
2249
2250                spin_unlock_bh(&queue->rx_lock);
2251        }
2252        return len;
2253}
2254
2255static ssize_t show_rxbuf_cur(struct device *dev,
2256                              struct device_attribute *attr, char *buf)
2257{
2258        struct net_device *netdev = to_net_dev(dev);
2259        struct netfront_info *info = netdev_priv(netdev);
2260        unsigned int num_queues = netdev->real_num_tx_queues;
2261
2262        if (num_queues)
2263                return sprintf(buf, "%u\n", info->queues[0].rx_target);
2264        else
2265                return sprintf(buf, "0\n");
2266}
2267
2268static struct device_attribute xennet_attrs[] = {
2269        __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
2270        __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
2271        __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
2272};
2273
2274static int xennet_sysfs_addif(struct net_device *netdev)
2275{
2276        int i;
2277        int err;
2278
2279        for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
2280                err = device_create_file(&netdev->dev,
2281                                           &xennet_attrs[i]);
2282                if (err)
2283                        goto fail;
2284        }
2285        return 0;
2286
2287 fail:
2288        while (--i >= 0)
2289                device_remove_file(&netdev->dev, &xennet_attrs[i]);
2290        return err;
2291}
2292
2293static void xennet_sysfs_delif(struct net_device *netdev)
2294{
2295        int i;
2296
2297        for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++)
2298                device_remove_file(&netdev->dev, &xennet_attrs[i]);
2299}
2300
2301#endif /* CONFIG_SYSFS */
2302
2303static const struct xenbus_device_id netfront_ids[] = {
2304        { "vif" },
2305        { "" }
2306};
2307
2308
2309static int xennet_remove(struct xenbus_device *dev)
2310{
2311        struct netfront_info *info = dev_get_drvdata(&dev->dev);
2312        unsigned int num_queues = info->netdev->real_num_tx_queues;
2313        struct netfront_queue *queue = NULL;
2314        unsigned int i = 0;
2315
2316        dev_dbg(&dev->dev, "%s\n", dev->nodename);
2317
2318        xennet_disconnect_backend(info);
2319
2320        xennet_sysfs_delif(info->netdev);
2321
2322        unregister_netdev(info->netdev);
2323
2324        for (i = 0; i < num_queues; ++i) {
2325                queue = &info->queues[i];
2326                del_timer_sync(&queue->rx_refill_timer);
2327        }
2328
2329        if (num_queues) {
2330                kfree(info->queues);
2331                info->queues = NULL;
2332        }
2333
2334        free_percpu(info->stats);
2335
2336        free_netdev(info->netdev);
2337
2338        return 0;
2339}
2340
2341static DEFINE_XENBUS_DRIVER(netfront, ,
2342        .probe = netfront_probe,
2343        .remove = xennet_remove,
2344        .resume = netfront_resume,
2345        .otherend_changed = netback_changed,
2346);
2347
2348static int __init netif_init(void)
2349{
2350        if (!xen_domain())
2351                return -ENODEV;
2352
2353        if (!xen_has_pv_nic_devices())
2354                return -ENODEV;
2355
2356        pr_info("Initialising Xen virtual ethernet driver\n");
2357
2358        /* Allow as many queues as there are CPUs, by default */
2359        xennet_max_queues = num_online_cpus();
2360
2361        return xenbus_register_frontend(&netfront_driver);
2362}
2363module_init(netif_init);
2364
2365
2366static void __exit netif_exit(void)
2367{
2368        xenbus_unregister_driver(&netfront_driver);
2369}
2370module_exit(netif_exit);
2371
2372MODULE_DESCRIPTION("Xen virtual network device frontend");
2373MODULE_LICENSE("GPL");
2374MODULE_ALIAS("xen:vif");
2375MODULE_ALIAS("xennet");
2376