linux/drivers/net/xen-netback/netback.c
<<
>>
Prefs
   1/*
   2 * Back-end of the driver for virtual network devices. This portion of the
   3 * driver exports a 'unified' network-device interface that can be accessed
   4 * by any operating system that implements a compatible front end. A
   5 * reference front-end implementation can be found in:
   6 *  drivers/net/xen-netfront.c
   7 *
   8 * Copyright (c) 2002-2005, K A Fraser
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public License version 2
  12 * as published by the Free Software Foundation; or, when distributed
  13 * separately from the Linux kernel or incorporated into other
  14 * software packages, subject to the following license:
  15 *
  16 * Permission is hereby granted, free of charge, to any person obtaining a copy
  17 * of this source file (the "Software"), to deal in the Software without
  18 * restriction, including without limitation the rights to use, copy, modify,
  19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  20 * and to permit persons to whom the Software is furnished to do so, subject to
  21 * the following conditions:
  22 *
  23 * The above copyright notice and this permission notice shall be included in
  24 * all copies or substantial portions of the Software.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  32 * IN THE SOFTWARE.
  33 */
  34
  35#include "common.h"
  36
  37#include <linux/kthread.h>
  38#include <linux/if_vlan.h>
  39#include <linux/udp.h>
  40
  41#include <net/tcp.h>
  42
  43#include <xen/xen.h>
  44#include <xen/events.h>
  45#include <xen/interface/memory.h>
  46
  47#include <asm/xen/hypercall.h>
  48#include <asm/xen/page.h>
  49
  50/*
  51 * This is the maximum slots a skb can have. If a guest sends a skb
  52 * which exceeds this limit it is considered malicious.
  53 */
  54#define FATAL_SKB_SLOTS_DEFAULT 20
  55static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
  56module_param(fatal_skb_slots, uint, 0444);
  57
  58/*
  59 * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating
  60 * the maximum slots a valid packet can use. Now this value is defined
  61 * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by
  62 * all backend.
  63 */
  64#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN
  65
  66typedef unsigned int pending_ring_idx_t;
  67#define INVALID_PENDING_RING_IDX (~0U)
  68
  69struct pending_tx_info {
  70        struct xen_netif_tx_request req; /* coalesced tx request */
  71        struct xenvif *vif;
  72        pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX
  73                                  * if it is head of one or more tx
  74                                  * reqs
  75                                  */
  76};
  77
  78struct netbk_rx_meta {
  79        int id;
  80        int size;
  81        int gso_size;
  82};
  83
  84#define MAX_PENDING_REQS 256
  85
  86/* Discriminate from any valid pending_idx value. */
  87#define INVALID_PENDING_IDX 0xFFFF
  88
  89#define MAX_BUFFER_OFFSET PAGE_SIZE
  90
  91/* extra field used in struct page */
  92union page_ext {
  93        struct {
  94#if BITS_PER_LONG < 64
  95#define IDX_WIDTH   8
  96#define GROUP_WIDTH (BITS_PER_LONG - IDX_WIDTH)
  97                unsigned int group:GROUP_WIDTH;
  98                unsigned int idx:IDX_WIDTH;
  99#else
 100                unsigned int group, idx;
 101#endif
 102        } e;
 103        void *mapping;
 104};
 105
 106struct xen_netbk {
 107        wait_queue_head_t wq;
 108        struct task_struct *task;
 109
 110        struct sk_buff_head rx_queue;
 111        struct sk_buff_head tx_queue;
 112
 113        struct timer_list net_timer;
 114
 115        struct page *mmap_pages[MAX_PENDING_REQS];
 116
 117        pending_ring_idx_t pending_prod;
 118        pending_ring_idx_t pending_cons;
 119        struct list_head net_schedule_list;
 120
 121        /* Protect the net_schedule_list in netif. */
 122        spinlock_t net_schedule_list_lock;
 123
 124        atomic_t netfront_count;
 125
 126        struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
 127        /* Coalescing tx requests before copying makes number of grant
 128         * copy ops greater or equal to number of slots required. In
 129         * worst case a tx request consumes 2 gnttab_copy.
 130         */
 131        struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
 132
 133        u16 pending_ring[MAX_PENDING_REQS];
 134
 135        /*
 136         * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
 137         * head/fragment page uses 2 copy operations because it
 138         * straddles two buffers in the frontend.
 139         */
 140        struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
 141        struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
 142};
 143
 144static struct xen_netbk *xen_netbk;
 145static int xen_netbk_group_nr;
 146
 147/*
 148 * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of
 149 * one or more merged tx requests, otherwise it is the continuation of
 150 * previous tx request.
 151 */
 152static inline int pending_tx_is_head(struct xen_netbk *netbk, RING_IDX idx)
 153{
 154        return netbk->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX;
 155}
 156
 157void xen_netbk_add_xenvif(struct xenvif *vif)
 158{
 159        int i;
 160        int min_netfront_count;
 161        int min_group = 0;
 162        struct xen_netbk *netbk;
 163
 164        min_netfront_count = atomic_read(&xen_netbk[0].netfront_count);
 165        for (i = 0; i < xen_netbk_group_nr; i++) {
 166                int netfront_count = atomic_read(&xen_netbk[i].netfront_count);
 167                if (netfront_count < min_netfront_count) {
 168                        min_group = i;
 169                        min_netfront_count = netfront_count;
 170                }
 171        }
 172
 173        netbk = &xen_netbk[min_group];
 174
 175        vif->netbk = netbk;
 176        atomic_inc(&netbk->netfront_count);
 177}
 178
 179void xen_netbk_remove_xenvif(struct xenvif *vif)
 180{
 181        struct xen_netbk *netbk = vif->netbk;
 182        vif->netbk = NULL;
 183        atomic_dec(&netbk->netfront_count);
 184}
 185
 186static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
 187                                  u8 status);
 188static void make_tx_response(struct xenvif *vif,
 189                             struct xen_netif_tx_request *txp,
 190                             s8       st);
 191static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
 192                                             u16      id,
 193                                             s8       st,
 194                                             u16      offset,
 195                                             u16      size,
 196                                             u16      flags);
 197
 198static inline unsigned long idx_to_pfn(struct xen_netbk *netbk,
 199                                       u16 idx)
 200{
 201        return page_to_pfn(netbk->mmap_pages[idx]);
 202}
 203
 204static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk,
 205                                         u16 idx)
 206{
 207        return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx));
 208}
 209
 210/* extra field used in struct page */
 211static inline void set_page_ext(struct page *pg, struct xen_netbk *netbk,
 212                                unsigned int idx)
 213{
 214        unsigned int group = netbk - xen_netbk;
 215        union page_ext ext = { .e = { .group = group + 1, .idx = idx } };
 216
 217        BUILD_BUG_ON(sizeof(ext) > sizeof(ext.mapping));
 218        pg->mapping = ext.mapping;
 219}
 220
 221static int get_page_ext(struct page *pg,
 222                        unsigned int *pgroup, unsigned int *pidx)
 223{
 224        union page_ext ext = { .mapping = pg->mapping };
 225        struct xen_netbk *netbk;
 226        unsigned int group, idx;
 227
 228        group = ext.e.group - 1;
 229
 230        if (group < 0 || group >= xen_netbk_group_nr)
 231                return 0;
 232
 233        netbk = &xen_netbk[group];
 234
 235        idx = ext.e.idx;
 236
 237        if ((idx < 0) || (idx >= MAX_PENDING_REQS))
 238                return 0;
 239
 240        if (netbk->mmap_pages[idx] != pg)
 241                return 0;
 242
 243        *pgroup = group;
 244        *pidx = idx;
 245
 246        return 1;
 247}
 248
 249/*
 250 * This is the amount of packet we copy rather than map, so that the
 251 * guest can't fiddle with the contents of the headers while we do
 252 * packet processing on them (netfilter, routing, etc).
 253 */
 254#define PKT_PROT_LEN    (ETH_HLEN + \
 255                         VLAN_HLEN + \
 256                         sizeof(struct iphdr) + MAX_IPOPTLEN + \
 257                         sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
 258
 259static u16 frag_get_pending_idx(skb_frag_t *frag)
 260{
 261        return (u16)frag->page_offset;
 262}
 263
 264static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx)
 265{
 266        frag->page_offset = pending_idx;
 267}
 268
 269static inline pending_ring_idx_t pending_index(unsigned i)
 270{
 271        return i & (MAX_PENDING_REQS-1);
 272}
 273
 274static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk)
 275{
 276        return MAX_PENDING_REQS -
 277                netbk->pending_prod + netbk->pending_cons;
 278}
 279
 280static void xen_netbk_kick_thread(struct xen_netbk *netbk)
 281{
 282        wake_up(&netbk->wq);
 283}
 284
 285static int max_required_rx_slots(struct xenvif *vif)
 286{
 287        int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
 288
 289        /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
 290        if (vif->can_sg || vif->gso || vif->gso_prefix)
 291                max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
 292
 293        return max;
 294}
 295
 296int xen_netbk_rx_ring_full(struct xenvif *vif)
 297{
 298        RING_IDX peek   = vif->rx_req_cons_peek;
 299        RING_IDX needed = max_required_rx_slots(vif);
 300
 301        return ((vif->rx.sring->req_prod - peek) < needed) ||
 302               ((vif->rx.rsp_prod_pvt + XEN_NETIF_RX_RING_SIZE - peek) < needed);
 303}
 304
 305int xen_netbk_must_stop_queue(struct xenvif *vif)
 306{
 307        if (!xen_netbk_rx_ring_full(vif))
 308                return 0;
 309
 310        vif->rx.sring->req_event = vif->rx_req_cons_peek +
 311                max_required_rx_slots(vif);
 312        mb(); /* request notification /then/ check the queue */
 313
 314        return xen_netbk_rx_ring_full(vif);
 315}
 316
 317/*
 318 * Returns true if we should start a new receive buffer instead of
 319 * adding 'size' bytes to a buffer which currently contains 'offset'
 320 * bytes.
 321 */
 322static bool start_new_rx_buffer(int offset, unsigned long size, int head)
 323{
 324        /* simple case: we have completely filled the current buffer. */
 325        if (offset == MAX_BUFFER_OFFSET)
 326                return true;
 327
 328        /*
 329         * complex case: start a fresh buffer if the current frag
 330         * would overflow the current buffer but only if:
 331         *     (i)   this frag would fit completely in the next buffer
 332         * and (ii)  there is already some data in the current buffer
 333         * and (iii) this is not the head buffer.
 334         *
 335         * Where:
 336         * - (i) stops us splitting a frag into two copies
 337         *   unless the frag is too large for a single buffer.
 338         * - (ii) stops us from leaving a buffer pointlessly empty.
 339         * - (iii) stops us leaving the first buffer
 340         *   empty. Strictly speaking this is already covered
 341         *   by (ii) but is explicitly checked because
 342         *   netfront relies on the first buffer being
 343         *   non-empty and can crash otherwise.
 344         *
 345         * This means we will effectively linearise small
 346         * frags but do not needlessly split large buffers
 347         * into multiple copies tend to give large frags their
 348         * own buffers as before.
 349         */
 350        if ((offset + size > MAX_BUFFER_OFFSET) &&
 351            (size <= MAX_BUFFER_OFFSET) && offset && !head)
 352                return true;
 353
 354        return false;
 355}
 356
 357struct xenvif_count_slot_state {
 358        unsigned long copy_off;
 359        bool head;
 360};
 361
 362unsigned int xenvif_count_frag_slots(struct xenvif *vif,
 363                                     unsigned long offset, unsigned long size,
 364                                     struct xenvif_count_slot_state *state)
 365{
 366        unsigned count = 0;
 367
 368        offset &= ~PAGE_MASK;
 369
 370        while (size > 0) {
 371                unsigned long bytes;
 372
 373                bytes = PAGE_SIZE - offset;
 374
 375                if (bytes > size)
 376                        bytes = size;
 377
 378                if (start_new_rx_buffer(state->copy_off, bytes, state->head)) {
 379                        count++;
 380                        state->copy_off = 0;
 381                }
 382
 383                if (state->copy_off + bytes > MAX_BUFFER_OFFSET)
 384                        bytes = MAX_BUFFER_OFFSET - state->copy_off;
 385
 386                state->copy_off += bytes;
 387
 388                offset += bytes;
 389                size -= bytes;
 390
 391                if (offset == PAGE_SIZE)
 392                        offset = 0;
 393
 394                state->head = false;
 395        }
 396
 397        return count;
 398}
 399
 400/*
 401 * Figure out how many ring slots we're going to need to send @skb to
 402 * the guest. This function is essentially a dry run of
 403 * netbk_gop_frag_copy.
 404 */
 405unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
 406{
 407        struct xenvif_count_slot_state state;
 408        unsigned int count;
 409        unsigned char *data;
 410        unsigned i;
 411
 412        state.head = true;
 413        state.copy_off = 0;
 414
 415        /* Slot for the first (partial) page of data. */
 416        count = 1;
 417
 418        /* Need a slot for the GSO prefix for GSO extra data? */
 419        if (skb_shinfo(skb)->gso_size)
 420                count++;
 421
 422        data = skb->data;
 423        while (data < skb_tail_pointer(skb)) {
 424                unsigned long offset = offset_in_page(data);
 425                unsigned long size = PAGE_SIZE - offset;
 426
 427                if (data + size > skb_tail_pointer(skb))
 428                        size = skb_tail_pointer(skb) - data;
 429
 430                count += xenvif_count_frag_slots(vif, offset, size, &state);
 431
 432                data += size;
 433        }
 434
 435        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 436                unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
 437                unsigned long offset = skb_shinfo(skb)->frags[i].page_offset;
 438
 439                count += xenvif_count_frag_slots(vif, offset, size, &state);
 440        }
 441        return count;
 442}
 443
 444struct netrx_pending_operations {
 445        unsigned copy_prod, copy_cons;
 446        unsigned meta_prod, meta_cons;
 447        struct gnttab_copy *copy;
 448        struct netbk_rx_meta *meta;
 449        int copy_off;
 450        grant_ref_t copy_gref;
 451};
 452
 453static struct netbk_rx_meta *get_next_rx_buffer(struct xenvif *vif,
 454                                                struct netrx_pending_operations *npo)
 455{
 456        struct netbk_rx_meta *meta;
 457        struct xen_netif_rx_request *req;
 458
 459        req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
 460
 461        meta = npo->meta + npo->meta_prod++;
 462        meta->gso_size = 0;
 463        meta->size = 0;
 464        meta->id = req->id;
 465
 466        npo->copy_off = 0;
 467        npo->copy_gref = req->gref;
 468
 469        return meta;
 470}
 471
 472/*
 473 * Set up the grant operations for this fragment. If it's a flipping
 474 * interface, we also set up the unmap request from here.
 475 */
 476static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
 477                                struct netrx_pending_operations *npo,
 478                                struct page *page, unsigned long size,
 479                                unsigned long offset, int *head)
 480{
 481        struct gnttab_copy *copy_gop;
 482        struct netbk_rx_meta *meta;
 483        /*
 484         * These variables are used iff get_page_ext returns true,
 485         * in which case they are guaranteed to be initialized.
 486         */
 487        unsigned int uninitialized_var(group), uninitialized_var(idx);
 488        int foreign = get_page_ext(page, &group, &idx);
 489        unsigned long bytes;
 490
 491        /* Data must not cross a page boundary. */
 492        BUG_ON(size + offset > PAGE_SIZE<<compound_order(page));
 493
 494        meta = npo->meta + npo->meta_prod - 1;
 495
 496        /* Skip unused frames from start of page */
 497        page += offset >> PAGE_SHIFT;
 498        offset &= ~PAGE_MASK;
 499
 500        while (size > 0) {
 501                BUG_ON(offset >= PAGE_SIZE);
 502                BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
 503
 504                bytes = PAGE_SIZE - offset;
 505
 506                if (bytes > size)
 507                        bytes = size;
 508
 509                if (start_new_rx_buffer(npo->copy_off, bytes, *head)) {
 510                        /*
 511                         * Netfront requires there to be some data in the head
 512                         * buffer.
 513                         */
 514                        BUG_ON(*head);
 515
 516                        meta = get_next_rx_buffer(vif, npo);
 517                }
 518
 519                if (npo->copy_off + bytes > MAX_BUFFER_OFFSET)
 520                        bytes = MAX_BUFFER_OFFSET - npo->copy_off;
 521
 522                copy_gop = npo->copy + npo->copy_prod++;
 523                copy_gop->flags = GNTCOPY_dest_gref;
 524                if (foreign) {
 525                        struct xen_netbk *netbk = &xen_netbk[group];
 526                        struct pending_tx_info *src_pend;
 527
 528                        src_pend = &netbk->pending_tx_info[idx];
 529
 530                        copy_gop->source.domid = src_pend->vif->domid;
 531                        copy_gop->source.u.ref = src_pend->req.gref;
 532                        copy_gop->flags |= GNTCOPY_source_gref;
 533                } else {
 534                        void *vaddr = page_address(page);
 535                        copy_gop->source.domid = DOMID_SELF;
 536                        copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
 537                }
 538                copy_gop->source.offset = offset;
 539                copy_gop->dest.domid = vif->domid;
 540
 541                copy_gop->dest.offset = npo->copy_off;
 542                copy_gop->dest.u.ref = npo->copy_gref;
 543                copy_gop->len = bytes;
 544
 545                npo->copy_off += bytes;
 546                meta->size += bytes;
 547
 548                offset += bytes;
 549                size -= bytes;
 550
 551                /* Next frame */
 552                if (offset == PAGE_SIZE && size) {
 553                        BUG_ON(!PageCompound(page));
 554                        page++;
 555                        offset = 0;
 556                }
 557
 558                /* Leave a gap for the GSO descriptor. */
 559                if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
 560                        vif->rx.req_cons++;
 561
 562                *head = 0; /* There must be something in this buffer now. */
 563
 564        }
 565}
 566
 567/*
 568 * Prepare an SKB to be transmitted to the frontend.
 569 *
 570 * This function is responsible for allocating grant operations, meta
 571 * structures, etc.
 572 *
 573 * It returns the number of meta structures consumed. The number of
 574 * ring slots used is always equal to the number of meta slots used
 575 * plus the number of GSO descriptors used. Currently, we use either
 576 * zero GSO descriptors (for non-GSO packets) or one descriptor (for
 577 * frontend-side LRO).
 578 */
 579static int netbk_gop_skb(struct sk_buff *skb,
 580                         struct netrx_pending_operations *npo)
 581{
 582        struct xenvif *vif = netdev_priv(skb->dev);
 583        int nr_frags = skb_shinfo(skb)->nr_frags;
 584        int i;
 585        struct xen_netif_rx_request *req;
 586        struct netbk_rx_meta *meta;
 587        unsigned char *data;
 588        int head = 1;
 589        int old_meta_prod;
 590
 591        old_meta_prod = npo->meta_prod;
 592
 593        /* Set up a GSO prefix descriptor, if necessary */
 594        if (skb_shinfo(skb)->gso_size && vif->gso_prefix) {
 595                req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
 596                meta = npo->meta + npo->meta_prod++;
 597                meta->gso_size = skb_shinfo(skb)->gso_size;
 598                meta->size = 0;
 599                meta->id = req->id;
 600        }
 601
 602        req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
 603        meta = npo->meta + npo->meta_prod++;
 604
 605        if (!vif->gso_prefix)
 606                meta->gso_size = skb_shinfo(skb)->gso_size;
 607        else
 608                meta->gso_size = 0;
 609
 610        meta->size = 0;
 611        meta->id = req->id;
 612        npo->copy_off = 0;
 613        npo->copy_gref = req->gref;
 614
 615        data = skb->data;
 616        while (data < skb_tail_pointer(skb)) {
 617                unsigned int offset = offset_in_page(data);
 618                unsigned int len = PAGE_SIZE - offset;
 619
 620                if (data + len > skb_tail_pointer(skb))
 621                        len = skb_tail_pointer(skb) - data;
 622
 623                netbk_gop_frag_copy(vif, skb, npo,
 624                                    virt_to_page(data), len, offset, &head);
 625                data += len;
 626        }
 627
 628        for (i = 0; i < nr_frags; i++) {
 629                netbk_gop_frag_copy(vif, skb, npo,
 630                                    skb_frag_page(&skb_shinfo(skb)->frags[i]),
 631                                    skb_frag_size(&skb_shinfo(skb)->frags[i]),
 632                                    skb_shinfo(skb)->frags[i].page_offset,
 633                                    &head);
 634        }
 635
 636        return npo->meta_prod - old_meta_prod;
 637}
 638
 639/*
 640 * This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
 641 * used to set up the operations on the top of
 642 * netrx_pending_operations, which have since been done.  Check that
 643 * they didn't give any errors and advance over them.
 644 */
 645static int netbk_check_gop(struct xenvif *vif, int nr_meta_slots,
 646                           struct netrx_pending_operations *npo)
 647{
 648        struct gnttab_copy     *copy_op;
 649        int status = XEN_NETIF_RSP_OKAY;
 650        int i;
 651
 652        for (i = 0; i < nr_meta_slots; i++) {
 653                copy_op = npo->copy + npo->copy_cons++;
 654                if (copy_op->status != GNTST_okay) {
 655                        netdev_dbg(vif->dev,
 656                                   "Bad status %d from copy to DOM%d.\n",
 657                                   copy_op->status, vif->domid);
 658                        status = XEN_NETIF_RSP_ERROR;
 659                }
 660        }
 661
 662        return status;
 663}
 664
 665static void netbk_add_frag_responses(struct xenvif *vif, int status,
 666                                     struct netbk_rx_meta *meta,
 667                                     int nr_meta_slots)
 668{
 669        int i;
 670        unsigned long offset;
 671
 672        /* No fragments used */
 673        if (nr_meta_slots <= 1)
 674                return;
 675
 676        nr_meta_slots--;
 677
 678        for (i = 0; i < nr_meta_slots; i++) {
 679                int flags;
 680                if (i == nr_meta_slots - 1)
 681                        flags = 0;
 682                else
 683                        flags = XEN_NETRXF_more_data;
 684
 685                offset = 0;
 686                make_rx_response(vif, meta[i].id, status, offset,
 687                                 meta[i].size, flags);
 688        }
 689}
 690
 691struct skb_cb_overlay {
 692        int meta_slots_used;
 693};
 694
 695static void xen_netbk_rx_action(struct xen_netbk *netbk)
 696{
 697        struct xenvif *vif = NULL, *tmp;
 698        s8 status;
 699        u16 flags;
 700        struct xen_netif_rx_response *resp;
 701        struct sk_buff_head rxq;
 702        struct sk_buff *skb;
 703        LIST_HEAD(notify);
 704        int ret;
 705        int nr_frags;
 706        int count;
 707        unsigned long offset;
 708        struct skb_cb_overlay *sco;
 709
 710        struct netrx_pending_operations npo = {
 711                .copy  = netbk->grant_copy_op,
 712                .meta  = netbk->meta,
 713        };
 714
 715        skb_queue_head_init(&rxq);
 716
 717        count = 0;
 718
 719        while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) {
 720                vif = netdev_priv(skb->dev);
 721                nr_frags = skb_shinfo(skb)->nr_frags;
 722
 723                sco = (struct skb_cb_overlay *)skb->cb;
 724                sco->meta_slots_used = netbk_gop_skb(skb, &npo);
 725
 726                count += nr_frags + 1;
 727
 728                __skb_queue_tail(&rxq, skb);
 729
 730                /* Filled the batch queue? */
 731                /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
 732                if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
 733                        break;
 734        }
 735
 736        BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));
 737
 738        if (!npo.copy_prod)
 739                return;
 740
 741        BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
 742        gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod);
 743
 744        while ((skb = __skb_dequeue(&rxq)) != NULL) {
 745                sco = (struct skb_cb_overlay *)skb->cb;
 746
 747                vif = netdev_priv(skb->dev);
 748
 749                if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {
 750                        resp = RING_GET_RESPONSE(&vif->rx,
 751                                                vif->rx.rsp_prod_pvt++);
 752
 753                        resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
 754
 755                        resp->offset = netbk->meta[npo.meta_cons].gso_size;
 756                        resp->id = netbk->meta[npo.meta_cons].id;
 757                        resp->status = sco->meta_slots_used;
 758
 759                        npo.meta_cons++;
 760                        sco->meta_slots_used--;
 761                }
 762
 763
 764                vif->dev->stats.tx_bytes += skb->len;
 765                vif->dev->stats.tx_packets++;
 766
 767                status = netbk_check_gop(vif, sco->meta_slots_used, &npo);
 768
 769                if (sco->meta_slots_used == 1)
 770                        flags = 0;
 771                else
 772                        flags = XEN_NETRXF_more_data;
 773
 774                if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
 775                        flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated;
 776                else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 777                        /* remote but checksummed. */
 778                        flags |= XEN_NETRXF_data_validated;
 779
 780                offset = 0;
 781                resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id,
 782                                        status, offset,
 783                                        netbk->meta[npo.meta_cons].size,
 784                                        flags);
 785
 786                if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {
 787                        struct xen_netif_extra_info *gso =
 788                                (struct xen_netif_extra_info *)
 789                                RING_GET_RESPONSE(&vif->rx,
 790                                                  vif->rx.rsp_prod_pvt++);
 791
 792                        resp->flags |= XEN_NETRXF_extra_info;
 793
 794                        gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;
 795                        gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 796                        gso->u.gso.pad = 0;
 797                        gso->u.gso.features = 0;
 798
 799                        gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 800                        gso->flags = 0;
 801                }
 802
 803                netbk_add_frag_responses(vif, status,
 804                                         netbk->meta + npo.meta_cons + 1,
 805                                         sco->meta_slots_used);
 806
 807                RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
 808
 809                xenvif_notify_tx_completion(vif);
 810
 811                if (ret && list_empty(&vif->notify_list))
 812                        list_add_tail(&vif->notify_list, &notify);
 813                else
 814                        xenvif_put(vif);
 815                npo.meta_cons += sco->meta_slots_used;
 816                dev_kfree_skb(skb);
 817        }
 818
 819        list_for_each_entry_safe(vif, tmp, &notify, notify_list) {
 820                notify_remote_via_irq(vif->irq);
 821                list_del_init(&vif->notify_list);
 822                xenvif_put(vif);
 823        }
 824
 825        /* More work to do? */
 826        if (!skb_queue_empty(&netbk->rx_queue) &&
 827                        !timer_pending(&netbk->net_timer))
 828                xen_netbk_kick_thread(netbk);
 829}
 830
 831void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)
 832{
 833        struct xen_netbk *netbk = vif->netbk;
 834
 835        skb_queue_tail(&netbk->rx_queue, skb);
 836
 837        xen_netbk_kick_thread(netbk);
 838}
 839
 840static void xen_netbk_alarm(unsigned long data)
 841{
 842        struct xen_netbk *netbk = (struct xen_netbk *)data;
 843        xen_netbk_kick_thread(netbk);
 844}
 845
 846static int __on_net_schedule_list(struct xenvif *vif)
 847{
 848        return !list_empty(&vif->schedule_list);
 849}
 850
 851/* Must be called with net_schedule_list_lock held */
 852static void remove_from_net_schedule_list(struct xenvif *vif)
 853{
 854        if (likely(__on_net_schedule_list(vif))) {
 855                list_del_init(&vif->schedule_list);
 856                xenvif_put(vif);
 857        }
 858}
 859
 860static struct xenvif *poll_net_schedule_list(struct xen_netbk *netbk)
 861{
 862        struct xenvif *vif = NULL;
 863
 864        spin_lock_irq(&netbk->net_schedule_list_lock);
 865        if (list_empty(&netbk->net_schedule_list))
 866                goto out;
 867
 868        vif = list_first_entry(&netbk->net_schedule_list,
 869                               struct xenvif, schedule_list);
 870        if (!vif)
 871                goto out;
 872
 873        xenvif_get(vif);
 874
 875        remove_from_net_schedule_list(vif);
 876out:
 877        spin_unlock_irq(&netbk->net_schedule_list_lock);
 878        return vif;
 879}
 880
 881void xen_netbk_schedule_xenvif(struct xenvif *vif)
 882{
 883        unsigned long flags;
 884        struct xen_netbk *netbk = vif->netbk;
 885
 886        if (__on_net_schedule_list(vif))
 887                goto kick;
 888
 889        spin_lock_irqsave(&netbk->net_schedule_list_lock, flags);
 890        if (!__on_net_schedule_list(vif) &&
 891            likely(xenvif_schedulable(vif))) {
 892                list_add_tail(&vif->schedule_list, &netbk->net_schedule_list);
 893                xenvif_get(vif);
 894        }
 895        spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags);
 896
 897kick:
 898        smp_mb();
 899        if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) &&
 900            !list_empty(&netbk->net_schedule_list))
 901                xen_netbk_kick_thread(netbk);
 902}
 903
 904void xen_netbk_deschedule_xenvif(struct xenvif *vif)
 905{
 906        struct xen_netbk *netbk = vif->netbk;
 907        spin_lock_irq(&netbk->net_schedule_list_lock);
 908        remove_from_net_schedule_list(vif);
 909        spin_unlock_irq(&netbk->net_schedule_list_lock);
 910}
 911
 912void xen_netbk_check_rx_xenvif(struct xenvif *vif)
 913{
 914        int more_to_do;
 915
 916        RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
 917
 918        if (more_to_do)
 919                xen_netbk_schedule_xenvif(vif);
 920}
 921
 922static void tx_add_credit(struct xenvif *vif)
 923{
 924        unsigned long max_burst, max_credit;
 925
 926        /*
 927         * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
 928         * Otherwise the interface can seize up due to insufficient credit.
 929         */
 930        max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size;
 931        max_burst = min(max_burst, 131072UL);
 932        max_burst = max(max_burst, vif->credit_bytes);
 933
 934        /* Take care that adding a new chunk of credit doesn't wrap to zero. */
 935        max_credit = vif->remaining_credit + vif->credit_bytes;
 936        if (max_credit < vif->remaining_credit)
 937                max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
 938
 939        vif->remaining_credit = min(max_credit, max_burst);
 940}
 941
 942static void tx_credit_callback(unsigned long data)
 943{
 944        struct xenvif *vif = (struct xenvif *)data;
 945        tx_add_credit(vif);
 946        xen_netbk_check_rx_xenvif(vif);
 947}
 948
 949static void netbk_tx_err(struct xenvif *vif,
 950                         struct xen_netif_tx_request *txp, RING_IDX end)
 951{
 952        RING_IDX cons = vif->tx.req_cons;
 953
 954        do {
 955                make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
 956                if (cons == end)
 957                        break;
 958                txp = RING_GET_REQUEST(&vif->tx, cons++);
 959        } while (1);
 960        vif->tx.req_cons = cons;
 961        xen_netbk_check_rx_xenvif(vif);
 962        xenvif_put(vif);
 963}
 964
 965static void netbk_fatal_tx_err(struct xenvif *vif)
 966{
 967        netdev_err(vif->dev, "fatal error; disabling device\n");
 968        xenvif_carrier_off(vif);
 969        xenvif_put(vif);
 970}
 971
 972static int netbk_count_requests(struct xenvif *vif,
 973                                struct xen_netif_tx_request *first,
 974                                struct xen_netif_tx_request *txp,
 975                                int work_to_do)
 976{
 977        RING_IDX cons = vif->tx.req_cons;
 978        int slots = 0;
 979        int drop_err = 0;
 980        int more_data;
 981
 982        if (!(first->flags & XEN_NETTXF_more_data))
 983                return 0;
 984
 985        do {
 986                struct xen_netif_tx_request dropped_tx = { 0 };
 987
 988                if (slots >= work_to_do) {
 989                        netdev_err(vif->dev,
 990                                   "Asked for %d slots but exceeds this limit\n",
 991                                   work_to_do);
 992                        netbk_fatal_tx_err(vif);
 993                        return -ENODATA;
 994                }
 995
 996                /* This guest is really using too many slots and
 997                 * considered malicious.
 998                 */
 999                if (unlikely(slots >= fatal_skb_slots)) {
1000                        netdev_err(vif->dev,
1001                                   "Malicious frontend using %d slots, threshold %u\n",
1002                                   slots, fatal_skb_slots);
1003                        netbk_fatal_tx_err(vif);
1004                        return -E2BIG;
1005                }
1006
1007                /* Xen network protocol had implicit dependency on
1008                 * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to
1009                 * the historical MAX_SKB_FRAGS value 18 to honor the
1010                 * same behavior as before. Any packet using more than
1011                 * 18 slots but less than fatal_skb_slots slots is
1012                 * dropped
1013                 */
1014                if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) {
1015                        if (net_ratelimit())
1016                                netdev_dbg(vif->dev,
1017                                           "Too many slots (%d) exceeding limit (%d), dropping packet\n",
1018                                           slots, XEN_NETBK_LEGACY_SLOTS_MAX);
1019                        drop_err = -E2BIG;
1020                }
1021
1022                if (drop_err)
1023                        txp = &dropped_tx;
1024
1025                memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots),
1026                       sizeof(*txp));
1027
1028                /* If the guest submitted a frame >= 64 KiB then
1029                 * first->size overflowed and following slots will
1030                 * appear to be larger than the frame.
1031                 *
1032                 * This cannot be fatal error as there are buggy
1033                 * frontends that do this.
1034                 *
1035                 * Consume all slots and drop the packet.
1036                 */
1037                if (!drop_err && txp->size > first->size) {
1038                        if (net_ratelimit())
1039                                netdev_dbg(vif->dev,
1040                                           "Invalid tx request, slot size %u > remaining size %u\n",
1041                                           txp->size, first->size);
1042                        drop_err = -EIO;
1043                }
1044
1045                first->size -= txp->size;
1046                slots++;
1047
1048                if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
1049                        netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n",
1050                                 txp->offset, txp->size);
1051                        netbk_fatal_tx_err(vif);
1052                        return -EINVAL;
1053                }
1054
1055                more_data = txp->flags & XEN_NETTXF_more_data;
1056
1057                if (!drop_err)
1058                        txp++;
1059
1060        } while (more_data);
1061
1062        if (drop_err) {
1063                netbk_tx_err(vif, first, cons + slots);
1064                return drop_err;
1065        }
1066
1067        return slots;
1068}
1069
1070static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
1071                                         u16 pending_idx)
1072{
1073        struct page *page;
1074        page = alloc_page(GFP_KERNEL|__GFP_COLD);
1075        if (!page)
1076                return NULL;
1077        set_page_ext(page, netbk, pending_idx);
1078        netbk->mmap_pages[pending_idx] = page;
1079        return page;
1080}
1081
1082static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
1083                                                  struct xenvif *vif,
1084                                                  struct sk_buff *skb,
1085                                                  struct xen_netif_tx_request *txp,
1086                                                  struct gnttab_copy *gop)
1087{
1088        struct skb_shared_info *shinfo = skb_shinfo(skb);
1089        skb_frag_t *frags = shinfo->frags;
1090        u16 pending_idx = *((u16 *)skb->data);
1091        u16 head_idx = 0;
1092        int slot, start;
1093        struct page *page;
1094        pending_ring_idx_t index, start_idx = 0;
1095        uint16_t dst_offset;
1096        unsigned int nr_slots;
1097        struct pending_tx_info *first = NULL;
1098
1099        /* At this point shinfo->nr_frags is in fact the number of
1100         * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
1101         */
1102        nr_slots = shinfo->nr_frags;
1103
1104        /* Skip first skb fragment if it is on same page as header fragment. */
1105        start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
1106
1107        /* Coalesce tx requests, at this point the packet passed in
1108         * should be <= 64K. Any packets larger than 64K have been
1109         * handled in netbk_count_requests().
1110         */
1111        for (shinfo->nr_frags = slot = start; slot < nr_slots;
1112             shinfo->nr_frags++) {
1113                struct pending_tx_info *pending_tx_info =
1114                        netbk->pending_tx_info;
1115
1116                page = alloc_page(GFP_KERNEL|__GFP_COLD);
1117                if (!page)
1118                        goto err;
1119
1120                dst_offset = 0;
1121                first = NULL;
1122                while (dst_offset < PAGE_SIZE && slot < nr_slots) {
1123                        gop->flags = GNTCOPY_source_gref;
1124
1125                        gop->source.u.ref = txp->gref;
1126                        gop->source.domid = vif->domid;
1127                        gop->source.offset = txp->offset;
1128
1129                        gop->dest.domid = DOMID_SELF;
1130
1131                        gop->dest.offset = dst_offset;
1132                        gop->dest.u.gmfn = virt_to_mfn(page_address(page));
1133
1134                        if (dst_offset + txp->size > PAGE_SIZE) {
1135                                /* This page can only merge a portion
1136                                 * of tx request. Do not increment any
1137                                 * pointer / counter here. The txp
1138                                 * will be dealt with in future
1139                                 * rounds, eventually hitting the
1140                                 * `else` branch.
1141                                 */
1142                                gop->len = PAGE_SIZE - dst_offset;
1143                                txp->offset += gop->len;
1144                                txp->size -= gop->len;
1145                                dst_offset += gop->len; /* quit loop */
1146                        } else {
1147                                /* This tx request can be merged in the page */
1148                                gop->len = txp->size;
1149                                dst_offset += gop->len;
1150
1151                                index = pending_index(netbk->pending_cons++);
1152
1153                                pending_idx = netbk->pending_ring[index];
1154
1155                                memcpy(&pending_tx_info[pending_idx].req, txp,
1156                                       sizeof(*txp));
1157                                xenvif_get(vif);
1158
1159                                pending_tx_info[pending_idx].vif = vif;
1160
1161                                /* Poison these fields, corresponding
1162                                 * fields for head tx req will be set
1163                                 * to correct values after the loop.
1164                                 */
1165                                netbk->mmap_pages[pending_idx] = (void *)(~0UL);
1166                                pending_tx_info[pending_idx].head =
1167                                        INVALID_PENDING_RING_IDX;
1168
1169                                if (!first) {
1170                                        first = &pending_tx_info[pending_idx];
1171                                        start_idx = index;
1172                                        head_idx = pending_idx;
1173                                }
1174
1175                                txp++;
1176                                slot++;
1177                        }
1178
1179                        gop++;
1180                }
1181
1182                first->req.offset = 0;
1183                first->req.size = dst_offset;
1184                first->head = start_idx;
1185                set_page_ext(page, netbk, head_idx);
1186                netbk->mmap_pages[head_idx] = page;
1187                frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx);
1188        }
1189
1190        BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS);
1191
1192        return gop;
1193err:
1194        /* Unwind, freeing all pages and sending error responses. */
1195        while (shinfo->nr_frags-- > start) {
1196                xen_netbk_idx_release(netbk,
1197                                frag_get_pending_idx(&frags[shinfo->nr_frags]),
1198                                XEN_NETIF_RSP_ERROR);
1199        }
1200        /* The head too, if necessary. */
1201        if (start)
1202                xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR);
1203
1204        return NULL;
1205}
1206
1207static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
1208                                  struct sk_buff *skb,
1209                                  struct gnttab_copy **gopp)
1210{
1211        struct gnttab_copy *gop = *gopp;
1212        u16 pending_idx = *((u16 *)skb->data);
1213        struct skb_shared_info *shinfo = skb_shinfo(skb);
1214        struct pending_tx_info *tx_info;
1215        int nr_frags = shinfo->nr_frags;
1216        int i, err, start;
1217        u16 peek; /* peek into next tx request */
1218
1219        /* Check status of header. */
1220        err = gop->status;
1221        if (unlikely(err))
1222                xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR);
1223
1224        /* Skip first skb fragment if it is on same page as header fragment. */
1225        start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
1226
1227        for (i = start; i < nr_frags; i++) {
1228                int j, newerr;
1229                pending_ring_idx_t head;
1230
1231                pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
1232                tx_info = &netbk->pending_tx_info[pending_idx];
1233                head = tx_info->head;
1234
1235                /* Check error status: if okay then remember grant handle. */
1236                do {
1237                        newerr = (++gop)->status;
1238                        if (newerr)
1239                                break;
1240                        peek = netbk->pending_ring[pending_index(++head)];
1241                } while (!pending_tx_is_head(netbk, peek));
1242
1243                if (likely(!newerr)) {
1244                        /* Had a previous error? Invalidate this fragment. */
1245                        if (unlikely(err))
1246                                xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
1247                        continue;
1248                }
1249
1250                /* Error on this fragment: respond to client with an error. */
1251                xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR);
1252
1253                /* Not the first error? Preceding frags already invalidated. */
1254                if (err)
1255                        continue;
1256
1257                /* First error: invalidate header and preceding fragments. */
1258                pending_idx = *((u16 *)skb->data);
1259                xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
1260                for (j = start; j < i; j++) {
1261                        pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
1262                        xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
1263                }
1264
1265                /* Remember the error: invalidate all subsequent fragments. */
1266                err = newerr;
1267        }
1268
1269        *gopp = gop + 1;
1270        return err;
1271}
1272
1273static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb)
1274{
1275        struct skb_shared_info *shinfo = skb_shinfo(skb);
1276        int nr_frags = shinfo->nr_frags;
1277        int i;
1278
1279        for (i = 0; i < nr_frags; i++) {
1280                skb_frag_t *frag = shinfo->frags + i;
1281                struct xen_netif_tx_request *txp;
1282                struct page *page;
1283                u16 pending_idx;
1284
1285                pending_idx = frag_get_pending_idx(frag);
1286
1287                txp = &netbk->pending_tx_info[pending_idx].req;
1288                page = virt_to_page(idx_to_kaddr(netbk, pending_idx));
1289                __skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
1290                skb->len += txp->size;
1291                skb->data_len += txp->size;
1292                skb->truesize += txp->size;
1293
1294                /* Take an extra reference to offset xen_netbk_idx_release */
1295                get_page(netbk->mmap_pages[pending_idx]);
1296                xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
1297        }
1298}
1299
1300static int xen_netbk_get_extras(struct xenvif *vif,
1301                                struct xen_netif_extra_info *extras,
1302                                int work_to_do)
1303{
1304        struct xen_netif_extra_info extra;
1305        RING_IDX cons = vif->tx.req_cons;
1306
1307        do {
1308                if (unlikely(work_to_do-- <= 0)) {
1309                        netdev_err(vif->dev, "Missing extra info\n");
1310                        netbk_fatal_tx_err(vif);
1311                        return -EBADR;
1312                }
1313
1314                memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),
1315                       sizeof(extra));
1316                if (unlikely(!extra.type ||
1317                             extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1318                        vif->tx.req_cons = ++cons;
1319                        netdev_err(vif->dev,
1320                                   "Invalid extra type: %d\n", extra.type);
1321                        netbk_fatal_tx_err(vif);
1322                        return -EINVAL;
1323                }
1324
1325                memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1326                vif->tx.req_cons = ++cons;
1327        } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
1328
1329        return work_to_do;
1330}
1331
1332static int netbk_set_skb_gso(struct xenvif *vif,
1333                             struct sk_buff *skb,
1334                             struct xen_netif_extra_info *gso)
1335{
1336        if (!gso->u.gso.size) {
1337                netdev_err(vif->dev, "GSO size must not be zero.\n");
1338                netbk_fatal_tx_err(vif);
1339                return -EINVAL;
1340        }
1341
1342        /* Currently only TCPv4 S.O. is supported. */
1343        if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
1344                netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
1345                netbk_fatal_tx_err(vif);
1346                return -EINVAL;
1347        }
1348
1349        skb_shinfo(skb)->gso_size = gso->u.gso.size;
1350        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1351
1352        /* Header must be checked, and gso_segs computed. */
1353        skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1354        skb_shinfo(skb)->gso_segs = 0;
1355
1356        return 0;
1357}
1358
1359static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
1360{
1361        struct iphdr *iph;
1362        int err = -EPROTO;
1363        int recalculate_partial_csum = 0;
1364
1365        /*
1366         * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1367         * peers can fail to set NETRXF_csum_blank when sending a GSO
1368         * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1369         * recalculate the partial checksum.
1370         */
1371        if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1372                vif->rx_gso_checksum_fixup++;
1373                skb->ip_summed = CHECKSUM_PARTIAL;
1374                recalculate_partial_csum = 1;
1375        }
1376
1377        /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1378        if (skb->ip_summed != CHECKSUM_PARTIAL)
1379                return 0;
1380
1381        if (skb->protocol != htons(ETH_P_IP))
1382                goto out;
1383
1384        iph = (void *)skb->data;
1385        switch (iph->protocol) {
1386        case IPPROTO_TCP:
1387                if (!skb_partial_csum_set(skb, 4 * iph->ihl,
1388                                          offsetof(struct tcphdr, check)))
1389                        goto out;
1390
1391                if (recalculate_partial_csum) {
1392                        struct tcphdr *tcph = tcp_hdr(skb);
1393                        tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1394                                                         skb->len - iph->ihl*4,
1395                                                         IPPROTO_TCP, 0);
1396                }
1397                break;
1398        case IPPROTO_UDP:
1399                if (!skb_partial_csum_set(skb, 4 * iph->ihl,
1400                                          offsetof(struct udphdr, check)))
1401                        goto out;
1402
1403                if (recalculate_partial_csum) {
1404                        struct udphdr *udph = udp_hdr(skb);
1405                        udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1406                                                         skb->len - iph->ihl*4,
1407                                                         IPPROTO_UDP, 0);
1408                }
1409                break;
1410        default:
1411                if (net_ratelimit())
1412                        netdev_err(vif->dev,
1413                                   "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n",
1414                                   iph->protocol);
1415                goto out;
1416        }
1417
1418        err = 0;
1419
1420out:
1421        return err;
1422}
1423
1424static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
1425{
1426        u64 now = get_jiffies_64();
1427        u64 next_credit = vif->credit_window_start +
1428                msecs_to_jiffies(vif->credit_usec / 1000);
1429
1430        /* Timer could already be pending in rare cases. */
1431        if (timer_pending(&vif->credit_timeout))
1432                return true;
1433
1434        /* Passed the point where we can replenish credit? */
1435        if (time_after_eq64(now, next_credit)) {
1436                vif->credit_window_start = now;
1437                tx_add_credit(vif);
1438        }
1439
1440        /* Still too big to send right now? Set a callback. */
1441        if (size > vif->remaining_credit) {
1442                vif->credit_timeout.data     =
1443                        (unsigned long)vif;
1444                vif->credit_timeout.function =
1445                        tx_credit_callback;
1446                mod_timer(&vif->credit_timeout,
1447                          next_credit);
1448                vif->credit_window_start = next_credit;
1449
1450                return true;
1451        }
1452
1453        return false;
1454}
1455
1456static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
1457{
1458        struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
1459        struct sk_buff *skb;
1460        int ret;
1461
1462        while ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX
1463                < MAX_PENDING_REQS) &&
1464                !list_empty(&netbk->net_schedule_list)) {
1465                struct xenvif *vif;
1466                struct xen_netif_tx_request txreq;
1467                struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
1468                struct page *page;
1469                struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
1470                u16 pending_idx;
1471                RING_IDX idx;
1472                int work_to_do;
1473                unsigned int data_len;
1474                pending_ring_idx_t index;
1475
1476                /* Get a netif from the list with work to do. */
1477                vif = poll_net_schedule_list(netbk);
1478                /* This can sometimes happen because the test of
1479                 * list_empty(net_schedule_list) at the top of the
1480                 * loop is unlocked.  Just go back and have another
1481                 * look.
1482                 */
1483                if (!vif)
1484                        continue;
1485
1486                if (vif->tx.sring->req_prod - vif->tx.req_cons >
1487                    XEN_NETIF_TX_RING_SIZE) {
1488                        netdev_err(vif->dev,
1489                                   "Impossible number of requests. "
1490                                   "req_prod %d, req_cons %d, size %ld\n",
1491                                   vif->tx.sring->req_prod, vif->tx.req_cons,
1492                                   XEN_NETIF_TX_RING_SIZE);
1493                        netbk_fatal_tx_err(vif);
1494                        continue;
1495                }
1496
1497                RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do);
1498                if (!work_to_do) {
1499                        xenvif_put(vif);
1500                        continue;
1501                }
1502
1503                idx = vif->tx.req_cons;
1504                rmb(); /* Ensure that we see the request before we copy it. */
1505                memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));
1506
1507                /* Credit-based scheduling. */
1508                if (txreq.size > vif->remaining_credit &&
1509                    tx_credit_exceeded(vif, txreq.size)) {
1510                        xenvif_put(vif);
1511                        continue;
1512                }
1513
1514                vif->remaining_credit -= txreq.size;
1515
1516                work_to_do--;
1517                vif->tx.req_cons = ++idx;
1518
1519                memset(extras, 0, sizeof(extras));
1520                if (txreq.flags & XEN_NETTXF_extra_info) {
1521                        work_to_do = xen_netbk_get_extras(vif, extras,
1522                                                          work_to_do);
1523                        idx = vif->tx.req_cons;
1524                        if (unlikely(work_to_do < 0))
1525                                continue;
1526                }
1527
1528                ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do);
1529                if (unlikely(ret < 0))
1530                        continue;
1531
1532                idx += ret;
1533
1534                if (unlikely(txreq.size < ETH_HLEN)) {
1535                        netdev_dbg(vif->dev,
1536                                   "Bad packet size: %d\n", txreq.size);
1537                        netbk_tx_err(vif, &txreq, idx);
1538                        continue;
1539                }
1540
1541                /* No crossing a page as the payload mustn't fragment. */
1542                if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1543                        netdev_err(vif->dev,
1544                                   "txreq.offset: %x, size: %u, end: %lu\n",
1545                                   txreq.offset, txreq.size,
1546                                   (txreq.offset&~PAGE_MASK) + txreq.size);
1547                        netbk_fatal_tx_err(vif);
1548                        continue;
1549                }
1550
1551                index = pending_index(netbk->pending_cons);
1552                pending_idx = netbk->pending_ring[index];
1553
1554                data_len = (txreq.size > PKT_PROT_LEN &&
1555                            ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
1556                        PKT_PROT_LEN : txreq.size;
1557
1558                skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
1559                                GFP_ATOMIC | __GFP_NOWARN);
1560                if (unlikely(skb == NULL)) {
1561                        netdev_dbg(vif->dev,
1562                                   "Can't allocate a skb in start_xmit.\n");
1563                        netbk_tx_err(vif, &txreq, idx);
1564                        break;
1565                }
1566
1567                /* Packets passed to netif_rx() must have some headroom. */
1568                skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
1569
1570                if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1571                        struct xen_netif_extra_info *gso;
1572                        gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1573
1574                        if (netbk_set_skb_gso(vif, skb, gso)) {
1575                                /* Failure in netbk_set_skb_gso is fatal. */
1576                                kfree_skb(skb);
1577                                continue;
1578                        }
1579                }
1580
1581                /* XXX could copy straight to head */
1582                page = xen_netbk_alloc_page(netbk, pending_idx);
1583                if (!page) {
1584                        kfree_skb(skb);
1585                        netbk_tx_err(vif, &txreq, idx);
1586                        continue;
1587                }
1588
1589                gop->source.u.ref = txreq.gref;
1590                gop->source.domid = vif->domid;
1591                gop->source.offset = txreq.offset;
1592
1593                gop->dest.u.gmfn = virt_to_mfn(page_address(page));
1594                gop->dest.domid = DOMID_SELF;
1595                gop->dest.offset = txreq.offset;
1596
1597                gop->len = txreq.size;
1598                gop->flags = GNTCOPY_source_gref;
1599
1600                gop++;
1601
1602                memcpy(&netbk->pending_tx_info[pending_idx].req,
1603                       &txreq, sizeof(txreq));
1604                netbk->pending_tx_info[pending_idx].vif = vif;
1605                netbk->pending_tx_info[pending_idx].head = index;
1606                *((u16 *)skb->data) = pending_idx;
1607
1608                __skb_put(skb, data_len);
1609
1610                skb_shinfo(skb)->nr_frags = ret;
1611                if (data_len < txreq.size) {
1612                        skb_shinfo(skb)->nr_frags++;
1613                        frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
1614                                             pending_idx);
1615                } else {
1616                        frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
1617                                             INVALID_PENDING_IDX);
1618                }
1619
1620                netbk->pending_cons++;
1621
1622                request_gop = xen_netbk_get_requests(netbk, vif,
1623                                                     skb, txfrags, gop);
1624                if (request_gop == NULL) {
1625                        kfree_skb(skb);
1626                        netbk_tx_err(vif, &txreq, idx);
1627                        continue;
1628                }
1629                gop = request_gop;
1630
1631                __skb_queue_tail(&netbk->tx_queue, skb);
1632
1633                vif->tx.req_cons = idx;
1634                xen_netbk_check_rx_xenvif(vif);
1635
1636                if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))
1637                        break;
1638        }
1639
1640        return gop - netbk->tx_copy_ops;
1641}
1642
1643static void xen_netbk_tx_submit(struct xen_netbk *netbk)
1644{
1645        struct gnttab_copy *gop = netbk->tx_copy_ops;
1646        struct sk_buff *skb;
1647
1648        while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {
1649                struct xen_netif_tx_request *txp;
1650                struct xenvif *vif;
1651                u16 pending_idx;
1652                unsigned data_len;
1653
1654                pending_idx = *((u16 *)skb->data);
1655                vif = netbk->pending_tx_info[pending_idx].vif;
1656                txp = &netbk->pending_tx_info[pending_idx].req;
1657
1658                /* Check the remap error code. */
1659                if (unlikely(xen_netbk_tx_check_gop(netbk, skb, &gop))) {
1660                        netdev_dbg(vif->dev, "netback grant failed.\n");
1661                        skb_shinfo(skb)->nr_frags = 0;
1662                        kfree_skb(skb);
1663                        continue;
1664                }
1665
1666                data_len = skb->len;
1667                memcpy(skb->data,
1668                       (void *)(idx_to_kaddr(netbk, pending_idx)|txp->offset),
1669                       data_len);
1670                if (data_len < txp->size) {
1671                        /* Append the packet payload as a fragment. */
1672                        txp->offset += data_len;
1673                        txp->size -= data_len;
1674                } else {
1675                        /* Schedule a response immediately. */
1676                        xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
1677                }
1678
1679                if (txp->flags & XEN_NETTXF_csum_blank)
1680                        skb->ip_summed = CHECKSUM_PARTIAL;
1681                else if (txp->flags & XEN_NETTXF_data_validated)
1682                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1683
1684                xen_netbk_fill_frags(netbk, skb);
1685
1686                /*
1687                 * If the initial fragment was < PKT_PROT_LEN then
1688                 * pull through some bytes from the other fragments to
1689                 * increase the linear region to PKT_PROT_LEN bytes.
1690                 */
1691                if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
1692                        int target = min_t(int, skb->len, PKT_PROT_LEN);
1693                        __pskb_pull_tail(skb, target - skb_headlen(skb));
1694                }
1695
1696                skb->dev      = vif->dev;
1697                skb->protocol = eth_type_trans(skb, skb->dev);
1698                skb_reset_network_header(skb);
1699
1700                if (checksum_setup(vif, skb)) {
1701                        netdev_dbg(vif->dev,
1702                                   "Can't setup checksum in net_tx_action\n");
1703                        kfree_skb(skb);
1704                        continue;
1705                }
1706
1707                skb_probe_transport_header(skb, 0);
1708
1709                vif->dev->stats.rx_bytes += skb->len;
1710                vif->dev->stats.rx_packets++;
1711
1712                xenvif_receive_skb(vif, skb);
1713        }
1714}
1715
1716/* Called after netfront has transmitted */
1717static void xen_netbk_tx_action(struct xen_netbk *netbk)
1718{
1719        unsigned nr_gops;
1720
1721        nr_gops = xen_netbk_tx_build_gops(netbk);
1722
1723        if (nr_gops == 0)
1724                return;
1725
1726        gnttab_batch_copy(netbk->tx_copy_ops, nr_gops);
1727
1728        xen_netbk_tx_submit(netbk);
1729}
1730
1731static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
1732                                  u8 status)
1733{
1734        struct xenvif *vif;
1735        struct pending_tx_info *pending_tx_info;
1736        pending_ring_idx_t head;
1737        u16 peek; /* peek into next tx request */
1738
1739        BUG_ON(netbk->mmap_pages[pending_idx] == (void *)(~0UL));
1740
1741        /* Already complete? */
1742        if (netbk->mmap_pages[pending_idx] == NULL)
1743                return;
1744
1745        pending_tx_info = &netbk->pending_tx_info[pending_idx];
1746
1747        vif = pending_tx_info->vif;
1748        head = pending_tx_info->head;
1749
1750        BUG_ON(!pending_tx_is_head(netbk, head));
1751        BUG_ON(netbk->pending_ring[pending_index(head)] != pending_idx);
1752
1753        do {
1754                pending_ring_idx_t index;
1755                pending_ring_idx_t idx = pending_index(head);
1756                u16 info_idx = netbk->pending_ring[idx];
1757
1758                pending_tx_info = &netbk->pending_tx_info[info_idx];
1759                make_tx_response(vif, &pending_tx_info->req, status);
1760
1761                /* Setting any number other than
1762                 * INVALID_PENDING_RING_IDX indicates this slot is
1763                 * starting a new packet / ending a previous packet.
1764                 */
1765                pending_tx_info->head = 0;
1766
1767                index = pending_index(netbk->pending_prod++);
1768                netbk->pending_ring[index] = netbk->pending_ring[info_idx];
1769
1770                xenvif_put(vif);
1771
1772                peek = netbk->pending_ring[pending_index(++head)];
1773
1774        } while (!pending_tx_is_head(netbk, peek));
1775
1776        netbk->mmap_pages[pending_idx]->mapping = 0;
1777        put_page(netbk->mmap_pages[pending_idx]);
1778        netbk->mmap_pages[pending_idx] = NULL;
1779}
1780
1781
1782static void make_tx_response(struct xenvif *vif,
1783                             struct xen_netif_tx_request *txp,
1784                             s8       st)
1785{
1786        RING_IDX i = vif->tx.rsp_prod_pvt;
1787        struct xen_netif_tx_response *resp;
1788        int notify;
1789
1790        resp = RING_GET_RESPONSE(&vif->tx, i);
1791        resp->id     = txp->id;
1792        resp->status = st;
1793
1794        if (txp->flags & XEN_NETTXF_extra_info)
1795                RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
1796
1797        vif->tx.rsp_prod_pvt = ++i;
1798        RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
1799        if (notify)
1800                notify_remote_via_irq(vif->irq);
1801}
1802
1803static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
1804                                             u16      id,
1805                                             s8       st,
1806                                             u16      offset,
1807                                             u16      size,
1808                                             u16      flags)
1809{
1810        RING_IDX i = vif->rx.rsp_prod_pvt;
1811        struct xen_netif_rx_response *resp;
1812
1813        resp = RING_GET_RESPONSE(&vif->rx, i);
1814        resp->offset     = offset;
1815        resp->flags      = flags;
1816        resp->id         = id;
1817        resp->status     = (s16)size;
1818        if (st < 0)
1819                resp->status = (s16)st;
1820
1821        vif->rx.rsp_prod_pvt = ++i;
1822
1823        return resp;
1824}
1825
1826static inline int rx_work_todo(struct xen_netbk *netbk)
1827{
1828        return !skb_queue_empty(&netbk->rx_queue);
1829}
1830
1831static inline int tx_work_todo(struct xen_netbk *netbk)
1832{
1833
1834        if ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX
1835             < MAX_PENDING_REQS) &&
1836             !list_empty(&netbk->net_schedule_list))
1837                return 1;
1838
1839        return 0;
1840}
1841
1842static int xen_netbk_kthread(void *data)
1843{
1844        struct xen_netbk *netbk = data;
1845        while (!kthread_should_stop()) {
1846                wait_event_interruptible(netbk->wq,
1847                                rx_work_todo(netbk) ||
1848                                tx_work_todo(netbk) ||
1849                                kthread_should_stop());
1850                cond_resched();
1851
1852                if (kthread_should_stop())
1853                        break;
1854
1855                if (rx_work_todo(netbk))
1856                        xen_netbk_rx_action(netbk);
1857
1858                if (tx_work_todo(netbk))
1859                        xen_netbk_tx_action(netbk);
1860        }
1861
1862        return 0;
1863}
1864
1865void xen_netbk_unmap_frontend_rings(struct xenvif *vif)
1866{
1867        if (vif->tx.sring)
1868                xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
1869                                        vif->tx.sring);
1870        if (vif->rx.sring)
1871                xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
1872                                        vif->rx.sring);
1873}
1874
1875int xen_netbk_map_frontend_rings(struct xenvif *vif,
1876                                 grant_ref_t tx_ring_ref,
1877                                 grant_ref_t rx_ring_ref)
1878{
1879        void *addr;
1880        struct xen_netif_tx_sring *txs;
1881        struct xen_netif_rx_sring *rxs;
1882
1883        int err = -ENOMEM;
1884
1885        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
1886                                     tx_ring_ref, &addr);
1887        if (err)
1888                goto err;
1889
1890        txs = (struct xen_netif_tx_sring *)addr;
1891        BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
1892
1893        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
1894                                     rx_ring_ref, &addr);
1895        if (err)
1896                goto err;
1897
1898        rxs = (struct xen_netif_rx_sring *)addr;
1899        BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
1900
1901        vif->rx_req_cons_peek = 0;
1902
1903        return 0;
1904
1905err:
1906        xen_netbk_unmap_frontend_rings(vif);
1907        return err;
1908}
1909
1910static int __init netback_init(void)
1911{
1912        int i;
1913        int rc = 0;
1914        int group;
1915
1916        if (!xen_domain())
1917                return -ENODEV;
1918
1919        if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
1920                pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
1921                        fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
1922                fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
1923        }
1924
1925        xen_netbk_group_nr = num_online_cpus();
1926        xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
1927        if (!xen_netbk)
1928                return -ENOMEM;
1929
1930        for (group = 0; group < xen_netbk_group_nr; group++) {
1931                struct xen_netbk *netbk = &xen_netbk[group];
1932                skb_queue_head_init(&netbk->rx_queue);
1933                skb_queue_head_init(&netbk->tx_queue);
1934
1935                init_timer(&netbk->net_timer);
1936                netbk->net_timer.data = (unsigned long)netbk;
1937                netbk->net_timer.function = xen_netbk_alarm;
1938
1939                netbk->pending_cons = 0;
1940                netbk->pending_prod = MAX_PENDING_REQS;
1941                for (i = 0; i < MAX_PENDING_REQS; i++)
1942                        netbk->pending_ring[i] = i;
1943
1944                init_waitqueue_head(&netbk->wq);
1945                netbk->task = kthread_create(xen_netbk_kthread,
1946                                             (void *)netbk,
1947                                             "netback/%u", group);
1948
1949                if (IS_ERR(netbk->task)) {
1950                        pr_alert("kthread_create() fails at netback\n");
1951                        del_timer(&netbk->net_timer);
1952                        rc = PTR_ERR(netbk->task);
1953                        goto failed_init;
1954                }
1955
1956                kthread_bind(netbk->task, group);
1957
1958                INIT_LIST_HEAD(&netbk->net_schedule_list);
1959
1960                spin_lock_init(&netbk->net_schedule_list_lock);
1961
1962                atomic_set(&netbk->netfront_count, 0);
1963
1964                wake_up_process(netbk->task);
1965        }
1966
1967        rc = xenvif_xenbus_init();
1968        if (rc)
1969                goto failed_init;
1970
1971        return 0;
1972
1973failed_init:
1974        while (--group >= 0) {
1975                struct xen_netbk *netbk = &xen_netbk[group];
1976                for (i = 0; i < MAX_PENDING_REQS; i++) {
1977                        if (netbk->mmap_pages[i])
1978                                __free_page(netbk->mmap_pages[i]);
1979                }
1980                del_timer(&netbk->net_timer);
1981                kthread_stop(netbk->task);
1982        }
1983        vfree(xen_netbk);
1984        return rc;
1985
1986}
1987
1988module_init(netback_init);
1989
1990MODULE_LICENSE("Dual BSD/GPL");
1991MODULE_ALIAS("xen-backend:vif");
1992