linux/drivers/net/ethernet/tile/tilegx.c
<<
>>
Prefs
   1/*
   2 * Copyright 2012 Tilera Corporation. All Rights Reserved.
   3 *
   4 *   This program is free software; you can redistribute it and/or
   5 *   modify it under the terms of the GNU General Public License
   6 *   as published by the Free Software Foundation, version 2.
   7 *
   8 *   This program is distributed in the hope that it will be useful, but
   9 *   WITHOUT ANY WARRANTY; without even the implied warranty of
  10 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11 *   NON INFRINGEMENT.  See the GNU General Public License for
  12 *   more details.
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/init.h>
  17#include <linux/moduleparam.h>
  18#include <linux/sched.h>
  19#include <linux/kernel.h>      /* printk() */
  20#include <linux/slab.h>        /* kmalloc() */
  21#include <linux/errno.h>       /* error codes */
  22#include <linux/types.h>       /* size_t */
  23#include <linux/interrupt.h>
  24#include <linux/in.h>
  25#include <linux/irq.h>
  26#include <linux/netdevice.h>   /* struct device, and other headers */
  27#include <linux/etherdevice.h> /* eth_type_trans */
  28#include <linux/skbuff.h>
  29#include <linux/ioctl.h>
  30#include <linux/cdev.h>
  31#include <linux/hugetlb.h>
  32#include <linux/in6.h>
  33#include <linux/timer.h>
  34#include <linux/hrtimer.h>
  35#include <linux/ktime.h>
  36#include <linux/io.h>
  37#include <linux/ctype.h>
  38#include <linux/ip.h>
  39#include <linux/tcp.h>
  40
  41#include <asm/checksum.h>
  42#include <asm/homecache.h>
  43#include <gxio/mpipe.h>
  44#include <arch/sim.h>
  45
  46/* Default transmit lockup timeout period, in jiffies. */
  47#define TILE_NET_TIMEOUT (5 * HZ)
  48
  49/* The maximum number of distinct channels (idesc.channel is 5 bits). */
  50#define TILE_NET_CHANNELS 32
  51
  52/* Maximum number of idescs to handle per "poll". */
  53#define TILE_NET_BATCH 128
  54
  55/* Maximum number of packets to handle per "poll". */
  56#define TILE_NET_WEIGHT 64
  57
  58/* Number of entries in each iqueue. */
  59#define IQUEUE_ENTRIES 512
  60
  61/* Number of entries in each equeue. */
  62#define EQUEUE_ENTRIES 2048
  63
  64/* Total header bytes per equeue slot.  Must be big enough for 2 bytes
  65 * of NET_IP_ALIGN alignment, plus 14 bytes (?) of L2 header, plus up to
  66 * 60 bytes of actual TCP header.  We round up to align to cache lines.
  67 */
  68#define HEADER_BYTES 128
  69
  70/* Maximum completions per cpu per device (must be a power of two).
  71 * ISSUE: What is the right number here?  If this is too small, then
  72 * egress might block waiting for free space in a completions array.
  73 * ISSUE: At the least, allocate these only for initialized echannels.
  74 */
  75#define TILE_NET_MAX_COMPS 64
  76
  77#define MAX_FRAGS (MAX_SKB_FRAGS + 1)
  78
  79/* Size of completions data to allocate.
  80 * ISSUE: Probably more than needed since we don't use all the channels.
  81 */
  82#define COMPS_SIZE (TILE_NET_CHANNELS * sizeof(struct tile_net_comps))
  83
  84/* Size of NotifRing data to allocate. */
  85#define NOTIF_RING_SIZE (IQUEUE_ENTRIES * sizeof(gxio_mpipe_idesc_t))
  86
  87/* Timeout to wake the per-device TX timer after we stop the queue.
  88 * We don't want the timeout too short (adds overhead, and might end
  89 * up causing stop/wake/stop/wake cycles) or too long (affects performance).
  90 * For the 10 Gb NIC, 30 usec means roughly 30+ 1500-byte packets.
  91 */
  92#define TX_TIMER_DELAY_USEC 30
  93
  94/* Timeout to wake the per-cpu egress timer to free completions. */
  95#define EGRESS_TIMER_DELAY_USEC 1000
  96
  97MODULE_AUTHOR("Tilera Corporation");
  98MODULE_LICENSE("GPL");
  99
 100/* A "packet fragment" (a chunk of memory). */
 101struct frag {
 102        void *buf;
 103        size_t length;
 104};
 105
 106/* A single completion. */
 107struct tile_net_comp {
 108        /* The "complete_count" when the completion will be complete. */
 109        s64 when;
 110        /* The buffer to be freed when the completion is complete. */
 111        struct sk_buff *skb;
 112};
 113
 114/* The completions for a given cpu and echannel. */
 115struct tile_net_comps {
 116        /* The completions. */
 117        struct tile_net_comp comp_queue[TILE_NET_MAX_COMPS];
 118        /* The number of completions used. */
 119        unsigned long comp_next;
 120        /* The number of completions freed. */
 121        unsigned long comp_last;
 122};
 123
 124/* The transmit wake timer for a given cpu and echannel. */
 125struct tile_net_tx_wake {
 126        int tx_queue_idx;
 127        struct hrtimer timer;
 128        struct net_device *dev;
 129};
 130
 131/* Info for a specific cpu. */
 132struct tile_net_info {
 133        /* The NAPI struct. */
 134        struct napi_struct napi;
 135        /* Packet queue. */
 136        gxio_mpipe_iqueue_t iqueue;
 137        /* Our cpu. */
 138        int my_cpu;
 139        /* True if iqueue is valid. */
 140        bool has_iqueue;
 141        /* NAPI flags. */
 142        bool napi_added;
 143        bool napi_enabled;
 144        /* Number of small sk_buffs which must still be provided. */
 145        unsigned int num_needed_small_buffers;
 146        /* Number of large sk_buffs which must still be provided. */
 147        unsigned int num_needed_large_buffers;
 148        /* A timer for handling egress completions. */
 149        struct hrtimer egress_timer;
 150        /* True if "egress_timer" is scheduled. */
 151        bool egress_timer_scheduled;
 152        /* Comps for each egress channel. */
 153        struct tile_net_comps *comps_for_echannel[TILE_NET_CHANNELS];
 154        /* Transmit wake timer for each egress channel. */
 155        struct tile_net_tx_wake tx_wake[TILE_NET_CHANNELS];
 156};
 157
 158/* Info for egress on a particular egress channel. */
 159struct tile_net_egress {
 160        /* The "equeue". */
 161        gxio_mpipe_equeue_t *equeue;
 162        /* The headers for TSO. */
 163        unsigned char *headers;
 164};
 165
 166/* Info for a specific device. */
 167struct tile_net_priv {
 168        /* Our network device. */
 169        struct net_device *dev;
 170        /* The primary link. */
 171        gxio_mpipe_link_t link;
 172        /* The primary channel, if open, else -1. */
 173        int channel;
 174        /* The "loopify" egress link, if needed. */
 175        gxio_mpipe_link_t loopify_link;
 176        /* The "loopify" egress channel, if open, else -1. */
 177        int loopify_channel;
 178        /* The egress channel (channel or loopify_channel). */
 179        int echannel;
 180        /* Total stats. */
 181        struct net_device_stats stats;
 182};
 183
 184/* Egress info, indexed by "priv->echannel" (lazily created as needed). */
 185static struct tile_net_egress egress_for_echannel[TILE_NET_CHANNELS];
 186
 187/* Devices currently associated with each channel.
 188 * NOTE: The array entry can become NULL after ifconfig down, but
 189 * we do not free the underlying net_device structures, so it is
 190 * safe to use a pointer after reading it from this array.
 191 */
 192static struct net_device *tile_net_devs_for_channel[TILE_NET_CHANNELS];
 193
 194/* A mutex for "tile_net_devs_for_channel". */
 195static DEFINE_MUTEX(tile_net_devs_for_channel_mutex);
 196
 197/* The per-cpu info. */
 198static DEFINE_PER_CPU(struct tile_net_info, per_cpu_info);
 199
 200/* The "context" for all devices. */
 201static gxio_mpipe_context_t context;
 202
 203/* Buffer sizes and mpipe enum codes for buffer stacks.
 204 * See arch/tile/include/gxio/mpipe.h for the set of possible values.
 205 */
 206#define BUFFER_SIZE_SMALL_ENUM GXIO_MPIPE_BUFFER_SIZE_128
 207#define BUFFER_SIZE_SMALL 128
 208#define BUFFER_SIZE_LARGE_ENUM GXIO_MPIPE_BUFFER_SIZE_1664
 209#define BUFFER_SIZE_LARGE 1664
 210
 211/* The small/large "buffer stacks". */
 212static int small_buffer_stack = -1;
 213static int large_buffer_stack = -1;
 214
 215/* Amount of memory allocated for each buffer stack. */
 216static size_t buffer_stack_size;
 217
 218/* The actual memory allocated for the buffer stacks. */
 219static void *small_buffer_stack_va;
 220static void *large_buffer_stack_va;
 221
 222/* The buckets. */
 223static int first_bucket = -1;
 224static int num_buckets = 1;
 225
 226/* The ingress irq. */
 227static int ingress_irq = -1;
 228
 229/* Text value of tile_net.cpus if passed as a module parameter. */
 230static char *network_cpus_string;
 231
 232/* The actual cpus in "network_cpus". */
 233static struct cpumask network_cpus_map;
 234
 235/* If "loopify=LINK" was specified, this is "LINK". */
 236static char *loopify_link_name;
 237
 238/* If "tile_net.custom" was specified, this is non-NULL. */
 239static char *custom_str;
 240
 241/* The "tile_net.cpus" argument specifies the cpus that are dedicated
 242 * to handle ingress packets.
 243 *
 244 * The parameter should be in the form "tile_net.cpus=m-n[,x-y]", where
 245 * m, n, x, y are integer numbers that represent the cpus that can be
 246 * neither a dedicated cpu nor a dataplane cpu.
 247 */
 248static bool network_cpus_init(void)
 249{
 250        char buf[1024];
 251        int rc;
 252
 253        if (network_cpus_string == NULL)
 254                return false;
 255
 256        rc = cpulist_parse_crop(network_cpus_string, &network_cpus_map);
 257        if (rc != 0) {
 258                pr_warn("tile_net.cpus=%s: malformed cpu list\n",
 259                        network_cpus_string);
 260                return false;
 261        }
 262
 263        /* Remove dedicated cpus. */
 264        cpumask_and(&network_cpus_map, &network_cpus_map, cpu_possible_mask);
 265
 266        if (cpumask_empty(&network_cpus_map)) {
 267                pr_warn("Ignoring empty tile_net.cpus='%s'.\n",
 268                        network_cpus_string);
 269                return false;
 270        }
 271
 272        cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map);
 273        pr_info("Linux network CPUs: %s\n", buf);
 274        return true;
 275}
 276
 277module_param_named(cpus, network_cpus_string, charp, 0444);
 278MODULE_PARM_DESC(cpus, "cpulist of cores that handle network interrupts");
 279
 280/* The "tile_net.loopify=LINK" argument causes the named device to
 281 * actually use "loop0" for ingress, and "loop1" for egress.  This
 282 * allows an app to sit between the actual link and linux, passing
 283 * (some) packets along to linux, and forwarding (some) packets sent
 284 * out by linux.
 285 */
 286module_param_named(loopify, loopify_link_name, charp, 0444);
 287MODULE_PARM_DESC(loopify, "name the device to use loop0/1 for ingress/egress");
 288
 289/* The "tile_net.custom" argument causes us to ignore the "conventional"
 290 * classifier metadata, in particular, the "l2_offset".
 291 */
 292module_param_named(custom, custom_str, charp, 0444);
 293MODULE_PARM_DESC(custom, "indicates a (heavily) customized classifier");
 294
 295/* Atomically update a statistics field.
 296 * Note that on TILE-Gx, this operation is fire-and-forget on the
 297 * issuing core (single-cycle dispatch) and takes only a few cycles
 298 * longer than a regular store when the request reaches the home cache.
 299 * No expensive bus management overhead is required.
 300 */
 301static void tile_net_stats_add(unsigned long value, unsigned long *field)
 302{
 303        BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(unsigned long));
 304        atomic_long_add(value, (atomic_long_t *)field);
 305}
 306
 307/* Allocate and push a buffer. */
 308static bool tile_net_provide_buffer(bool small)
 309{
 310        int stack = small ? small_buffer_stack : large_buffer_stack;
 311        const unsigned long buffer_alignment = 128;
 312        struct sk_buff *skb;
 313        int len;
 314
 315        len = sizeof(struct sk_buff **) + buffer_alignment;
 316        len += (small ? BUFFER_SIZE_SMALL : BUFFER_SIZE_LARGE);
 317        skb = dev_alloc_skb(len);
 318        if (skb == NULL)
 319                return false;
 320
 321        /* Make room for a back-pointer to 'skb' and guarantee alignment. */
 322        skb_reserve(skb, sizeof(struct sk_buff **));
 323        skb_reserve(skb, -(long)skb->data & (buffer_alignment - 1));
 324
 325        /* Save a back-pointer to 'skb'. */
 326        *(struct sk_buff **)(skb->data - sizeof(struct sk_buff **)) = skb;
 327
 328        /* Make sure "skb" and the back-pointer have been flushed. */
 329        wmb();
 330
 331        gxio_mpipe_push_buffer(&context, stack,
 332                               (void *)va_to_tile_io_addr(skb->data));
 333
 334        return true;
 335}
 336
 337/* Convert a raw mpipe buffer to its matching skb pointer. */
 338static struct sk_buff *mpipe_buf_to_skb(void *va)
 339{
 340        /* Acquire the associated "skb". */
 341        struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
 342        struct sk_buff *skb = *skb_ptr;
 343
 344        /* Paranoia. */
 345        if (skb->data != va) {
 346                /* Panic here since there's a reasonable chance
 347                 * that corrupt buffers means generic memory
 348                 * corruption, with unpredictable system effects.
 349                 */
 350                panic("Corrupt linux buffer! va=%p, skb=%p, skb->data=%p",
 351                      va, skb, skb->data);
 352        }
 353
 354        return skb;
 355}
 356
 357static void tile_net_pop_all_buffers(int stack)
 358{
 359        for (;;) {
 360                tile_io_addr_t addr =
 361                        (tile_io_addr_t)gxio_mpipe_pop_buffer(&context, stack);
 362                if (addr == 0)
 363                        break;
 364                dev_kfree_skb_irq(mpipe_buf_to_skb(tile_io_addr_to_va(addr)));
 365        }
 366}
 367
 368/* Provide linux buffers to mPIPE. */
 369static void tile_net_provide_needed_buffers(void)
 370{
 371        struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
 372
 373        while (info->num_needed_small_buffers != 0) {
 374                if (!tile_net_provide_buffer(true))
 375                        goto oops;
 376                info->num_needed_small_buffers--;
 377        }
 378
 379        while (info->num_needed_large_buffers != 0) {
 380                if (!tile_net_provide_buffer(false))
 381                        goto oops;
 382                info->num_needed_large_buffers--;
 383        }
 384
 385        return;
 386
 387oops:
 388        /* Add a description to the page allocation failure dump. */
 389        pr_notice("Tile %d still needs some buffers\n", info->my_cpu);
 390}
 391
 392static inline bool filter_packet(struct net_device *dev, void *buf)
 393{
 394        /* Filter packets received before we're up. */
 395        if (dev == NULL || !(dev->flags & IFF_UP))
 396                return true;
 397
 398        /* Filter out packets that aren't for us. */
 399        if (!(dev->flags & IFF_PROMISC) &&
 400            !is_multicast_ether_addr(buf) &&
 401            compare_ether_addr(dev->dev_addr, buf) != 0)
 402                return true;
 403
 404        return false;
 405}
 406
 407static void tile_net_receive_skb(struct net_device *dev, struct sk_buff *skb,
 408                                 gxio_mpipe_idesc_t *idesc, unsigned long len)
 409{
 410        struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
 411        struct tile_net_priv *priv = netdev_priv(dev);
 412
 413        /* Encode the actual packet length. */
 414        skb_put(skb, len);
 415
 416        skb->protocol = eth_type_trans(skb, dev);
 417
 418        /* Acknowledge "good" hardware checksums. */
 419        if (idesc->cs && idesc->csum_seed_val == 0xFFFF)
 420                skb->ip_summed = CHECKSUM_UNNECESSARY;
 421
 422        netif_receive_skb(skb);
 423
 424        /* Update stats. */
 425        tile_net_stats_add(1, &priv->stats.rx_packets);
 426        tile_net_stats_add(len, &priv->stats.rx_bytes);
 427
 428        /* Need a new buffer. */
 429        if (idesc->size == BUFFER_SIZE_SMALL_ENUM)
 430                info->num_needed_small_buffers++;
 431        else
 432                info->num_needed_large_buffers++;
 433}
 434
 435/* Handle a packet.  Return true if "processed", false if "filtered". */
 436static bool tile_net_handle_packet(gxio_mpipe_idesc_t *idesc)
 437{
 438        struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
 439        struct net_device *dev = tile_net_devs_for_channel[idesc->channel];
 440        uint8_t l2_offset;
 441        void *va;
 442        void *buf;
 443        unsigned long len;
 444        bool filter;
 445
 446        /* Drop packets for which no buffer was available.
 447         * NOTE: This happens under heavy load.
 448         */
 449        if (idesc->be) {
 450                struct tile_net_priv *priv = netdev_priv(dev);
 451                tile_net_stats_add(1, &priv->stats.rx_dropped);
 452                gxio_mpipe_iqueue_consume(&info->iqueue, idesc);
 453                if (net_ratelimit())
 454                        pr_info("Dropping packet (insufficient buffers).\n");
 455                return false;
 456        }
 457
 458        /* Get the "l2_offset", if allowed. */
 459        l2_offset = custom_str ? 0 : gxio_mpipe_idesc_get_l2_offset(idesc);
 460
 461        /* Get the raw buffer VA (includes "headroom"). */
 462        va = tile_io_addr_to_va((unsigned long)(long)idesc->va);
 463
 464        /* Get the actual packet start/length. */
 465        buf = va + l2_offset;
 466        len = idesc->l2_size - l2_offset;
 467
 468        /* Point "va" at the raw buffer. */
 469        va -= NET_IP_ALIGN;
 470
 471        filter = filter_packet(dev, buf);
 472        if (filter) {
 473                gxio_mpipe_iqueue_drop(&info->iqueue, idesc);
 474        } else {
 475                struct sk_buff *skb = mpipe_buf_to_skb(va);
 476
 477                /* Skip headroom, and any custom header. */
 478                skb_reserve(skb, NET_IP_ALIGN + l2_offset);
 479
 480                tile_net_receive_skb(dev, skb, idesc, len);
 481        }
 482
 483        gxio_mpipe_iqueue_consume(&info->iqueue, idesc);
 484        return !filter;
 485}
 486
 487/* Handle some packets for the current CPU.
 488 *
 489 * This function handles up to TILE_NET_BATCH idescs per call.
 490 *
 491 * ISSUE: Since we do not provide new buffers until this function is
 492 * complete, we must initially provide enough buffers for each network
 493 * cpu to fill its iqueue and also its batched idescs.
 494 *
 495 * ISSUE: The "rotting packet" race condition occurs if a packet
 496 * arrives after the queue appears to be empty, and before the
 497 * hypervisor interrupt is re-enabled.
 498 */
 499static int tile_net_poll(struct napi_struct *napi, int budget)
 500{
 501        struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
 502        unsigned int work = 0;
 503        gxio_mpipe_idesc_t *idesc;
 504        int i, n;
 505
 506        /* Process packets. */
 507        while ((n = gxio_mpipe_iqueue_try_peek(&info->iqueue, &idesc)) > 0) {
 508                for (i = 0; i < n; i++) {
 509                        if (i == TILE_NET_BATCH)
 510                                goto done;
 511                        if (tile_net_handle_packet(idesc + i)) {
 512                                if (++work >= budget)
 513                                        goto done;
 514                        }
 515                }
 516        }
 517
 518        /* There are no packets left. */
 519        napi_complete(&info->napi);
 520
 521        /* Re-enable hypervisor interrupts. */
 522        gxio_mpipe_enable_notif_ring_interrupt(&context, info->iqueue.ring);
 523
 524        /* HACK: Avoid the "rotting packet" problem. */
 525        if (gxio_mpipe_iqueue_try_peek(&info->iqueue, &idesc) > 0)
 526                napi_schedule(&info->napi);
 527
 528        /* ISSUE: Handle completions? */
 529
 530done:
 531        tile_net_provide_needed_buffers();
 532
 533        return work;
 534}
 535
 536/* Handle an ingress interrupt on the current cpu. */
 537static irqreturn_t tile_net_handle_ingress_irq(int irq, void *unused)
 538{
 539        struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
 540        napi_schedule(&info->napi);
 541        return IRQ_HANDLED;
 542}
 543
 544/* Free some completions.  This must be called with interrupts blocked. */
 545static int tile_net_free_comps(gxio_mpipe_equeue_t *equeue,
 546                                struct tile_net_comps *comps,
 547                                int limit, bool force_update)
 548{
 549        int n = 0;
 550        while (comps->comp_last < comps->comp_next) {
 551                unsigned int cid = comps->comp_last % TILE_NET_MAX_COMPS;
 552                struct tile_net_comp *comp = &comps->comp_queue[cid];
 553                if (!gxio_mpipe_equeue_is_complete(equeue, comp->when,
 554                                                   force_update || n == 0))
 555                        break;
 556                dev_kfree_skb_irq(comp->skb);
 557                comps->comp_last++;
 558                if (++n == limit)
 559                        break;
 560        }
 561        return n;
 562}
 563
 564/* Add a completion.  This must be called with interrupts blocked.
 565 * tile_net_equeue_try_reserve() will have ensured a free completion entry.
 566 */
 567static void add_comp(gxio_mpipe_equeue_t *equeue,
 568                     struct tile_net_comps *comps,
 569                     uint64_t when, struct sk_buff *skb)
 570{
 571        int cid = comps->comp_next % TILE_NET_MAX_COMPS;
 572        comps->comp_queue[cid].when = when;
 573        comps->comp_queue[cid].skb = skb;
 574        comps->comp_next++;
 575}
 576
 577static void tile_net_schedule_tx_wake_timer(struct net_device *dev,
 578                                            int tx_queue_idx)
 579{
 580        struct tile_net_info *info = &per_cpu(per_cpu_info, tx_queue_idx);
 581        struct tile_net_priv *priv = netdev_priv(dev);
 582        struct tile_net_tx_wake *tx_wake = &info->tx_wake[priv->echannel];
 583
 584        hrtimer_start(&tx_wake->timer,
 585                      ktime_set(0, TX_TIMER_DELAY_USEC * 1000UL),
 586                      HRTIMER_MODE_REL_PINNED);
 587}
 588
 589static enum hrtimer_restart tile_net_handle_tx_wake_timer(struct hrtimer *t)
 590{
 591        struct tile_net_tx_wake *tx_wake =
 592                container_of(t, struct tile_net_tx_wake, timer);
 593        netif_wake_subqueue(tx_wake->dev, tx_wake->tx_queue_idx);
 594        return HRTIMER_NORESTART;
 595}
 596
 597/* Make sure the egress timer is scheduled. */
 598static void tile_net_schedule_egress_timer(void)
 599{
 600        struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
 601
 602        if (!info->egress_timer_scheduled) {
 603                hrtimer_start(&info->egress_timer,
 604                              ktime_set(0, EGRESS_TIMER_DELAY_USEC * 1000UL),
 605                              HRTIMER_MODE_REL_PINNED);
 606                info->egress_timer_scheduled = true;
 607        }
 608}
 609
 610/* The "function" for "info->egress_timer".
 611 *
 612 * This timer will reschedule itself as long as there are any pending
 613 * completions expected for this tile.
 614 */
 615static enum hrtimer_restart tile_net_handle_egress_timer(struct hrtimer *t)
 616{
 617        struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
 618        unsigned long irqflags;
 619        bool pending = false;
 620        int i;
 621
 622        local_irq_save(irqflags);
 623
 624        /* The timer is no longer scheduled. */
 625        info->egress_timer_scheduled = false;
 626
 627        /* Free all possible comps for this tile. */
 628        for (i = 0; i < TILE_NET_CHANNELS; i++) {
 629                struct tile_net_egress *egress = &egress_for_echannel[i];
 630                struct tile_net_comps *comps = info->comps_for_echannel[i];
 631                if (comps->comp_last >= comps->comp_next)
 632                        continue;
 633                tile_net_free_comps(egress->equeue, comps, -1, true);
 634                pending = pending || (comps->comp_last < comps->comp_next);
 635        }
 636
 637        /* Reschedule timer if needed. */
 638        if (pending)
 639                tile_net_schedule_egress_timer();
 640
 641        local_irq_restore(irqflags);
 642
 643        return HRTIMER_NORESTART;
 644}
 645
 646/* Helper function for "tile_net_update()".
 647 * "dev" (i.e. arg) is the device being brought up or down,
 648 * or NULL if all devices are now down.
 649 */
 650static void tile_net_update_cpu(void *arg)
 651{
 652        struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
 653        struct net_device *dev = arg;
 654
 655        if (!info->has_iqueue)
 656                return;
 657
 658        if (dev != NULL) {
 659                if (!info->napi_added) {
 660                        netif_napi_add(dev, &info->napi,
 661                                       tile_net_poll, TILE_NET_WEIGHT);
 662                        info->napi_added = true;
 663                }
 664                if (!info->napi_enabled) {
 665                        napi_enable(&info->napi);
 666                        info->napi_enabled = true;
 667                }
 668                enable_percpu_irq(ingress_irq, 0);
 669        } else {
 670                disable_percpu_irq(ingress_irq);
 671                if (info->napi_enabled) {
 672                        napi_disable(&info->napi);
 673                        info->napi_enabled = false;
 674                }
 675                /* FIXME: Drain the iqueue. */
 676        }
 677}
 678
 679/* Helper function for tile_net_open() and tile_net_stop().
 680 * Always called under tile_net_devs_for_channel_mutex.
 681 */
 682static int tile_net_update(struct net_device *dev)
 683{
 684        static gxio_mpipe_rules_t rules;  /* too big to fit on the stack */
 685        bool saw_channel = false;
 686        int channel;
 687        int rc;
 688        int cpu;
 689
 690        gxio_mpipe_rules_init(&rules, &context);
 691
 692        for (channel = 0; channel < TILE_NET_CHANNELS; channel++) {
 693                if (tile_net_devs_for_channel[channel] == NULL)
 694                        continue;
 695                if (!saw_channel) {
 696                        saw_channel = true;
 697                        gxio_mpipe_rules_begin(&rules, first_bucket,
 698                                               num_buckets, NULL);
 699                        gxio_mpipe_rules_set_headroom(&rules, NET_IP_ALIGN);
 700                }
 701                gxio_mpipe_rules_add_channel(&rules, channel);
 702        }
 703
 704        /* NOTE: This can fail if there is no classifier.
 705         * ISSUE: Can anything else cause it to fail?
 706         */
 707        rc = gxio_mpipe_rules_commit(&rules);
 708        if (rc != 0) {
 709                netdev_warn(dev, "gxio_mpipe_rules_commit failed: %d\n", rc);
 710                return -EIO;
 711        }
 712
 713        /* Update all cpus, sequentially (to protect "netif_napi_add()"). */
 714        for_each_online_cpu(cpu)
 715                smp_call_function_single(cpu, tile_net_update_cpu,
 716                                         (saw_channel ? dev : NULL), 1);
 717
 718        /* HACK: Allow packets to flow in the simulator. */
 719        if (saw_channel)
 720                sim_enable_mpipe_links(0, -1);
 721
 722        return 0;
 723}
 724
 725/* Allocate and initialize mpipe buffer stacks, and register them in
 726 * the mPIPE TLBs, for both small and large packet sizes.
 727 * This routine supports tile_net_init_mpipe(), below.
 728 */
 729static int init_buffer_stacks(struct net_device *dev, int num_buffers)
 730{
 731        pte_t hash_pte = pte_set_home((pte_t) { 0 }, PAGE_HOME_HASH);
 732        int rc;
 733
 734        /* Compute stack bytes; we round up to 64KB and then use
 735         * alloc_pages() so we get the required 64KB alignment as well.
 736         */
 737        buffer_stack_size =
 738                ALIGN(gxio_mpipe_calc_buffer_stack_bytes(num_buffers),
 739                      64 * 1024);
 740
 741        /* Allocate two buffer stack indices. */
 742        rc = gxio_mpipe_alloc_buffer_stacks(&context, 2, 0, 0);
 743        if (rc < 0) {
 744                netdev_err(dev, "gxio_mpipe_alloc_buffer_stacks failed: %d\n",
 745                           rc);
 746                return rc;
 747        }
 748        small_buffer_stack = rc;
 749        large_buffer_stack = rc + 1;
 750
 751        /* Allocate the small memory stack. */
 752        small_buffer_stack_va =
 753                alloc_pages_exact(buffer_stack_size, GFP_KERNEL);
 754        if (small_buffer_stack_va == NULL) {
 755                netdev_err(dev,
 756                           "Could not alloc %zd bytes for buffer stacks\n",
 757                           buffer_stack_size);
 758                return -ENOMEM;
 759        }
 760        rc = gxio_mpipe_init_buffer_stack(&context, small_buffer_stack,
 761                                          BUFFER_SIZE_SMALL_ENUM,
 762                                          small_buffer_stack_va,
 763                                          buffer_stack_size, 0);
 764        if (rc != 0) {
 765                netdev_err(dev, "gxio_mpipe_init_buffer_stack: %d\n", rc);
 766                return rc;
 767        }
 768        rc = gxio_mpipe_register_client_memory(&context, small_buffer_stack,
 769                                               hash_pte, 0);
 770        if (rc != 0) {
 771                netdev_err(dev,
 772                           "gxio_mpipe_register_buffer_memory failed: %d\n",
 773                           rc);
 774                return rc;
 775        }
 776
 777        /* Allocate the large buffer stack. */
 778        large_buffer_stack_va =
 779                alloc_pages_exact(buffer_stack_size, GFP_KERNEL);
 780        if (large_buffer_stack_va == NULL) {
 781                netdev_err(dev,
 782                           "Could not alloc %zd bytes for buffer stacks\n",
 783                           buffer_stack_size);
 784                return -ENOMEM;
 785        }
 786        rc = gxio_mpipe_init_buffer_stack(&context, large_buffer_stack,
 787                                          BUFFER_SIZE_LARGE_ENUM,
 788                                          large_buffer_stack_va,
 789                                          buffer_stack_size, 0);
 790        if (rc != 0) {
 791                netdev_err(dev, "gxio_mpipe_init_buffer_stack failed: %d\n",
 792                           rc);
 793                return rc;
 794        }
 795        rc = gxio_mpipe_register_client_memory(&context, large_buffer_stack,
 796                                               hash_pte, 0);
 797        if (rc != 0) {
 798                netdev_err(dev,
 799                           "gxio_mpipe_register_buffer_memory failed: %d\n",
 800                           rc);
 801                return rc;
 802        }
 803
 804        return 0;
 805}
 806
 807/* Allocate per-cpu resources (memory for completions and idescs).
 808 * This routine supports tile_net_init_mpipe(), below.
 809 */
 810static int alloc_percpu_mpipe_resources(struct net_device *dev,
 811                                        int cpu, int ring)
 812{
 813        struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
 814        int order, i, rc;
 815        struct page *page;
 816        void *addr;
 817
 818        /* Allocate the "comps". */
 819        order = get_order(COMPS_SIZE);
 820        page = homecache_alloc_pages(GFP_KERNEL, order, cpu);
 821        if (page == NULL) {
 822                netdev_err(dev, "Failed to alloc %zd bytes comps memory\n",
 823                           COMPS_SIZE);
 824                return -ENOMEM;
 825        }
 826        addr = pfn_to_kaddr(page_to_pfn(page));
 827        memset(addr, 0, COMPS_SIZE);
 828        for (i = 0; i < TILE_NET_CHANNELS; i++)
 829                info->comps_for_echannel[i] =
 830                        addr + i * sizeof(struct tile_net_comps);
 831
 832        /* If this is a network cpu, create an iqueue. */
 833        if (cpu_isset(cpu, network_cpus_map)) {
 834                order = get_order(NOTIF_RING_SIZE);
 835                page = homecache_alloc_pages(GFP_KERNEL, order, cpu);
 836                if (page == NULL) {
 837                        netdev_err(dev,
 838                                   "Failed to alloc %zd bytes iqueue memory\n",
 839                                   NOTIF_RING_SIZE);
 840                        return -ENOMEM;
 841                }
 842                addr = pfn_to_kaddr(page_to_pfn(page));
 843                rc = gxio_mpipe_iqueue_init(&info->iqueue, &context, ring++,
 844                                            addr, NOTIF_RING_SIZE, 0);
 845                if (rc < 0) {
 846                        netdev_err(dev,
 847                                   "gxio_mpipe_iqueue_init failed: %d\n", rc);
 848                        return rc;
 849                }
 850                info->has_iqueue = true;
 851        }
 852
 853        return ring;
 854}
 855
 856/* Initialize NotifGroup and buckets.
 857 * This routine supports tile_net_init_mpipe(), below.
 858 */
 859static int init_notif_group_and_buckets(struct net_device *dev,
 860                                        int ring, int network_cpus_count)
 861{
 862        int group, rc;
 863
 864        /* Allocate one NotifGroup. */
 865        rc = gxio_mpipe_alloc_notif_groups(&context, 1, 0, 0);
 866        if (rc < 0) {
 867                netdev_err(dev, "gxio_mpipe_alloc_notif_groups failed: %d\n",
 868                           rc);
 869                return rc;
 870        }
 871        group = rc;
 872
 873        /* Initialize global num_buckets value. */
 874        if (network_cpus_count > 4)
 875                num_buckets = 256;
 876        else if (network_cpus_count > 1)
 877                num_buckets = 16;
 878
 879        /* Allocate some buckets, and set global first_bucket value. */
 880        rc = gxio_mpipe_alloc_buckets(&context, num_buckets, 0, 0);
 881        if (rc < 0) {
 882                netdev_err(dev, "gxio_mpipe_alloc_buckets failed: %d\n", rc);
 883                return rc;
 884        }
 885        first_bucket = rc;
 886
 887        /* Init group and buckets. */
 888        rc = gxio_mpipe_init_notif_group_and_buckets(
 889                &context, group, ring, network_cpus_count,
 890                first_bucket, num_buckets,
 891                GXIO_MPIPE_BUCKET_STICKY_FLOW_LOCALITY);
 892        if (rc != 0) {
 893                netdev_err(
 894                        dev,
 895                        "gxio_mpipe_init_notif_group_and_buckets failed: %d\n",
 896                        rc);
 897                return rc;
 898        }
 899
 900        return 0;
 901}
 902
 903/* Create an irq and register it, then activate the irq and request
 904 * interrupts on all cores.  Note that "ingress_irq" being initialized
 905 * is how we know not to call tile_net_init_mpipe() again.
 906 * This routine supports tile_net_init_mpipe(), below.
 907 */
 908static int tile_net_setup_interrupts(struct net_device *dev)
 909{
 910        int cpu, rc;
 911
 912        rc = create_irq();
 913        if (rc < 0) {
 914                netdev_err(dev, "create_irq failed: %d\n", rc);
 915                return rc;
 916        }
 917        ingress_irq = rc;
 918        tile_irq_activate(ingress_irq, TILE_IRQ_PERCPU);
 919        rc = request_irq(ingress_irq, tile_net_handle_ingress_irq,
 920                         0, "tile_net", NULL);
 921        if (rc != 0) {
 922                netdev_err(dev, "request_irq failed: %d\n", rc);
 923                destroy_irq(ingress_irq);
 924                ingress_irq = -1;
 925                return rc;
 926        }
 927
 928        for_each_online_cpu(cpu) {
 929                struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
 930                if (info->has_iqueue) {
 931                        gxio_mpipe_request_notif_ring_interrupt(
 932                                &context, cpu_x(cpu), cpu_y(cpu),
 933                                KERNEL_PL, ingress_irq, info->iqueue.ring);
 934                }
 935        }
 936
 937        return 0;
 938}
 939
 940/* Undo any state set up partially by a failed call to tile_net_init_mpipe. */
 941static void tile_net_init_mpipe_fail(void)
 942{
 943        int cpu;
 944
 945        /* Do cleanups that require the mpipe context first. */
 946        if (small_buffer_stack >= 0)
 947                tile_net_pop_all_buffers(small_buffer_stack);
 948        if (large_buffer_stack >= 0)
 949                tile_net_pop_all_buffers(large_buffer_stack);
 950
 951        /* Destroy mpipe context so the hardware no longer owns any memory. */
 952        gxio_mpipe_destroy(&context);
 953
 954        for_each_online_cpu(cpu) {
 955                struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
 956                free_pages((unsigned long)(info->comps_for_echannel[0]),
 957                           get_order(COMPS_SIZE));
 958                info->comps_for_echannel[0] = NULL;
 959                free_pages((unsigned long)(info->iqueue.idescs),
 960                           get_order(NOTIF_RING_SIZE));
 961                info->iqueue.idescs = NULL;
 962        }
 963
 964        if (small_buffer_stack_va)
 965                free_pages_exact(small_buffer_stack_va, buffer_stack_size);
 966        if (large_buffer_stack_va)
 967                free_pages_exact(large_buffer_stack_va, buffer_stack_size);
 968
 969        small_buffer_stack_va = NULL;
 970        large_buffer_stack_va = NULL;
 971        large_buffer_stack = -1;
 972        small_buffer_stack = -1;
 973        first_bucket = -1;
 974}
 975
 976/* The first time any tilegx network device is opened, we initialize
 977 * the global mpipe state.  If this step fails, we fail to open the
 978 * device, but if it succeeds, we never need to do it again, and since
 979 * tile_net can't be unloaded, we never undo it.
 980 *
 981 * Note that some resources in this path (buffer stack indices,
 982 * bindings from init_buffer_stack, etc.) are hypervisor resources
 983 * that are freed implicitly by gxio_mpipe_destroy().
 984 */
 985static int tile_net_init_mpipe(struct net_device *dev)
 986{
 987        int i, num_buffers, rc;
 988        int cpu;
 989        int first_ring, ring;
 990        int network_cpus_count = cpus_weight(network_cpus_map);
 991
 992        if (!hash_default) {
 993                netdev_err(dev, "Networking requires hash_default!\n");
 994                return -EIO;
 995        }
 996
 997        rc = gxio_mpipe_init(&context, 0);
 998        if (rc != 0) {
 999                netdev_err(dev, "gxio_mpipe_init failed: %d\n", rc);
1000                return -EIO;
1001        }
1002
1003        /* Set up the buffer stacks. */
1004        num_buffers =
1005                network_cpus_count * (IQUEUE_ENTRIES + TILE_NET_BATCH);
1006        rc = init_buffer_stacks(dev, num_buffers);
1007        if (rc != 0)
1008                goto fail;
1009
1010        /* Provide initial buffers. */
1011        rc = -ENOMEM;
1012        for (i = 0; i < num_buffers; i++) {
1013                if (!tile_net_provide_buffer(true)) {
1014                        netdev_err(dev, "Cannot allocate initial sk_bufs!\n");
1015                        goto fail;
1016                }
1017        }
1018        for (i = 0; i < num_buffers; i++) {
1019                if (!tile_net_provide_buffer(false)) {
1020                        netdev_err(dev, "Cannot allocate initial sk_bufs!\n");
1021                        goto fail;
1022                }
1023        }
1024
1025        /* Allocate one NotifRing for each network cpu. */
1026        rc = gxio_mpipe_alloc_notif_rings(&context, network_cpus_count, 0, 0);
1027        if (rc < 0) {
1028                netdev_err(dev, "gxio_mpipe_alloc_notif_rings failed %d\n",
1029                           rc);
1030                goto fail;
1031        }
1032
1033        /* Init NotifRings per-cpu. */
1034        first_ring = rc;
1035        ring = first_ring;
1036        for_each_online_cpu(cpu) {
1037                rc = alloc_percpu_mpipe_resources(dev, cpu, ring);
1038                if (rc < 0)
1039                        goto fail;
1040                ring = rc;
1041        }
1042
1043        /* Initialize NotifGroup and buckets. */
1044        rc = init_notif_group_and_buckets(dev, first_ring, network_cpus_count);
1045        if (rc != 0)
1046                goto fail;
1047
1048        /* Create and enable interrupts. */
1049        rc = tile_net_setup_interrupts(dev);
1050        if (rc != 0)
1051                goto fail;
1052
1053        return 0;
1054
1055fail:
1056        tile_net_init_mpipe_fail();
1057        return rc;
1058}
1059
1060/* Create persistent egress info for a given egress channel.
1061 * Note that this may be shared between, say, "gbe0" and "xgbe0".
1062 * ISSUE: Defer header allocation until TSO is actually needed?
1063 */
1064static int tile_net_init_egress(struct net_device *dev, int echannel)
1065{
1066        struct page *headers_page, *edescs_page, *equeue_page;
1067        gxio_mpipe_edesc_t *edescs;
1068        gxio_mpipe_equeue_t *equeue;
1069        unsigned char *headers;
1070        int headers_order, edescs_order, equeue_order;
1071        size_t edescs_size;
1072        int edma;
1073        int rc = -ENOMEM;
1074
1075        /* Only initialize once. */
1076        if (egress_for_echannel[echannel].equeue != NULL)
1077                return 0;
1078
1079        /* Allocate memory for the "headers". */
1080        headers_order = get_order(EQUEUE_ENTRIES * HEADER_BYTES);
1081        headers_page = alloc_pages(GFP_KERNEL, headers_order);
1082        if (headers_page == NULL) {
1083                netdev_warn(dev,
1084                            "Could not alloc %zd bytes for TSO headers.\n",
1085                            PAGE_SIZE << headers_order);
1086                goto fail;
1087        }
1088        headers = pfn_to_kaddr(page_to_pfn(headers_page));
1089
1090        /* Allocate memory for the "edescs". */
1091        edescs_size = EQUEUE_ENTRIES * sizeof(*edescs);
1092        edescs_order = get_order(edescs_size);
1093        edescs_page = alloc_pages(GFP_KERNEL, edescs_order);
1094        if (edescs_page == NULL) {
1095                netdev_warn(dev,
1096                            "Could not alloc %zd bytes for eDMA ring.\n",
1097                            edescs_size);
1098                goto fail_headers;
1099        }
1100        edescs = pfn_to_kaddr(page_to_pfn(edescs_page));
1101
1102        /* Allocate memory for the "equeue". */
1103        equeue_order = get_order(sizeof(*equeue));
1104        equeue_page = alloc_pages(GFP_KERNEL, equeue_order);
1105        if (equeue_page == NULL) {
1106                netdev_warn(dev,
1107                            "Could not alloc %zd bytes for equeue info.\n",
1108                            PAGE_SIZE << equeue_order);
1109                goto fail_edescs;
1110        }
1111        equeue = pfn_to_kaddr(page_to_pfn(equeue_page));
1112
1113        /* Allocate an edma ring.  Note that in practice this can't
1114         * fail, which is good, because we will leak an edma ring if so.
1115         */
1116        rc = gxio_mpipe_alloc_edma_rings(&context, 1, 0, 0);
1117        if (rc < 0) {
1118                netdev_warn(dev, "gxio_mpipe_alloc_edma_rings failed: %d\n",
1119                            rc);
1120                goto fail_equeue;
1121        }
1122        edma = rc;
1123
1124        /* Initialize the equeue. */
1125        rc = gxio_mpipe_equeue_init(equeue, &context, edma, echannel,
1126                                    edescs, edescs_size, 0);
1127        if (rc != 0) {
1128                netdev_err(dev, "gxio_mpipe_equeue_init failed: %d\n", rc);
1129                goto fail_equeue;
1130        }
1131
1132        /* Done. */
1133        egress_for_echannel[echannel].equeue = equeue;
1134        egress_for_echannel[echannel].headers = headers;
1135        return 0;
1136
1137fail_equeue:
1138        __free_pages(equeue_page, equeue_order);
1139
1140fail_edescs:
1141        __free_pages(edescs_page, edescs_order);
1142
1143fail_headers:
1144        __free_pages(headers_page, headers_order);
1145
1146fail:
1147        return rc;
1148}
1149
1150/* Return channel number for a newly-opened link. */
1151static int tile_net_link_open(struct net_device *dev, gxio_mpipe_link_t *link,
1152                              const char *link_name)
1153{
1154        int rc = gxio_mpipe_link_open(link, &context, link_name, 0);
1155        if (rc < 0) {
1156                netdev_err(dev, "Failed to open '%s'\n", link_name);
1157                return rc;
1158        }
1159        rc = gxio_mpipe_link_channel(link);
1160        if (rc < 0 || rc >= TILE_NET_CHANNELS) {
1161                netdev_err(dev, "gxio_mpipe_link_channel bad value: %d\n", rc);
1162                gxio_mpipe_link_close(link);
1163                return -EINVAL;
1164        }
1165        return rc;
1166}
1167
1168/* Help the kernel activate the given network interface. */
1169static int tile_net_open(struct net_device *dev)
1170{
1171        struct tile_net_priv *priv = netdev_priv(dev);
1172        int cpu, rc;
1173
1174        mutex_lock(&tile_net_devs_for_channel_mutex);
1175
1176        /* Do one-time initialization the first time any device is opened. */
1177        if (ingress_irq < 0) {
1178                rc = tile_net_init_mpipe(dev);
1179                if (rc != 0)
1180                        goto fail;
1181        }
1182
1183        /* Determine if this is the "loopify" device. */
1184        if (unlikely((loopify_link_name != NULL) &&
1185                     !strcmp(dev->name, loopify_link_name))) {
1186                rc = tile_net_link_open(dev, &priv->link, "loop0");
1187                if (rc < 0)
1188                        goto fail;
1189                priv->channel = rc;
1190                rc = tile_net_link_open(dev, &priv->loopify_link, "loop1");
1191                if (rc < 0)
1192                        goto fail;
1193                priv->loopify_channel = rc;
1194                priv->echannel = rc;
1195        } else {
1196                rc = tile_net_link_open(dev, &priv->link, dev->name);
1197                if (rc < 0)
1198                        goto fail;
1199                priv->channel = rc;
1200                priv->echannel = rc;
1201        }
1202
1203        /* Initialize egress info (if needed).  Once ever, per echannel. */
1204        rc = tile_net_init_egress(dev, priv->echannel);
1205        if (rc != 0)
1206                goto fail;
1207
1208        tile_net_devs_for_channel[priv->channel] = dev;
1209
1210        rc = tile_net_update(dev);
1211        if (rc != 0)
1212                goto fail;
1213
1214        mutex_unlock(&tile_net_devs_for_channel_mutex);
1215
1216        /* Initialize the transmit wake timer for this device for each cpu. */
1217        for_each_online_cpu(cpu) {
1218                struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
1219                struct tile_net_tx_wake *tx_wake =
1220                        &info->tx_wake[priv->echannel];
1221
1222                hrtimer_init(&tx_wake->timer, CLOCK_MONOTONIC,
1223                             HRTIMER_MODE_REL);
1224                tx_wake->tx_queue_idx = cpu;
1225                tx_wake->timer.function = tile_net_handle_tx_wake_timer;
1226                tx_wake->dev = dev;
1227        }
1228
1229        for_each_online_cpu(cpu)
1230                netif_start_subqueue(dev, cpu);
1231        netif_carrier_on(dev);
1232        return 0;
1233
1234fail:
1235        if (priv->loopify_channel >= 0) {
1236                if (gxio_mpipe_link_close(&priv->loopify_link) != 0)
1237                        netdev_warn(dev, "Failed to close loopify link!\n");
1238                priv->loopify_channel = -1;
1239        }
1240        if (priv->channel >= 0) {
1241                if (gxio_mpipe_link_close(&priv->link) != 0)
1242                        netdev_warn(dev, "Failed to close link!\n");
1243                priv->channel = -1;
1244        }
1245        priv->echannel = -1;
1246        tile_net_devs_for_channel[priv->channel] = NULL;
1247        mutex_unlock(&tile_net_devs_for_channel_mutex);
1248
1249        /* Don't return raw gxio error codes to generic Linux. */
1250        return (rc > -512) ? rc : -EIO;
1251}
1252
1253/* Help the kernel deactivate the given network interface. */
1254static int tile_net_stop(struct net_device *dev)
1255{
1256        struct tile_net_priv *priv = netdev_priv(dev);
1257        int cpu;
1258
1259        for_each_online_cpu(cpu) {
1260                struct tile_net_info *info = &per_cpu(per_cpu_info, cpu);
1261                struct tile_net_tx_wake *tx_wake =
1262                        &info->tx_wake[priv->echannel];
1263
1264                hrtimer_cancel(&tx_wake->timer);
1265                netif_stop_subqueue(dev, cpu);
1266        }
1267
1268        mutex_lock(&tile_net_devs_for_channel_mutex);
1269        tile_net_devs_for_channel[priv->channel] = NULL;
1270        (void)tile_net_update(dev);
1271        if (priv->loopify_channel >= 0) {
1272                if (gxio_mpipe_link_close(&priv->loopify_link) != 0)
1273                        netdev_warn(dev, "Failed to close loopify link!\n");
1274                priv->loopify_channel = -1;
1275        }
1276        if (priv->channel >= 0) {
1277                if (gxio_mpipe_link_close(&priv->link) != 0)
1278                        netdev_warn(dev, "Failed to close link!\n");
1279                priv->channel = -1;
1280        }
1281        priv->echannel = -1;
1282        mutex_unlock(&tile_net_devs_for_channel_mutex);
1283
1284        return 0;
1285}
1286
1287/* Determine the VA for a fragment. */
1288static inline void *tile_net_frag_buf(skb_frag_t *f)
1289{
1290        unsigned long pfn = page_to_pfn(skb_frag_page(f));
1291        return pfn_to_kaddr(pfn) + f->page_offset;
1292}
1293
1294/* Acquire a completion entry and an egress slot, or if we can't,
1295 * stop the queue and schedule the tx_wake timer.
1296 */
1297static s64 tile_net_equeue_try_reserve(struct net_device *dev,
1298                                       int tx_queue_idx,
1299                                       struct tile_net_comps *comps,
1300                                       gxio_mpipe_equeue_t *equeue,
1301                                       int num_edescs)
1302{
1303        /* Try to acquire a completion entry. */
1304        if (comps->comp_next - comps->comp_last < TILE_NET_MAX_COMPS - 1 ||
1305            tile_net_free_comps(equeue, comps, 32, false) != 0) {
1306
1307                /* Try to acquire an egress slot. */
1308                s64 slot = gxio_mpipe_equeue_try_reserve(equeue, num_edescs);
1309                if (slot >= 0)
1310                        return slot;
1311
1312                /* Freeing some completions gives the equeue time to drain. */
1313                tile_net_free_comps(equeue, comps, TILE_NET_MAX_COMPS, false);
1314
1315                slot = gxio_mpipe_equeue_try_reserve(equeue, num_edescs);
1316                if (slot >= 0)
1317                        return slot;
1318        }
1319
1320        /* Still nothing; give up and stop the queue for a short while. */
1321        netif_stop_subqueue(dev, tx_queue_idx);
1322        tile_net_schedule_tx_wake_timer(dev, tx_queue_idx);
1323        return -1;
1324}
1325
1326/* Determine how many edesc's are needed for TSO.
1327 *
1328 * Sometimes, if "sendfile()" requires copying, we will be called with
1329 * "data" containing the header and payload, with "frags" being empty.
1330 * Sometimes, for example when using NFS over TCP, a single segment can
1331 * span 3 fragments.  This requires special care.
1332 */
1333static int tso_count_edescs(struct sk_buff *skb)
1334{
1335        struct skb_shared_info *sh = skb_shinfo(skb);
1336        unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
1337        unsigned int data_len = skb->len - sh_len;
1338        unsigned int p_len = sh->gso_size;
1339        long f_id = -1;    /* id of the current fragment */
1340        long f_size = skb_headlen(skb) - sh_len;  /* current fragment size */
1341        long f_used = 0;  /* bytes used from the current fragment */
1342        long n;            /* size of the current piece of payload */
1343        int num_edescs = 0;
1344        int segment;
1345
1346        for (segment = 0; segment < sh->gso_segs; segment++) {
1347
1348                unsigned int p_used = 0;
1349
1350                /* One edesc for header and for each piece of the payload. */
1351                for (num_edescs++; p_used < p_len; num_edescs++) {
1352
1353                        /* Advance as needed. */
1354                        while (f_used >= f_size) {
1355                                f_id++;
1356                                f_size = skb_frag_size(&sh->frags[f_id]);
1357                                f_used = 0;
1358                        }
1359
1360                        /* Use bytes from the current fragment. */
1361                        n = p_len - p_used;
1362                        if (n > f_size - f_used)
1363                                n = f_size - f_used;
1364                        f_used += n;
1365                        p_used += n;
1366                }
1367
1368                /* The last segment may be less than gso_size. */
1369                data_len -= p_len;
1370                if (data_len < p_len)
1371                        p_len = data_len;
1372        }
1373
1374        return num_edescs;
1375}
1376
1377/* Prepare modified copies of the skbuff headers.
1378 * FIXME: add support for IPv6.
1379 */
1380static void tso_headers_prepare(struct sk_buff *skb, unsigned char *headers,
1381                                s64 slot)
1382{
1383        struct skb_shared_info *sh = skb_shinfo(skb);
1384        struct iphdr *ih;
1385        struct tcphdr *th;
1386        unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
1387        unsigned int data_len = skb->len - sh_len;
1388        unsigned char *data = skb->data;
1389        unsigned int ih_off, th_off, p_len;
1390        unsigned int isum_seed, tsum_seed, id, seq;
1391        long f_id = -1;    /* id of the current fragment */
1392        long f_size = skb_headlen(skb) - sh_len;  /* current fragment size */
1393        long f_used = 0;  /* bytes used from the current fragment */
1394        long n;            /* size of the current piece of payload */
1395        int segment;
1396
1397        /* Locate original headers and compute various lengths. */
1398        ih = ip_hdr(skb);
1399        th = tcp_hdr(skb);
1400        ih_off = skb_network_offset(skb);
1401        th_off = skb_transport_offset(skb);
1402        p_len = sh->gso_size;
1403
1404        /* Set up seed values for IP and TCP csum and initialize id and seq. */
1405        isum_seed = ((0xFFFF - ih->check) +
1406                     (0xFFFF - ih->tot_len) +
1407                     (0xFFFF - ih->id));
1408        tsum_seed = th->check + (0xFFFF ^ htons(skb->len));
1409        id = ntohs(ih->id);
1410        seq = ntohl(th->seq);
1411
1412        /* Prepare all the headers. */
1413        for (segment = 0; segment < sh->gso_segs; segment++) {
1414                unsigned char *buf;
1415                unsigned int p_used = 0;
1416
1417                /* Copy to the header memory for this segment. */
1418                buf = headers + (slot % EQUEUE_ENTRIES) * HEADER_BYTES +
1419                        NET_IP_ALIGN;
1420                memcpy(buf, data, sh_len);
1421
1422                /* Update copied ip header. */
1423                ih = (struct iphdr *)(buf + ih_off);
1424                ih->tot_len = htons(sh_len + p_len - ih_off);
1425                ih->id = htons(id);
1426                ih->check = csum_long(isum_seed + ih->tot_len +
1427                                      ih->id) ^ 0xffff;
1428
1429                /* Update copied tcp header. */
1430                th = (struct tcphdr *)(buf + th_off);
1431                th->seq = htonl(seq);
1432                th->check = csum_long(tsum_seed + htons(sh_len + p_len));
1433                if (segment != sh->gso_segs - 1) {
1434                        th->fin = 0;
1435                        th->psh = 0;
1436                }
1437
1438                /* Skip past the header. */
1439                slot++;
1440
1441                /* Skip past the payload. */
1442                while (p_used < p_len) {
1443
1444                        /* Advance as needed. */
1445                        while (f_used >= f_size) {
1446                                f_id++;
1447                                f_size = skb_frag_size(&sh->frags[f_id]);
1448                                f_used = 0;
1449                        }
1450
1451                        /* Use bytes from the current fragment. */
1452                        n = p_len - p_used;
1453                        if (n > f_size - f_used)
1454                                n = f_size - f_used;
1455                        f_used += n;
1456                        p_used += n;
1457
1458                        slot++;
1459                }
1460
1461                id++;
1462                seq += p_len;
1463
1464                /* The last segment may be less than gso_size. */
1465                data_len -= p_len;
1466                if (data_len < p_len)
1467                        p_len = data_len;
1468        }
1469
1470        /* Flush the headers so they are ready for hardware DMA. */
1471        wmb();
1472}
1473
1474/* Pass all the data to mpipe for egress. */
1475static void tso_egress(struct net_device *dev, gxio_mpipe_equeue_t *equeue,
1476                       struct sk_buff *skb, unsigned char *headers, s64 slot)
1477{
1478        struct tile_net_priv *priv = netdev_priv(dev);
1479        struct skb_shared_info *sh = skb_shinfo(skb);
1480        unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
1481        unsigned int data_len = skb->len - sh_len;
1482        unsigned int p_len = sh->gso_size;
1483        gxio_mpipe_edesc_t edesc_head = { { 0 } };
1484        gxio_mpipe_edesc_t edesc_body = { { 0 } };
1485        long f_id = -1;    /* id of the current fragment */
1486        long f_size = skb_headlen(skb) - sh_len;  /* current fragment size */
1487        long f_used = 0;  /* bytes used from the current fragment */
1488        void *f_data = skb->data + sh_len;
1489        long n;            /* size of the current piece of payload */
1490        unsigned long tx_packets = 0, tx_bytes = 0;
1491        unsigned int csum_start;
1492        int segment;
1493
1494        /* Prepare to egress the headers: set up header edesc. */
1495        csum_start = skb_checksum_start_offset(skb);
1496        edesc_head.csum = 1;
1497        edesc_head.csum_start = csum_start;
1498        edesc_head.csum_dest = csum_start + skb->csum_offset;
1499        edesc_head.xfer_size = sh_len;
1500
1501        /* This is only used to specify the TLB. */
1502        edesc_head.stack_idx = large_buffer_stack;
1503        edesc_body.stack_idx = large_buffer_stack;
1504
1505        /* Egress all the edescs. */
1506        for (segment = 0; segment < sh->gso_segs; segment++) {
1507                unsigned char *buf;
1508                unsigned int p_used = 0;
1509
1510                /* Egress the header. */
1511                buf = headers + (slot % EQUEUE_ENTRIES) * HEADER_BYTES +
1512                        NET_IP_ALIGN;
1513                edesc_head.va = va_to_tile_io_addr(buf);
1514                gxio_mpipe_equeue_put_at(equeue, edesc_head, slot);
1515                slot++;
1516
1517                /* Egress the payload. */
1518                while (p_used < p_len) {
1519                        void *va;
1520
1521                        /* Advance as needed. */
1522                        while (f_used >= f_size) {
1523                                f_id++;
1524                                f_size = skb_frag_size(&sh->frags[f_id]);
1525                                f_data = tile_net_frag_buf(&sh->frags[f_id]);
1526                                f_used = 0;
1527                        }
1528
1529                        va = f_data + f_used;
1530
1531                        /* Use bytes from the current fragment. */
1532                        n = p_len - p_used;
1533                        if (n > f_size - f_used)
1534                                n = f_size - f_used;
1535                        f_used += n;
1536                        p_used += n;
1537
1538                        /* Egress a piece of the payload. */
1539                        edesc_body.va = va_to_tile_io_addr(va);
1540                        edesc_body.xfer_size = n;
1541                        edesc_body.bound = !(p_used < p_len);
1542                        gxio_mpipe_equeue_put_at(equeue, edesc_body, slot);
1543                        slot++;
1544                }
1545
1546                tx_packets++;
1547                tx_bytes += sh_len + p_len;
1548
1549                /* The last segment may be less than gso_size. */
1550                data_len -= p_len;
1551                if (data_len < p_len)
1552                        p_len = data_len;
1553        }
1554
1555        /* Update stats. */
1556        tile_net_stats_add(tx_packets, &priv->stats.tx_packets);
1557        tile_net_stats_add(tx_bytes, &priv->stats.tx_bytes);
1558}
1559
1560/* Do "TSO" handling for egress.
1561 *
1562 * Normally drivers set NETIF_F_TSO only to support hardware TSO;
1563 * otherwise the stack uses scatter-gather to implement GSO in software.
1564 * On our testing, enabling GSO support (via NETIF_F_SG) drops network
1565 * performance down to around 7.5 Gbps on the 10G interfaces, although
1566 * also dropping cpu utilization way down, to under 8%.  But
1567 * implementing "TSO" in the driver brings performance back up to line
1568 * rate, while dropping cpu usage even further, to less than 4%.  In
1569 * practice, profiling of GSO shows that skb_segment() is what causes
1570 * the performance overheads; we benefit in the driver from using
1571 * preallocated memory to duplicate the TCP/IP headers.
1572 */
1573static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev)
1574{
1575        struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
1576        struct tile_net_priv *priv = netdev_priv(dev);
1577        int channel = priv->echannel;
1578        struct tile_net_egress *egress = &egress_for_echannel[channel];
1579        struct tile_net_comps *comps = info->comps_for_echannel[channel];
1580        gxio_mpipe_equeue_t *equeue = egress->equeue;
1581        unsigned long irqflags;
1582        int num_edescs;
1583        s64 slot;
1584
1585        /* Determine how many mpipe edesc's are needed. */
1586        num_edescs = tso_count_edescs(skb);
1587
1588        local_irq_save(irqflags);
1589
1590        /* Try to acquire a completion entry and an egress slot. */
1591        slot = tile_net_equeue_try_reserve(dev, skb->queue_mapping, comps,
1592                                           equeue, num_edescs);
1593        if (slot < 0) {
1594                local_irq_restore(irqflags);
1595                return NETDEV_TX_BUSY;
1596        }
1597
1598        /* Set up copies of header data properly. */
1599        tso_headers_prepare(skb, egress->headers, slot);
1600
1601        /* Actually pass the data to the network hardware. */
1602        tso_egress(dev, equeue, skb, egress->headers, slot);
1603
1604        /* Add a completion record. */
1605        add_comp(equeue, comps, slot + num_edescs - 1, skb);
1606
1607        local_irq_restore(irqflags);
1608
1609        /* Make sure the egress timer is scheduled. */
1610        tile_net_schedule_egress_timer();
1611
1612        return NETDEV_TX_OK;
1613}
1614
1615/* Analyze the body and frags for a transmit request. */
1616static unsigned int tile_net_tx_frags(struct frag *frags,
1617                                       struct sk_buff *skb,
1618                                       void *b_data, unsigned int b_len)
1619{
1620        unsigned int i, n = 0;
1621
1622        struct skb_shared_info *sh = skb_shinfo(skb);
1623
1624        if (b_len != 0) {
1625                frags[n].buf = b_data;
1626                frags[n++].length = b_len;
1627        }
1628
1629        for (i = 0; i < sh->nr_frags; i++) {
1630                skb_frag_t *f = &sh->frags[i];
1631                frags[n].buf = tile_net_frag_buf(f);
1632                frags[n++].length = skb_frag_size(f);
1633        }
1634
1635        return n;
1636}
1637
1638/* Help the kernel transmit a packet. */
1639static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
1640{
1641        struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
1642        struct tile_net_priv *priv = netdev_priv(dev);
1643        struct tile_net_egress *egress = &egress_for_echannel[priv->echannel];
1644        gxio_mpipe_equeue_t *equeue = egress->equeue;
1645        struct tile_net_comps *comps =
1646                info->comps_for_echannel[priv->echannel];
1647        unsigned int len = skb->len;
1648        unsigned char *data = skb->data;
1649        unsigned int num_edescs;
1650        struct frag frags[MAX_FRAGS];
1651        gxio_mpipe_edesc_t edescs[MAX_FRAGS];
1652        unsigned long irqflags;
1653        gxio_mpipe_edesc_t edesc = { { 0 } };
1654        unsigned int i;
1655        s64 slot;
1656
1657        if (skb_is_gso(skb))
1658                return tile_net_tx_tso(skb, dev);
1659
1660        num_edescs = tile_net_tx_frags(frags, skb, data, skb_headlen(skb));
1661
1662        /* This is only used to specify the TLB. */
1663        edesc.stack_idx = large_buffer_stack;
1664
1665        /* Prepare the edescs. */
1666        for (i = 0; i < num_edescs; i++) {
1667                edesc.xfer_size = frags[i].length;
1668                edesc.va = va_to_tile_io_addr(frags[i].buf);
1669                edescs[i] = edesc;
1670        }
1671
1672        /* Mark the final edesc. */
1673        edescs[num_edescs - 1].bound = 1;
1674
1675        /* Add checksum info to the initial edesc, if needed. */
1676        if (skb->ip_summed == CHECKSUM_PARTIAL) {
1677                unsigned int csum_start = skb_checksum_start_offset(skb);
1678                edescs[0].csum = 1;
1679                edescs[0].csum_start = csum_start;
1680                edescs[0].csum_dest = csum_start + skb->csum_offset;
1681        }
1682
1683        local_irq_save(irqflags);
1684
1685        /* Try to acquire a completion entry and an egress slot. */
1686        slot = tile_net_equeue_try_reserve(dev, skb->queue_mapping, comps,
1687                                           equeue, num_edescs);
1688        if (slot < 0) {
1689                local_irq_restore(irqflags);
1690                return NETDEV_TX_BUSY;
1691        }
1692
1693        for (i = 0; i < num_edescs; i++)
1694                gxio_mpipe_equeue_put_at(equeue, edescs[i], slot++);
1695
1696        /* Add a completion record. */
1697        add_comp(equeue, comps, slot - 1, skb);
1698
1699        /* NOTE: Use ETH_ZLEN for short packets (e.g. 42 < 60). */
1700        tile_net_stats_add(1, &priv->stats.tx_packets);
1701        tile_net_stats_add(max_t(unsigned int, len, ETH_ZLEN),
1702                           &priv->stats.tx_bytes);
1703
1704        local_irq_restore(irqflags);
1705
1706        /* Make sure the egress timer is scheduled. */
1707        tile_net_schedule_egress_timer();
1708
1709        return NETDEV_TX_OK;
1710}
1711
1712/* Return subqueue id on this core (one per core). */
1713static u16 tile_net_select_queue(struct net_device *dev, struct sk_buff *skb)
1714{
1715        return smp_processor_id();
1716}
1717
1718/* Deal with a transmit timeout. */
1719static void tile_net_tx_timeout(struct net_device *dev)
1720{
1721        int cpu;
1722
1723        for_each_online_cpu(cpu)
1724                netif_wake_subqueue(dev, cpu);
1725}
1726
1727/* Ioctl commands. */
1728static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
1729{
1730        return -EOPNOTSUPP;
1731}
1732
1733/* Get system network statistics for device. */
1734static struct net_device_stats *tile_net_get_stats(struct net_device *dev)
1735{
1736        struct tile_net_priv *priv = netdev_priv(dev);
1737        return &priv->stats;
1738}
1739
1740/* Change the MTU. */
1741static int tile_net_change_mtu(struct net_device *dev, int new_mtu)
1742{
1743        if ((new_mtu < 68) || (new_mtu > 1500))
1744                return -EINVAL;
1745        dev->mtu = new_mtu;
1746        return 0;
1747}
1748
1749/* Change the Ethernet address of the NIC.
1750 *
1751 * The hypervisor driver does not support changing MAC address.  However,
1752 * the hardware does not do anything with the MAC address, so the address
1753 * which gets used on outgoing packets, and which is accepted on incoming
1754 * packets, is completely up to us.
1755 *
1756 * Returns 0 on success, negative on failure.
1757 */
1758static int tile_net_set_mac_address(struct net_device *dev, void *p)
1759{
1760        struct sockaddr *addr = p;
1761
1762        if (!is_valid_ether_addr(addr->sa_data))
1763                return -EINVAL;
1764        memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1765        return 0;
1766}
1767
1768#ifdef CONFIG_NET_POLL_CONTROLLER
1769/* Polling 'interrupt' - used by things like netconsole to send skbs
1770 * without having to re-enable interrupts. It's not called while
1771 * the interrupt routine is executing.
1772 */
1773static void tile_net_netpoll(struct net_device *dev)
1774{
1775        disable_percpu_irq(ingress_irq);
1776        tile_net_handle_ingress_irq(ingress_irq, NULL);
1777        enable_percpu_irq(ingress_irq, 0);
1778}
1779#endif
1780
1781static const struct net_device_ops tile_net_ops = {
1782        .ndo_open = tile_net_open,
1783        .ndo_stop = tile_net_stop,
1784        .ndo_start_xmit = tile_net_tx,
1785        .ndo_select_queue = tile_net_select_queue,
1786        .ndo_do_ioctl = tile_net_ioctl,
1787        .ndo_get_stats = tile_net_get_stats,
1788        .ndo_change_mtu = tile_net_change_mtu,
1789        .ndo_tx_timeout = tile_net_tx_timeout,
1790        .ndo_set_mac_address = tile_net_set_mac_address,
1791#ifdef CONFIG_NET_POLL_CONTROLLER
1792        .ndo_poll_controller = tile_net_netpoll,
1793#endif
1794};
1795
1796/* The setup function.
1797 *
1798 * This uses ether_setup() to assign various fields in dev, including
1799 * setting IFF_BROADCAST and IFF_MULTICAST, then sets some extra fields.
1800 */
1801static void tile_net_setup(struct net_device *dev)
1802{
1803        ether_setup(dev);
1804        dev->netdev_ops = &tile_net_ops;
1805        dev->watchdog_timeo = TILE_NET_TIMEOUT;
1806        dev->features |= NETIF_F_LLTX;
1807        dev->features |= NETIF_F_HW_CSUM;
1808        dev->features |= NETIF_F_SG;
1809        dev->features |= NETIF_F_TSO;
1810        dev->mtu = 1500;
1811}
1812
1813/* Allocate the device structure, register the device, and obtain the
1814 * MAC address from the hypervisor.
1815 */
1816static void tile_net_dev_init(const char *name, const uint8_t *mac)
1817{
1818        int ret;
1819        int i;
1820        int nz_addr = 0;
1821        struct net_device *dev;
1822        struct tile_net_priv *priv;
1823
1824        /* HACK: Ignore "loop" links. */
1825        if (strncmp(name, "loop", 4) == 0)
1826                return;
1827
1828        /* Allocate the device structure.  Normally, "name" is a
1829         * template, instantiated by register_netdev(), but not for us.
1830         */
1831        dev = alloc_netdev_mqs(sizeof(*priv), name, tile_net_setup,
1832                               NR_CPUS, 1);
1833        if (!dev) {
1834                pr_err("alloc_netdev_mqs(%s) failed\n", name);
1835                return;
1836        }
1837
1838        /* Initialize "priv". */
1839        priv = netdev_priv(dev);
1840        memset(priv, 0, sizeof(*priv));
1841        priv->dev = dev;
1842        priv->channel = -1;
1843        priv->loopify_channel = -1;
1844        priv->echannel = -1;
1845
1846        /* Get the MAC address and set it in the device struct; this must
1847         * be done before the device is opened.  If the MAC is all zeroes,
1848         * we use a random address, since we're probably on the simulator.
1849         */
1850        for (i = 0; i < 6; i++)
1851                nz_addr |= mac[i];
1852
1853        if (nz_addr) {
1854                memcpy(dev->dev_addr, mac, 6);
1855                dev->addr_len = 6;
1856        } else {
1857                eth_hw_addr_random(dev);
1858        }
1859
1860        /* Register the network device. */
1861        ret = register_netdev(dev);
1862        if (ret) {
1863                netdev_err(dev, "register_netdev failed %d\n", ret);
1864                free_netdev(dev);
1865                return;
1866        }
1867}
1868
1869/* Per-cpu module initialization. */
1870static void tile_net_init_module_percpu(void *unused)
1871{
1872        struct tile_net_info *info = &__get_cpu_var(per_cpu_info);
1873        int my_cpu = smp_processor_id();
1874
1875        info->has_iqueue = false;
1876
1877        info->my_cpu = my_cpu;
1878
1879        /* Initialize the egress timer. */
1880        hrtimer_init(&info->egress_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1881        info->egress_timer.function = tile_net_handle_egress_timer;
1882}
1883
1884/* Module initialization. */
1885static int __init tile_net_init_module(void)
1886{
1887        int i;
1888        char name[GXIO_MPIPE_LINK_NAME_LEN];
1889        uint8_t mac[6];
1890
1891        pr_info("Tilera Network Driver\n");
1892
1893        mutex_init(&tile_net_devs_for_channel_mutex);
1894
1895        /* Initialize each CPU. */
1896        on_each_cpu(tile_net_init_module_percpu, NULL, 1);
1897
1898        /* Find out what devices we have, and initialize them. */
1899        for (i = 0; gxio_mpipe_link_enumerate_mac(i, name, mac) >= 0; i++)
1900                tile_net_dev_init(name, mac);
1901
1902        if (!network_cpus_init())
1903                network_cpus_map = *cpu_online_mask;
1904
1905        return 0;
1906}
1907
1908module_init(tile_net_init_module);
1909