linux/drivers/net/ethernet/sfc/efx.c
<<
>>
Prefs
   1/****************************************************************************
   2 * Driver for Solarflare network controllers and boards
   3 * Copyright 2005-2006 Fen Systems Ltd.
   4 * Copyright 2005-2013 Solarflare Communications Inc.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published
   8 * by the Free Software Foundation, incorporated herein by reference.
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/pci.h>
  13#include <linux/netdevice.h>
  14#include <linux/etherdevice.h>
  15#include <linux/delay.h>
  16#include <linux/notifier.h>
  17#include <linux/ip.h>
  18#include <linux/tcp.h>
  19#include <linux/in.h>
  20#include <linux/ethtool.h>
  21#include <linux/topology.h>
  22#include <linux/gfp.h>
  23#include <linux/aer.h>
  24#include <linux/interrupt.h>
  25#include "net_driver.h"
  26#include "efx.h"
  27#include "nic.h"
  28#include "selftest.h"
  29#include "sriov.h"
  30
  31#include "mcdi.h"
  32#include "workarounds.h"
  33
  34/**************************************************************************
  35 *
  36 * Type name strings
  37 *
  38 **************************************************************************
  39 */
  40
  41/* Loopback mode names (see LOOPBACK_MODE()) */
  42const unsigned int efx_loopback_mode_max = LOOPBACK_MAX;
  43const char *const efx_loopback_mode_names[] = {
  44        [LOOPBACK_NONE]         = "NONE",
  45        [LOOPBACK_DATA]         = "DATAPATH",
  46        [LOOPBACK_GMAC]         = "GMAC",
  47        [LOOPBACK_XGMII]        = "XGMII",
  48        [LOOPBACK_XGXS]         = "XGXS",
  49        [LOOPBACK_XAUI]         = "XAUI",
  50        [LOOPBACK_GMII]         = "GMII",
  51        [LOOPBACK_SGMII]        = "SGMII",
  52        [LOOPBACK_XGBR]         = "XGBR",
  53        [LOOPBACK_XFI]          = "XFI",
  54        [LOOPBACK_XAUI_FAR]     = "XAUI_FAR",
  55        [LOOPBACK_GMII_FAR]     = "GMII_FAR",
  56        [LOOPBACK_SGMII_FAR]    = "SGMII_FAR",
  57        [LOOPBACK_XFI_FAR]      = "XFI_FAR",
  58        [LOOPBACK_GPHY]         = "GPHY",
  59        [LOOPBACK_PHYXS]        = "PHYXS",
  60        [LOOPBACK_PCS]          = "PCS",
  61        [LOOPBACK_PMAPMD]       = "PMA/PMD",
  62        [LOOPBACK_XPORT]        = "XPORT",
  63        [LOOPBACK_XGMII_WS]     = "XGMII_WS",
  64        [LOOPBACK_XAUI_WS]      = "XAUI_WS",
  65        [LOOPBACK_XAUI_WS_FAR]  = "XAUI_WS_FAR",
  66        [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
  67        [LOOPBACK_GMII_WS]      = "GMII_WS",
  68        [LOOPBACK_XFI_WS]       = "XFI_WS",
  69        [LOOPBACK_XFI_WS_FAR]   = "XFI_WS_FAR",
  70        [LOOPBACK_PHYXS_WS]     = "PHYXS_WS",
  71};
  72
  73const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
  74const char *const efx_reset_type_names[] = {
  75        [RESET_TYPE_INVISIBLE]          = "INVISIBLE",
  76        [RESET_TYPE_ALL]                = "ALL",
  77        [RESET_TYPE_RECOVER_OR_ALL]     = "RECOVER_OR_ALL",
  78        [RESET_TYPE_WORLD]              = "WORLD",
  79        [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
  80        [RESET_TYPE_DATAPATH]           = "DATAPATH",
  81        [RESET_TYPE_MC_BIST]            = "MC_BIST",
  82        [RESET_TYPE_DISABLE]            = "DISABLE",
  83        [RESET_TYPE_TX_WATCHDOG]        = "TX_WATCHDOG",
  84        [RESET_TYPE_INT_ERROR]          = "INT_ERROR",
  85        [RESET_TYPE_RX_RECOVERY]        = "RX_RECOVERY",
  86        [RESET_TYPE_DMA_ERROR]          = "DMA_ERROR",
  87        [RESET_TYPE_TX_SKIP]            = "TX_SKIP",
  88        [RESET_TYPE_MC_FAILURE]         = "MC_FAILURE",
  89        [RESET_TYPE_MCDI_TIMEOUT]       = "MCDI_TIMEOUT (FLR)",
  90};
  91
  92/* Reset workqueue. If any NIC has a hardware failure then a reset will be
  93 * queued onto this work queue. This is not a per-nic work queue, because
  94 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
  95 */
  96static struct workqueue_struct *reset_workqueue;
  97
  98/* How often and how many times to poll for a reset while waiting for a
  99 * BIST that another function started to complete.
 100 */
 101#define BIST_WAIT_DELAY_MS      100
 102#define BIST_WAIT_DELAY_COUNT   100
 103
 104/**************************************************************************
 105 *
 106 * Configurable values
 107 *
 108 *************************************************************************/
 109
 110/*
 111 * Use separate channels for TX and RX events
 112 *
 113 * Set this to 1 to use separate channels for TX and RX. It allows us
 114 * to control interrupt affinity separately for TX and RX.
 115 *
 116 * This is only used in MSI-X interrupt mode
 117 */
 118bool efx_separate_tx_channels;
 119module_param(efx_separate_tx_channels, bool, 0444);
 120MODULE_PARM_DESC(efx_separate_tx_channels,
 121                 "Use separate channels for TX and RX");
 122
 123/* This is the weight assigned to each of the (per-channel) virtual
 124 * NAPI devices.
 125 */
 126static int napi_weight = 64;
 127
 128/* This is the time (in jiffies) between invocations of the hardware
 129 * monitor.
 130 * On Falcon-based NICs, this will:
 131 * - Check the on-board hardware monitor;
 132 * - Poll the link state and reconfigure the hardware as necessary.
 133 * On Siena-based NICs for power systems with EEH support, this will give EEH a
 134 * chance to start.
 135 */
 136static unsigned int efx_monitor_interval = 1 * HZ;
 137
 138/* Initial interrupt moderation settings.  They can be modified after
 139 * module load with ethtool.
 140 *
 141 * The default for RX should strike a balance between increasing the
 142 * round-trip latency and reducing overhead.
 143 */
 144static unsigned int rx_irq_mod_usec = 60;
 145
 146/* Initial interrupt moderation settings.  They can be modified after
 147 * module load with ethtool.
 148 *
 149 * This default is chosen to ensure that a 10G link does not go idle
 150 * while a TX queue is stopped after it has become full.  A queue is
 151 * restarted when it drops below half full.  The time this takes (assuming
 152 * worst case 3 descriptors per packet and 1024 descriptors) is
 153 *   512 / 3 * 1.2 = 205 usec.
 154 */
 155static unsigned int tx_irq_mod_usec = 150;
 156
 157/* This is the first interrupt mode to try out of:
 158 * 0 => MSI-X
 159 * 1 => MSI
 160 * 2 => legacy
 161 */
 162static unsigned int interrupt_mode;
 163
 164/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
 165 * i.e. the number of CPUs among which we may distribute simultaneous
 166 * interrupt handling.
 167 *
 168 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
 169 * The default (0) means to assign an interrupt to each core.
 170 */
 171static unsigned int rss_cpus;
 172module_param(rss_cpus, uint, 0444);
 173MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
 174
 175static bool phy_flash_cfg;
 176module_param(phy_flash_cfg, bool, 0644);
 177MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");
 178
 179static unsigned irq_adapt_low_thresh = 8000;
 180module_param(irq_adapt_low_thresh, uint, 0644);
 181MODULE_PARM_DESC(irq_adapt_low_thresh,
 182                 "Threshold score for reducing IRQ moderation");
 183
 184static unsigned irq_adapt_high_thresh = 16000;
 185module_param(irq_adapt_high_thresh, uint, 0644);
 186MODULE_PARM_DESC(irq_adapt_high_thresh,
 187                 "Threshold score for increasing IRQ moderation");
 188
 189static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
 190                         NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
 191                         NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
 192                         NETIF_MSG_TX_ERR | NETIF_MSG_HW);
 193module_param(debug, uint, 0);
 194MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
 195
 196/**************************************************************************
 197 *
 198 * Utility functions and prototypes
 199 *
 200 *************************************************************************/
 201
 202static int efx_soft_enable_interrupts(struct efx_nic *efx);
 203static void efx_soft_disable_interrupts(struct efx_nic *efx);
 204static void efx_remove_channel(struct efx_channel *channel);
 205static void efx_remove_channels(struct efx_nic *efx);
 206static const struct efx_channel_type efx_default_channel_type;
 207static void efx_remove_port(struct efx_nic *efx);
 208static void efx_init_napi_channel(struct efx_channel *channel);
 209static void efx_fini_napi(struct efx_nic *efx);
 210static void efx_fini_napi_channel(struct efx_channel *channel);
 211static void efx_fini_struct(struct efx_nic *efx);
 212static void efx_start_all(struct efx_nic *efx);
 213static void efx_stop_all(struct efx_nic *efx);
 214
 215#define EFX_ASSERT_RESET_SERIALISED(efx)                \
 216        do {                                            \
 217                if ((efx->state == STATE_READY) ||      \
 218                    (efx->state == STATE_RECOVERY) ||   \
 219                    (efx->state == STATE_DISABLED))     \
 220                        ASSERT_RTNL();                  \
 221        } while (0)
 222
 223static int efx_check_disabled(struct efx_nic *efx)
 224{
 225        if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
 226                netif_err(efx, drv, efx->net_dev,
 227                          "device is disabled due to earlier errors\n");
 228                return -EIO;
 229        }
 230        return 0;
 231}
 232
 233/**************************************************************************
 234 *
 235 * Event queue processing
 236 *
 237 *************************************************************************/
 238
 239/* Process channel's event queue
 240 *
 241 * This function is responsible for processing the event queue of a
 242 * single channel.  The caller must guarantee that this function will
 243 * never be concurrently called more than once on the same channel,
 244 * though different channels may be being processed concurrently.
 245 */
 246static int efx_process_channel(struct efx_channel *channel, int budget)
 247{
 248        struct efx_tx_queue *tx_queue;
 249        int spent;
 250
 251        if (unlikely(!channel->enabled))
 252                return 0;
 253
 254        efx_for_each_channel_tx_queue(tx_queue, channel) {
 255                tx_queue->pkts_compl = 0;
 256                tx_queue->bytes_compl = 0;
 257        }
 258
 259        spent = efx_nic_process_eventq(channel, budget);
 260        if (spent && efx_channel_has_rx_queue(channel)) {
 261                struct efx_rx_queue *rx_queue =
 262                        efx_channel_get_rx_queue(channel);
 263
 264                efx_rx_flush_packet(channel);
 265                efx_fast_push_rx_descriptors(rx_queue, true);
 266        }
 267
 268        /* Update BQL */
 269        efx_for_each_channel_tx_queue(tx_queue, channel) {
 270                if (tx_queue->bytes_compl) {
 271                        netdev_tx_completed_queue(tx_queue->core_txq,
 272                                tx_queue->pkts_compl, tx_queue->bytes_compl);
 273                }
 274        }
 275
 276        return spent;
 277}
 278
 279/* NAPI poll handler
 280 *
 281 * NAPI guarantees serialisation of polls of the same device, which
 282 * provides the guarantee required by efx_process_channel().
 283 */
 284static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
 285{
 286        int step = efx->irq_mod_step_us;
 287
 288        if (channel->irq_mod_score < irq_adapt_low_thresh) {
 289                if (channel->irq_moderation_us > step) {
 290                        channel->irq_moderation_us -= step;
 291                        efx->type->push_irq_moderation(channel);
 292                }
 293        } else if (channel->irq_mod_score > irq_adapt_high_thresh) {
 294                if (channel->irq_moderation_us <
 295                    efx->irq_rx_moderation_us) {
 296                        channel->irq_moderation_us += step;
 297                        efx->type->push_irq_moderation(channel);
 298                }
 299        }
 300
 301        channel->irq_count = 0;
 302        channel->irq_mod_score = 0;
 303}
 304
 305static int efx_poll(struct napi_struct *napi, int budget)
 306{
 307        struct efx_channel *channel =
 308                container_of(napi, struct efx_channel, napi_str);
 309        struct efx_nic *efx = channel->efx;
 310        int spent;
 311
 312        if (!efx_channel_lock_napi(channel))
 313                return budget;
 314
 315        netif_vdbg(efx, intr, efx->net_dev,
 316                   "channel %d NAPI poll executing on CPU %d\n",
 317                   channel->channel, raw_smp_processor_id());
 318
 319        spent = efx_process_channel(channel, budget);
 320
 321        if (spent < budget) {
 322                if (efx_channel_has_rx_queue(channel) &&
 323                    efx->irq_rx_adaptive &&
 324                    unlikely(++channel->irq_count == 1000)) {
 325                        efx_update_irq_mod(efx, channel);
 326                }
 327
 328                efx_filter_rfs_expire(channel);
 329
 330                /* There is no race here; although napi_disable() will
 331                 * only wait for napi_complete(), this isn't a problem
 332                 * since efx_nic_eventq_read_ack() will have no effect if
 333                 * interrupts have already been disabled.
 334                 */
 335                napi_complete(napi);
 336                efx_nic_eventq_read_ack(channel);
 337        }
 338
 339        efx_channel_unlock_napi(channel);
 340        return spent;
 341}
 342
 343/* Create event queue
 344 * Event queue memory allocations are done only once.  If the channel
 345 * is reset, the memory buffer will be reused; this guards against
 346 * errors during channel reset and also simplifies interrupt handling.
 347 */
 348static int efx_probe_eventq(struct efx_channel *channel)
 349{
 350        struct efx_nic *efx = channel->efx;
 351        unsigned long entries;
 352
 353        netif_dbg(efx, probe, efx->net_dev,
 354                  "chan %d create event queue\n", channel->channel);
 355
 356        /* Build an event queue with room for one event per tx and rx buffer,
 357         * plus some extra for link state events and MCDI completions. */
 358        entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
 359        EFX_BUG_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
 360        channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
 361
 362        return efx_nic_probe_eventq(channel);
 363}
 364
 365/* Prepare channel's event queue */
 366static int efx_init_eventq(struct efx_channel *channel)
 367{
 368        struct efx_nic *efx = channel->efx;
 369        int rc;
 370
 371        EFX_WARN_ON_PARANOID(channel->eventq_init);
 372
 373        netif_dbg(efx, drv, efx->net_dev,
 374                  "chan %d init event queue\n", channel->channel);
 375
 376        rc = efx_nic_init_eventq(channel);
 377        if (rc == 0) {
 378                efx->type->push_irq_moderation(channel);
 379                channel->eventq_read_ptr = 0;
 380                channel->eventq_init = true;
 381        }
 382        return rc;
 383}
 384
 385/* Enable event queue processing and NAPI */
 386void efx_start_eventq(struct efx_channel *channel)
 387{
 388        netif_dbg(channel->efx, ifup, channel->efx->net_dev,
 389                  "chan %d start event queue\n", channel->channel);
 390
 391        /* Make sure the NAPI handler sees the enabled flag set */
 392        channel->enabled = true;
 393        smp_wmb();
 394
 395        efx_channel_enable(channel);
 396        napi_enable(&channel->napi_str);
 397        efx_nic_eventq_read_ack(channel);
 398}
 399
 400/* Disable event queue processing and NAPI */
 401void efx_stop_eventq(struct efx_channel *channel)
 402{
 403        if (!channel->enabled)
 404                return;
 405
 406        napi_disable(&channel->napi_str);
 407        while (!efx_channel_disable(channel))
 408                usleep_range(1000, 20000);
 409        channel->enabled = false;
 410}
 411
 412static void efx_fini_eventq(struct efx_channel *channel)
 413{
 414        if (!channel->eventq_init)
 415                return;
 416
 417        netif_dbg(channel->efx, drv, channel->efx->net_dev,
 418                  "chan %d fini event queue\n", channel->channel);
 419
 420        efx_nic_fini_eventq(channel);
 421        channel->eventq_init = false;
 422}
 423
 424static void efx_remove_eventq(struct efx_channel *channel)
 425{
 426        netif_dbg(channel->efx, drv, channel->efx->net_dev,
 427                  "chan %d remove event queue\n", channel->channel);
 428
 429        efx_nic_remove_eventq(channel);
 430}
 431
 432/**************************************************************************
 433 *
 434 * Channel handling
 435 *
 436 *************************************************************************/
 437
 438/* Allocate and initialise a channel structure. */
 439static struct efx_channel *
 440efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
 441{
 442        struct efx_channel *channel;
 443        struct efx_rx_queue *rx_queue;
 444        struct efx_tx_queue *tx_queue;
 445        int j;
 446
 447        channel = kzalloc(sizeof(*channel), GFP_KERNEL);
 448        if (!channel)
 449                return NULL;
 450
 451        channel->efx = efx;
 452        channel->channel = i;
 453        channel->type = &efx_default_channel_type;
 454
 455        for (j = 0; j < EFX_TXQ_TYPES; j++) {
 456                tx_queue = &channel->tx_queue[j];
 457                tx_queue->efx = efx;
 458                tx_queue->queue = i * EFX_TXQ_TYPES + j;
 459                tx_queue->channel = channel;
 460        }
 461
 462        rx_queue = &channel->rx_queue;
 463        rx_queue->efx = efx;
 464        setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
 465                    (unsigned long)rx_queue);
 466
 467        return channel;
 468}
 469
 470/* Allocate and initialise a channel structure, copying parameters
 471 * (but not resources) from an old channel structure.
 472 */
 473static struct efx_channel *
 474efx_copy_channel(const struct efx_channel *old_channel)
 475{
 476        struct efx_channel *channel;
 477        struct efx_rx_queue *rx_queue;
 478        struct efx_tx_queue *tx_queue;
 479        int j;
 480
 481        channel = kmalloc(sizeof(*channel), GFP_KERNEL);
 482        if (!channel)
 483                return NULL;
 484
 485        *channel = *old_channel;
 486
 487        channel->napi_dev = NULL;
 488        INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
 489        channel->napi_str.napi_id = 0;
 490        channel->napi_str.state = 0;
 491        memset(&channel->eventq, 0, sizeof(channel->eventq));
 492
 493        for (j = 0; j < EFX_TXQ_TYPES; j++) {
 494                tx_queue = &channel->tx_queue[j];
 495                if (tx_queue->channel)
 496                        tx_queue->channel = channel;
 497                tx_queue->buffer = NULL;
 498                memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
 499        }
 500
 501        rx_queue = &channel->rx_queue;
 502        rx_queue->buffer = NULL;
 503        memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
 504        setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
 505                    (unsigned long)rx_queue);
 506
 507        return channel;
 508}
 509
 510static int efx_probe_channel(struct efx_channel *channel)
 511{
 512        struct efx_tx_queue *tx_queue;
 513        struct efx_rx_queue *rx_queue;
 514        int rc;
 515
 516        netif_dbg(channel->efx, probe, channel->efx->net_dev,
 517                  "creating channel %d\n", channel->channel);
 518
 519        rc = channel->type->pre_probe(channel);
 520        if (rc)
 521                goto fail;
 522
 523        rc = efx_probe_eventq(channel);
 524        if (rc)
 525                goto fail;
 526
 527        efx_for_each_channel_tx_queue(tx_queue, channel) {
 528                rc = efx_probe_tx_queue(tx_queue);
 529                if (rc)
 530                        goto fail;
 531        }
 532
 533        efx_for_each_channel_rx_queue(rx_queue, channel) {
 534                rc = efx_probe_rx_queue(rx_queue);
 535                if (rc)
 536                        goto fail;
 537        }
 538
 539        return 0;
 540
 541fail:
 542        efx_remove_channel(channel);
 543        return rc;
 544}
 545
 546static void
 547efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
 548{
 549        struct efx_nic *efx = channel->efx;
 550        const char *type;
 551        int number;
 552
 553        number = channel->channel;
 554        if (efx->tx_channel_offset == 0) {
 555                type = "";
 556        } else if (channel->channel < efx->tx_channel_offset) {
 557                type = "-rx";
 558        } else {
 559                type = "-tx";
 560                number -= efx->tx_channel_offset;
 561        }
 562        snprintf(buf, len, "%s%s-%d", efx->name, type, number);
 563}
 564
 565static void efx_set_channel_names(struct efx_nic *efx)
 566{
 567        struct efx_channel *channel;
 568
 569        efx_for_each_channel(channel, efx)
 570                channel->type->get_name(channel,
 571                                        efx->msi_context[channel->channel].name,
 572                                        sizeof(efx->msi_context[0].name));
 573}
 574
 575static int efx_probe_channels(struct efx_nic *efx)
 576{
 577        struct efx_channel *channel;
 578        int rc;
 579
 580        /* Restart special buffer allocation */
 581        efx->next_buffer_table = 0;
 582
 583        /* Probe channels in reverse, so that any 'extra' channels
 584         * use the start of the buffer table. This allows the traffic
 585         * channels to be resized without moving them or wasting the
 586         * entries before them.
 587         */
 588        efx_for_each_channel_rev(channel, efx) {
 589                rc = efx_probe_channel(channel);
 590                if (rc) {
 591                        netif_err(efx, probe, efx->net_dev,
 592                                  "failed to create channel %d\n",
 593                                  channel->channel);
 594                        goto fail;
 595                }
 596        }
 597        efx_set_channel_names(efx);
 598
 599        return 0;
 600
 601fail:
 602        efx_remove_channels(efx);
 603        return rc;
 604}
 605
 606/* Channels are shutdown and reinitialised whilst the NIC is running
 607 * to propagate configuration changes (mtu, checksum offload), or
 608 * to clear hardware error conditions
 609 */
 610static void efx_start_datapath(struct efx_nic *efx)
 611{
 612        netdev_features_t old_features = efx->net_dev->features;
 613        bool old_rx_scatter = efx->rx_scatter;
 614        struct efx_tx_queue *tx_queue;
 615        struct efx_rx_queue *rx_queue;
 616        struct efx_channel *channel;
 617        size_t rx_buf_len;
 618
 619        /* Calculate the rx buffer allocation parameters required to
 620         * support the current MTU, including padding for header
 621         * alignment and overruns.
 622         */
 623        efx->rx_dma_len = (efx->rx_prefix_size +
 624                           EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
 625                           efx->type->rx_buffer_padding);
 626        rx_buf_len = (sizeof(struct efx_rx_page_state) +
 627                      efx->rx_ip_align + efx->rx_dma_len);
 628        if (rx_buf_len <= PAGE_SIZE) {
 629                efx->rx_scatter = efx->type->always_rx_scatter;
 630                efx->rx_buffer_order = 0;
 631        } else if (efx->type->can_rx_scatter) {
 632                BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
 633                BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
 634                             2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE,
 635                                       EFX_RX_BUF_ALIGNMENT) >
 636                             PAGE_SIZE);
 637                efx->rx_scatter = true;
 638                efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
 639                efx->rx_buffer_order = 0;
 640        } else {
 641                efx->rx_scatter = false;
 642                efx->rx_buffer_order = get_order(rx_buf_len);
 643        }
 644
 645        efx_rx_config_page_split(efx);
 646        if (efx->rx_buffer_order)
 647                netif_dbg(efx, drv, efx->net_dev,
 648                          "RX buf len=%u; page order=%u batch=%u\n",
 649                          efx->rx_dma_len, efx->rx_buffer_order,
 650                          efx->rx_pages_per_batch);
 651        else
 652                netif_dbg(efx, drv, efx->net_dev,
 653                          "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
 654                          efx->rx_dma_len, efx->rx_page_buf_step,
 655                          efx->rx_bufs_per_page, efx->rx_pages_per_batch);
 656
 657        /* Restore previously fixed features in hw_features and remove
 658         * features which are fixed now
 659         */
 660        efx->net_dev->hw_features |= efx->net_dev->features;
 661        efx->net_dev->hw_features &= ~efx->fixed_features;
 662        efx->net_dev->features |= efx->fixed_features;
 663        if (efx->net_dev->features != old_features)
 664                netdev_features_change(efx->net_dev);
 665
 666        /* RX filters may also have scatter-enabled flags */
 667        if (efx->rx_scatter != old_rx_scatter)
 668                efx->type->filter_update_rx_scatter(efx);
 669
 670        /* We must keep at least one descriptor in a TX ring empty.
 671         * We could avoid this when the queue size does not exactly
 672         * match the hardware ring size, but it's not that important.
 673         * Therefore we stop the queue when one more skb might fill
 674         * the ring completely.  We wake it when half way back to
 675         * empty.
 676         */
 677        efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
 678        efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
 679
 680        /* Initialise the channels */
 681        efx_for_each_channel(channel, efx) {
 682                efx_for_each_channel_tx_queue(tx_queue, channel) {
 683                        efx_init_tx_queue(tx_queue);
 684                        atomic_inc(&efx->active_queues);
 685                }
 686
 687                efx_for_each_channel_rx_queue(rx_queue, channel) {
 688                        efx_init_rx_queue(rx_queue);
 689                        atomic_inc(&efx->active_queues);
 690                        efx_stop_eventq(channel);
 691                        efx_fast_push_rx_descriptors(rx_queue, false);
 692                        efx_start_eventq(channel);
 693                }
 694
 695                WARN_ON(channel->rx_pkt_n_frags);
 696        }
 697
 698        efx_ptp_start_datapath(efx);
 699
 700        if (netif_device_present(efx->net_dev))
 701                netif_tx_wake_all_queues(efx->net_dev);
 702}
 703
 704static void efx_stop_datapath(struct efx_nic *efx)
 705{
 706        struct efx_channel *channel;
 707        struct efx_tx_queue *tx_queue;
 708        struct efx_rx_queue *rx_queue;
 709        int rc;
 710
 711        EFX_ASSERT_RESET_SERIALISED(efx);
 712        BUG_ON(efx->port_enabled);
 713
 714        efx_ptp_stop_datapath(efx);
 715
 716        /* Stop RX refill */
 717        efx_for_each_channel(channel, efx) {
 718                efx_for_each_channel_rx_queue(rx_queue, channel)
 719                        rx_queue->refill_enabled = false;
 720        }
 721
 722        efx_for_each_channel(channel, efx) {
 723                /* RX packet processing is pipelined, so wait for the
 724                 * NAPI handler to complete.  At least event queue 0
 725                 * might be kept active by non-data events, so don't
 726                 * use napi_synchronize() but actually disable NAPI
 727                 * temporarily.
 728                 */
 729                if (efx_channel_has_rx_queue(channel)) {
 730                        efx_stop_eventq(channel);
 731                        efx_start_eventq(channel);
 732                }
 733        }
 734
 735        rc = efx->type->fini_dmaq(efx);
 736        if (rc && EFX_WORKAROUND_7803(efx)) {
 737                /* Schedule a reset to recover from the flush failure. The
 738                 * descriptor caches reference memory we're about to free,
 739                 * but falcon_reconfigure_mac_wrapper() won't reconnect
 740                 * the MACs because of the pending reset.
 741                 */
 742                netif_err(efx, drv, efx->net_dev,
 743                          "Resetting to recover from flush failure\n");
 744                efx_schedule_reset(efx, RESET_TYPE_ALL);
 745        } else if (rc) {
 746                netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
 747        } else {
 748                netif_dbg(efx, drv, efx->net_dev,
 749                          "successfully flushed all queues\n");
 750        }
 751
 752        efx_for_each_channel(channel, efx) {
 753                efx_for_each_channel_rx_queue(rx_queue, channel)
 754                        efx_fini_rx_queue(rx_queue);
 755                efx_for_each_possible_channel_tx_queue(tx_queue, channel)
 756                        efx_fini_tx_queue(tx_queue);
 757        }
 758}
 759
 760static void efx_remove_channel(struct efx_channel *channel)
 761{
 762        struct efx_tx_queue *tx_queue;
 763        struct efx_rx_queue *rx_queue;
 764
 765        netif_dbg(channel->efx, drv, channel->efx->net_dev,
 766                  "destroy chan %d\n", channel->channel);
 767
 768        efx_for_each_channel_rx_queue(rx_queue, channel)
 769                efx_remove_rx_queue(rx_queue);
 770        efx_for_each_possible_channel_tx_queue(tx_queue, channel)
 771                efx_remove_tx_queue(tx_queue);
 772        efx_remove_eventq(channel);
 773        channel->type->post_remove(channel);
 774}
 775
 776static void efx_remove_channels(struct efx_nic *efx)
 777{
 778        struct efx_channel *channel;
 779
 780        efx_for_each_channel(channel, efx)
 781                efx_remove_channel(channel);
 782}
 783
 784int
 785efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
 786{
 787        struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
 788        u32 old_rxq_entries, old_txq_entries;
 789        unsigned i, next_buffer_table = 0;
 790        int rc, rc2;
 791
 792        rc = efx_check_disabled(efx);
 793        if (rc)
 794                return rc;
 795
 796        /* Not all channels should be reallocated. We must avoid
 797         * reallocating their buffer table entries.
 798         */
 799        efx_for_each_channel(channel, efx) {
 800                struct efx_rx_queue *rx_queue;
 801                struct efx_tx_queue *tx_queue;
 802
 803                if (channel->type->copy)
 804                        continue;
 805                next_buffer_table = max(next_buffer_table,
 806                                        channel->eventq.index +
 807                                        channel->eventq.entries);
 808                efx_for_each_channel_rx_queue(rx_queue, channel)
 809                        next_buffer_table = max(next_buffer_table,
 810                                                rx_queue->rxd.index +
 811                                                rx_queue->rxd.entries);
 812                efx_for_each_channel_tx_queue(tx_queue, channel)
 813                        next_buffer_table = max(next_buffer_table,
 814                                                tx_queue->txd.index +
 815                                                tx_queue->txd.entries);
 816        }
 817
 818        efx_device_detach_sync(efx);
 819        efx_stop_all(efx);
 820        efx_soft_disable_interrupts(efx);
 821
 822        /* Clone channels (where possible) */
 823        memset(other_channel, 0, sizeof(other_channel));
 824        for (i = 0; i < efx->n_channels; i++) {
 825                channel = efx->channel[i];
 826                if (channel->type->copy)
 827                        channel = channel->type->copy(channel);
 828                if (!channel) {
 829                        rc = -ENOMEM;
 830                        goto out;
 831                }
 832                other_channel[i] = channel;
 833        }
 834
 835        /* Swap entry counts and channel pointers */
 836        old_rxq_entries = efx->rxq_entries;
 837        old_txq_entries = efx->txq_entries;
 838        efx->rxq_entries = rxq_entries;
 839        efx->txq_entries = txq_entries;
 840        for (i = 0; i < efx->n_channels; i++) {
 841                channel = efx->channel[i];
 842                efx->channel[i] = other_channel[i];
 843                other_channel[i] = channel;
 844        }
 845
 846        /* Restart buffer table allocation */
 847        efx->next_buffer_table = next_buffer_table;
 848
 849        for (i = 0; i < efx->n_channels; i++) {
 850                channel = efx->channel[i];
 851                if (!channel->type->copy)
 852                        continue;
 853                rc = efx_probe_channel(channel);
 854                if (rc)
 855                        goto rollback;
 856                efx_init_napi_channel(efx->channel[i]);
 857        }
 858
 859out:
 860        /* Destroy unused channel structures */
 861        for (i = 0; i < efx->n_channels; i++) {
 862                channel = other_channel[i];
 863                if (channel && channel->type->copy) {
 864                        efx_fini_napi_channel(channel);
 865                        efx_remove_channel(channel);
 866                        kfree(channel);
 867                }
 868        }
 869
 870        rc2 = efx_soft_enable_interrupts(efx);
 871        if (rc2) {
 872                rc = rc ? rc : rc2;
 873                netif_err(efx, drv, efx->net_dev,
 874                          "unable to restart interrupts on channel reallocation\n");
 875                efx_schedule_reset(efx, RESET_TYPE_DISABLE);
 876        } else {
 877                efx_start_all(efx);
 878                netif_device_attach(efx->net_dev);
 879        }
 880        return rc;
 881
 882rollback:
 883        /* Swap back */
 884        efx->rxq_entries = old_rxq_entries;
 885        efx->txq_entries = old_txq_entries;
 886        for (i = 0; i < efx->n_channels; i++) {
 887                channel = efx->channel[i];
 888                efx->channel[i] = other_channel[i];
 889                other_channel[i] = channel;
 890        }
 891        goto out;
 892}
 893
 894void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
 895{
 896        mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100));
 897}
 898
 899static const struct efx_channel_type efx_default_channel_type = {
 900        .pre_probe              = efx_channel_dummy_op_int,
 901        .post_remove            = efx_channel_dummy_op_void,
 902        .get_name               = efx_get_channel_name,
 903        .copy                   = efx_copy_channel,
 904        .keep_eventq            = false,
 905};
 906
 907int efx_channel_dummy_op_int(struct efx_channel *channel)
 908{
 909        return 0;
 910}
 911
 912void efx_channel_dummy_op_void(struct efx_channel *channel)
 913{
 914}
 915
 916/**************************************************************************
 917 *
 918 * Port handling
 919 *
 920 **************************************************************************/
 921
 922/* This ensures that the kernel is kept informed (via
 923 * netif_carrier_on/off) of the link status, and also maintains the
 924 * link status's stop on the port's TX queue.
 925 */
 926void efx_link_status_changed(struct efx_nic *efx)
 927{
 928        struct efx_link_state *link_state = &efx->link_state;
 929
 930        /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
 931         * that no events are triggered between unregister_netdev() and the
 932         * driver unloading. A more general condition is that NETDEV_CHANGE
 933         * can only be generated between NETDEV_UP and NETDEV_DOWN */
 934        if (!netif_running(efx->net_dev))
 935                return;
 936
 937        if (link_state->up != netif_carrier_ok(efx->net_dev)) {
 938                efx->n_link_state_changes++;
 939
 940                if (link_state->up)
 941                        netif_carrier_on(efx->net_dev);
 942                else
 943                        netif_carrier_off(efx->net_dev);
 944        }
 945
 946        /* Status message for kernel log */
 947        if (link_state->up)
 948                netif_info(efx, link, efx->net_dev,
 949                           "link up at %uMbps %s-duplex (MTU %d)\n",
 950                           link_state->speed, link_state->fd ? "full" : "half",
 951                           efx->net_dev->mtu);
 952        else
 953                netif_info(efx, link, efx->net_dev, "link down\n");
 954}
 955
 956void efx_link_set_advertising(struct efx_nic *efx, u32 advertising)
 957{
 958        efx->link_advertising = advertising;
 959        if (advertising) {
 960                if (advertising & ADVERTISED_Pause)
 961                        efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX);
 962                else
 963                        efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
 964                if (advertising & ADVERTISED_Asym_Pause)
 965                        efx->wanted_fc ^= EFX_FC_TX;
 966        }
 967}
 968
 969void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
 970{
 971        efx->wanted_fc = wanted_fc;
 972        if (efx->link_advertising) {
 973                if (wanted_fc & EFX_FC_RX)
 974                        efx->link_advertising |= (ADVERTISED_Pause |
 975                                                  ADVERTISED_Asym_Pause);
 976                else
 977                        efx->link_advertising &= ~(ADVERTISED_Pause |
 978                                                   ADVERTISED_Asym_Pause);
 979                if (wanted_fc & EFX_FC_TX)
 980                        efx->link_advertising ^= ADVERTISED_Asym_Pause;
 981        }
 982}
 983
 984static void efx_fini_port(struct efx_nic *efx);
 985
 986/* We assume that efx->type->reconfigure_mac will always try to sync RX
 987 * filters and therefore needs to read-lock the filter table against freeing
 988 */
 989void efx_mac_reconfigure(struct efx_nic *efx)
 990{
 991        down_read(&efx->filter_sem);
 992        efx->type->reconfigure_mac(efx);
 993        up_read(&efx->filter_sem);
 994}
 995
 996/* Push loopback/power/transmit disable settings to the PHY, and reconfigure
 997 * the MAC appropriately. All other PHY configuration changes are pushed
 998 * through phy_op->set_settings(), and pushed asynchronously to the MAC
 999 * through efx_monitor().
1000 *
1001 * Callers must hold the mac_lock
1002 */
1003int __efx_reconfigure_port(struct efx_nic *efx)
1004{
1005        enum efx_phy_mode phy_mode;
1006        int rc;
1007
1008        WARN_ON(!mutex_is_locked(&efx->mac_lock));
1009
1010        /* Disable PHY transmit in mac level loopbacks */
1011        phy_mode = efx->phy_mode;
1012        if (LOOPBACK_INTERNAL(efx))
1013                efx->phy_mode |= PHY_MODE_TX_DISABLED;
1014        else
1015                efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
1016
1017        rc = efx->type->reconfigure_port(efx);
1018
1019        if (rc)
1020                efx->phy_mode = phy_mode;
1021
1022        return rc;
1023}
1024
1025/* Reinitialise the MAC to pick up new PHY settings, even if the port is
1026 * disabled. */
1027int efx_reconfigure_port(struct efx_nic *efx)
1028{
1029        int rc;
1030
1031        EFX_ASSERT_RESET_SERIALISED(efx);
1032
1033        mutex_lock(&efx->mac_lock);
1034        rc = __efx_reconfigure_port(efx);
1035        mutex_unlock(&efx->mac_lock);
1036
1037        return rc;
1038}
1039
1040/* Asynchronous work item for changing MAC promiscuity and multicast
1041 * hash.  Avoid a drain/rx_ingress enable by reconfiguring the current
1042 * MAC directly. */
1043static void efx_mac_work(struct work_struct *data)
1044{
1045        struct efx_nic *efx = container_of(data, struct efx_nic, mac_work);
1046
1047        mutex_lock(&efx->mac_lock);
1048        if (efx->port_enabled)
1049                efx_mac_reconfigure(efx);
1050        mutex_unlock(&efx->mac_lock);
1051}
1052
1053static int efx_probe_port(struct efx_nic *efx)
1054{
1055        int rc;
1056
1057        netif_dbg(efx, probe, efx->net_dev, "create port\n");
1058
1059        if (phy_flash_cfg)
1060                efx->phy_mode = PHY_MODE_SPECIAL;
1061
1062        /* Connect up MAC/PHY operations table */
1063        rc = efx->type->probe_port(efx);
1064        if (rc)
1065                return rc;
1066
1067        /* Initialise MAC address to permanent address */
1068        ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr);
1069
1070        return 0;
1071}
1072
1073static int efx_init_port(struct efx_nic *efx)
1074{
1075        int rc;
1076
1077        netif_dbg(efx, drv, efx->net_dev, "init port\n");
1078
1079        mutex_lock(&efx->mac_lock);
1080
1081        rc = efx->phy_op->init(efx);
1082        if (rc)
1083                goto fail1;
1084
1085        efx->port_initialized = true;
1086
1087        /* Reconfigure the MAC before creating dma queues (required for
1088         * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */
1089        efx_mac_reconfigure(efx);
1090
1091        /* Ensure the PHY advertises the correct flow control settings */
1092        rc = efx->phy_op->reconfigure(efx);
1093        if (rc && rc != -EPERM)
1094                goto fail2;
1095
1096        mutex_unlock(&efx->mac_lock);
1097        return 0;
1098
1099fail2:
1100        efx->phy_op->fini(efx);
1101fail1:
1102        mutex_unlock(&efx->mac_lock);
1103        return rc;
1104}
1105
1106static void efx_start_port(struct efx_nic *efx)
1107{
1108        netif_dbg(efx, ifup, efx->net_dev, "start port\n");
1109        BUG_ON(efx->port_enabled);
1110
1111        mutex_lock(&efx->mac_lock);
1112        efx->port_enabled = true;
1113
1114        /* Ensure MAC ingress/egress is enabled */
1115        efx_mac_reconfigure(efx);
1116
1117        mutex_unlock(&efx->mac_lock);
1118}
1119
1120/* Cancel work for MAC reconfiguration, periodic hardware monitoring
1121 * and the async self-test, wait for them to finish and prevent them
1122 * being scheduled again.  This doesn't cover online resets, which
1123 * should only be cancelled when removing the device.
1124 */
1125static void efx_stop_port(struct efx_nic *efx)
1126{
1127        netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
1128
1129        EFX_ASSERT_RESET_SERIALISED(efx);
1130
1131        mutex_lock(&efx->mac_lock);
1132        efx->port_enabled = false;
1133        mutex_unlock(&efx->mac_lock);
1134
1135        /* Serialise against efx_set_multicast_list() */
1136        netif_addr_lock_bh(efx->net_dev);
1137        netif_addr_unlock_bh(efx->net_dev);
1138
1139        cancel_delayed_work_sync(&efx->monitor_work);
1140        efx_selftest_async_cancel(efx);
1141        cancel_work_sync(&efx->mac_work);
1142}
1143
1144static void efx_fini_port(struct efx_nic *efx)
1145{
1146        netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
1147
1148        if (!efx->port_initialized)
1149                return;
1150
1151        efx->phy_op->fini(efx);
1152        efx->port_initialized = false;
1153
1154        efx->link_state.up = false;
1155        efx_link_status_changed(efx);
1156}
1157
1158static void efx_remove_port(struct efx_nic *efx)
1159{
1160        netif_dbg(efx, drv, efx->net_dev, "destroying port\n");
1161
1162        efx->type->remove_port(efx);
1163}
1164
1165/**************************************************************************
1166 *
1167 * NIC handling
1168 *
1169 **************************************************************************/
1170
1171static LIST_HEAD(efx_primary_list);
1172static LIST_HEAD(efx_unassociated_list);
1173
1174static bool efx_same_controller(struct efx_nic *left, struct efx_nic *right)
1175{
1176        return left->type == right->type &&
1177                left->vpd_sn && right->vpd_sn &&
1178                !strcmp(left->vpd_sn, right->vpd_sn);
1179}
1180
1181static void efx_associate(struct efx_nic *efx)
1182{
1183        struct efx_nic *other, *next;
1184
1185        if (efx->primary == efx) {
1186                /* Adding primary function; look for secondaries */
1187
1188                netif_dbg(efx, probe, efx->net_dev, "adding to primary list\n");
1189                list_add_tail(&efx->node, &efx_primary_list);
1190
1191                list_for_each_entry_safe(other, next, &efx_unassociated_list,
1192                                         node) {
1193                        if (efx_same_controller(efx, other)) {
1194                                list_del(&other->node);
1195                                netif_dbg(other, probe, other->net_dev,
1196                                          "moving to secondary list of %s %s\n",
1197                                          pci_name(efx->pci_dev),
1198                                          efx->net_dev->name);
1199                                list_add_tail(&other->node,
1200                                              &efx->secondary_list);
1201                                other->primary = efx;
1202                        }
1203                }
1204        } else {
1205                /* Adding secondary function; look for primary */
1206
1207                list_for_each_entry(other, &efx_primary_list, node) {
1208                        if (efx_same_controller(efx, other)) {
1209                                netif_dbg(efx, probe, efx->net_dev,
1210                                          "adding to secondary list of %s %s\n",
1211                                          pci_name(other->pci_dev),
1212                                          other->net_dev->name);
1213                                list_add_tail(&efx->node,
1214                                              &other->secondary_list);
1215                                efx->primary = other;
1216                                return;
1217                        }
1218                }
1219
1220                netif_dbg(efx, probe, efx->net_dev,
1221                          "adding to unassociated list\n");
1222                list_add_tail(&efx->node, &efx_unassociated_list);
1223        }
1224}
1225
1226static void efx_dissociate(struct efx_nic *efx)
1227{
1228        struct efx_nic *other, *next;
1229
1230        list_del(&efx->node);
1231        efx->primary = NULL;
1232
1233        list_for_each_entry_safe(other, next, &efx->secondary_list, node) {
1234                list_del(&other->node);
1235                netif_dbg(other, probe, other->net_dev,
1236                          "moving to unassociated list\n");
1237                list_add_tail(&other->node, &efx_unassociated_list);
1238                other->primary = NULL;
1239        }
1240}
1241
1242/* This configures the PCI device to enable I/O and DMA. */
1243static int efx_init_io(struct efx_nic *efx)
1244{
1245        struct pci_dev *pci_dev = efx->pci_dev;
1246        dma_addr_t dma_mask = efx->type->max_dma_mask;
1247        unsigned int mem_map_size = efx->type->mem_map_size(efx);
1248        int rc, bar;
1249
1250        netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
1251
1252        bar = efx->type->mem_bar;
1253
1254        rc = pci_enable_device(pci_dev);
1255        if (rc) {
1256                netif_err(efx, probe, efx->net_dev,
1257                          "failed to enable PCI device\n");
1258                goto fail1;
1259        }
1260
1261        pci_set_master(pci_dev);
1262
1263        /* Set the PCI DMA mask.  Try all possibilities from our
1264         * genuine mask down to 32 bits, because some architectures
1265         * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
1266         * masks event though they reject 46 bit masks.
1267         */
1268        while (dma_mask > 0x7fffffffUL) {
1269                rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
1270                if (rc == 0)
1271                        break;
1272                dma_mask >>= 1;
1273        }
1274        if (rc) {
1275                netif_err(efx, probe, efx->net_dev,
1276                          "could not find a suitable DMA mask\n");
1277                goto fail2;
1278        }
1279        netif_dbg(efx, probe, efx->net_dev,
1280                  "using DMA mask %llx\n", (unsigned long long) dma_mask);
1281
1282        efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
1283        rc = pci_request_region(pci_dev, bar, "sfc");
1284        if (rc) {
1285                netif_err(efx, probe, efx->net_dev,
1286                          "request for memory BAR failed\n");
1287                rc = -EIO;
1288                goto fail3;
1289        }
1290        efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size);
1291        if (!efx->membase) {
1292                netif_err(efx, probe, efx->net_dev,
1293                          "could not map memory BAR at %llx+%x\n",
1294                          (unsigned long long)efx->membase_phys, mem_map_size);
1295                rc = -ENOMEM;
1296                goto fail4;
1297        }
1298        netif_dbg(efx, probe, efx->net_dev,
1299                  "memory BAR at %llx+%x (virtual %p)\n",
1300                  (unsigned long long)efx->membase_phys, mem_map_size,
1301                  efx->membase);
1302
1303        return 0;
1304
1305 fail4:
1306        pci_release_region(efx->pci_dev, bar);
1307 fail3:
1308        efx->membase_phys = 0;
1309 fail2:
1310        pci_disable_device(efx->pci_dev);
1311 fail1:
1312        return rc;
1313}
1314
1315static void efx_fini_io(struct efx_nic *efx)
1316{
1317        int bar;
1318
1319        netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
1320
1321        if (efx->membase) {
1322                iounmap(efx->membase);
1323                efx->membase = NULL;
1324        }
1325
1326        if (efx->membase_phys) {
1327                bar = efx->type->mem_bar;
1328                pci_release_region(efx->pci_dev, bar);
1329                efx->membase_phys = 0;
1330        }
1331
1332        /* Don't disable bus-mastering if VFs are assigned */
1333        if (!pci_vfs_assigned(efx->pci_dev))
1334                pci_disable_device(efx->pci_dev);
1335}
1336
1337void efx_set_default_rx_indir_table(struct efx_nic *efx)
1338{
1339        size_t i;
1340
1341        for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
1342                efx->rx_indir_table[i] =
1343                        ethtool_rxfh_indir_default(i, efx->rss_spread);
1344}
1345
1346static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
1347{
1348        cpumask_var_t thread_mask;
1349        unsigned int count;
1350        int cpu;
1351
1352        if (rss_cpus) {
1353                count = rss_cpus;
1354        } else {
1355                if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
1356                        netif_warn(efx, probe, efx->net_dev,
1357                                   "RSS disabled due to allocation failure\n");
1358                        return 1;
1359                }
1360
1361                count = 0;
1362                for_each_online_cpu(cpu) {
1363                        if (!cpumask_test_cpu(cpu, thread_mask)) {
1364                                ++count;
1365                                cpumask_or(thread_mask, thread_mask,
1366                                           topology_sibling_cpumask(cpu));
1367                        }
1368                }
1369
1370                free_cpumask_var(thread_mask);
1371        }
1372
1373        /* If RSS is requested for the PF *and* VFs then we can't write RSS
1374         * table entries that are inaccessible to VFs
1375         */
1376#ifdef CONFIG_SFC_SRIOV
1377        if (efx->type->sriov_wanted) {
1378                if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
1379                    count > efx_vf_size(efx)) {
1380                        netif_warn(efx, probe, efx->net_dev,
1381                                   "Reducing number of RSS channels from %u to %u for "
1382                                   "VF support. Increase vf-msix-limit to use more "
1383                                   "channels on the PF.\n",
1384                                   count, efx_vf_size(efx));
1385                        count = efx_vf_size(efx);
1386                }
1387        }
1388#endif
1389
1390        return count;
1391}
1392
1393/* Probe the number and type of interrupts we are able to obtain, and
1394 * the resulting numbers of channels and RX queues.
1395 */
1396static int efx_probe_interrupts(struct efx_nic *efx)
1397{
1398        unsigned int extra_channels = 0;
1399        unsigned int i, j;
1400        int rc;
1401
1402        for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
1403                if (efx->extra_channel_type[i])
1404                        ++extra_channels;
1405
1406        if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
1407                struct msix_entry xentries[EFX_MAX_CHANNELS];
1408                unsigned int n_channels;
1409
1410                n_channels = efx_wanted_parallelism(efx);
1411                if (efx_separate_tx_channels)
1412                        n_channels *= 2;
1413                n_channels += extra_channels;
1414                n_channels = min(n_channels, efx->max_channels);
1415
1416                for (i = 0; i < n_channels; i++)
1417                        xentries[i].entry = i;
1418                rc = pci_enable_msix_range(efx->pci_dev,
1419                                           xentries, 1, n_channels);
1420                if (rc < 0) {
1421                        /* Fall back to single channel MSI */
1422                        efx->interrupt_mode = EFX_INT_MODE_MSI;
1423                        netif_err(efx, drv, efx->net_dev,
1424                                  "could not enable MSI-X\n");
1425                } else if (rc < n_channels) {
1426                        netif_err(efx, drv, efx->net_dev,
1427                                  "WARNING: Insufficient MSI-X vectors"
1428                                  " available (%d < %u).\n", rc, n_channels);
1429                        netif_err(efx, drv, efx->net_dev,
1430                                  "WARNING: Performance may be reduced.\n");
1431                        n_channels = rc;
1432                }
1433
1434                if (rc > 0) {
1435                        efx->n_channels = n_channels;
1436                        if (n_channels > extra_channels)
1437                                n_channels -= extra_channels;
1438                        if (efx_separate_tx_channels) {
1439                                efx->n_tx_channels = min(max(n_channels / 2,
1440                                                             1U),
1441                                                         efx->max_tx_channels);
1442                                efx->n_rx_channels = max(n_channels -
1443                                                         efx->n_tx_channels,
1444                                                         1U);
1445                        } else {
1446                                efx->n_tx_channels = min(n_channels,
1447                                                         efx->max_tx_channels);
1448                                efx->n_rx_channels = n_channels;
1449                        }
1450                        for (i = 0; i < efx->n_channels; i++)
1451                                efx_get_channel(efx, i)->irq =
1452                                        xentries[i].vector;
1453                }
1454        }
1455
1456        /* Try single interrupt MSI */
1457        if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
1458                efx->n_channels = 1;
1459                efx->n_rx_channels = 1;
1460                efx->n_tx_channels = 1;
1461                rc = pci_enable_msi(efx->pci_dev);
1462                if (rc == 0) {
1463                        efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
1464                } else {
1465                        netif_err(efx, drv, efx->net_dev,
1466                                  "could not enable MSI\n");
1467                        efx->interrupt_mode = EFX_INT_MODE_LEGACY;
1468                }
1469        }
1470
1471        /* Assume legacy interrupts */
1472        if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
1473                efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
1474                efx->n_rx_channels = 1;
1475                efx->n_tx_channels = 1;
1476                efx->legacy_irq = efx->pci_dev->irq;
1477        }
1478
1479        /* Assign extra channels if possible */
1480        j = efx->n_channels;
1481        for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
1482                if (!efx->extra_channel_type[i])
1483                        continue;
1484                if (efx->interrupt_mode != EFX_INT_MODE_MSIX ||
1485                    efx->n_channels <= extra_channels) {
1486                        efx->extra_channel_type[i]->handle_no_channel(efx);
1487                } else {
1488                        --j;
1489                        efx_get_channel(efx, j)->type =
1490                                efx->extra_channel_type[i];
1491                }
1492        }
1493
1494        /* RSS might be usable on VFs even if it is disabled on the PF */
1495#ifdef CONFIG_SFC_SRIOV
1496        if (efx->type->sriov_wanted) {
1497                efx->rss_spread = ((efx->n_rx_channels > 1 ||
1498                                    !efx->type->sriov_wanted(efx)) ?
1499                                   efx->n_rx_channels : efx_vf_size(efx));
1500                return 0;
1501        }
1502#endif
1503        efx->rss_spread = efx->n_rx_channels;
1504
1505        return 0;
1506}
1507
1508static int efx_soft_enable_interrupts(struct efx_nic *efx)
1509{
1510        struct efx_channel *channel, *end_channel;
1511        int rc;
1512
1513        BUG_ON(efx->state == STATE_DISABLED);
1514
1515        efx->irq_soft_enabled = true;
1516        smp_wmb();
1517
1518        efx_for_each_channel(channel, efx) {
1519                if (!channel->type->keep_eventq) {
1520                        rc = efx_init_eventq(channel);
1521                        if (rc)
1522                                goto fail;
1523                }
1524                efx_start_eventq(channel);
1525        }
1526
1527        efx_mcdi_mode_event(efx);
1528
1529        return 0;
1530fail:
1531        end_channel = channel;
1532        efx_for_each_channel(channel, efx) {
1533                if (channel == end_channel)
1534                        break;
1535                efx_stop_eventq(channel);
1536                if (!channel->type->keep_eventq)
1537                        efx_fini_eventq(channel);
1538        }
1539
1540        return rc;
1541}
1542
1543static void efx_soft_disable_interrupts(struct efx_nic *efx)
1544{
1545        struct efx_channel *channel;
1546
1547        if (efx->state == STATE_DISABLED)
1548                return;
1549
1550        efx_mcdi_mode_poll(efx);
1551
1552        efx->irq_soft_enabled = false;
1553        smp_wmb();
1554
1555        if (efx->legacy_irq)
1556                synchronize_irq(efx->legacy_irq);
1557
1558        efx_for_each_channel(channel, efx) {
1559                if (channel->irq)
1560                        synchronize_irq(channel->irq);
1561
1562                efx_stop_eventq(channel);
1563                if (!channel->type->keep_eventq)
1564                        efx_fini_eventq(channel);
1565        }
1566
1567        /* Flush the asynchronous MCDI request queue */
1568        efx_mcdi_flush_async(efx);
1569}
1570
1571static int efx_enable_interrupts(struct efx_nic *efx)
1572{
1573        struct efx_channel *channel, *end_channel;
1574        int rc;
1575
1576        BUG_ON(efx->state == STATE_DISABLED);
1577
1578        if (efx->eeh_disabled_legacy_irq) {
1579                enable_irq(efx->legacy_irq);
1580                efx->eeh_disabled_legacy_irq = false;
1581        }
1582
1583        efx->type->irq_enable_master(efx);
1584
1585        efx_for_each_channel(channel, efx) {
1586                if (channel->type->keep_eventq) {
1587                        rc = efx_init_eventq(channel);
1588                        if (rc)
1589                                goto fail;
1590                }
1591        }
1592
1593        rc = efx_soft_enable_interrupts(efx);
1594        if (rc)
1595                goto fail;
1596
1597        return 0;
1598
1599fail:
1600        end_channel = channel;
1601        efx_for_each_channel(channel, efx) {
1602                if (channel == end_channel)
1603                        break;
1604                if (channel->type->keep_eventq)
1605                        efx_fini_eventq(channel);
1606        }
1607
1608        efx->type->irq_disable_non_ev(efx);
1609
1610        return rc;
1611}
1612
1613static void efx_disable_interrupts(struct efx_nic *efx)
1614{
1615        struct efx_channel *channel;
1616
1617        efx_soft_disable_interrupts(efx);
1618
1619        efx_for_each_channel(channel, efx) {
1620                if (channel->type->keep_eventq)
1621                        efx_fini_eventq(channel);
1622        }
1623
1624        efx->type->irq_disable_non_ev(efx);
1625}
1626
1627static void efx_remove_interrupts(struct efx_nic *efx)
1628{
1629        struct efx_channel *channel;
1630
1631        /* Remove MSI/MSI-X interrupts */
1632        efx_for_each_channel(channel, efx)
1633                channel->irq = 0;
1634        pci_disable_msi(efx->pci_dev);
1635        pci_disable_msix(efx->pci_dev);
1636
1637        /* Remove legacy interrupt */
1638        efx->legacy_irq = 0;
1639}
1640
1641static void efx_set_channels(struct efx_nic *efx)
1642{
1643        struct efx_channel *channel;
1644        struct efx_tx_queue *tx_queue;
1645
1646        efx->tx_channel_offset =
1647                efx_separate_tx_channels ?
1648                efx->n_channels - efx->n_tx_channels : 0;
1649
1650        /* We need to mark which channels really have RX and TX
1651         * queues, and adjust the TX queue numbers if we have separate
1652         * RX-only and TX-only channels.
1653         */
1654        efx_for_each_channel(channel, efx) {
1655                if (channel->channel < efx->n_rx_channels)
1656                        channel->rx_queue.core_index = channel->channel;
1657                else
1658                        channel->rx_queue.core_index = -1;
1659
1660                efx_for_each_channel_tx_queue(tx_queue, channel)
1661                        tx_queue->queue -= (efx->tx_channel_offset *
1662                                            EFX_TXQ_TYPES);
1663        }
1664}
1665
1666static int efx_probe_nic(struct efx_nic *efx)
1667{
1668        int rc;
1669
1670        netif_dbg(efx, probe, efx->net_dev, "creating NIC\n");
1671
1672        /* Carry out hardware-type specific initialisation */
1673        rc = efx->type->probe(efx);
1674        if (rc)
1675                return rc;
1676
1677        do {
1678                if (!efx->max_channels || !efx->max_tx_channels) {
1679                        netif_err(efx, drv, efx->net_dev,
1680                                  "Insufficient resources to allocate"
1681                                  " any channels\n");
1682                        rc = -ENOSPC;
1683                        goto fail1;
1684                }
1685
1686                /* Determine the number of channels and queues by trying
1687                 * to hook in MSI-X interrupts.
1688                 */
1689                rc = efx_probe_interrupts(efx);
1690                if (rc)
1691                        goto fail1;
1692
1693                efx_set_channels(efx);
1694
1695                /* dimension_resources can fail with EAGAIN */
1696                rc = efx->type->dimension_resources(efx);
1697                if (rc != 0 && rc != -EAGAIN)
1698                        goto fail2;
1699
1700                if (rc == -EAGAIN)
1701                        /* try again with new max_channels */
1702                        efx_remove_interrupts(efx);
1703
1704        } while (rc == -EAGAIN);
1705
1706        if (efx->n_channels > 1)
1707                netdev_rss_key_fill(&efx->rx_hash_key,
1708                                    sizeof(efx->rx_hash_key));
1709        efx_set_default_rx_indir_table(efx);
1710
1711        netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
1712        netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
1713
1714        /* Initialise the interrupt moderation settings */
1715        efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000);
1716        efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
1717                                true);
1718
1719        return 0;
1720
1721fail2:
1722        efx_remove_interrupts(efx);
1723fail1:
1724        efx->type->remove(efx);
1725        return rc;
1726}
1727
1728static void efx_remove_nic(struct efx_nic *efx)
1729{
1730        netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n");
1731
1732        efx_remove_interrupts(efx);
1733        efx->type->remove(efx);
1734}
1735
1736static int efx_probe_filters(struct efx_nic *efx)
1737{
1738        int rc;
1739
1740        spin_lock_init(&efx->filter_lock);
1741        init_rwsem(&efx->filter_sem);
1742        mutex_lock(&efx->mac_lock);
1743        down_write(&efx->filter_sem);
1744        rc = efx->type->filter_table_probe(efx);
1745        if (rc)
1746                goto out_unlock;
1747
1748#ifdef CONFIG_RFS_ACCEL
1749        if (efx->type->offload_features & NETIF_F_NTUPLE) {
1750                struct efx_channel *channel;
1751                int i, success = 1;
1752
1753                efx_for_each_channel(channel, efx) {
1754                        channel->rps_flow_id =
1755                                kcalloc(efx->type->max_rx_ip_filters,
1756                                        sizeof(*channel->rps_flow_id),
1757                                        GFP_KERNEL);
1758                        if (!channel->rps_flow_id)
1759                                success = 0;
1760                        else
1761                                for (i = 0;
1762                                     i < efx->type->max_rx_ip_filters;
1763                                     ++i)
1764                                        channel->rps_flow_id[i] =
1765                                                RPS_FLOW_ID_INVALID;
1766                }
1767
1768                if (!success) {
1769                        efx_for_each_channel(channel, efx)
1770                                kfree(channel->rps_flow_id);
1771                        efx->type->filter_table_remove(efx);
1772                        rc = -ENOMEM;
1773                        goto out_unlock;
1774                }
1775
1776                efx->rps_expire_index = efx->rps_expire_channel = 0;
1777        }
1778#endif
1779out_unlock:
1780        up_write(&efx->filter_sem);
1781        mutex_unlock(&efx->mac_lock);
1782        return rc;
1783}
1784
1785static void efx_remove_filters(struct efx_nic *efx)
1786{
1787#ifdef CONFIG_RFS_ACCEL
1788        struct efx_channel *channel;
1789
1790        efx_for_each_channel(channel, efx)
1791                kfree(channel->rps_flow_id);
1792#endif
1793        down_write(&efx->filter_sem);
1794        efx->type->filter_table_remove(efx);
1795        up_write(&efx->filter_sem);
1796}
1797
1798static void efx_restore_filters(struct efx_nic *efx)
1799{
1800        down_read(&efx->filter_sem);
1801        efx->type->filter_table_restore(efx);
1802        up_read(&efx->filter_sem);
1803}
1804
1805/**************************************************************************
1806 *
1807 * NIC startup/shutdown
1808 *
1809 *************************************************************************/
1810
1811static int efx_probe_all(struct efx_nic *efx)
1812{
1813        int rc;
1814
1815        rc = efx_probe_nic(efx);
1816        if (rc) {
1817                netif_err(efx, probe, efx->net_dev, "failed to create NIC\n");
1818                goto fail1;
1819        }
1820
1821        rc = efx_probe_port(efx);
1822        if (rc) {
1823                netif_err(efx, probe, efx->net_dev, "failed to create port\n");
1824                goto fail2;
1825        }
1826
1827        BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT);
1828        if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) {
1829                rc = -EINVAL;
1830                goto fail3;
1831        }
1832        efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE;
1833
1834#ifdef CONFIG_SFC_SRIOV
1835        rc = efx->type->vswitching_probe(efx);
1836        if (rc) /* not fatal; the PF will still work fine */
1837                netif_warn(efx, probe, efx->net_dev,
1838                           "failed to setup vswitching rc=%d;"
1839                           " VFs may not function\n", rc);
1840#endif
1841
1842        rc = efx_probe_filters(efx);
1843        if (rc) {
1844                netif_err(efx, probe, efx->net_dev,
1845                          "failed to create filter tables\n");
1846                goto fail4;
1847        }
1848
1849        rc = efx_probe_channels(efx);
1850        if (rc)
1851                goto fail5;
1852
1853        return 0;
1854
1855 fail5:
1856        efx_remove_filters(efx);
1857 fail4:
1858#ifdef CONFIG_SFC_SRIOV
1859        efx->type->vswitching_remove(efx);
1860#endif
1861 fail3:
1862        efx_remove_port(efx);
1863 fail2:
1864        efx_remove_nic(efx);
1865 fail1:
1866        return rc;
1867}
1868
1869/* If the interface is supposed to be running but is not, start
1870 * the hardware and software data path, regular activity for the port
1871 * (MAC statistics, link polling, etc.) and schedule the port to be
1872 * reconfigured.  Interrupts must already be enabled.  This function
1873 * is safe to call multiple times, so long as the NIC is not disabled.
1874 * Requires the RTNL lock.
1875 */
1876static void efx_start_all(struct efx_nic *efx)
1877{
1878        EFX_ASSERT_RESET_SERIALISED(efx);
1879        BUG_ON(efx->state == STATE_DISABLED);
1880
1881        /* Check that it is appropriate to restart the interface. All
1882         * of these flags are safe to read under just the rtnl lock */
1883        if (efx->port_enabled || !netif_running(efx->net_dev) ||
1884            efx->reset_pending)
1885                return;
1886
1887        efx_start_port(efx);
1888        efx_start_datapath(efx);
1889
1890        /* Start the hardware monitor if there is one */
1891        if (efx->type->monitor != NULL)
1892                queue_delayed_work(efx->workqueue, &efx->monitor_work,
1893                                   efx_monitor_interval);
1894
1895        /* If link state detection is normally event-driven, we have
1896         * to poll now because we could have missed a change
1897         */
1898        if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) {
1899                mutex_lock(&efx->mac_lock);
1900                if (efx->phy_op->poll(efx))
1901                        efx_link_status_changed(efx);
1902                mutex_unlock(&efx->mac_lock);
1903        }
1904
1905        efx->type->start_stats(efx);
1906        efx->type->pull_stats(efx);
1907        spin_lock_bh(&efx->stats_lock);
1908        efx->type->update_stats(efx, NULL, NULL);
1909        spin_unlock_bh(&efx->stats_lock);
1910}
1911
1912/* Quiesce the hardware and software data path, and regular activity
1913 * for the port without bringing the link down.  Safe to call multiple
1914 * times with the NIC in almost any state, but interrupts should be
1915 * enabled.  Requires the RTNL lock.
1916 */
1917static void efx_stop_all(struct efx_nic *efx)
1918{
1919        EFX_ASSERT_RESET_SERIALISED(efx);
1920
1921        /* port_enabled can be read safely under the rtnl lock */
1922        if (!efx->port_enabled)
1923                return;
1924
1925        /* update stats before we go down so we can accurately count
1926         * rx_nodesc_drops
1927         */
1928        efx->type->pull_stats(efx);
1929        spin_lock_bh(&efx->stats_lock);
1930        efx->type->update_stats(efx, NULL, NULL);
1931        spin_unlock_bh(&efx->stats_lock);
1932        efx->type->stop_stats(efx);
1933        efx_stop_port(efx);
1934
1935        /* Stop the kernel transmit interface.  This is only valid if
1936         * the device is stopped or detached; otherwise the watchdog
1937         * may fire immediately.
1938         */
1939        WARN_ON(netif_running(efx->net_dev) &&
1940                netif_device_present(efx->net_dev));
1941        netif_tx_disable(efx->net_dev);
1942
1943        efx_stop_datapath(efx);
1944}
1945
1946static void efx_remove_all(struct efx_nic *efx)
1947{
1948        efx_remove_channels(efx);
1949        efx_remove_filters(efx);
1950#ifdef CONFIG_SFC_SRIOV
1951        efx->type->vswitching_remove(efx);
1952#endif
1953        efx_remove_port(efx);
1954        efx_remove_nic(efx);
1955}
1956
1957/**************************************************************************
1958 *
1959 * Interrupt moderation
1960 *
1961 **************************************************************************/
1962unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs)
1963{
1964        if (usecs == 0)
1965                return 0;
1966        if (usecs * 1000 < efx->timer_quantum_ns)
1967                return 1; /* never round down to 0 */
1968        return usecs * 1000 / efx->timer_quantum_ns;
1969}
1970
1971unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks)
1972{
1973        /* We must round up when converting ticks to microseconds
1974         * because we round down when converting the other way.
1975         */
1976        return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000);
1977}
1978
1979/* Set interrupt moderation parameters */
1980int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
1981                            unsigned int rx_usecs, bool rx_adaptive,
1982                            bool rx_may_override_tx)
1983{
1984        struct efx_channel *channel;
1985        unsigned int timer_max_us;
1986
1987        EFX_ASSERT_RESET_SERIALISED(efx);
1988
1989        timer_max_us = efx->timer_max_ns / 1000;
1990
1991        if (tx_usecs > timer_max_us || rx_usecs > timer_max_us)
1992                return -EINVAL;
1993
1994        if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 &&
1995            !rx_may_override_tx) {
1996                netif_err(efx, drv, efx->net_dev, "Channels are shared. "
1997                          "RX and TX IRQ moderation must be equal\n");
1998                return -EINVAL;
1999        }
2000
2001        efx->irq_rx_adaptive = rx_adaptive;
2002        efx->irq_rx_moderation_us = rx_usecs;
2003        efx_for_each_channel(channel, efx) {
2004                if (efx_channel_has_rx_queue(channel))
2005                        channel->irq_moderation_us = rx_usecs;
2006                else if (efx_channel_has_tx_queues(channel))
2007                        channel->irq_moderation_us = tx_usecs;
2008        }
2009
2010        return 0;
2011}
2012
2013void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
2014                            unsigned int *rx_usecs, bool *rx_adaptive)
2015{
2016        *rx_adaptive = efx->irq_rx_adaptive;
2017        *rx_usecs = efx->irq_rx_moderation_us;
2018
2019        /* If channels are shared between RX and TX, so is IRQ
2020         * moderation.  Otherwise, IRQ moderation is the same for all
2021         * TX channels and is not adaptive.
2022         */
2023        if (efx->tx_channel_offset == 0) {
2024                *tx_usecs = *rx_usecs;
2025        } else {
2026                struct efx_channel *tx_channel;
2027
2028                tx_channel = efx->channel[efx->tx_channel_offset];
2029                *tx_usecs = tx_channel->irq_moderation_us;
2030        }
2031}
2032
2033/**************************************************************************
2034 *
2035 * Hardware monitor
2036 *
2037 **************************************************************************/
2038
2039/* Run periodically off the general workqueue */
2040static void efx_monitor(struct work_struct *data)
2041{
2042        struct efx_nic *efx = container_of(data, struct efx_nic,
2043                                           monitor_work.work);
2044
2045        netif_vdbg(efx, timer, efx->net_dev,
2046                   "hardware monitor executing on CPU %d\n",
2047                   raw_smp_processor_id());
2048        BUG_ON(efx->type->monitor == NULL);
2049
2050        /* If the mac_lock is already held then it is likely a port
2051         * reconfiguration is already in place, which will likely do
2052         * most of the work of monitor() anyway. */
2053        if (mutex_trylock(&efx->mac_lock)) {
2054                if (efx->port_enabled)
2055                        efx->type->monitor(efx);
2056                mutex_unlock(&efx->mac_lock);
2057        }
2058
2059        queue_delayed_work(efx->workqueue, &efx->monitor_work,
2060                           efx_monitor_interval);
2061}
2062
2063/**************************************************************************
2064 *
2065 * ioctls
2066 *
2067 *************************************************************************/
2068
2069/* Net device ioctl
2070 * Context: process, rtnl_lock() held.
2071 */
2072static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
2073{
2074        struct efx_nic *efx = netdev_priv(net_dev);
2075        struct mii_ioctl_data *data = if_mii(ifr);
2076
2077        if (cmd == SIOCSHWTSTAMP)
2078                return efx_ptp_set_ts_config(efx, ifr);
2079        if (cmd == SIOCGHWTSTAMP)
2080                return efx_ptp_get_ts_config(efx, ifr);
2081
2082        /* Convert phy_id from older PRTAD/DEVAD format */
2083        if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) &&
2084            (data->phy_id & 0xfc00) == 0x0400)
2085                data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400;
2086
2087        return mdio_mii_ioctl(&efx->mdio, data, cmd);
2088}
2089
2090/**************************************************************************
2091 *
2092 * NAPI interface
2093 *
2094 **************************************************************************/
2095
2096static void efx_init_napi_channel(struct efx_channel *channel)
2097{
2098        struct efx_nic *efx = channel->efx;
2099
2100        channel->napi_dev = efx->net_dev;
2101        netif_napi_add(channel->napi_dev, &channel->napi_str,
2102                       efx_poll, napi_weight);
2103        efx_channel_busy_poll_init(channel);
2104}
2105
2106static void efx_init_napi(struct efx_nic *efx)
2107{
2108        struct efx_channel *channel;
2109
2110        efx_for_each_channel(channel, efx)
2111                efx_init_napi_channel(channel);
2112}
2113
2114static void efx_fini_napi_channel(struct efx_channel *channel)
2115{
2116        if (channel->napi_dev) {
2117                netif_napi_del(&channel->napi_str);
2118                napi_hash_del(&channel->napi_str);
2119        }
2120        channel->napi_dev = NULL;
2121}
2122
2123static void efx_fini_napi(struct efx_nic *efx)
2124{
2125        struct efx_channel *channel;
2126
2127        efx_for_each_channel(channel, efx)
2128                efx_fini_napi_channel(channel);
2129}
2130
2131/**************************************************************************
2132 *
2133 * Kernel netpoll interface
2134 *
2135 *************************************************************************/
2136
2137#ifdef CONFIG_NET_POLL_CONTROLLER
2138
2139/* Although in the common case interrupts will be disabled, this is not
2140 * guaranteed. However, all our work happens inside the NAPI callback,
2141 * so no locking is required.
2142 */
2143static void efx_netpoll(struct net_device *net_dev)
2144{
2145        struct efx_nic *efx = netdev_priv(net_dev);
2146        struct efx_channel *channel;
2147
2148        efx_for_each_channel(channel, efx)
2149                efx_schedule_channel(channel);
2150}
2151
2152#endif
2153
2154#ifdef CONFIG_NET_RX_BUSY_POLL
2155static int efx_busy_poll(struct napi_struct *napi)
2156{
2157        struct efx_channel *channel =
2158                container_of(napi, struct efx_channel, napi_str);
2159        struct efx_nic *efx = channel->efx;
2160        int budget = 4;
2161        int old_rx_packets, rx_packets;
2162
2163        if (!netif_running(efx->net_dev))
2164                return LL_FLUSH_FAILED;
2165
2166        if (!efx_channel_try_lock_poll(channel))
2167                return LL_FLUSH_BUSY;
2168
2169        old_rx_packets = channel->rx_queue.rx_packets;
2170        efx_process_channel(channel, budget);
2171
2172        rx_packets = channel->rx_queue.rx_packets - old_rx_packets;
2173
2174        /* There is no race condition with NAPI here.
2175         * NAPI will automatically be rescheduled if it yielded during busy
2176         * polling, because it was not able to take the lock and thus returned
2177         * the full budget.
2178         */
2179        efx_channel_unlock_poll(channel);
2180
2181        return rx_packets;
2182}
2183#endif
2184
2185/**************************************************************************
2186 *
2187 * Kernel net device interface
2188 *
2189 *************************************************************************/
2190
2191/* Context: process, rtnl_lock() held. */
2192int efx_net_open(struct net_device *net_dev)
2193{
2194        struct efx_nic *efx = netdev_priv(net_dev);
2195        int rc;
2196
2197        netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n",
2198                  raw_smp_processor_id());
2199
2200        rc = efx_check_disabled(efx);
2201        if (rc)
2202                return rc;
2203        if (efx->phy_mode & PHY_MODE_SPECIAL)
2204                return -EBUSY;
2205        if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL))
2206                return -EIO;
2207
2208        /* Notify the kernel of the link state polled during driver load,
2209         * before the monitor starts running */
2210        efx_link_status_changed(efx);
2211
2212        efx_start_all(efx);
2213        efx_selftest_async_start(efx);
2214        return 0;
2215}
2216
2217/* Context: process, rtnl_lock() held.
2218 * Note that the kernel will ignore our return code; this method
2219 * should really be a void.
2220 */
2221int efx_net_stop(struct net_device *net_dev)
2222{
2223        struct efx_nic *efx = netdev_priv(net_dev);
2224
2225        netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
2226                  raw_smp_processor_id());
2227
2228        /* Stop the device and flush all the channels */
2229        efx_stop_all(efx);
2230
2231        return 0;
2232}
2233
2234/* Context: process, dev_base_lock or RTNL held, non-blocking. */
2235static struct rtnl_link_stats64 *efx_net_stats(struct net_device *net_dev,
2236                                               struct rtnl_link_stats64 *stats)
2237{
2238        struct efx_nic *efx = netdev_priv(net_dev);
2239
2240        spin_lock_bh(&efx->stats_lock);
2241        efx->type->update_stats(efx, NULL, stats);
2242        spin_unlock_bh(&efx->stats_lock);
2243
2244        return stats;
2245}
2246
2247/* Context: netif_tx_lock held, BHs disabled. */
2248static void efx_watchdog(struct net_device *net_dev)
2249{
2250        struct efx_nic *efx = netdev_priv(net_dev);
2251
2252        netif_err(efx, tx_err, efx->net_dev,
2253                  "TX stuck with port_enabled=%d: resetting channels\n",
2254                  efx->port_enabled);
2255
2256        efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
2257}
2258
2259
2260/* Context: process, rtnl_lock() held. */
2261static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
2262{
2263        struct efx_nic *efx = netdev_priv(net_dev);
2264        int rc;
2265
2266        rc = efx_check_disabled(efx);
2267        if (rc)
2268                return rc;
2269        if (new_mtu > EFX_MAX_MTU) {
2270                netif_err(efx, drv, efx->net_dev,
2271                          "Requested MTU of %d too big (max: %d)\n",
2272                          new_mtu, EFX_MAX_MTU);
2273                return -EINVAL;
2274        }
2275        if (new_mtu < EFX_MIN_MTU) {
2276                netif_err(efx, drv, efx->net_dev,
2277                          "Requested MTU of %d too small (min: %d)\n",
2278                          new_mtu, EFX_MIN_MTU);
2279                return -EINVAL;
2280        }
2281
2282        netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
2283
2284        efx_device_detach_sync(efx);
2285        efx_stop_all(efx);
2286
2287        mutex_lock(&efx->mac_lock);
2288        net_dev->mtu = new_mtu;
2289        efx_mac_reconfigure(efx);
2290        mutex_unlock(&efx->mac_lock);
2291
2292        efx_start_all(efx);
2293        netif_device_attach(efx->net_dev);
2294        return 0;
2295}
2296
2297static int efx_set_mac_address(struct net_device *net_dev, void *data)
2298{
2299        struct efx_nic *efx = netdev_priv(net_dev);
2300        struct sockaddr *addr = data;
2301        u8 *new_addr = addr->sa_data;
2302        u8 old_addr[6];
2303        int rc;
2304
2305        if (!is_valid_ether_addr(new_addr)) {
2306                netif_err(efx, drv, efx->net_dev,
2307                          "invalid ethernet MAC address requested: %pM\n",
2308                          new_addr);
2309                return -EADDRNOTAVAIL;
2310        }
2311
2312        /* save old address */
2313        ether_addr_copy(old_addr, net_dev->dev_addr);
2314        ether_addr_copy(net_dev->dev_addr, new_addr);
2315        if (efx->type->set_mac_address) {
2316                rc = efx->type->set_mac_address(efx);
2317                if (rc) {
2318                        ether_addr_copy(net_dev->dev_addr, old_addr);
2319                        return rc;
2320                }
2321        }
2322
2323        /* Reconfigure the MAC */
2324        mutex_lock(&efx->mac_lock);
2325        efx_mac_reconfigure(efx);
2326        mutex_unlock(&efx->mac_lock);
2327
2328        return 0;
2329}
2330
2331/* Context: netif_addr_lock held, BHs disabled. */
2332static void efx_set_rx_mode(struct net_device *net_dev)
2333{
2334        struct efx_nic *efx = netdev_priv(net_dev);
2335
2336        if (efx->port_enabled)
2337                queue_work(efx->workqueue, &efx->mac_work);
2338        /* Otherwise efx_start_port() will do this */
2339}
2340
2341static int efx_set_features(struct net_device *net_dev, netdev_features_t data)
2342{
2343        struct efx_nic *efx = netdev_priv(net_dev);
2344        int rc;
2345
2346        /* If disabling RX n-tuple filtering, clear existing filters */
2347        if (net_dev->features & ~data & NETIF_F_NTUPLE) {
2348                rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL);
2349                if (rc)
2350                        return rc;
2351        }
2352
2353        /* If Rx VLAN filter is changed, update filters via mac_reconfigure */
2354        if ((net_dev->features ^ data) & NETIF_F_HW_VLAN_CTAG_FILTER) {
2355                /* efx_set_rx_mode() will schedule MAC work to update filters
2356                 * when a new features are finally set in net_dev.
2357                 */
2358                efx_set_rx_mode(net_dev);
2359        }
2360
2361        return 0;
2362}
2363
2364static int efx_vlan_rx_add_vid(struct net_device *net_dev, __be16 proto, u16 vid)
2365{
2366        struct efx_nic *efx = netdev_priv(net_dev);
2367
2368        if (efx->type->vlan_rx_add_vid)
2369                return efx->type->vlan_rx_add_vid(efx, proto, vid);
2370        else
2371                return -EOPNOTSUPP;
2372}
2373
2374static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vid)
2375{
2376        struct efx_nic *efx = netdev_priv(net_dev);
2377
2378        if (efx->type->vlan_rx_kill_vid)
2379                return efx->type->vlan_rx_kill_vid(efx, proto, vid);
2380        else
2381                return -EOPNOTSUPP;
2382}
2383
2384static const struct net_device_ops efx_netdev_ops = {
2385        .ndo_open               = efx_net_open,
2386        .ndo_stop               = efx_net_stop,
2387        .ndo_get_stats64        = efx_net_stats,
2388        .ndo_tx_timeout         = efx_watchdog,
2389        .ndo_start_xmit         = efx_hard_start_xmit,
2390        .ndo_validate_addr      = eth_validate_addr,
2391        .ndo_do_ioctl           = efx_ioctl,
2392        .ndo_change_mtu         = efx_change_mtu,
2393        .ndo_set_mac_address    = efx_set_mac_address,
2394        .ndo_set_rx_mode        = efx_set_rx_mode,
2395        .ndo_set_features       = efx_set_features,
2396        .ndo_vlan_rx_add_vid    = efx_vlan_rx_add_vid,
2397        .ndo_vlan_rx_kill_vid   = efx_vlan_rx_kill_vid,
2398#ifdef CONFIG_SFC_SRIOV
2399        .ndo_set_vf_mac         = efx_sriov_set_vf_mac,
2400        .ndo_set_vf_vlan        = efx_sriov_set_vf_vlan,
2401        .ndo_set_vf_spoofchk    = efx_sriov_set_vf_spoofchk,
2402        .ndo_get_vf_config      = efx_sriov_get_vf_config,
2403        .ndo_set_vf_link_state  = efx_sriov_set_vf_link_state,
2404        .ndo_get_phys_port_id   = efx_sriov_get_phys_port_id,
2405#endif
2406#ifdef CONFIG_NET_POLL_CONTROLLER
2407        .ndo_poll_controller = efx_netpoll,
2408#endif
2409        .ndo_setup_tc           = efx_setup_tc,
2410#ifdef CONFIG_NET_RX_BUSY_POLL
2411        .ndo_busy_poll          = efx_busy_poll,
2412#endif
2413#ifdef CONFIG_RFS_ACCEL
2414        .ndo_rx_flow_steer      = efx_filter_rfs,
2415#endif
2416};
2417
2418static void efx_update_name(struct efx_nic *efx)
2419{
2420        strcpy(efx->name, efx->net_dev->name);
2421        efx_mtd_rename(efx);
2422        efx_set_channel_names(efx);
2423}
2424
2425static int efx_netdev_event(struct notifier_block *this,
2426                            unsigned long event, void *ptr)
2427{
2428        struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
2429
2430        if ((net_dev->netdev_ops == &efx_netdev_ops) &&
2431            event == NETDEV_CHANGENAME)
2432                efx_update_name(netdev_priv(net_dev));
2433
2434        return NOTIFY_DONE;
2435}
2436
2437static struct notifier_block efx_netdev_notifier = {
2438        .notifier_call = efx_netdev_event,
2439};
2440
2441static ssize_t
2442show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
2443{
2444        struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
2445        return sprintf(buf, "%d\n", efx->phy_type);
2446}
2447static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
2448
2449#ifdef CONFIG_SFC_MCDI_LOGGING
2450static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
2451                             char *buf)
2452{
2453        struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
2454        struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
2455
2456        return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
2457}
2458static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
2459                            const char *buf, size_t count)
2460{
2461        struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
2462        struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
2463        bool enable = count > 0 && *buf != '0';
2464
2465        mcdi->logging_enabled = enable;
2466        return count;
2467}
2468static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log);
2469#endif
2470
2471static int efx_register_netdev(struct efx_nic *efx)
2472{
2473        struct net_device *net_dev = efx->net_dev;
2474        struct efx_channel *channel;
2475        int rc;
2476
2477        net_dev->watchdog_timeo = 5 * HZ;
2478        net_dev->irq = efx->pci_dev->irq;
2479        net_dev->netdev_ops = &efx_netdev_ops;
2480        if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
2481                net_dev->priv_flags |= IFF_UNICAST_FLT;
2482        net_dev->ethtool_ops = &efx_ethtool_ops;
2483        net_dev->gso_max_segs = EFX_TSO_MAX_SEGS;
2484
2485        rtnl_lock();
2486
2487        /* Enable resets to be scheduled and check whether any were
2488         * already requested.  If so, the NIC is probably hosed so we
2489         * abort.
2490         */
2491        efx->state = STATE_READY;
2492        smp_mb(); /* ensure we change state before checking reset_pending */
2493        if (efx->reset_pending) {
2494                netif_err(efx, probe, efx->net_dev,
2495                          "aborting probe due to scheduled reset\n");
2496                rc = -EIO;
2497                goto fail_locked;
2498        }
2499
2500        rc = dev_alloc_name(net_dev, net_dev->name);
2501        if (rc < 0)
2502                goto fail_locked;
2503        efx_update_name(efx);
2504
2505        /* Always start with carrier off; PHY events will detect the link */
2506        netif_carrier_off(net_dev);
2507
2508        rc = register_netdevice(net_dev);
2509        if (rc)
2510                goto fail_locked;
2511
2512        efx_for_each_channel(channel, efx) {
2513                struct efx_tx_queue *tx_queue;
2514                efx_for_each_channel_tx_queue(tx_queue, channel)
2515                        efx_init_tx_queue_core_txq(tx_queue);
2516        }
2517
2518        efx_associate(efx);
2519
2520        rtnl_unlock();
2521
2522        rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
2523        if (rc) {
2524                netif_err(efx, drv, efx->net_dev,
2525                          "failed to init net dev attributes\n");
2526                goto fail_registered;
2527        }
2528#ifdef CONFIG_SFC_MCDI_LOGGING
2529        rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
2530        if (rc) {
2531                netif_err(efx, drv, efx->net_dev,
2532                          "failed to init net dev attributes\n");
2533                goto fail_attr_mcdi_logging;
2534        }
2535#endif
2536
2537        return 0;
2538
2539#ifdef CONFIG_SFC_MCDI_LOGGING
2540fail_attr_mcdi_logging:
2541        device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
2542#endif
2543fail_registered:
2544        rtnl_lock();
2545        efx_dissociate(efx);
2546        unregister_netdevice(net_dev);
2547fail_locked:
2548        efx->state = STATE_UNINIT;
2549        rtnl_unlock();
2550        netif_err(efx, drv, efx->net_dev, "could not register net dev\n");
2551        return rc;
2552}
2553
2554static void efx_unregister_netdev(struct efx_nic *efx)
2555{
2556        if (!efx->net_dev)
2557                return;
2558
2559        BUG_ON(netdev_priv(efx->net_dev) != efx);
2560
2561        if (efx_dev_registered(efx)) {
2562                strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
2563#ifdef CONFIG_SFC_MCDI_LOGGING
2564                device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
2565#endif
2566                device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
2567                unregister_netdev(efx->net_dev);
2568        }
2569}
2570
2571/**************************************************************************
2572 *
2573 * Device reset and suspend
2574 *
2575 **************************************************************************/
2576
2577/* Tears down the entire software state and most of the hardware state
2578 * before reset.  */
2579void efx_reset_down(struct efx_nic *efx, enum reset_type method)
2580{
2581        EFX_ASSERT_RESET_SERIALISED(efx);
2582
2583        if (method == RESET_TYPE_MCDI_TIMEOUT)
2584                efx->type->prepare_flr(efx);
2585
2586        efx_stop_all(efx);
2587        efx_disable_interrupts(efx);
2588
2589        mutex_lock(&efx->mac_lock);
2590        if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
2591            method != RESET_TYPE_DATAPATH)
2592                efx->phy_op->fini(efx);
2593        efx->type->fini(efx);
2594}
2595
2596/* This function will always ensure that the locks acquired in
2597 * efx_reset_down() are released. A failure return code indicates
2598 * that we were unable to reinitialise the hardware, and the
2599 * driver should be disabled. If ok is false, then the rx and tx
2600 * engines are not restarted, pending a RESET_DISABLE. */
2601int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
2602{
2603        int rc;
2604
2605        EFX_ASSERT_RESET_SERIALISED(efx);
2606
2607        if (method == RESET_TYPE_MCDI_TIMEOUT)
2608                efx->type->finish_flr(efx);
2609
2610        /* Ensure that SRAM is initialised even if we're disabling the device */
2611        rc = efx->type->init(efx);
2612        if (rc) {
2613                netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
2614                goto fail;
2615        }
2616
2617        if (!ok)
2618                goto fail;
2619
2620        if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
2621            method != RESET_TYPE_DATAPATH) {
2622                rc = efx->phy_op->init(efx);
2623                if (rc)
2624                        goto fail;
2625                rc = efx->phy_op->reconfigure(efx);
2626                if (rc && rc != -EPERM)
2627                        netif_err(efx, drv, efx->net_dev,
2628                                  "could not restore PHY settings\n");
2629        }
2630
2631        rc = efx_enable_interrupts(efx);
2632        if (rc)
2633                goto fail;
2634
2635#ifdef CONFIG_SFC_SRIOV
2636        rc = efx->type->vswitching_restore(efx);
2637        if (rc) /* not fatal; the PF will still work fine */
2638                netif_warn(efx, probe, efx->net_dev,
2639                           "failed to restore vswitching rc=%d;"
2640                           " VFs may not function\n", rc);
2641#endif
2642
2643        down_read(&efx->filter_sem);
2644        efx_restore_filters(efx);
2645        up_read(&efx->filter_sem);
2646        if (efx->type->sriov_reset)
2647                efx->type->sriov_reset(efx);
2648
2649        mutex_unlock(&efx->mac_lock);
2650
2651        efx_start_all(efx);
2652
2653        return 0;
2654
2655fail:
2656        efx->port_initialized = false;
2657
2658        mutex_unlock(&efx->mac_lock);
2659
2660        return rc;
2661}
2662
2663/* Reset the NIC using the specified method.  Note that the reset may
2664 * fail, in which case the card will be left in an unusable state.
2665 *
2666 * Caller must hold the rtnl_lock.
2667 */
2668int efx_reset(struct efx_nic *efx, enum reset_type method)
2669{
2670        int rc, rc2;
2671        bool disabled;
2672
2673        netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
2674                   RESET_TYPE(method));
2675
2676        efx_device_detach_sync(efx);
2677        efx_reset_down(efx, method);
2678
2679        rc = efx->type->reset(efx, method);
2680        if (rc) {
2681                netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
2682                goto out;
2683        }
2684
2685        /* Clear flags for the scopes we covered.  We assume the NIC and
2686         * driver are now quiescent so that there is no race here.
2687         */
2688        if (method < RESET_TYPE_MAX_METHOD)
2689                efx->reset_pending &= -(1 << (method + 1));
2690        else /* it doesn't fit into the well-ordered scope hierarchy */
2691                __clear_bit(method, &efx->reset_pending);
2692
2693        /* Reinitialise bus-mastering, which may have been turned off before
2694         * the reset was scheduled. This is still appropriate, even in the
2695         * RESET_TYPE_DISABLE since this driver generally assumes the hardware
2696         * can respond to requests. */
2697        pci_set_master(efx->pci_dev);
2698
2699out:
2700        /* Leave device stopped if necessary */
2701        disabled = rc ||
2702                method == RESET_TYPE_DISABLE ||
2703                method == RESET_TYPE_RECOVER_OR_DISABLE;
2704        rc2 = efx_reset_up(efx, method, !disabled);
2705        if (rc2) {
2706                disabled = true;
2707                if (!rc)
2708                        rc = rc2;
2709        }
2710
2711        if (disabled) {
2712                dev_close(efx->net_dev);
2713                netif_err(efx, drv, efx->net_dev, "has been disabled\n");
2714                efx->state = STATE_DISABLED;
2715        } else {
2716                netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
2717                netif_device_attach(efx->net_dev);
2718        }
2719        return rc;
2720}
2721
2722/* Try recovery mechanisms.
2723 * For now only EEH is supported.
2724 * Returns 0 if the recovery mechanisms are unsuccessful.
2725 * Returns a non-zero value otherwise.
2726 */
2727int efx_try_recovery(struct efx_nic *efx)
2728{
2729#ifdef CONFIG_EEH
2730        /* A PCI error can occur and not be seen by EEH because nothing
2731         * happens on the PCI bus. In this case the driver may fail and
2732         * schedule a 'recover or reset', leading to this recovery handler.
2733         * Manually call the eeh failure check function.
2734         */
2735        struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
2736        if (eeh_dev_check_failure(eehdev)) {
2737                /* The EEH mechanisms will handle the error and reset the
2738                 * device if necessary.
2739                 */
2740                return 1;
2741        }
2742#endif
2743        return 0;
2744}
2745
2746static void efx_wait_for_bist_end(struct efx_nic *efx)
2747{
2748        int i;
2749
2750        for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) {
2751                if (efx_mcdi_poll_reboot(efx))
2752                        goto out;
2753                msleep(BIST_WAIT_DELAY_MS);
2754        }
2755
2756        netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n");
2757out:
2758        /* Either way unset the BIST flag. If we found no reboot we probably
2759         * won't recover, but we should try.
2760         */
2761        efx->mc_bist_for_other_fn = false;
2762}
2763
2764/* The worker thread exists so that code that cannot sleep can
2765 * schedule a reset for later.
2766 */
2767static void efx_reset_work(struct work_struct *data)
2768{
2769        struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
2770        unsigned long pending;
2771        enum reset_type method;
2772
2773        pending = ACCESS_ONCE(efx->reset_pending);
2774        method = fls(pending) - 1;
2775
2776        if (method == RESET_TYPE_MC_BIST)
2777                efx_wait_for_bist_end(efx);
2778
2779        if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
2780             method == RESET_TYPE_RECOVER_OR_ALL) &&
2781            efx_try_recovery(efx))
2782                return;
2783
2784        if (!pending)
2785                return;
2786
2787        rtnl_lock();
2788
2789        /* We checked the state in efx_schedule_reset() but it may
2790         * have changed by now.  Now that we have the RTNL lock,
2791         * it cannot change again.
2792         */
2793        if (efx->state == STATE_READY)
2794                (void)efx_reset(efx, method);
2795
2796        rtnl_unlock();
2797}
2798
2799void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
2800{
2801        enum reset_type method;
2802
2803        if (efx->state == STATE_RECOVERY) {
2804                netif_dbg(efx, drv, efx->net_dev,
2805                          "recovering: skip scheduling %s reset\n",
2806                          RESET_TYPE(type));
2807                return;
2808        }
2809
2810        switch (type) {
2811        case RESET_TYPE_INVISIBLE:
2812        case RESET_TYPE_ALL:
2813        case RESET_TYPE_RECOVER_OR_ALL:
2814        case RESET_TYPE_WORLD:
2815        case RESET_TYPE_DISABLE:
2816        case RESET_TYPE_RECOVER_OR_DISABLE:
2817        case RESET_TYPE_DATAPATH:
2818        case RESET_TYPE_MC_BIST:
2819        case RESET_TYPE_MCDI_TIMEOUT:
2820                method = type;
2821                netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
2822                          RESET_TYPE(method));
2823                break;
2824        default:
2825                method = efx->type->map_reset_reason(type);
2826                netif_dbg(efx, drv, efx->net_dev,
2827                          "scheduling %s reset for %s\n",
2828                          RESET_TYPE(method), RESET_TYPE(type));
2829                break;
2830        }
2831
2832        set_bit(method, &efx->reset_pending);
2833        smp_mb(); /* ensure we change reset_pending before checking state */
2834
2835        /* If we're not READY then just leave the flags set as the cue
2836         * to abort probing or reschedule the reset later.
2837         */
2838        if (ACCESS_ONCE(efx->state) != STATE_READY)
2839                return;
2840
2841        /* efx_process_channel() will no longer read events once a
2842         * reset is scheduled. So switch back to poll'd MCDI completions. */
2843        efx_mcdi_mode_poll(efx);
2844
2845        queue_work(reset_workqueue, &efx->reset_work);
2846}
2847
2848/**************************************************************************
2849 *
2850 * List of NICs we support
2851 *
2852 **************************************************************************/
2853
2854/* PCI device ID table */
2855static const struct pci_device_id efx_pci_table[] = {
2856        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
2857                    PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0),
2858         .driver_data = (unsigned long) &falcon_a1_nic_type},
2859        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
2860                    PCI_DEVICE_ID_SOLARFLARE_SFC4000B),
2861         .driver_data = (unsigned long) &falcon_b0_nic_type},
2862        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803),  /* SFC9020 */
2863         .driver_data = (unsigned long) &siena_a0_nic_type},
2864        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813),  /* SFL9021 */
2865         .driver_data = (unsigned long) &siena_a0_nic_type},
2866        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903),  /* SFC9120 PF */
2867         .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
2868        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903),  /* SFC9120 VF */
2869         .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
2870        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923),  /* SFC9140 PF */
2871         .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
2872        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923),  /* SFC9140 VF */
2873         .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
2874        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03),  /* SFC9220 PF */
2875         .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
2876        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03),  /* SFC9220 VF */
2877         .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
2878        {0}                     /* end of list */
2879};
2880
2881/**************************************************************************
2882 *
2883 * Dummy PHY/MAC operations
2884 *
2885 * Can be used for some unimplemented operations
2886 * Needed so all function pointers are valid and do not have to be tested
2887 * before use
2888 *
2889 **************************************************************************/
2890int efx_port_dummy_op_int(struct efx_nic *efx)
2891{
2892        return 0;
2893}
2894void efx_port_dummy_op_void(struct efx_nic *efx) {}
2895
2896static bool efx_port_dummy_op_poll(struct efx_nic *efx)
2897{
2898        return false;
2899}
2900
2901static const struct efx_phy_operations efx_dummy_phy_operations = {
2902        .init            = efx_port_dummy_op_int,
2903        .reconfigure     = efx_port_dummy_op_int,
2904        .poll            = efx_port_dummy_op_poll,
2905        .fini            = efx_port_dummy_op_void,
2906};
2907
2908/**************************************************************************
2909 *
2910 * Data housekeeping
2911 *
2912 **************************************************************************/
2913
2914/* This zeroes out and then fills in the invariants in a struct
2915 * efx_nic (including all sub-structures).
2916 */
2917static int efx_init_struct(struct efx_nic *efx,
2918                           struct pci_dev *pci_dev, struct net_device *net_dev)
2919{
2920        int i;
2921
2922        /* Initialise common structures */
2923        INIT_LIST_HEAD(&efx->node);
2924        INIT_LIST_HEAD(&efx->secondary_list);
2925        spin_lock_init(&efx->biu_lock);
2926#ifdef CONFIG_SFC_MTD
2927        INIT_LIST_HEAD(&efx->mtd_list);
2928#endif
2929        INIT_WORK(&efx->reset_work, efx_reset_work);
2930        INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
2931        INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work);
2932        efx->pci_dev = pci_dev;
2933        efx->msg_enable = debug;
2934        efx->state = STATE_UNINIT;
2935        strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
2936
2937        efx->net_dev = net_dev;
2938        efx->rx_prefix_size = efx->type->rx_prefix_size;
2939        efx->rx_ip_align =
2940                NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
2941        efx->rx_packet_hash_offset =
2942                efx->type->rx_hash_offset - efx->type->rx_prefix_size;
2943        efx->rx_packet_ts_offset =
2944                efx->type->rx_ts_offset - efx->type->rx_prefix_size;
2945        spin_lock_init(&efx->stats_lock);
2946        mutex_init(&efx->mac_lock);
2947        efx->phy_op = &efx_dummy_phy_operations;
2948        efx->mdio.dev = net_dev;
2949        INIT_WORK(&efx->mac_work, efx_mac_work);
2950        init_waitqueue_head(&efx->flush_wq);
2951
2952        for (i = 0; i < EFX_MAX_CHANNELS; i++) {
2953                efx->channel[i] = efx_alloc_channel(efx, i, NULL);
2954                if (!efx->channel[i])
2955                        goto fail;
2956                efx->msi_context[i].efx = efx;
2957                efx->msi_context[i].index = i;
2958        }
2959
2960        /* Higher numbered interrupt modes are less capable! */
2961        efx->interrupt_mode = max(efx->type->max_interrupt_mode,
2962                                  interrupt_mode);
2963
2964        /* Would be good to use the net_dev name, but we're too early */
2965        snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
2966                 pci_name(pci_dev));
2967        efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
2968        if (!efx->workqueue)
2969                goto fail;
2970
2971        return 0;
2972
2973fail:
2974        efx_fini_struct(efx);
2975        return -ENOMEM;
2976}
2977
2978static void efx_fini_struct(struct efx_nic *efx)
2979{
2980        int i;
2981
2982        for (i = 0; i < EFX_MAX_CHANNELS; i++)
2983                kfree(efx->channel[i]);
2984
2985        kfree(efx->vpd_sn);
2986
2987        if (efx->workqueue) {
2988                destroy_workqueue(efx->workqueue);
2989                efx->workqueue = NULL;
2990        }
2991}
2992
2993void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
2994{
2995        u64 n_rx_nodesc_trunc = 0;
2996        struct efx_channel *channel;
2997
2998        efx_for_each_channel(channel, efx)
2999                n_rx_nodesc_trunc += channel->n_rx_nodesc_trunc;
3000        stats[GENERIC_STAT_rx_nodesc_trunc] = n_rx_nodesc_trunc;
3001        stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
3002}
3003
3004/**************************************************************************
3005 *
3006 * PCI interface
3007 *
3008 **************************************************************************/
3009
3010/* Main body of final NIC shutdown code
3011 * This is called only at module unload (or hotplug removal).
3012 */
3013static void efx_pci_remove_main(struct efx_nic *efx)
3014{
3015        /* Flush reset_work. It can no longer be scheduled since we
3016         * are not READY.
3017         */
3018        BUG_ON(efx->state == STATE_READY);
3019        cancel_work_sync(&efx->reset_work);
3020
3021        efx_disable_interrupts(efx);
3022        efx_nic_fini_interrupt(efx);
3023        efx_fini_port(efx);
3024        efx->type->fini(efx);
3025        efx_fini_napi(efx);
3026        efx_remove_all(efx);
3027}
3028
3029/* Final NIC shutdown
3030 * This is called only at module unload (or hotplug removal).  A PF can call
3031 * this on its VFs to ensure they are unbound first.
3032 */
3033static void efx_pci_remove(struct pci_dev *pci_dev)
3034{
3035        struct efx_nic *efx;
3036
3037        efx = pci_get_drvdata(pci_dev);
3038        if (!efx)
3039                return;
3040
3041        /* Mark the NIC as fini, then stop the interface */
3042        rtnl_lock();
3043        efx_dissociate(efx);
3044        dev_close(efx->net_dev);
3045        efx_disable_interrupts(efx);
3046        efx->state = STATE_UNINIT;
3047        rtnl_unlock();
3048
3049        if (efx->type->sriov_fini)
3050                efx->type->sriov_fini(efx);
3051
3052        efx_unregister_netdev(efx);
3053
3054        efx_mtd_remove(efx);
3055
3056        efx_pci_remove_main(efx);
3057
3058        efx_fini_io(efx);
3059        netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
3060
3061        efx_fini_struct(efx);
3062        free_netdev(efx->net_dev);
3063
3064        pci_disable_pcie_error_reporting(pci_dev);
3065};
3066
3067/* NIC VPD information
3068 * Called during probe to display the part number of the
3069 * installed NIC.  VPD is potentially very large but this should
3070 * always appear within the first 512 bytes.
3071 */
3072#define SFC_VPD_LEN 512
3073static void efx_probe_vpd_strings(struct efx_nic *efx)
3074{
3075        struct pci_dev *dev = efx->pci_dev;
3076        char vpd_data[SFC_VPD_LEN];
3077        ssize_t vpd_size;
3078        int ro_start, ro_size, i, j;
3079
3080        /* Get the vpd data from the device */
3081        vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data);
3082        if (vpd_size <= 0) {
3083                netif_err(efx, drv, efx->net_dev, "Unable to read VPD\n");
3084                return;
3085        }
3086
3087        /* Get the Read only section */
3088        ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA);
3089        if (ro_start < 0) {
3090                netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n");
3091                return;
3092        }
3093
3094        ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]);
3095        j = ro_size;
3096        i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
3097        if (i + j > vpd_size)
3098                j = vpd_size - i;
3099
3100        /* Get the Part number */
3101        i = pci_vpd_find_info_keyword(vpd_data, i, j, "PN");
3102        if (i < 0) {
3103                netif_err(efx, drv, efx->net_dev, "Part number not found\n");
3104                return;
3105        }
3106
3107        j = pci_vpd_info_field_size(&vpd_data[i]);
3108        i += PCI_VPD_INFO_FLD_HDR_SIZE;
3109        if (i + j > vpd_size) {
3110                netif_err(efx, drv, efx->net_dev, "Incomplete part number\n");
3111                return;
3112        }
3113
3114        netif_info(efx, drv, efx->net_dev,
3115                   "Part Number : %.*s\n", j, &vpd_data[i]);
3116
3117        i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
3118        j = ro_size;
3119        i = pci_vpd_find_info_keyword(vpd_data, i, j, "SN");
3120        if (i < 0) {
3121                netif_err(efx, drv, efx->net_dev, "Serial number not found\n");
3122                return;
3123        }
3124
3125        j = pci_vpd_info_field_size(&vpd_data[i]);
3126        i += PCI_VPD_INFO_FLD_HDR_SIZE;
3127        if (i + j > vpd_size) {
3128                netif_err(efx, drv, efx->net_dev, "Incomplete serial number\n");
3129                return;
3130        }
3131
3132        efx->vpd_sn = kmalloc(j + 1, GFP_KERNEL);
3133        if (!efx->vpd_sn)
3134                return;
3135
3136        snprintf(efx->vpd_sn, j + 1, "%s", &vpd_data[i]);
3137}
3138
3139
3140/* Main body of NIC initialisation
3141 * This is called at module load (or hotplug insertion, theoretically).
3142 */
3143static int efx_pci_probe_main(struct efx_nic *efx)
3144{
3145        int rc;
3146
3147        /* Do start-of-day initialisation */
3148        rc = efx_probe_all(efx);
3149        if (rc)
3150                goto fail1;
3151
3152        efx_init_napi(efx);
3153
3154        rc = efx->type->init(efx);
3155        if (rc) {
3156                netif_err(efx, probe, efx->net_dev,
3157                          "failed to initialise NIC\n");
3158                goto fail3;
3159        }
3160
3161        rc = efx_init_port(efx);
3162        if (rc) {
3163                netif_err(efx, probe, efx->net_dev,
3164                          "failed to initialise port\n");
3165                goto fail4;
3166        }
3167
3168        rc = efx_nic_init_interrupt(efx);
3169        if (rc)
3170                goto fail5;
3171        rc = efx_enable_interrupts(efx);
3172        if (rc)
3173                goto fail6;
3174
3175        return 0;
3176
3177 fail6:
3178        efx_nic_fini_interrupt(efx);
3179 fail5:
3180        efx_fini_port(efx);
3181 fail4:
3182        efx->type->fini(efx);
3183 fail3:
3184        efx_fini_napi(efx);
3185        efx_remove_all(efx);
3186 fail1:
3187        return rc;
3188}
3189
3190/* NIC initialisation
3191 *
3192 * This is called at module load (or hotplug insertion,
3193 * theoretically).  It sets up PCI mappings, resets the NIC,
3194 * sets up and registers the network devices with the kernel and hooks
3195 * the interrupt service routine.  It does not prepare the device for
3196 * transmission; this is left to the first time one of the network
3197 * interfaces is brought up (i.e. efx_net_open).
3198 */
3199static int efx_pci_probe(struct pci_dev *pci_dev,
3200                         const struct pci_device_id *entry)
3201{
3202        struct net_device *net_dev;
3203        struct efx_nic *efx;
3204        int rc;
3205
3206        /* Allocate and initialise a struct net_device and struct efx_nic */
3207        net_dev = alloc_etherdev_mqs(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES,
3208                                     EFX_MAX_RX_QUEUES);
3209        if (!net_dev)
3210                return -ENOMEM;
3211        efx = netdev_priv(net_dev);
3212        efx->type = (const struct efx_nic_type *) entry->driver_data;
3213        efx->fixed_features |= NETIF_F_HIGHDMA;
3214        net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
3215                              NETIF_F_TSO | NETIF_F_RXCSUM);
3216        if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
3217                net_dev->features |= NETIF_F_TSO6;
3218        /* Mask for features that also apply to VLAN devices */
3219        net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
3220                                   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
3221                                   NETIF_F_RXCSUM);
3222
3223        net_dev->hw_features = net_dev->features & ~efx->fixed_features;
3224
3225        /* Disable VLAN filtering by default.  It may be enforced if
3226         * the feature is fixed (i.e. VLAN filters are required to
3227         * receive VLAN tagged packets due to vPort restrictions).
3228         */
3229        net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
3230        net_dev->features |= efx->fixed_features;
3231
3232        pci_set_drvdata(pci_dev, efx);
3233        SET_NETDEV_DEV(net_dev, &pci_dev->dev);
3234        rc = efx_init_struct(efx, pci_dev, net_dev);
3235        if (rc)
3236                goto fail1;
3237
3238        netif_info(efx, probe, efx->net_dev,
3239                   "Solarflare NIC detected\n");
3240
3241        if (!efx->type->is_vf)
3242                efx_probe_vpd_strings(efx);
3243
3244        /* Set up basic I/O (BAR mappings etc) */
3245        rc = efx_init_io(efx);
3246        if (rc)
3247                goto fail2;
3248
3249        rc = efx_pci_probe_main(efx);
3250        if (rc)
3251                goto fail3;
3252
3253        rc = efx_register_netdev(efx);
3254        if (rc)
3255                goto fail4;
3256
3257        if (efx->type->sriov_init) {
3258                rc = efx->type->sriov_init(efx);
3259                if (rc)
3260                        netif_err(efx, probe, efx->net_dev,
3261                                  "SR-IOV can't be enabled rc %d\n", rc);
3262        }
3263
3264        netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
3265
3266        /* Try to create MTDs, but allow this to fail */
3267        rtnl_lock();
3268        rc = efx_mtd_probe(efx);
3269        rtnl_unlock();
3270        if (rc && rc != -EPERM)
3271                netif_warn(efx, probe, efx->net_dev,
3272                           "failed to create MTDs (%d)\n", rc);
3273
3274        rc = pci_enable_pcie_error_reporting(pci_dev);
3275        if (rc && rc != -EINVAL)
3276                netif_notice(efx, probe, efx->net_dev,
3277                             "PCIE error reporting unavailable (%d).\n",
3278                             rc);
3279
3280        return 0;
3281
3282 fail4:
3283        efx_pci_remove_main(efx);
3284 fail3:
3285        efx_fini_io(efx);
3286 fail2:
3287        efx_fini_struct(efx);
3288 fail1:
3289        WARN_ON(rc > 0);
3290        netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc);
3291        free_netdev(net_dev);
3292        return rc;
3293}
3294
3295/* efx_pci_sriov_configure returns the actual number of Virtual Functions
3296 * enabled on success
3297 */
3298#ifdef CONFIG_SFC_SRIOV
3299static int efx_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
3300{
3301        int rc;
3302        struct efx_nic *efx = pci_get_drvdata(dev);
3303
3304        if (efx->type->sriov_configure) {
3305                rc = efx->type->sriov_configure(efx, num_vfs);
3306                if (rc)
3307                        return rc;
3308                else
3309                        return num_vfs;
3310        } else
3311                return -EOPNOTSUPP;
3312}
3313#endif
3314
3315static int efx_pm_freeze(struct device *dev)
3316{
3317        struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
3318
3319        rtnl_lock();
3320
3321        if (efx->state != STATE_DISABLED) {
3322                efx->state = STATE_UNINIT;
3323
3324                efx_device_detach_sync(efx);
3325
3326                efx_stop_all(efx);
3327                efx_disable_interrupts(efx);
3328        }
3329
3330        rtnl_unlock();
3331
3332        return 0;
3333}
3334
3335static int efx_pm_thaw(struct device *dev)
3336{
3337        int rc;
3338        struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
3339
3340        rtnl_lock();
3341
3342        if (efx->state != STATE_DISABLED) {
3343                rc = efx_enable_interrupts(efx);
3344                if (rc)
3345                        goto fail;
3346
3347                mutex_lock(&efx->mac_lock);
3348                efx->phy_op->reconfigure(efx);
3349                mutex_unlock(&efx->mac_lock);
3350
3351                efx_start_all(efx);
3352
3353                netif_device_attach(efx->net_dev);
3354
3355                efx->state = STATE_READY;
3356
3357                efx->type->resume_wol(efx);
3358        }
3359
3360        rtnl_unlock();
3361
3362        /* Reschedule any quenched resets scheduled during efx_pm_freeze() */
3363        queue_work(reset_workqueue, &efx->reset_work);
3364
3365        return 0;
3366
3367fail:
3368        rtnl_unlock();
3369
3370        return rc;
3371}
3372
3373static int efx_pm_poweroff(struct device *dev)
3374{
3375        struct pci_dev *pci_dev = to_pci_dev(dev);
3376        struct efx_nic *efx = pci_get_drvdata(pci_dev);
3377
3378        efx->type->fini(efx);
3379
3380        efx->reset_pending = 0;
3381
3382        pci_save_state(pci_dev);
3383        return pci_set_power_state(pci_dev, PCI_D3hot);
3384}
3385
3386/* Used for both resume and restore */
3387static int efx_pm_resume(struct device *dev)
3388{
3389        struct pci_dev *pci_dev = to_pci_dev(dev);
3390        struct efx_nic *efx = pci_get_drvdata(pci_dev);
3391        int rc;
3392
3393        rc = pci_set_power_state(pci_dev, PCI_D0);
3394        if (rc)
3395                return rc;
3396        pci_restore_state(pci_dev);
3397        rc = pci_enable_device(pci_dev);
3398        if (rc)
3399                return rc;
3400        pci_set_master(efx->pci_dev);
3401        rc = efx->type->reset(efx, RESET_TYPE_ALL);
3402        if (rc)
3403                return rc;
3404        rc = efx->type->init(efx);
3405        if (rc)
3406                return rc;
3407        rc = efx_pm_thaw(dev);
3408        return rc;
3409}
3410
3411static int efx_pm_suspend(struct device *dev)
3412{
3413        int rc;
3414
3415        efx_pm_freeze(dev);
3416        rc = efx_pm_poweroff(dev);
3417        if (rc)
3418                efx_pm_resume(dev);
3419        return rc;
3420}
3421
3422static const struct dev_pm_ops efx_pm_ops = {
3423        .suspend        = efx_pm_suspend,
3424        .resume         = efx_pm_resume,
3425        .freeze         = efx_pm_freeze,
3426        .thaw           = efx_pm_thaw,
3427        .poweroff       = efx_pm_poweroff,
3428        .restore        = efx_pm_resume,
3429};
3430
3431/* A PCI error affecting this device was detected.
3432 * At this point MMIO and DMA may be disabled.
3433 * Stop the software path and request a slot reset.
3434 */
3435static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
3436                                              enum pci_channel_state state)
3437{
3438        pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
3439        struct efx_nic *efx = pci_get_drvdata(pdev);
3440
3441        if (state == pci_channel_io_perm_failure)
3442                return PCI_ERS_RESULT_DISCONNECT;
3443
3444        rtnl_lock();
3445
3446        if (efx->state != STATE_DISABLED) {
3447                efx->state = STATE_RECOVERY;
3448                efx->reset_pending = 0;
3449
3450                efx_device_detach_sync(efx);
3451
3452                efx_stop_all(efx);
3453                efx_disable_interrupts(efx);
3454
3455                status = PCI_ERS_RESULT_NEED_RESET;
3456        } else {
3457                /* If the interface is disabled we don't want to do anything
3458                 * with it.
3459                 */
3460                status = PCI_ERS_RESULT_RECOVERED;
3461        }
3462
3463        rtnl_unlock();
3464
3465        pci_disable_device(pdev);
3466
3467        return status;
3468}
3469
3470/* Fake a successful reset, which will be performed later in efx_io_resume. */
3471static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev)
3472{
3473        struct efx_nic *efx = pci_get_drvdata(pdev);
3474        pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
3475        int rc;
3476
3477        if (pci_enable_device(pdev)) {
3478                netif_err(efx, hw, efx->net_dev,
3479                          "Cannot re-enable PCI device after reset.\n");
3480                status =  PCI_ERS_RESULT_DISCONNECT;
3481        }
3482
3483        rc = pci_cleanup_aer_uncorrect_error_status(pdev);
3484        if (rc) {
3485                netif_err(efx, hw, efx->net_dev,
3486                "pci_cleanup_aer_uncorrect_error_status failed (%d)\n", rc);
3487                /* Non-fatal error. Continue. */
3488        }
3489
3490        return status;
3491}
3492
3493/* Perform the actual reset and resume I/O operations. */
3494static void efx_io_resume(struct pci_dev *pdev)
3495{
3496        struct efx_nic *efx = pci_get_drvdata(pdev);
3497        int rc;
3498
3499        rtnl_lock();
3500
3501        if (efx->state == STATE_DISABLED)
3502                goto out;
3503
3504        rc = efx_reset(efx, RESET_TYPE_ALL);
3505        if (rc) {
3506                netif_err(efx, hw, efx->net_dev,
3507                          "efx_reset failed after PCI error (%d)\n", rc);
3508        } else {
3509                efx->state = STATE_READY;
3510                netif_dbg(efx, hw, efx->net_dev,
3511                          "Done resetting and resuming IO after PCI error.\n");
3512        }
3513
3514out:
3515        rtnl_unlock();
3516}
3517
3518/* For simplicity and reliability, we always require a slot reset and try to
3519 * reset the hardware when a pci error affecting the device is detected.
3520 * We leave both the link_reset and mmio_enabled callback unimplemented:
3521 * with our request for slot reset the mmio_enabled callback will never be
3522 * called, and the link_reset callback is not used by AER or EEH mechanisms.
3523 */
3524static const struct pci_error_handlers efx_err_handlers = {
3525        .error_detected = efx_io_error_detected,
3526        .slot_reset     = efx_io_slot_reset,
3527        .resume         = efx_io_resume,
3528};
3529
3530static struct pci_driver efx_pci_driver = {
3531        .name           = KBUILD_MODNAME,
3532        .id_table       = efx_pci_table,
3533        .probe          = efx_pci_probe,
3534        .remove         = efx_pci_remove,
3535        .driver.pm      = &efx_pm_ops,
3536        .err_handler    = &efx_err_handlers,
3537#ifdef CONFIG_SFC_SRIOV
3538        .sriov_configure = efx_pci_sriov_configure,
3539#endif
3540};
3541
3542/**************************************************************************
3543 *
3544 * Kernel module interface
3545 *
3546 *************************************************************************/
3547
3548module_param(interrupt_mode, uint, 0444);
3549MODULE_PARM_DESC(interrupt_mode,
3550                 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
3551
3552static int __init efx_init_module(void)
3553{
3554        int rc;
3555
3556        printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n");
3557
3558        rc = register_netdevice_notifier(&efx_netdev_notifier);
3559        if (rc)
3560                goto err_notifier;
3561
3562#ifdef CONFIG_SFC_SRIOV
3563        rc = efx_init_sriov();
3564        if (rc)
3565                goto err_sriov;
3566#endif
3567
3568        reset_workqueue = create_singlethread_workqueue("sfc_reset");
3569        if (!reset_workqueue) {
3570                rc = -ENOMEM;
3571                goto err_reset;
3572        }
3573
3574        rc = pci_register_driver(&efx_pci_driver);
3575        if (rc < 0)
3576                goto err_pci;
3577
3578        return 0;
3579
3580 err_pci:
3581        destroy_workqueue(reset_workqueue);
3582 err_reset:
3583#ifdef CONFIG_SFC_SRIOV
3584        efx_fini_sriov();
3585 err_sriov:
3586#endif
3587        unregister_netdevice_notifier(&efx_netdev_notifier);
3588 err_notifier:
3589        return rc;
3590}
3591
3592static void __exit efx_exit_module(void)
3593{
3594        printk(KERN_INFO "Solarflare NET driver unloading\n");
3595
3596        pci_unregister_driver(&efx_pci_driver);
3597        destroy_workqueue(reset_workqueue);
3598#ifdef CONFIG_SFC_SRIOV
3599        efx_fini_sriov();
3600#endif
3601        unregister_netdevice_notifier(&efx_netdev_notifier);
3602
3603}
3604
3605module_init(efx_init_module);
3606module_exit(efx_exit_module);
3607
3608MODULE_AUTHOR("Solarflare Communications and "
3609              "Michael Brown <mbrown@fensystems.co.uk>");
3610MODULE_DESCRIPTION("Solarflare network driver");
3611MODULE_LICENSE("GPL");
3612MODULE_DEVICE_TABLE(pci, efx_pci_table);
3613