linux/drivers/net/ethernet/sfc/efx.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/****************************************************************************
   3 * Driver for Solarflare network controllers and boards
   4 * Copyright 2005-2006 Fen Systems Ltd.
   5 * Copyright 2005-2013 Solarflare Communications Inc.
   6 */
   7
   8#include <linux/module.h>
   9#include <linux/pci.h>
  10#include <linux/netdevice.h>
  11#include <linux/etherdevice.h>
  12#include <linux/delay.h>
  13#include <linux/notifier.h>
  14#include <linux/ip.h>
  15#include <linux/tcp.h>
  16#include <linux/in.h>
  17#include <linux/ethtool.h>
  18#include <linux/topology.h>
  19#include <linux/gfp.h>
  20#include <linux/aer.h>
  21#include <linux/interrupt.h>
  22#include "net_driver.h"
  23#include <net/gre.h>
  24#include <net/udp_tunnel.h>
  25#include "efx.h"
  26#include "efx_common.h"
  27#include "efx_channels.h"
  28#include "rx_common.h"
  29#include "tx_common.h"
  30#include "nic.h"
  31#include "io.h"
  32#include "selftest.h"
  33#include "sriov.h"
  34
  35#include "mcdi.h"
  36#include "mcdi_pcol.h"
  37#include "workarounds.h"
  38
  39/**************************************************************************
  40 *
  41 * Type name strings
  42 *
  43 **************************************************************************
  44 */
  45
  46/* UDP tunnel type names */
  47static const char *const efx_udp_tunnel_type_names[] = {
  48        [TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan",
  49        [TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE] = "geneve",
  50};
  51
  52void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen)
  53{
  54        if (type < ARRAY_SIZE(efx_udp_tunnel_type_names) &&
  55            efx_udp_tunnel_type_names[type] != NULL)
  56                snprintf(buf, buflen, "%s", efx_udp_tunnel_type_names[type]);
  57        else
  58                snprintf(buf, buflen, "type %d", type);
  59}
  60
  61/**************************************************************************
  62 *
  63 * Configurable values
  64 *
  65 *************************************************************************/
  66
  67/*
  68 * Use separate channels for TX and RX events
  69 *
  70 * Set this to 1 to use separate channels for TX and RX. It allows us
  71 * to control interrupt affinity separately for TX and RX.
  72 *
  73 * This is only used in MSI-X interrupt mode
  74 */
  75bool efx_separate_tx_channels;
  76module_param(efx_separate_tx_channels, bool, 0444);
  77MODULE_PARM_DESC(efx_separate_tx_channels,
  78                 "Use separate channels for TX and RX");
  79
  80/* Initial interrupt moderation settings.  They can be modified after
  81 * module load with ethtool.
  82 *
  83 * The default for RX should strike a balance between increasing the
  84 * round-trip latency and reducing overhead.
  85 */
  86static unsigned int rx_irq_mod_usec = 60;
  87
  88/* Initial interrupt moderation settings.  They can be modified after
  89 * module load with ethtool.
  90 *
  91 * This default is chosen to ensure that a 10G link does not go idle
  92 * while a TX queue is stopped after it has become full.  A queue is
  93 * restarted when it drops below half full.  The time this takes (assuming
  94 * worst case 3 descriptors per packet and 1024 descriptors) is
  95 *   512 / 3 * 1.2 = 205 usec.
  96 */
  97static unsigned int tx_irq_mod_usec = 150;
  98
  99static bool phy_flash_cfg;
 100module_param(phy_flash_cfg, bool, 0644);
 101MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");
 102
 103static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
 104                         NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
 105                         NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
 106                         NETIF_MSG_TX_ERR | NETIF_MSG_HW);
 107module_param(debug, uint, 0);
 108MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
 109
 110/**************************************************************************
 111 *
 112 * Utility functions and prototypes
 113 *
 114 *************************************************************************/
 115
 116static const struct efx_channel_type efx_default_channel_type;
 117static void efx_remove_port(struct efx_nic *efx);
 118static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog);
 119static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp);
 120static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
 121                        u32 flags);
 122
 123#define EFX_ASSERT_RESET_SERIALISED(efx)                \
 124        do {                                            \
 125                if ((efx->state == STATE_READY) ||      \
 126                    (efx->state == STATE_RECOVERY) ||   \
 127                    (efx->state == STATE_DISABLED))     \
 128                        ASSERT_RTNL();                  \
 129        } while (0)
 130
 131/**************************************************************************
 132 *
 133 * Port handling
 134 *
 135 **************************************************************************/
 136
 137/* Equivalent to efx_link_set_advertising with all-zeroes, except does not
 138 * force the Autoneg bit on.
 139 */
 140void efx_link_clear_advertising(struct efx_nic *efx)
 141{
 142        bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS);
 143        efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
 144}
 145
 146void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
 147{
 148        efx->wanted_fc = wanted_fc;
 149        if (efx->link_advertising[0]) {
 150                if (wanted_fc & EFX_FC_RX)
 151                        efx->link_advertising[0] |= (ADVERTISED_Pause |
 152                                                     ADVERTISED_Asym_Pause);
 153                else
 154                        efx->link_advertising[0] &= ~(ADVERTISED_Pause |
 155                                                      ADVERTISED_Asym_Pause);
 156                if (wanted_fc & EFX_FC_TX)
 157                        efx->link_advertising[0] ^= ADVERTISED_Asym_Pause;
 158        }
 159}
 160
 161static void efx_fini_port(struct efx_nic *efx);
 162
 163static int efx_probe_port(struct efx_nic *efx)
 164{
 165        int rc;
 166
 167        netif_dbg(efx, probe, efx->net_dev, "create port\n");
 168
 169        if (phy_flash_cfg)
 170                efx->phy_mode = PHY_MODE_SPECIAL;
 171
 172        /* Connect up MAC/PHY operations table */
 173        rc = efx->type->probe_port(efx);
 174        if (rc)
 175                return rc;
 176
 177        /* Initialise MAC address to permanent address */
 178        ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr);
 179
 180        return 0;
 181}
 182
 183static int efx_init_port(struct efx_nic *efx)
 184{
 185        int rc;
 186
 187        netif_dbg(efx, drv, efx->net_dev, "init port\n");
 188
 189        mutex_lock(&efx->mac_lock);
 190
 191        rc = efx->phy_op->init(efx);
 192        if (rc)
 193                goto fail1;
 194
 195        efx->port_initialized = true;
 196
 197        /* Reconfigure the MAC before creating dma queues (required for
 198         * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */
 199        efx_mac_reconfigure(efx);
 200
 201        /* Ensure the PHY advertises the correct flow control settings */
 202        rc = efx->phy_op->reconfigure(efx);
 203        if (rc && rc != -EPERM)
 204                goto fail2;
 205
 206        mutex_unlock(&efx->mac_lock);
 207        return 0;
 208
 209fail2:
 210        efx->phy_op->fini(efx);
 211fail1:
 212        mutex_unlock(&efx->mac_lock);
 213        return rc;
 214}
 215
 216static void efx_fini_port(struct efx_nic *efx)
 217{
 218        netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
 219
 220        if (!efx->port_initialized)
 221                return;
 222
 223        efx->phy_op->fini(efx);
 224        efx->port_initialized = false;
 225
 226        efx->link_state.up = false;
 227        efx_link_status_changed(efx);
 228}
 229
 230static void efx_remove_port(struct efx_nic *efx)
 231{
 232        netif_dbg(efx, drv, efx->net_dev, "destroying port\n");
 233
 234        efx->type->remove_port(efx);
 235}
 236
 237/**************************************************************************
 238 *
 239 * NIC handling
 240 *
 241 **************************************************************************/
 242
 243static LIST_HEAD(efx_primary_list);
 244static LIST_HEAD(efx_unassociated_list);
 245
 246static bool efx_same_controller(struct efx_nic *left, struct efx_nic *right)
 247{
 248        return left->type == right->type &&
 249                left->vpd_sn && right->vpd_sn &&
 250                !strcmp(left->vpd_sn, right->vpd_sn);
 251}
 252
 253static void efx_associate(struct efx_nic *efx)
 254{
 255        struct efx_nic *other, *next;
 256
 257        if (efx->primary == efx) {
 258                /* Adding primary function; look for secondaries */
 259
 260                netif_dbg(efx, probe, efx->net_dev, "adding to primary list\n");
 261                list_add_tail(&efx->node, &efx_primary_list);
 262
 263                list_for_each_entry_safe(other, next, &efx_unassociated_list,
 264                                         node) {
 265                        if (efx_same_controller(efx, other)) {
 266                                list_del(&other->node);
 267                                netif_dbg(other, probe, other->net_dev,
 268                                          "moving to secondary list of %s %s\n",
 269                                          pci_name(efx->pci_dev),
 270                                          efx->net_dev->name);
 271                                list_add_tail(&other->node,
 272                                              &efx->secondary_list);
 273                                other->primary = efx;
 274                        }
 275                }
 276        } else {
 277                /* Adding secondary function; look for primary */
 278
 279                list_for_each_entry(other, &efx_primary_list, node) {
 280                        if (efx_same_controller(efx, other)) {
 281                                netif_dbg(efx, probe, efx->net_dev,
 282                                          "adding to secondary list of %s %s\n",
 283                                          pci_name(other->pci_dev),
 284                                          other->net_dev->name);
 285                                list_add_tail(&efx->node,
 286                                              &other->secondary_list);
 287                                efx->primary = other;
 288                                return;
 289                        }
 290                }
 291
 292                netif_dbg(efx, probe, efx->net_dev,
 293                          "adding to unassociated list\n");
 294                list_add_tail(&efx->node, &efx_unassociated_list);
 295        }
 296}
 297
 298static void efx_dissociate(struct efx_nic *efx)
 299{
 300        struct efx_nic *other, *next;
 301
 302        list_del(&efx->node);
 303        efx->primary = NULL;
 304
 305        list_for_each_entry_safe(other, next, &efx->secondary_list, node) {
 306                list_del(&other->node);
 307                netif_dbg(other, probe, other->net_dev,
 308                          "moving to unassociated list\n");
 309                list_add_tail(&other->node, &efx_unassociated_list);
 310                other->primary = NULL;
 311        }
 312}
 313
 314static int efx_probe_nic(struct efx_nic *efx)
 315{
 316        int rc;
 317
 318        netif_dbg(efx, probe, efx->net_dev, "creating NIC\n");
 319
 320        /* Carry out hardware-type specific initialisation */
 321        rc = efx->type->probe(efx);
 322        if (rc)
 323                return rc;
 324
 325        do {
 326                if (!efx->max_channels || !efx->max_tx_channels) {
 327                        netif_err(efx, drv, efx->net_dev,
 328                                  "Insufficient resources to allocate"
 329                                  " any channels\n");
 330                        rc = -ENOSPC;
 331                        goto fail1;
 332                }
 333
 334                /* Determine the number of channels and queues by trying
 335                 * to hook in MSI-X interrupts.
 336                 */
 337                rc = efx_probe_interrupts(efx);
 338                if (rc)
 339                        goto fail1;
 340
 341                rc = efx_set_channels(efx);
 342                if (rc)
 343                        goto fail1;
 344
 345                /* dimension_resources can fail with EAGAIN */
 346                rc = efx->type->dimension_resources(efx);
 347                if (rc != 0 && rc != -EAGAIN)
 348                        goto fail2;
 349
 350                if (rc == -EAGAIN)
 351                        /* try again with new max_channels */
 352                        efx_remove_interrupts(efx);
 353
 354        } while (rc == -EAGAIN);
 355
 356        if (efx->n_channels > 1)
 357                netdev_rss_key_fill(efx->rss_context.rx_hash_key,
 358                                    sizeof(efx->rss_context.rx_hash_key));
 359        efx_set_default_rx_indir_table(efx, &efx->rss_context);
 360
 361        netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
 362        netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
 363
 364        /* Initialise the interrupt moderation settings */
 365        efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000);
 366        efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
 367                                true);
 368
 369        return 0;
 370
 371fail2:
 372        efx_remove_interrupts(efx);
 373fail1:
 374        efx->type->remove(efx);
 375        return rc;
 376}
 377
 378static void efx_remove_nic(struct efx_nic *efx)
 379{
 380        netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n");
 381
 382        efx_remove_interrupts(efx);
 383        efx->type->remove(efx);
 384}
 385
 386/**************************************************************************
 387 *
 388 * NIC startup/shutdown
 389 *
 390 *************************************************************************/
 391
 392static int efx_probe_all(struct efx_nic *efx)
 393{
 394        int rc;
 395
 396        rc = efx_probe_nic(efx);
 397        if (rc) {
 398                netif_err(efx, probe, efx->net_dev, "failed to create NIC\n");
 399                goto fail1;
 400        }
 401
 402        rc = efx_probe_port(efx);
 403        if (rc) {
 404                netif_err(efx, probe, efx->net_dev, "failed to create port\n");
 405                goto fail2;
 406        }
 407
 408        BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT);
 409        if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) {
 410                rc = -EINVAL;
 411                goto fail3;
 412        }
 413        efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE;
 414
 415#ifdef CONFIG_SFC_SRIOV
 416        rc = efx->type->vswitching_probe(efx);
 417        if (rc) /* not fatal; the PF will still work fine */
 418                netif_warn(efx, probe, efx->net_dev,
 419                           "failed to setup vswitching rc=%d;"
 420                           " VFs may not function\n", rc);
 421#endif
 422
 423        rc = efx_probe_filters(efx);
 424        if (rc) {
 425                netif_err(efx, probe, efx->net_dev,
 426                          "failed to create filter tables\n");
 427                goto fail4;
 428        }
 429
 430        rc = efx_probe_channels(efx);
 431        if (rc)
 432                goto fail5;
 433
 434        return 0;
 435
 436 fail5:
 437        efx_remove_filters(efx);
 438 fail4:
 439#ifdef CONFIG_SFC_SRIOV
 440        efx->type->vswitching_remove(efx);
 441#endif
 442 fail3:
 443        efx_remove_port(efx);
 444 fail2:
 445        efx_remove_nic(efx);
 446 fail1:
 447        return rc;
 448}
 449
 450static void efx_remove_all(struct efx_nic *efx)
 451{
 452        rtnl_lock();
 453        efx_xdp_setup_prog(efx, NULL);
 454        rtnl_unlock();
 455
 456        efx_remove_channels(efx);
 457        efx_remove_filters(efx);
 458#ifdef CONFIG_SFC_SRIOV
 459        efx->type->vswitching_remove(efx);
 460#endif
 461        efx_remove_port(efx);
 462        efx_remove_nic(efx);
 463}
 464
 465/**************************************************************************
 466 *
 467 * Interrupt moderation
 468 *
 469 **************************************************************************/
 470unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs)
 471{
 472        if (usecs == 0)
 473                return 0;
 474        if (usecs * 1000 < efx->timer_quantum_ns)
 475                return 1; /* never round down to 0 */
 476        return usecs * 1000 / efx->timer_quantum_ns;
 477}
 478
 479unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks)
 480{
 481        /* We must round up when converting ticks to microseconds
 482         * because we round down when converting the other way.
 483         */
 484        return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000);
 485}
 486
 487/* Set interrupt moderation parameters */
 488int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
 489                            unsigned int rx_usecs, bool rx_adaptive,
 490                            bool rx_may_override_tx)
 491{
 492        struct efx_channel *channel;
 493        unsigned int timer_max_us;
 494
 495        EFX_ASSERT_RESET_SERIALISED(efx);
 496
 497        timer_max_us = efx->timer_max_ns / 1000;
 498
 499        if (tx_usecs > timer_max_us || rx_usecs > timer_max_us)
 500                return -EINVAL;
 501
 502        if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 &&
 503            !rx_may_override_tx) {
 504                netif_err(efx, drv, efx->net_dev, "Channels are shared. "
 505                          "RX and TX IRQ moderation must be equal\n");
 506                return -EINVAL;
 507        }
 508
 509        efx->irq_rx_adaptive = rx_adaptive;
 510        efx->irq_rx_moderation_us = rx_usecs;
 511        efx_for_each_channel(channel, efx) {
 512                if (efx_channel_has_rx_queue(channel))
 513                        channel->irq_moderation_us = rx_usecs;
 514                else if (efx_channel_has_tx_queues(channel))
 515                        channel->irq_moderation_us = tx_usecs;
 516                else if (efx_channel_is_xdp_tx(channel))
 517                        channel->irq_moderation_us = tx_usecs;
 518        }
 519
 520        return 0;
 521}
 522
 523void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
 524                            unsigned int *rx_usecs, bool *rx_adaptive)
 525{
 526        *rx_adaptive = efx->irq_rx_adaptive;
 527        *rx_usecs = efx->irq_rx_moderation_us;
 528
 529        /* If channels are shared between RX and TX, so is IRQ
 530         * moderation.  Otherwise, IRQ moderation is the same for all
 531         * TX channels and is not adaptive.
 532         */
 533        if (efx->tx_channel_offset == 0) {
 534                *tx_usecs = *rx_usecs;
 535        } else {
 536                struct efx_channel *tx_channel;
 537
 538                tx_channel = efx->channel[efx->tx_channel_offset];
 539                *tx_usecs = tx_channel->irq_moderation_us;
 540        }
 541}
 542
 543/**************************************************************************
 544 *
 545 * ioctls
 546 *
 547 *************************************************************************/
 548
 549/* Net device ioctl
 550 * Context: process, rtnl_lock() held.
 551 */
 552static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
 553{
 554        struct efx_nic *efx = netdev_priv(net_dev);
 555        struct mii_ioctl_data *data = if_mii(ifr);
 556
 557        if (cmd == SIOCSHWTSTAMP)
 558                return efx_ptp_set_ts_config(efx, ifr);
 559        if (cmd == SIOCGHWTSTAMP)
 560                return efx_ptp_get_ts_config(efx, ifr);
 561
 562        /* Convert phy_id from older PRTAD/DEVAD format */
 563        if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) &&
 564            (data->phy_id & 0xfc00) == 0x0400)
 565                data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400;
 566
 567        return mdio_mii_ioctl(&efx->mdio, data, cmd);
 568}
 569
 570/**************************************************************************
 571 *
 572 * Kernel net device interface
 573 *
 574 *************************************************************************/
 575
 576/* Context: process, rtnl_lock() held. */
 577int efx_net_open(struct net_device *net_dev)
 578{
 579        struct efx_nic *efx = netdev_priv(net_dev);
 580        int rc;
 581
 582        netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n",
 583                  raw_smp_processor_id());
 584
 585        rc = efx_check_disabled(efx);
 586        if (rc)
 587                return rc;
 588        if (efx->phy_mode & PHY_MODE_SPECIAL)
 589                return -EBUSY;
 590        if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL))
 591                return -EIO;
 592
 593        /* Notify the kernel of the link state polled during driver load,
 594         * before the monitor starts running */
 595        efx_link_status_changed(efx);
 596
 597        efx_start_all(efx);
 598        if (efx->state == STATE_DISABLED || efx->reset_pending)
 599                netif_device_detach(efx->net_dev);
 600        efx_selftest_async_start(efx);
 601        return 0;
 602}
 603
 604/* Context: process, rtnl_lock() held.
 605 * Note that the kernel will ignore our return code; this method
 606 * should really be a void.
 607 */
 608int efx_net_stop(struct net_device *net_dev)
 609{
 610        struct efx_nic *efx = netdev_priv(net_dev);
 611
 612        netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
 613                  raw_smp_processor_id());
 614
 615        /* Stop the device and flush all the channels */
 616        efx_stop_all(efx);
 617
 618        return 0;
 619}
 620
 621/* Context: netif_tx_lock held, BHs disabled. */
 622static void efx_watchdog(struct net_device *net_dev, unsigned int txqueue)
 623{
 624        struct efx_nic *efx = netdev_priv(net_dev);
 625
 626        netif_err(efx, tx_err, efx->net_dev,
 627                  "TX stuck with port_enabled=%d: resetting channels\n",
 628                  efx->port_enabled);
 629
 630        efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
 631}
 632
 633static int efx_set_mac_address(struct net_device *net_dev, void *data)
 634{
 635        struct efx_nic *efx = netdev_priv(net_dev);
 636        struct sockaddr *addr = data;
 637        u8 *new_addr = addr->sa_data;
 638        u8 old_addr[6];
 639        int rc;
 640
 641        if (!is_valid_ether_addr(new_addr)) {
 642                netif_err(efx, drv, efx->net_dev,
 643                          "invalid ethernet MAC address requested: %pM\n",
 644                          new_addr);
 645                return -EADDRNOTAVAIL;
 646        }
 647
 648        /* save old address */
 649        ether_addr_copy(old_addr, net_dev->dev_addr);
 650        ether_addr_copy(net_dev->dev_addr, new_addr);
 651        if (efx->type->set_mac_address) {
 652                rc = efx->type->set_mac_address(efx);
 653                if (rc) {
 654                        ether_addr_copy(net_dev->dev_addr, old_addr);
 655                        return rc;
 656                }
 657        }
 658
 659        /* Reconfigure the MAC */
 660        mutex_lock(&efx->mac_lock);
 661        efx_mac_reconfigure(efx);
 662        mutex_unlock(&efx->mac_lock);
 663
 664        return 0;
 665}
 666
 667/* Context: netif_addr_lock held, BHs disabled. */
 668static void efx_set_rx_mode(struct net_device *net_dev)
 669{
 670        struct efx_nic *efx = netdev_priv(net_dev);
 671
 672        if (efx->port_enabled)
 673                queue_work(efx->workqueue, &efx->mac_work);
 674        /* Otherwise efx_start_port() will do this */
 675}
 676
 677static int efx_set_features(struct net_device *net_dev, netdev_features_t data)
 678{
 679        struct efx_nic *efx = netdev_priv(net_dev);
 680        int rc;
 681
 682        /* If disabling RX n-tuple filtering, clear existing filters */
 683        if (net_dev->features & ~data & NETIF_F_NTUPLE) {
 684                rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL);
 685                if (rc)
 686                        return rc;
 687        }
 688
 689        /* If Rx VLAN filter is changed, update filters via mac_reconfigure.
 690         * If rx-fcs is changed, mac_reconfigure updates that too.
 691         */
 692        if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER |
 693                                          NETIF_F_RXFCS)) {
 694                /* efx_set_rx_mode() will schedule MAC work to update filters
 695                 * when a new features are finally set in net_dev.
 696                 */
 697                efx_set_rx_mode(net_dev);
 698        }
 699
 700        return 0;
 701}
 702
 703static int efx_get_phys_port_id(struct net_device *net_dev,
 704                                struct netdev_phys_item_id *ppid)
 705{
 706        struct efx_nic *efx = netdev_priv(net_dev);
 707
 708        if (efx->type->get_phys_port_id)
 709                return efx->type->get_phys_port_id(efx, ppid);
 710        else
 711                return -EOPNOTSUPP;
 712}
 713
 714static int efx_get_phys_port_name(struct net_device *net_dev,
 715                                  char *name, size_t len)
 716{
 717        struct efx_nic *efx = netdev_priv(net_dev);
 718
 719        if (snprintf(name, len, "p%u", efx->port_num) >= len)
 720                return -EINVAL;
 721        return 0;
 722}
 723
 724static int efx_vlan_rx_add_vid(struct net_device *net_dev, __be16 proto, u16 vid)
 725{
 726        struct efx_nic *efx = netdev_priv(net_dev);
 727
 728        if (efx->type->vlan_rx_add_vid)
 729                return efx->type->vlan_rx_add_vid(efx, proto, vid);
 730        else
 731                return -EOPNOTSUPP;
 732}
 733
 734static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vid)
 735{
 736        struct efx_nic *efx = netdev_priv(net_dev);
 737
 738        if (efx->type->vlan_rx_kill_vid)
 739                return efx->type->vlan_rx_kill_vid(efx, proto, vid);
 740        else
 741                return -EOPNOTSUPP;
 742}
 743
 744static int efx_udp_tunnel_type_map(enum udp_parsable_tunnel_type in)
 745{
 746        switch (in) {
 747        case UDP_TUNNEL_TYPE_VXLAN:
 748                return TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN;
 749        case UDP_TUNNEL_TYPE_GENEVE:
 750                return TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE;
 751        default:
 752                return -1;
 753        }
 754}
 755
 756static void efx_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti)
 757{
 758        struct efx_nic *efx = netdev_priv(dev);
 759        struct efx_udp_tunnel tnl;
 760        int efx_tunnel_type;
 761
 762        efx_tunnel_type = efx_udp_tunnel_type_map(ti->type);
 763        if (efx_tunnel_type < 0)
 764                return;
 765
 766        tnl.type = (u16)efx_tunnel_type;
 767        tnl.port = ti->port;
 768
 769        if (efx->type->udp_tnl_add_port)
 770                (void)efx->type->udp_tnl_add_port(efx, tnl);
 771}
 772
 773static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti)
 774{
 775        struct efx_nic *efx = netdev_priv(dev);
 776        struct efx_udp_tunnel tnl;
 777        int efx_tunnel_type;
 778
 779        efx_tunnel_type = efx_udp_tunnel_type_map(ti->type);
 780        if (efx_tunnel_type < 0)
 781                return;
 782
 783        tnl.type = (u16)efx_tunnel_type;
 784        tnl.port = ti->port;
 785
 786        if (efx->type->udp_tnl_del_port)
 787                (void)efx->type->udp_tnl_del_port(efx, tnl);
 788}
 789
 790static const struct net_device_ops efx_netdev_ops = {
 791        .ndo_open               = efx_net_open,
 792        .ndo_stop               = efx_net_stop,
 793        .ndo_get_stats64        = efx_net_stats,
 794        .ndo_tx_timeout         = efx_watchdog,
 795        .ndo_start_xmit         = efx_hard_start_xmit,
 796        .ndo_validate_addr      = eth_validate_addr,
 797        .ndo_do_ioctl           = efx_ioctl,
 798        .ndo_change_mtu         = efx_change_mtu,
 799        .ndo_set_mac_address    = efx_set_mac_address,
 800        .ndo_set_rx_mode        = efx_set_rx_mode,
 801        .ndo_set_features       = efx_set_features,
 802        .ndo_vlan_rx_add_vid    = efx_vlan_rx_add_vid,
 803        .ndo_vlan_rx_kill_vid   = efx_vlan_rx_kill_vid,
 804#ifdef CONFIG_SFC_SRIOV
 805        .ndo_set_vf_mac         = efx_sriov_set_vf_mac,
 806        .ndo_set_vf_vlan        = efx_sriov_set_vf_vlan,
 807        .ndo_set_vf_spoofchk    = efx_sriov_set_vf_spoofchk,
 808        .ndo_get_vf_config      = efx_sriov_get_vf_config,
 809        .ndo_set_vf_link_state  = efx_sriov_set_vf_link_state,
 810#endif
 811        .ndo_get_phys_port_id   = efx_get_phys_port_id,
 812        .ndo_get_phys_port_name = efx_get_phys_port_name,
 813        .ndo_setup_tc           = efx_setup_tc,
 814#ifdef CONFIG_RFS_ACCEL
 815        .ndo_rx_flow_steer      = efx_filter_rfs,
 816#endif
 817        .ndo_udp_tunnel_add     = efx_udp_tunnel_add,
 818        .ndo_udp_tunnel_del     = efx_udp_tunnel_del,
 819        .ndo_xdp_xmit           = efx_xdp_xmit,
 820        .ndo_bpf                = efx_xdp
 821};
 822
 823static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog)
 824{
 825        struct bpf_prog *old_prog;
 826
 827        if (efx->xdp_rxq_info_failed) {
 828                netif_err(efx, drv, efx->net_dev,
 829                          "Unable to bind XDP program due to previous failure of rxq_info\n");
 830                return -EINVAL;
 831        }
 832
 833        if (prog && efx->net_dev->mtu > efx_xdp_max_mtu(efx)) {
 834                netif_err(efx, drv, efx->net_dev,
 835                          "Unable to configure XDP with MTU of %d (max: %d)\n",
 836                          efx->net_dev->mtu, efx_xdp_max_mtu(efx));
 837                return -EINVAL;
 838        }
 839
 840        old_prog = rtnl_dereference(efx->xdp_prog);
 841        rcu_assign_pointer(efx->xdp_prog, prog);
 842        /* Release the reference that was originally passed by the caller. */
 843        if (old_prog)
 844                bpf_prog_put(old_prog);
 845
 846        return 0;
 847}
 848
 849/* Context: process, rtnl_lock() held. */
 850static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 851{
 852        struct efx_nic *efx = netdev_priv(dev);
 853        struct bpf_prog *xdp_prog;
 854
 855        switch (xdp->command) {
 856        case XDP_SETUP_PROG:
 857                return efx_xdp_setup_prog(efx, xdp->prog);
 858        case XDP_QUERY_PROG:
 859                xdp_prog = rtnl_dereference(efx->xdp_prog);
 860                xdp->prog_id = xdp_prog ? xdp_prog->aux->id : 0;
 861                return 0;
 862        default:
 863                return -EINVAL;
 864        }
 865}
 866
 867static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
 868                        u32 flags)
 869{
 870        struct efx_nic *efx = netdev_priv(dev);
 871
 872        if (!netif_running(dev))
 873                return -EINVAL;
 874
 875        return efx_xdp_tx_buffers(efx, n, xdpfs, flags & XDP_XMIT_FLUSH);
 876}
 877
 878static void efx_update_name(struct efx_nic *efx)
 879{
 880        strcpy(efx->name, efx->net_dev->name);
 881        efx_mtd_rename(efx);
 882        efx_set_channel_names(efx);
 883}
 884
 885static int efx_netdev_event(struct notifier_block *this,
 886                            unsigned long event, void *ptr)
 887{
 888        struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
 889
 890        if ((net_dev->netdev_ops == &efx_netdev_ops) &&
 891            event == NETDEV_CHANGENAME)
 892                efx_update_name(netdev_priv(net_dev));
 893
 894        return NOTIFY_DONE;
 895}
 896
 897static struct notifier_block efx_netdev_notifier = {
 898        .notifier_call = efx_netdev_event,
 899};
 900
 901static ssize_t
 902show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
 903{
 904        struct efx_nic *efx = dev_get_drvdata(dev);
 905        return sprintf(buf, "%d\n", efx->phy_type);
 906}
 907static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
 908
 909static int efx_register_netdev(struct efx_nic *efx)
 910{
 911        struct net_device *net_dev = efx->net_dev;
 912        struct efx_channel *channel;
 913        int rc;
 914
 915        net_dev->watchdog_timeo = 5 * HZ;
 916        net_dev->irq = efx->pci_dev->irq;
 917        net_dev->netdev_ops = &efx_netdev_ops;
 918        if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
 919                net_dev->priv_flags |= IFF_UNICAST_FLT;
 920        net_dev->ethtool_ops = &efx_ethtool_ops;
 921        net_dev->gso_max_segs = EFX_TSO_MAX_SEGS;
 922        net_dev->min_mtu = EFX_MIN_MTU;
 923        net_dev->max_mtu = EFX_MAX_MTU;
 924
 925        rtnl_lock();
 926
 927        /* Enable resets to be scheduled and check whether any were
 928         * already requested.  If so, the NIC is probably hosed so we
 929         * abort.
 930         */
 931        efx->state = STATE_READY;
 932        smp_mb(); /* ensure we change state before checking reset_pending */
 933        if (efx->reset_pending) {
 934                netif_err(efx, probe, efx->net_dev,
 935                          "aborting probe due to scheduled reset\n");
 936                rc = -EIO;
 937                goto fail_locked;
 938        }
 939
 940        rc = dev_alloc_name(net_dev, net_dev->name);
 941        if (rc < 0)
 942                goto fail_locked;
 943        efx_update_name(efx);
 944
 945        /* Always start with carrier off; PHY events will detect the link */
 946        netif_carrier_off(net_dev);
 947
 948        rc = register_netdevice(net_dev);
 949        if (rc)
 950                goto fail_locked;
 951
 952        efx_for_each_channel(channel, efx) {
 953                struct efx_tx_queue *tx_queue;
 954                efx_for_each_channel_tx_queue(tx_queue, channel)
 955                        efx_init_tx_queue_core_txq(tx_queue);
 956        }
 957
 958        efx_associate(efx);
 959
 960        rtnl_unlock();
 961
 962        rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
 963        if (rc) {
 964                netif_err(efx, drv, efx->net_dev,
 965                          "failed to init net dev attributes\n");
 966                goto fail_registered;
 967        }
 968
 969        efx_init_mcdi_logging(efx);
 970
 971        return 0;
 972
 973fail_registered:
 974        rtnl_lock();
 975        efx_dissociate(efx);
 976        unregister_netdevice(net_dev);
 977fail_locked:
 978        efx->state = STATE_UNINIT;
 979        rtnl_unlock();
 980        netif_err(efx, drv, efx->net_dev, "could not register net dev\n");
 981        return rc;
 982}
 983
 984static void efx_unregister_netdev(struct efx_nic *efx)
 985{
 986        if (!efx->net_dev)
 987                return;
 988
 989        BUG_ON(netdev_priv(efx->net_dev) != efx);
 990
 991        if (efx_dev_registered(efx)) {
 992                strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
 993                efx_fini_mcdi_logging(efx);
 994                device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
 995                unregister_netdev(efx->net_dev);
 996        }
 997}
 998
 999/**************************************************************************
1000 *
1001 * List of NICs we support
1002 *
1003 **************************************************************************/
1004
1005/* PCI device ID table */
1006static const struct pci_device_id efx_pci_table[] = {
1007        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803),  /* SFC9020 */
1008         .driver_data = (unsigned long) &siena_a0_nic_type},
1009        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813),  /* SFL9021 */
1010         .driver_data = (unsigned long) &siena_a0_nic_type},
1011        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903),  /* SFC9120 PF */
1012         .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
1013        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903),  /* SFC9120 VF */
1014         .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
1015        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923),  /* SFC9140 PF */
1016         .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
1017        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923),  /* SFC9140 VF */
1018         .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
1019        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03),  /* SFC9220 PF */
1020         .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
1021        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03),  /* SFC9220 VF */
1022         .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
1023        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0b03),  /* SFC9250 PF */
1024         .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
1025        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1b03),  /* SFC9250 VF */
1026         .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
1027        {0}                     /* end of list */
1028};
1029
1030/**************************************************************************
1031 *
1032 * Data housekeeping
1033 *
1034 **************************************************************************/
1035
1036void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
1037{
1038        u64 n_rx_nodesc_trunc = 0;
1039        struct efx_channel *channel;
1040
1041        efx_for_each_channel(channel, efx)
1042                n_rx_nodesc_trunc += channel->n_rx_nodesc_trunc;
1043        stats[GENERIC_STAT_rx_nodesc_trunc] = n_rx_nodesc_trunc;
1044        stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
1045}
1046
1047/**************************************************************************
1048 *
1049 * PCI interface
1050 *
1051 **************************************************************************/
1052
1053/* Main body of final NIC shutdown code
1054 * This is called only at module unload (or hotplug removal).
1055 */
1056static void efx_pci_remove_main(struct efx_nic *efx)
1057{
1058        /* Flush reset_work. It can no longer be scheduled since we
1059         * are not READY.
1060         */
1061        BUG_ON(efx->state == STATE_READY);
1062        efx_flush_reset_workqueue(efx);
1063
1064        efx_disable_interrupts(efx);
1065        efx_clear_interrupt_affinity(efx);
1066        efx_nic_fini_interrupt(efx);
1067        efx_fini_port(efx);
1068        efx->type->fini(efx);
1069        efx_fini_napi(efx);
1070        efx_remove_all(efx);
1071}
1072
1073/* Final NIC shutdown
1074 * This is called only at module unload (or hotplug removal).  A PF can call
1075 * this on its VFs to ensure they are unbound first.
1076 */
1077static void efx_pci_remove(struct pci_dev *pci_dev)
1078{
1079        struct efx_nic *efx;
1080
1081        efx = pci_get_drvdata(pci_dev);
1082        if (!efx)
1083                return;
1084
1085        /* Mark the NIC as fini, then stop the interface */
1086        rtnl_lock();
1087        efx_dissociate(efx);
1088        dev_close(efx->net_dev);
1089        efx_disable_interrupts(efx);
1090        efx->state = STATE_UNINIT;
1091        rtnl_unlock();
1092
1093        if (efx->type->sriov_fini)
1094                efx->type->sriov_fini(efx);
1095
1096        efx_unregister_netdev(efx);
1097
1098        efx_mtd_remove(efx);
1099
1100        efx_pci_remove_main(efx);
1101
1102        efx_fini_io(efx, efx->type->mem_bar(efx));
1103        netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
1104
1105        efx_fini_struct(efx);
1106        free_netdev(efx->net_dev);
1107
1108        pci_disable_pcie_error_reporting(pci_dev);
1109};
1110
1111/* NIC VPD information
1112 * Called during probe to display the part number of the
1113 * installed NIC.  VPD is potentially very large but this should
1114 * always appear within the first 512 bytes.
1115 */
1116#define SFC_VPD_LEN 512
1117static void efx_probe_vpd_strings(struct efx_nic *efx)
1118{
1119        struct pci_dev *dev = efx->pci_dev;
1120        char vpd_data[SFC_VPD_LEN];
1121        ssize_t vpd_size;
1122        int ro_start, ro_size, i, j;
1123
1124        /* Get the vpd data from the device */
1125        vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data);
1126        if (vpd_size <= 0) {
1127                netif_err(efx, drv, efx->net_dev, "Unable to read VPD\n");
1128                return;
1129        }
1130
1131        /* Get the Read only section */
1132        ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA);
1133        if (ro_start < 0) {
1134                netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n");
1135                return;
1136        }
1137
1138        ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]);
1139        j = ro_size;
1140        i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
1141        if (i + j > vpd_size)
1142                j = vpd_size - i;
1143
1144        /* Get the Part number */
1145        i = pci_vpd_find_info_keyword(vpd_data, i, j, "PN");
1146        if (i < 0) {
1147                netif_err(efx, drv, efx->net_dev, "Part number not found\n");
1148                return;
1149        }
1150
1151        j = pci_vpd_info_field_size(&vpd_data[i]);
1152        i += PCI_VPD_INFO_FLD_HDR_SIZE;
1153        if (i + j > vpd_size) {
1154                netif_err(efx, drv, efx->net_dev, "Incomplete part number\n");
1155                return;
1156        }
1157
1158        netif_info(efx, drv, efx->net_dev,
1159                   "Part Number : %.*s\n", j, &vpd_data[i]);
1160
1161        i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
1162        j = ro_size;
1163        i = pci_vpd_find_info_keyword(vpd_data, i, j, "SN");
1164        if (i < 0) {
1165                netif_err(efx, drv, efx->net_dev, "Serial number not found\n");
1166                return;
1167        }
1168
1169        j = pci_vpd_info_field_size(&vpd_data[i]);
1170        i += PCI_VPD_INFO_FLD_HDR_SIZE;
1171        if (i + j > vpd_size) {
1172                netif_err(efx, drv, efx->net_dev, "Incomplete serial number\n");
1173                return;
1174        }
1175
1176        efx->vpd_sn = kmalloc(j + 1, GFP_KERNEL);
1177        if (!efx->vpd_sn)
1178                return;
1179
1180        snprintf(efx->vpd_sn, j + 1, "%s", &vpd_data[i]);
1181}
1182
1183
1184/* Main body of NIC initialisation
1185 * This is called at module load (or hotplug insertion, theoretically).
1186 */
1187static int efx_pci_probe_main(struct efx_nic *efx)
1188{
1189        int rc;
1190
1191        /* Do start-of-day initialisation */
1192        rc = efx_probe_all(efx);
1193        if (rc)
1194                goto fail1;
1195
1196        efx_init_napi(efx);
1197
1198        down_write(&efx->filter_sem);
1199        rc = efx->type->init(efx);
1200        up_write(&efx->filter_sem);
1201        if (rc) {
1202                netif_err(efx, probe, efx->net_dev,
1203                          "failed to initialise NIC\n");
1204                goto fail3;
1205        }
1206
1207        rc = efx_init_port(efx);
1208        if (rc) {
1209                netif_err(efx, probe, efx->net_dev,
1210                          "failed to initialise port\n");
1211                goto fail4;
1212        }
1213
1214        rc = efx_nic_init_interrupt(efx);
1215        if (rc)
1216                goto fail5;
1217
1218        efx_set_interrupt_affinity(efx);
1219        rc = efx_enable_interrupts(efx);
1220        if (rc)
1221                goto fail6;
1222
1223        return 0;
1224
1225 fail6:
1226        efx_clear_interrupt_affinity(efx);
1227        efx_nic_fini_interrupt(efx);
1228 fail5:
1229        efx_fini_port(efx);
1230 fail4:
1231        efx->type->fini(efx);
1232 fail3:
1233        efx_fini_napi(efx);
1234        efx_remove_all(efx);
1235 fail1:
1236        return rc;
1237}
1238
1239static int efx_pci_probe_post_io(struct efx_nic *efx)
1240{
1241        struct net_device *net_dev = efx->net_dev;
1242        int rc = efx_pci_probe_main(efx);
1243
1244        if (rc)
1245                return rc;
1246
1247        if (efx->type->sriov_init) {
1248                rc = efx->type->sriov_init(efx);
1249                if (rc)
1250                        netif_err(efx, probe, efx->net_dev,
1251                                  "SR-IOV can't be enabled rc %d\n", rc);
1252        }
1253
1254        /* Determine netdevice features */
1255        net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
1256                              NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL);
1257        if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1258                net_dev->features |= NETIF_F_TSO6;
1259        /* Check whether device supports TSO */
1260        if (!efx->type->tso_versions || !efx->type->tso_versions(efx))
1261                net_dev->features &= ~NETIF_F_ALL_TSO;
1262        /* Mask for features that also apply to VLAN devices */
1263        net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
1264                                   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
1265                                   NETIF_F_RXCSUM);
1266
1267        net_dev->hw_features |= net_dev->features & ~efx->fixed_features;
1268
1269        /* Disable receiving frames with bad FCS, by default. */
1270        net_dev->features &= ~NETIF_F_RXALL;
1271
1272        /* Disable VLAN filtering by default.  It may be enforced if
1273         * the feature is fixed (i.e. VLAN filters are required to
1274         * receive VLAN tagged packets due to vPort restrictions).
1275         */
1276        net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
1277        net_dev->features |= efx->fixed_features;
1278
1279        rc = efx_register_netdev(efx);
1280        if (!rc)
1281                return 0;
1282
1283        efx_pci_remove_main(efx);
1284        return rc;
1285}
1286
1287/* NIC initialisation
1288 *
1289 * This is called at module load (or hotplug insertion,
1290 * theoretically).  It sets up PCI mappings, resets the NIC,
1291 * sets up and registers the network devices with the kernel and hooks
1292 * the interrupt service routine.  It does not prepare the device for
1293 * transmission; this is left to the first time one of the network
1294 * interfaces is brought up (i.e. efx_net_open).
1295 */
1296static int efx_pci_probe(struct pci_dev *pci_dev,
1297                         const struct pci_device_id *entry)
1298{
1299        struct net_device *net_dev;
1300        struct efx_nic *efx;
1301        int rc;
1302
1303        /* Allocate and initialise a struct net_device and struct efx_nic */
1304        net_dev = alloc_etherdev_mqs(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES,
1305                                     EFX_MAX_RX_QUEUES);
1306        if (!net_dev)
1307                return -ENOMEM;
1308        efx = netdev_priv(net_dev);
1309        efx->type = (const struct efx_nic_type *) entry->driver_data;
1310        efx->fixed_features |= NETIF_F_HIGHDMA;
1311
1312        pci_set_drvdata(pci_dev, efx);
1313        SET_NETDEV_DEV(net_dev, &pci_dev->dev);
1314        rc = efx_init_struct(efx, pci_dev, net_dev);
1315        if (rc)
1316                goto fail1;
1317
1318        netif_info(efx, probe, efx->net_dev,
1319                   "Solarflare NIC detected\n");
1320
1321        if (!efx->type->is_vf)
1322                efx_probe_vpd_strings(efx);
1323
1324        /* Set up basic I/O (BAR mappings etc) */
1325        rc = efx_init_io(efx, efx->type->mem_bar(efx), efx->type->max_dma_mask,
1326                         efx->type->mem_map_size(efx));
1327        if (rc)
1328                goto fail2;
1329
1330        rc = efx_pci_probe_post_io(efx);
1331        if (rc) {
1332                /* On failure, retry once immediately.
1333                 * If we aborted probe due to a scheduled reset, dismiss it.
1334                 */
1335                efx->reset_pending = 0;
1336                rc = efx_pci_probe_post_io(efx);
1337                if (rc) {
1338                        /* On another failure, retry once more
1339                         * after a 50-305ms delay.
1340                         */
1341                        unsigned char r;
1342
1343                        get_random_bytes(&r, 1);
1344                        msleep((unsigned int)r + 50);
1345                        efx->reset_pending = 0;
1346                        rc = efx_pci_probe_post_io(efx);
1347                }
1348        }
1349        if (rc)
1350                goto fail3;
1351
1352        netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
1353
1354        /* Try to create MTDs, but allow this to fail */
1355        rtnl_lock();
1356        rc = efx_mtd_probe(efx);
1357        rtnl_unlock();
1358        if (rc && rc != -EPERM)
1359                netif_warn(efx, probe, efx->net_dev,
1360                           "failed to create MTDs (%d)\n", rc);
1361
1362        (void)pci_enable_pcie_error_reporting(pci_dev);
1363
1364        if (efx->type->udp_tnl_push_ports)
1365                efx->type->udp_tnl_push_ports(efx);
1366
1367        return 0;
1368
1369 fail3:
1370        efx_fini_io(efx, efx->type->mem_bar(efx));
1371 fail2:
1372        efx_fini_struct(efx);
1373 fail1:
1374        WARN_ON(rc > 0);
1375        netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc);
1376        free_netdev(net_dev);
1377        return rc;
1378}
1379
1380/* efx_pci_sriov_configure returns the actual number of Virtual Functions
1381 * enabled on success
1382 */
1383#ifdef CONFIG_SFC_SRIOV
1384static int efx_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
1385{
1386        int rc;
1387        struct efx_nic *efx = pci_get_drvdata(dev);
1388
1389        if (efx->type->sriov_configure) {
1390                rc = efx->type->sriov_configure(efx, num_vfs);
1391                if (rc)
1392                        return rc;
1393                else
1394                        return num_vfs;
1395        } else
1396                return -EOPNOTSUPP;
1397}
1398#endif
1399
1400static int efx_pm_freeze(struct device *dev)
1401{
1402        struct efx_nic *efx = dev_get_drvdata(dev);
1403
1404        rtnl_lock();
1405
1406        if (efx->state != STATE_DISABLED) {
1407                efx->state = STATE_UNINIT;
1408
1409                efx_device_detach_sync(efx);
1410
1411                efx_stop_all(efx);
1412                efx_disable_interrupts(efx);
1413        }
1414
1415        rtnl_unlock();
1416
1417        return 0;
1418}
1419
1420static int efx_pm_thaw(struct device *dev)
1421{
1422        int rc;
1423        struct efx_nic *efx = dev_get_drvdata(dev);
1424
1425        rtnl_lock();
1426
1427        if (efx->state != STATE_DISABLED) {
1428                rc = efx_enable_interrupts(efx);
1429                if (rc)
1430                        goto fail;
1431
1432                mutex_lock(&efx->mac_lock);
1433                efx->phy_op->reconfigure(efx);
1434                mutex_unlock(&efx->mac_lock);
1435
1436                efx_start_all(efx);
1437
1438                efx_device_attach_if_not_resetting(efx);
1439
1440                efx->state = STATE_READY;
1441
1442                efx->type->resume_wol(efx);
1443        }
1444
1445        rtnl_unlock();
1446
1447        /* Reschedule any quenched resets scheduled during efx_pm_freeze() */
1448        efx_queue_reset_work(efx);
1449
1450        return 0;
1451
1452fail:
1453        rtnl_unlock();
1454
1455        return rc;
1456}
1457
1458static int efx_pm_poweroff(struct device *dev)
1459{
1460        struct pci_dev *pci_dev = to_pci_dev(dev);
1461        struct efx_nic *efx = pci_get_drvdata(pci_dev);
1462
1463        efx->type->fini(efx);
1464
1465        efx->reset_pending = 0;
1466
1467        pci_save_state(pci_dev);
1468        return pci_set_power_state(pci_dev, PCI_D3hot);
1469}
1470
1471/* Used for both resume and restore */
1472static int efx_pm_resume(struct device *dev)
1473{
1474        struct pci_dev *pci_dev = to_pci_dev(dev);
1475        struct efx_nic *efx = pci_get_drvdata(pci_dev);
1476        int rc;
1477
1478        rc = pci_set_power_state(pci_dev, PCI_D0);
1479        if (rc)
1480                return rc;
1481        pci_restore_state(pci_dev);
1482        rc = pci_enable_device(pci_dev);
1483        if (rc)
1484                return rc;
1485        pci_set_master(efx->pci_dev);
1486        rc = efx->type->reset(efx, RESET_TYPE_ALL);
1487        if (rc)
1488                return rc;
1489        down_write(&efx->filter_sem);
1490        rc = efx->type->init(efx);
1491        up_write(&efx->filter_sem);
1492        if (rc)
1493                return rc;
1494        rc = efx_pm_thaw(dev);
1495        return rc;
1496}
1497
1498static int efx_pm_suspend(struct device *dev)
1499{
1500        int rc;
1501
1502        efx_pm_freeze(dev);
1503        rc = efx_pm_poweroff(dev);
1504        if (rc)
1505                efx_pm_resume(dev);
1506        return rc;
1507}
1508
1509static const struct dev_pm_ops efx_pm_ops = {
1510        .suspend        = efx_pm_suspend,
1511        .resume         = efx_pm_resume,
1512        .freeze         = efx_pm_freeze,
1513        .thaw           = efx_pm_thaw,
1514        .poweroff       = efx_pm_poweroff,
1515        .restore        = efx_pm_resume,
1516};
1517
1518/* A PCI error affecting this device was detected.
1519 * At this point MMIO and DMA may be disabled.
1520 * Stop the software path and request a slot reset.
1521 */
1522static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
1523                                              enum pci_channel_state state)
1524{
1525        pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
1526        struct efx_nic *efx = pci_get_drvdata(pdev);
1527
1528        if (state == pci_channel_io_perm_failure)
1529                return PCI_ERS_RESULT_DISCONNECT;
1530
1531        rtnl_lock();
1532
1533        if (efx->state != STATE_DISABLED) {
1534                efx->state = STATE_RECOVERY;
1535                efx->reset_pending = 0;
1536
1537                efx_device_detach_sync(efx);
1538
1539                efx_stop_all(efx);
1540                efx_disable_interrupts(efx);
1541
1542                status = PCI_ERS_RESULT_NEED_RESET;
1543        } else {
1544                /* If the interface is disabled we don't want to do anything
1545                 * with it.
1546                 */
1547                status = PCI_ERS_RESULT_RECOVERED;
1548        }
1549
1550        rtnl_unlock();
1551
1552        pci_disable_device(pdev);
1553
1554        return status;
1555}
1556
1557/* Fake a successful reset, which will be performed later in efx_io_resume. */
1558static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev)
1559{
1560        struct efx_nic *efx = pci_get_drvdata(pdev);
1561        pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
1562
1563        if (pci_enable_device(pdev)) {
1564                netif_err(efx, hw, efx->net_dev,
1565                          "Cannot re-enable PCI device after reset.\n");
1566                status =  PCI_ERS_RESULT_DISCONNECT;
1567        }
1568
1569        return status;
1570}
1571
1572/* Perform the actual reset and resume I/O operations. */
1573static void efx_io_resume(struct pci_dev *pdev)
1574{
1575        struct efx_nic *efx = pci_get_drvdata(pdev);
1576        int rc;
1577
1578        rtnl_lock();
1579
1580        if (efx->state == STATE_DISABLED)
1581                goto out;
1582
1583        rc = efx_reset(efx, RESET_TYPE_ALL);
1584        if (rc) {
1585                netif_err(efx, hw, efx->net_dev,
1586                          "efx_reset failed after PCI error (%d)\n", rc);
1587        } else {
1588                efx->state = STATE_READY;
1589                netif_dbg(efx, hw, efx->net_dev,
1590                          "Done resetting and resuming IO after PCI error.\n");
1591        }
1592
1593out:
1594        rtnl_unlock();
1595}
1596
1597/* For simplicity and reliability, we always require a slot reset and try to
1598 * reset the hardware when a pci error affecting the device is detected.
1599 * We leave both the link_reset and mmio_enabled callback unimplemented:
1600 * with our request for slot reset the mmio_enabled callback will never be
1601 * called, and the link_reset callback is not used by AER or EEH mechanisms.
1602 */
1603static const struct pci_error_handlers efx_err_handlers = {
1604        .error_detected = efx_io_error_detected,
1605        .slot_reset     = efx_io_slot_reset,
1606        .resume         = efx_io_resume,
1607};
1608
1609static struct pci_driver efx_pci_driver = {
1610        .name           = KBUILD_MODNAME,
1611        .id_table       = efx_pci_table,
1612        .probe          = efx_pci_probe,
1613        .remove         = efx_pci_remove,
1614        .driver.pm      = &efx_pm_ops,
1615        .err_handler    = &efx_err_handlers,
1616#ifdef CONFIG_SFC_SRIOV
1617        .sriov_configure = efx_pci_sriov_configure,
1618#endif
1619};
1620
1621/**************************************************************************
1622 *
1623 * Kernel module interface
1624 *
1625 *************************************************************************/
1626
1627static int __init efx_init_module(void)
1628{
1629        int rc;
1630
1631        printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n");
1632
1633        rc = register_netdevice_notifier(&efx_netdev_notifier);
1634        if (rc)
1635                goto err_notifier;
1636
1637#ifdef CONFIG_SFC_SRIOV
1638        rc = efx_init_sriov();
1639        if (rc)
1640                goto err_sriov;
1641#endif
1642
1643        rc = efx_create_reset_workqueue();
1644        if (rc)
1645                goto err_reset;
1646
1647        rc = pci_register_driver(&efx_pci_driver);
1648        if (rc < 0)
1649                goto err_pci;
1650
1651        return 0;
1652
1653 err_pci:
1654        efx_destroy_reset_workqueue();
1655 err_reset:
1656#ifdef CONFIG_SFC_SRIOV
1657        efx_fini_sriov();
1658 err_sriov:
1659#endif
1660        unregister_netdevice_notifier(&efx_netdev_notifier);
1661 err_notifier:
1662        return rc;
1663}
1664
1665static void __exit efx_exit_module(void)
1666{
1667        printk(KERN_INFO "Solarflare NET driver unloading\n");
1668
1669        pci_unregister_driver(&efx_pci_driver);
1670        efx_destroy_reset_workqueue();
1671#ifdef CONFIG_SFC_SRIOV
1672        efx_fini_sriov();
1673#endif
1674        unregister_netdevice_notifier(&efx_netdev_notifier);
1675
1676}
1677
1678module_init(efx_init_module);
1679module_exit(efx_exit_module);
1680
1681MODULE_AUTHOR("Solarflare Communications and "
1682              "Michael Brown <mbrown@fensystems.co.uk>");
1683MODULE_DESCRIPTION("Solarflare network driver");
1684MODULE_LICENSE("GPL");
1685MODULE_DEVICE_TABLE(pci, efx_pci_table);
1686MODULE_VERSION(EFX_DRIVER_VERSION);
1687