linux/drivers/net/ethernet/intel/igb/igb_main.c
<<
>>
Prefs
   1/* Intel(R) Gigabit Ethernet Linux driver
   2 * Copyright(c) 2007-2014 Intel Corporation.
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms and conditions of the GNU General Public License,
   6 * version 2, as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, see <http://www.gnu.org/licenses/>.
  15 *
  16 * The full GNU General Public License is included in this distribution in
  17 * the file called "COPYING".
  18 *
  19 * Contact Information:
  20 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
  21 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
  22 */
  23
  24#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  25
  26#include <linux/module.h>
  27#include <linux/types.h>
  28#include <linux/init.h>
  29#include <linux/bitops.h>
  30#include <linux/vmalloc.h>
  31#include <linux/pagemap.h>
  32#include <linux/netdevice.h>
  33#include <linux/ipv6.h>
  34#include <linux/slab.h>
  35#include <net/checksum.h>
  36#include <net/ip6_checksum.h>
  37#include <linux/net_tstamp.h>
  38#include <linux/mii.h>
  39#include <linux/ethtool.h>
  40#include <linux/if.h>
  41#include <linux/if_vlan.h>
  42#include <linux/pci.h>
  43#include <linux/pci-aspm.h>
  44#include <linux/delay.h>
  45#include <linux/interrupt.h>
  46#include <linux/ip.h>
  47#include <linux/tcp.h>
  48#include <linux/sctp.h>
  49#include <linux/if_ether.h>
  50#include <linux/aer.h>
  51#include <linux/prefetch.h>
  52#include <linux/pm_runtime.h>
  53#include <linux/etherdevice.h>
  54#ifdef CONFIG_IGB_DCA
  55#include <linux/dca.h>
  56#endif
  57#include <linux/i2c.h>
  58#include "igb.h"
  59
  60#define MAJ 5
  61#define MIN 4
  62#define BUILD 0
  63#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
  64__stringify(BUILD) "-k"
  65char igb_driver_name[] = "igb";
  66char igb_driver_version[] = DRV_VERSION;
  67static const char igb_driver_string[] =
  68                                "Intel(R) Gigabit Ethernet Network Driver";
  69static const char igb_copyright[] =
  70                                "Copyright (c) 2007-2014 Intel Corporation.";
  71
  72static const struct e1000_info *igb_info_tbl[] = {
  73        [board_82575] = &e1000_82575_info,
  74};
  75
  76static const struct pci_device_id igb_pci_tbl[] = {
  77        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
  78        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) },
  79        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) },
  80        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
  81        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
  82        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
  83        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
  84        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
  85        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS), board_82575 },
  86        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS), board_82575 },
  87        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
  88        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
  89        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
  90        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
  91        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
  92        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
  93        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
  94        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
  95        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
  96        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
  97        { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
  98        { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
  99        { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
 100        { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
 101        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
 102        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
 103        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
 104        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
 105        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
 106        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
 107        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
 108        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
 109        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
 110        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
 111        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
 112        /* required last entry */
 113        {0, }
 114};
 115
 116MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
 117
 118static int igb_setup_all_tx_resources(struct igb_adapter *);
 119static int igb_setup_all_rx_resources(struct igb_adapter *);
 120static void igb_free_all_tx_resources(struct igb_adapter *);
 121static void igb_free_all_rx_resources(struct igb_adapter *);
 122static void igb_setup_mrqc(struct igb_adapter *);
 123static int igb_probe(struct pci_dev *, const struct pci_device_id *);
 124static void igb_remove(struct pci_dev *pdev);
 125static int igb_sw_init(struct igb_adapter *);
 126int igb_open(struct net_device *);
 127int igb_close(struct net_device *);
 128static void igb_configure(struct igb_adapter *);
 129static void igb_configure_tx(struct igb_adapter *);
 130static void igb_configure_rx(struct igb_adapter *);
 131static void igb_clean_all_tx_rings(struct igb_adapter *);
 132static void igb_clean_all_rx_rings(struct igb_adapter *);
 133static void igb_clean_tx_ring(struct igb_ring *);
 134static void igb_clean_rx_ring(struct igb_ring *);
 135static void igb_set_rx_mode(struct net_device *);
 136static void igb_update_phy_info(unsigned long);
 137static void igb_watchdog(unsigned long);
 138static void igb_watchdog_task(struct work_struct *);
 139static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
 140static void igb_get_stats64(struct net_device *dev,
 141                            struct rtnl_link_stats64 *stats);
 142static int igb_change_mtu(struct net_device *, int);
 143static int igb_set_mac(struct net_device *, void *);
 144static void igb_set_uta(struct igb_adapter *adapter, bool set);
 145static irqreturn_t igb_intr(int irq, void *);
 146static irqreturn_t igb_intr_msi(int irq, void *);
 147static irqreturn_t igb_msix_other(int irq, void *);
 148static irqreturn_t igb_msix_ring(int irq, void *);
 149#ifdef CONFIG_IGB_DCA
 150static void igb_update_dca(struct igb_q_vector *);
 151static void igb_setup_dca(struct igb_adapter *);
 152#endif /* CONFIG_IGB_DCA */
 153static int igb_poll(struct napi_struct *, int);
 154static bool igb_clean_tx_irq(struct igb_q_vector *, int);
 155static int igb_clean_rx_irq(struct igb_q_vector *, int);
 156static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
 157static void igb_tx_timeout(struct net_device *);
 158static void igb_reset_task(struct work_struct *);
 159static void igb_vlan_mode(struct net_device *netdev,
 160                          netdev_features_t features);
 161static int igb_vlan_rx_add_vid(struct net_device *, __be16, u16);
 162static int igb_vlan_rx_kill_vid(struct net_device *, __be16, u16);
 163static void igb_restore_vlan(struct igb_adapter *);
 164static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
 165static void igb_ping_all_vfs(struct igb_adapter *);
 166static void igb_msg_task(struct igb_adapter *);
 167static void igb_vmm_control(struct igb_adapter *);
 168static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
 169static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
 170static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
 171static int igb_ndo_set_vf_vlan(struct net_device *netdev,
 172                               int vf, u16 vlan, u8 qos, __be16 vlan_proto);
 173static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
 174static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
 175                                   bool setting);
 176static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
 177                                 struct ifla_vf_info *ivi);
 178static void igb_check_vf_rate_limit(struct igb_adapter *);
 179static void igb_nfc_filter_exit(struct igb_adapter *adapter);
 180static void igb_nfc_filter_restore(struct igb_adapter *adapter);
 181
 182#ifdef CONFIG_PCI_IOV
 183static int igb_vf_configure(struct igb_adapter *adapter, int vf);
 184static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs);
 185static int igb_disable_sriov(struct pci_dev *dev);
 186static int igb_pci_disable_sriov(struct pci_dev *dev);
 187#endif
 188
 189#ifdef CONFIG_PM
 190#ifdef CONFIG_PM_SLEEP
 191static int igb_suspend(struct device *);
 192#endif
 193static int igb_resume(struct device *);
 194static int igb_runtime_suspend(struct device *dev);
 195static int igb_runtime_resume(struct device *dev);
 196static int igb_runtime_idle(struct device *dev);
 197static const struct dev_pm_ops igb_pm_ops = {
 198        SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
 199        SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
 200                        igb_runtime_idle)
 201};
 202#endif
 203static void igb_shutdown(struct pci_dev *);
 204static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs);
 205#ifdef CONFIG_IGB_DCA
 206static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
 207static struct notifier_block dca_notifier = {
 208        .notifier_call  = igb_notify_dca,
 209        .next           = NULL,
 210        .priority       = 0
 211};
 212#endif
 213#ifdef CONFIG_NET_POLL_CONTROLLER
 214/* for netdump / net console */
 215static void igb_netpoll(struct net_device *);
 216#endif
 217#ifdef CONFIG_PCI_IOV
 218static unsigned int max_vfs;
 219module_param(max_vfs, uint, 0);
 220MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate per physical function");
 221#endif /* CONFIG_PCI_IOV */
 222
 223static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
 224                     pci_channel_state_t);
 225static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
 226static void igb_io_resume(struct pci_dev *);
 227
 228static const struct pci_error_handlers igb_err_handler = {
 229        .error_detected = igb_io_error_detected,
 230        .slot_reset = igb_io_slot_reset,
 231        .resume = igb_io_resume,
 232};
 233
 234static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
 235
 236static struct pci_driver igb_driver = {
 237        .name     = igb_driver_name,
 238        .id_table = igb_pci_tbl,
 239        .probe    = igb_probe,
 240        .remove   = igb_remove,
 241#ifdef CONFIG_PM
 242        .driver.pm = &igb_pm_ops,
 243#endif
 244        .shutdown = igb_shutdown,
 245        .sriov_configure = igb_pci_sriov_configure,
 246        .err_handler = &igb_err_handler
 247};
 248
 249MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
 250MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
 251MODULE_LICENSE("GPL");
 252MODULE_VERSION(DRV_VERSION);
 253
 254#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
 255static int debug = -1;
 256module_param(debug, int, 0);
 257MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
 258
 259struct igb_reg_info {
 260        u32 ofs;
 261        char *name;
 262};
 263
 264static const struct igb_reg_info igb_reg_info_tbl[] = {
 265
 266        /* General Registers */
 267        {E1000_CTRL, "CTRL"},
 268        {E1000_STATUS, "STATUS"},
 269        {E1000_CTRL_EXT, "CTRL_EXT"},
 270
 271        /* Interrupt Registers */
 272        {E1000_ICR, "ICR"},
 273
 274        /* RX Registers */
 275        {E1000_RCTL, "RCTL"},
 276        {E1000_RDLEN(0), "RDLEN"},
 277        {E1000_RDH(0), "RDH"},
 278        {E1000_RDT(0), "RDT"},
 279        {E1000_RXDCTL(0), "RXDCTL"},
 280        {E1000_RDBAL(0), "RDBAL"},
 281        {E1000_RDBAH(0), "RDBAH"},
 282
 283        /* TX Registers */
 284        {E1000_TCTL, "TCTL"},
 285        {E1000_TDBAL(0), "TDBAL"},
 286        {E1000_TDBAH(0), "TDBAH"},
 287        {E1000_TDLEN(0), "TDLEN"},
 288        {E1000_TDH(0), "TDH"},
 289        {E1000_TDT(0), "TDT"},
 290        {E1000_TXDCTL(0), "TXDCTL"},
 291        {E1000_TDFH, "TDFH"},
 292        {E1000_TDFT, "TDFT"},
 293        {E1000_TDFHS, "TDFHS"},
 294        {E1000_TDFPC, "TDFPC"},
 295
 296        /* List Terminator */
 297        {}
 298};
 299
 300/* igb_regdump - register printout routine */
 301static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
 302{
 303        int n = 0;
 304        char rname[16];
 305        u32 regs[8];
 306
 307        switch (reginfo->ofs) {
 308        case E1000_RDLEN(0):
 309                for (n = 0; n < 4; n++)
 310                        regs[n] = rd32(E1000_RDLEN(n));
 311                break;
 312        case E1000_RDH(0):
 313                for (n = 0; n < 4; n++)
 314                        regs[n] = rd32(E1000_RDH(n));
 315                break;
 316        case E1000_RDT(0):
 317                for (n = 0; n < 4; n++)
 318                        regs[n] = rd32(E1000_RDT(n));
 319                break;
 320        case E1000_RXDCTL(0):
 321                for (n = 0; n < 4; n++)
 322                        regs[n] = rd32(E1000_RXDCTL(n));
 323                break;
 324        case E1000_RDBAL(0):
 325                for (n = 0; n < 4; n++)
 326                        regs[n] = rd32(E1000_RDBAL(n));
 327                break;
 328        case E1000_RDBAH(0):
 329                for (n = 0; n < 4; n++)
 330                        regs[n] = rd32(E1000_RDBAH(n));
 331                break;
 332        case E1000_TDBAL(0):
 333                for (n = 0; n < 4; n++)
 334                        regs[n] = rd32(E1000_RDBAL(n));
 335                break;
 336        case E1000_TDBAH(0):
 337                for (n = 0; n < 4; n++)
 338                        regs[n] = rd32(E1000_TDBAH(n));
 339                break;
 340        case E1000_TDLEN(0):
 341                for (n = 0; n < 4; n++)
 342                        regs[n] = rd32(E1000_TDLEN(n));
 343                break;
 344        case E1000_TDH(0):
 345                for (n = 0; n < 4; n++)
 346                        regs[n] = rd32(E1000_TDH(n));
 347                break;
 348        case E1000_TDT(0):
 349                for (n = 0; n < 4; n++)
 350                        regs[n] = rd32(E1000_TDT(n));
 351                break;
 352        case E1000_TXDCTL(0):
 353                for (n = 0; n < 4; n++)
 354                        regs[n] = rd32(E1000_TXDCTL(n));
 355                break;
 356        default:
 357                pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
 358                return;
 359        }
 360
 361        snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
 362        pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
 363                regs[2], regs[3]);
 364}
 365
 366/* igb_dump - Print registers, Tx-rings and Rx-rings */
 367static void igb_dump(struct igb_adapter *adapter)
 368{
 369        struct net_device *netdev = adapter->netdev;
 370        struct e1000_hw *hw = &adapter->hw;
 371        struct igb_reg_info *reginfo;
 372        struct igb_ring *tx_ring;
 373        union e1000_adv_tx_desc *tx_desc;
 374        struct my_u0 { u64 a; u64 b; } *u0;
 375        struct igb_ring *rx_ring;
 376        union e1000_adv_rx_desc *rx_desc;
 377        u32 staterr;
 378        u16 i, n;
 379
 380        if (!netif_msg_hw(adapter))
 381                return;
 382
 383        /* Print netdevice Info */
 384        if (netdev) {
 385                dev_info(&adapter->pdev->dev, "Net device Info\n");
 386                pr_info("Device Name     state            trans_start\n");
 387                pr_info("%-15s %016lX %016lX\n", netdev->name,
 388                        netdev->state, dev_trans_start(netdev));
 389        }
 390
 391        /* Print Registers */
 392        dev_info(&adapter->pdev->dev, "Register Dump\n");
 393        pr_info(" Register Name   Value\n");
 394        for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
 395             reginfo->name; reginfo++) {
 396                igb_regdump(hw, reginfo);
 397        }
 398
 399        /* Print TX Ring Summary */
 400        if (!netdev || !netif_running(netdev))
 401                goto exit;
 402
 403        dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
 404        pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
 405        for (n = 0; n < adapter->num_tx_queues; n++) {
 406                struct igb_tx_buffer *buffer_info;
 407                tx_ring = adapter->tx_ring[n];
 408                buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
 409                pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
 410                        n, tx_ring->next_to_use, tx_ring->next_to_clean,
 411                        (u64)dma_unmap_addr(buffer_info, dma),
 412                        dma_unmap_len(buffer_info, len),
 413                        buffer_info->next_to_watch,
 414                        (u64)buffer_info->time_stamp);
 415        }
 416
 417        /* Print TX Rings */
 418        if (!netif_msg_tx_done(adapter))
 419                goto rx_ring_summary;
 420
 421        dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
 422
 423        /* Transmit Descriptor Formats
 424         *
 425         * Advanced Transmit Descriptor
 426         *   +--------------------------------------------------------------+
 427         * 0 |         Buffer Address [63:0]                                |
 428         *   +--------------------------------------------------------------+
 429         * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
 430         *   +--------------------------------------------------------------+
 431         *   63      46 45    40 39 38 36 35 32 31   24             15       0
 432         */
 433
 434        for (n = 0; n < adapter->num_tx_queues; n++) {
 435                tx_ring = adapter->tx_ring[n];
 436                pr_info("------------------------------------\n");
 437                pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
 438                pr_info("------------------------------------\n");
 439                pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] [bi->dma       ] leng  ntw timestamp        bi->skb\n");
 440
 441                for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
 442                        const char *next_desc;
 443                        struct igb_tx_buffer *buffer_info;
 444                        tx_desc = IGB_TX_DESC(tx_ring, i);
 445                        buffer_info = &tx_ring->tx_buffer_info[i];
 446                        u0 = (struct my_u0 *)tx_desc;
 447                        if (i == tx_ring->next_to_use &&
 448                            i == tx_ring->next_to_clean)
 449                                next_desc = " NTC/U";
 450                        else if (i == tx_ring->next_to_use)
 451                                next_desc = " NTU";
 452                        else if (i == tx_ring->next_to_clean)
 453                                next_desc = " NTC";
 454                        else
 455                                next_desc = "";
 456
 457                        pr_info("T [0x%03X]    %016llX %016llX %016llX %04X  %p %016llX %p%s\n",
 458                                i, le64_to_cpu(u0->a),
 459                                le64_to_cpu(u0->b),
 460                                (u64)dma_unmap_addr(buffer_info, dma),
 461                                dma_unmap_len(buffer_info, len),
 462                                buffer_info->next_to_watch,
 463                                (u64)buffer_info->time_stamp,
 464                                buffer_info->skb, next_desc);
 465
 466                        if (netif_msg_pktdata(adapter) && buffer_info->skb)
 467                                print_hex_dump(KERN_INFO, "",
 468                                        DUMP_PREFIX_ADDRESS,
 469                                        16, 1, buffer_info->skb->data,
 470                                        dma_unmap_len(buffer_info, len),
 471                                        true);
 472                }
 473        }
 474
 475        /* Print RX Rings Summary */
 476rx_ring_summary:
 477        dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
 478        pr_info("Queue [NTU] [NTC]\n");
 479        for (n = 0; n < adapter->num_rx_queues; n++) {
 480                rx_ring = adapter->rx_ring[n];
 481                pr_info(" %5d %5X %5X\n",
 482                        n, rx_ring->next_to_use, rx_ring->next_to_clean);
 483        }
 484
 485        /* Print RX Rings */
 486        if (!netif_msg_rx_status(adapter))
 487                goto exit;
 488
 489        dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
 490
 491        /* Advanced Receive Descriptor (Read) Format
 492         *    63                                           1        0
 493         *    +-----------------------------------------------------+
 494         *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
 495         *    +----------------------------------------------+------+
 496         *  8 |       Header Buffer Address [63:1]           |  DD  |
 497         *    +-----------------------------------------------------+
 498         *
 499         *
 500         * Advanced Receive Descriptor (Write-Back) Format
 501         *
 502         *   63       48 47    32 31  30      21 20 17 16   4 3     0
 503         *   +------------------------------------------------------+
 504         * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
 505         *   | Checksum   Ident  |   |           |    | Type | Type |
 506         *   +------------------------------------------------------+
 507         * 8 | VLAN Tag | Length | Extended Error | Extended Status |
 508         *   +------------------------------------------------------+
 509         *   63       48 47    32 31            20 19               0
 510         */
 511
 512        for (n = 0; n < adapter->num_rx_queues; n++) {
 513                rx_ring = adapter->rx_ring[n];
 514                pr_info("------------------------------------\n");
 515                pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
 516                pr_info("------------------------------------\n");
 517                pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] [bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
 518                pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n");
 519
 520                for (i = 0; i < rx_ring->count; i++) {
 521                        const char *next_desc;
 522                        struct igb_rx_buffer *buffer_info;
 523                        buffer_info = &rx_ring->rx_buffer_info[i];
 524                        rx_desc = IGB_RX_DESC(rx_ring, i);
 525                        u0 = (struct my_u0 *)rx_desc;
 526                        staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 527
 528                        if (i == rx_ring->next_to_use)
 529                                next_desc = " NTU";
 530                        else if (i == rx_ring->next_to_clean)
 531                                next_desc = " NTC";
 532                        else
 533                                next_desc = "";
 534
 535                        if (staterr & E1000_RXD_STAT_DD) {
 536                                /* Descriptor Done */
 537                                pr_info("%s[0x%03X]     %016llX %016llX ---------------- %s\n",
 538                                        "RWB", i,
 539                                        le64_to_cpu(u0->a),
 540                                        le64_to_cpu(u0->b),
 541                                        next_desc);
 542                        } else {
 543                                pr_info("%s[0x%03X]     %016llX %016llX %016llX %s\n",
 544                                        "R  ", i,
 545                                        le64_to_cpu(u0->a),
 546                                        le64_to_cpu(u0->b),
 547                                        (u64)buffer_info->dma,
 548                                        next_desc);
 549
 550                                if (netif_msg_pktdata(adapter) &&
 551                                    buffer_info->dma && buffer_info->page) {
 552                                        print_hex_dump(KERN_INFO, "",
 553                                          DUMP_PREFIX_ADDRESS,
 554                                          16, 1,
 555                                          page_address(buffer_info->page) +
 556                                                      buffer_info->page_offset,
 557                                          IGB_RX_BUFSZ, true);
 558                                }
 559                        }
 560                }
 561        }
 562
 563exit:
 564        return;
 565}
 566
 567/**
 568 *  igb_get_i2c_data - Reads the I2C SDA data bit
 569 *  @hw: pointer to hardware structure
 570 *  @i2cctl: Current value of I2CCTL register
 571 *
 572 *  Returns the I2C data bit value
 573 **/
 574static int igb_get_i2c_data(void *data)
 575{
 576        struct igb_adapter *adapter = (struct igb_adapter *)data;
 577        struct e1000_hw *hw = &adapter->hw;
 578        s32 i2cctl = rd32(E1000_I2CPARAMS);
 579
 580        return !!(i2cctl & E1000_I2C_DATA_IN);
 581}
 582
 583/**
 584 *  igb_set_i2c_data - Sets the I2C data bit
 585 *  @data: pointer to hardware structure
 586 *  @state: I2C data value (0 or 1) to set
 587 *
 588 *  Sets the I2C data bit
 589 **/
 590static void igb_set_i2c_data(void *data, int state)
 591{
 592        struct igb_adapter *adapter = (struct igb_adapter *)data;
 593        struct e1000_hw *hw = &adapter->hw;
 594        s32 i2cctl = rd32(E1000_I2CPARAMS);
 595
 596        if (state)
 597                i2cctl |= E1000_I2C_DATA_OUT;
 598        else
 599                i2cctl &= ~E1000_I2C_DATA_OUT;
 600
 601        i2cctl &= ~E1000_I2C_DATA_OE_N;
 602        i2cctl |= E1000_I2C_CLK_OE_N;
 603        wr32(E1000_I2CPARAMS, i2cctl);
 604        wrfl();
 605
 606}
 607
 608/**
 609 *  igb_set_i2c_clk - Sets the I2C SCL clock
 610 *  @data: pointer to hardware structure
 611 *  @state: state to set clock
 612 *
 613 *  Sets the I2C clock line to state
 614 **/
 615static void igb_set_i2c_clk(void *data, int state)
 616{
 617        struct igb_adapter *adapter = (struct igb_adapter *)data;
 618        struct e1000_hw *hw = &adapter->hw;
 619        s32 i2cctl = rd32(E1000_I2CPARAMS);
 620
 621        if (state) {
 622                i2cctl |= E1000_I2C_CLK_OUT;
 623                i2cctl &= ~E1000_I2C_CLK_OE_N;
 624        } else {
 625                i2cctl &= ~E1000_I2C_CLK_OUT;
 626                i2cctl &= ~E1000_I2C_CLK_OE_N;
 627        }
 628        wr32(E1000_I2CPARAMS, i2cctl);
 629        wrfl();
 630}
 631
 632/**
 633 *  igb_get_i2c_clk - Gets the I2C SCL clock state
 634 *  @data: pointer to hardware structure
 635 *
 636 *  Gets the I2C clock state
 637 **/
 638static int igb_get_i2c_clk(void *data)
 639{
 640        struct igb_adapter *adapter = (struct igb_adapter *)data;
 641        struct e1000_hw *hw = &adapter->hw;
 642        s32 i2cctl = rd32(E1000_I2CPARAMS);
 643
 644        return !!(i2cctl & E1000_I2C_CLK_IN);
 645}
 646
 647static const struct i2c_algo_bit_data igb_i2c_algo = {
 648        .setsda         = igb_set_i2c_data,
 649        .setscl         = igb_set_i2c_clk,
 650        .getsda         = igb_get_i2c_data,
 651        .getscl         = igb_get_i2c_clk,
 652        .udelay         = 5,
 653        .timeout        = 20,
 654};
 655
 656/**
 657 *  igb_get_hw_dev - return device
 658 *  @hw: pointer to hardware structure
 659 *
 660 *  used by hardware layer to print debugging information
 661 **/
 662struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
 663{
 664        struct igb_adapter *adapter = hw->back;
 665        return adapter->netdev;
 666}
 667
 668/**
 669 *  igb_init_module - Driver Registration Routine
 670 *
 671 *  igb_init_module is the first routine called when the driver is
 672 *  loaded. All it does is register with the PCI subsystem.
 673 **/
 674static int __init igb_init_module(void)
 675{
 676        int ret;
 677
 678        pr_info("%s - version %s\n",
 679               igb_driver_string, igb_driver_version);
 680        pr_info("%s\n", igb_copyright);
 681
 682#ifdef CONFIG_IGB_DCA
 683        dca_register_notify(&dca_notifier);
 684#endif
 685        ret = pci_register_driver(&igb_driver);
 686        return ret;
 687}
 688
 689module_init(igb_init_module);
 690
 691/**
 692 *  igb_exit_module - Driver Exit Cleanup Routine
 693 *
 694 *  igb_exit_module is called just before the driver is removed
 695 *  from memory.
 696 **/
 697static void __exit igb_exit_module(void)
 698{
 699#ifdef CONFIG_IGB_DCA
 700        dca_unregister_notify(&dca_notifier);
 701#endif
 702        pci_unregister_driver(&igb_driver);
 703}
 704
 705module_exit(igb_exit_module);
 706
 707#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
 708/**
 709 *  igb_cache_ring_register - Descriptor ring to register mapping
 710 *  @adapter: board private structure to initialize
 711 *
 712 *  Once we know the feature-set enabled for the device, we'll cache
 713 *  the register offset the descriptor ring is assigned to.
 714 **/
 715static void igb_cache_ring_register(struct igb_adapter *adapter)
 716{
 717        int i = 0, j = 0;
 718        u32 rbase_offset = adapter->vfs_allocated_count;
 719
 720        switch (adapter->hw.mac.type) {
 721        case e1000_82576:
 722                /* The queues are allocated for virtualization such that VF 0
 723                 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
 724                 * In order to avoid collision we start at the first free queue
 725                 * and continue consuming queues in the same sequence
 726                 */
 727                if (adapter->vfs_allocated_count) {
 728                        for (; i < adapter->rss_queues; i++)
 729                                adapter->rx_ring[i]->reg_idx = rbase_offset +
 730                                                               Q_IDX_82576(i);
 731                }
 732                /* Fall through */
 733        case e1000_82575:
 734        case e1000_82580:
 735        case e1000_i350:
 736        case e1000_i354:
 737        case e1000_i210:
 738        case e1000_i211:
 739                /* Fall through */
 740        default:
 741                for (; i < adapter->num_rx_queues; i++)
 742                        adapter->rx_ring[i]->reg_idx = rbase_offset + i;
 743                for (; j < adapter->num_tx_queues; j++)
 744                        adapter->tx_ring[j]->reg_idx = rbase_offset + j;
 745                break;
 746        }
 747}
 748
 749u32 igb_rd32(struct e1000_hw *hw, u32 reg)
 750{
 751        struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw);
 752        u8 __iomem *hw_addr = ACCESS_ONCE(hw->hw_addr);
 753        u32 value = 0;
 754
 755        if (E1000_REMOVED(hw_addr))
 756                return ~value;
 757
 758        value = readl(&hw_addr[reg]);
 759
 760        /* reads should not return all F's */
 761        if (!(~value) && (!reg || !(~readl(hw_addr)))) {
 762                struct net_device *netdev = igb->netdev;
 763                hw->hw_addr = NULL;
 764                netif_device_detach(netdev);
 765                netdev_err(netdev, "PCIe link lost, device now detached\n");
 766        }
 767
 768        return value;
 769}
 770
 771/**
 772 *  igb_write_ivar - configure ivar for given MSI-X vector
 773 *  @hw: pointer to the HW structure
 774 *  @msix_vector: vector number we are allocating to a given ring
 775 *  @index: row index of IVAR register to write within IVAR table
 776 *  @offset: column offset of in IVAR, should be multiple of 8
 777 *
 778 *  This function is intended to handle the writing of the IVAR register
 779 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
 780 *  each containing an cause allocation for an Rx and Tx ring, and a
 781 *  variable number of rows depending on the number of queues supported.
 782 **/
 783static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
 784                           int index, int offset)
 785{
 786        u32 ivar = array_rd32(E1000_IVAR0, index);
 787
 788        /* clear any bits that are currently set */
 789        ivar &= ~((u32)0xFF << offset);
 790
 791        /* write vector and valid bit */
 792        ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
 793
 794        array_wr32(E1000_IVAR0, index, ivar);
 795}
 796
 797#define IGB_N0_QUEUE -1
 798static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
 799{
 800        struct igb_adapter *adapter = q_vector->adapter;
 801        struct e1000_hw *hw = &adapter->hw;
 802        int rx_queue = IGB_N0_QUEUE;
 803        int tx_queue = IGB_N0_QUEUE;
 804        u32 msixbm = 0;
 805
 806        if (q_vector->rx.ring)
 807                rx_queue = q_vector->rx.ring->reg_idx;
 808        if (q_vector->tx.ring)
 809                tx_queue = q_vector->tx.ring->reg_idx;
 810
 811        switch (hw->mac.type) {
 812        case e1000_82575:
 813                /* The 82575 assigns vectors using a bitmask, which matches the
 814                 * bitmask for the EICR/EIMS/EIMC registers.  To assign one
 815                 * or more queues to a vector, we write the appropriate bits
 816                 * into the MSIXBM register for that vector.
 817                 */
 818                if (rx_queue > IGB_N0_QUEUE)
 819                        msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
 820                if (tx_queue > IGB_N0_QUEUE)
 821                        msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
 822                if (!(adapter->flags & IGB_FLAG_HAS_MSIX) && msix_vector == 0)
 823                        msixbm |= E1000_EIMS_OTHER;
 824                array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
 825                q_vector->eims_value = msixbm;
 826                break;
 827        case e1000_82576:
 828                /* 82576 uses a table that essentially consists of 2 columns
 829                 * with 8 rows.  The ordering is column-major so we use the
 830                 * lower 3 bits as the row index, and the 4th bit as the
 831                 * column offset.
 832                 */
 833                if (rx_queue > IGB_N0_QUEUE)
 834                        igb_write_ivar(hw, msix_vector,
 835                                       rx_queue & 0x7,
 836                                       (rx_queue & 0x8) << 1);
 837                if (tx_queue > IGB_N0_QUEUE)
 838                        igb_write_ivar(hw, msix_vector,
 839                                       tx_queue & 0x7,
 840                                       ((tx_queue & 0x8) << 1) + 8);
 841                q_vector->eims_value = BIT(msix_vector);
 842                break;
 843        case e1000_82580:
 844        case e1000_i350:
 845        case e1000_i354:
 846        case e1000_i210:
 847        case e1000_i211:
 848                /* On 82580 and newer adapters the scheme is similar to 82576
 849                 * however instead of ordering column-major we have things
 850                 * ordered row-major.  So we traverse the table by using
 851                 * bit 0 as the column offset, and the remaining bits as the
 852                 * row index.
 853                 */
 854                if (rx_queue > IGB_N0_QUEUE)
 855                        igb_write_ivar(hw, msix_vector,
 856                                       rx_queue >> 1,
 857                                       (rx_queue & 0x1) << 4);
 858                if (tx_queue > IGB_N0_QUEUE)
 859                        igb_write_ivar(hw, msix_vector,
 860                                       tx_queue >> 1,
 861                                       ((tx_queue & 0x1) << 4) + 8);
 862                q_vector->eims_value = BIT(msix_vector);
 863                break;
 864        default:
 865                BUG();
 866                break;
 867        }
 868
 869        /* add q_vector eims value to global eims_enable_mask */
 870        adapter->eims_enable_mask |= q_vector->eims_value;
 871
 872        /* configure q_vector to set itr on first interrupt */
 873        q_vector->set_itr = 1;
 874}
 875
 876/**
 877 *  igb_configure_msix - Configure MSI-X hardware
 878 *  @adapter: board private structure to initialize
 879 *
 880 *  igb_configure_msix sets up the hardware to properly
 881 *  generate MSI-X interrupts.
 882 **/
 883static void igb_configure_msix(struct igb_adapter *adapter)
 884{
 885        u32 tmp;
 886        int i, vector = 0;
 887        struct e1000_hw *hw = &adapter->hw;
 888
 889        adapter->eims_enable_mask = 0;
 890
 891        /* set vector for other causes, i.e. link changes */
 892        switch (hw->mac.type) {
 893        case e1000_82575:
 894                tmp = rd32(E1000_CTRL_EXT);
 895                /* enable MSI-X PBA support*/
 896                tmp |= E1000_CTRL_EXT_PBA_CLR;
 897
 898                /* Auto-Mask interrupts upon ICR read. */
 899                tmp |= E1000_CTRL_EXT_EIAME;
 900                tmp |= E1000_CTRL_EXT_IRCA;
 901
 902                wr32(E1000_CTRL_EXT, tmp);
 903
 904                /* enable msix_other interrupt */
 905                array_wr32(E1000_MSIXBM(0), vector++, E1000_EIMS_OTHER);
 906                adapter->eims_other = E1000_EIMS_OTHER;
 907
 908                break;
 909
 910        case e1000_82576:
 911        case e1000_82580:
 912        case e1000_i350:
 913        case e1000_i354:
 914        case e1000_i210:
 915        case e1000_i211:
 916                /* Turn on MSI-X capability first, or our settings
 917                 * won't stick.  And it will take days to debug.
 918                 */
 919                wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
 920                     E1000_GPIE_PBA | E1000_GPIE_EIAME |
 921                     E1000_GPIE_NSICR);
 922
 923                /* enable msix_other interrupt */
 924                adapter->eims_other = BIT(vector);
 925                tmp = (vector++ | E1000_IVAR_VALID) << 8;
 926
 927                wr32(E1000_IVAR_MISC, tmp);
 928                break;
 929        default:
 930                /* do nothing, since nothing else supports MSI-X */
 931                break;
 932        } /* switch (hw->mac.type) */
 933
 934        adapter->eims_enable_mask |= adapter->eims_other;
 935
 936        for (i = 0; i < adapter->num_q_vectors; i++)
 937                igb_assign_vector(adapter->q_vector[i], vector++);
 938
 939        wrfl();
 940}
 941
 942/**
 943 *  igb_request_msix - Initialize MSI-X interrupts
 944 *  @adapter: board private structure to initialize
 945 *
 946 *  igb_request_msix allocates MSI-X vectors and requests interrupts from the
 947 *  kernel.
 948 **/
 949static int igb_request_msix(struct igb_adapter *adapter)
 950{
 951        struct net_device *netdev = adapter->netdev;
 952        int i, err = 0, vector = 0, free_vector = 0;
 953
 954        err = request_irq(adapter->msix_entries[vector].vector,
 955                          igb_msix_other, 0, netdev->name, adapter);
 956        if (err)
 957                goto err_out;
 958
 959        for (i = 0; i < adapter->num_q_vectors; i++) {
 960                struct igb_q_vector *q_vector = adapter->q_vector[i];
 961
 962                vector++;
 963
 964                q_vector->itr_register = adapter->io_addr + E1000_EITR(vector);
 965
 966                if (q_vector->rx.ring && q_vector->tx.ring)
 967                        sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
 968                                q_vector->rx.ring->queue_index);
 969                else if (q_vector->tx.ring)
 970                        sprintf(q_vector->name, "%s-tx-%u", netdev->name,
 971                                q_vector->tx.ring->queue_index);
 972                else if (q_vector->rx.ring)
 973                        sprintf(q_vector->name, "%s-rx-%u", netdev->name,
 974                                q_vector->rx.ring->queue_index);
 975                else
 976                        sprintf(q_vector->name, "%s-unused", netdev->name);
 977
 978                err = request_irq(adapter->msix_entries[vector].vector,
 979                                  igb_msix_ring, 0, q_vector->name,
 980                                  q_vector);
 981                if (err)
 982                        goto err_free;
 983        }
 984
 985        igb_configure_msix(adapter);
 986        return 0;
 987
 988err_free:
 989        /* free already assigned IRQs */
 990        free_irq(adapter->msix_entries[free_vector++].vector, adapter);
 991
 992        vector--;
 993        for (i = 0; i < vector; i++) {
 994                free_irq(adapter->msix_entries[free_vector++].vector,
 995                         adapter->q_vector[i]);
 996        }
 997err_out:
 998        return err;
 999}
1000
1001/**
1002 *  igb_free_q_vector - Free memory allocated for specific interrupt vector
1003 *  @adapter: board private structure to initialize
1004 *  @v_idx: Index of vector to be freed
1005 *
1006 *  This function frees the memory allocated to the q_vector.
1007 **/
1008static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx)
1009{
1010        struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1011
1012        adapter->q_vector[v_idx] = NULL;
1013
1014        /* igb_get_stats64() might access the rings on this vector,
1015         * we must wait a grace period before freeing it.
1016         */
1017        if (q_vector)
1018                kfree_rcu(q_vector, rcu);
1019}
1020
1021/**
1022 *  igb_reset_q_vector - Reset config for interrupt vector
1023 *  @adapter: board private structure to initialize
1024 *  @v_idx: Index of vector to be reset
1025 *
1026 *  If NAPI is enabled it will delete any references to the
1027 *  NAPI struct. This is preparation for igb_free_q_vector.
1028 **/
1029static void igb_reset_q_vector(struct igb_adapter *adapter, int v_idx)
1030{
1031        struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1032
1033        /* Coming from igb_set_interrupt_capability, the vectors are not yet
1034         * allocated. So, q_vector is NULL so we should stop here.
1035         */
1036        if (!q_vector)
1037                return;
1038
1039        if (q_vector->tx.ring)
1040                adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
1041
1042        if (q_vector->rx.ring)
1043                adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
1044
1045        netif_napi_del(&q_vector->napi);
1046
1047}
1048
1049static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
1050{
1051        int v_idx = adapter->num_q_vectors;
1052
1053        if (adapter->flags & IGB_FLAG_HAS_MSIX)
1054                pci_disable_msix(adapter->pdev);
1055        else if (adapter->flags & IGB_FLAG_HAS_MSI)
1056                pci_disable_msi(adapter->pdev);
1057
1058        while (v_idx--)
1059                igb_reset_q_vector(adapter, v_idx);
1060}
1061
1062/**
1063 *  igb_free_q_vectors - Free memory allocated for interrupt vectors
1064 *  @adapter: board private structure to initialize
1065 *
1066 *  This function frees the memory allocated to the q_vectors.  In addition if
1067 *  NAPI is enabled it will delete any references to the NAPI struct prior
1068 *  to freeing the q_vector.
1069 **/
1070static void igb_free_q_vectors(struct igb_adapter *adapter)
1071{
1072        int v_idx = adapter->num_q_vectors;
1073
1074        adapter->num_tx_queues = 0;
1075        adapter->num_rx_queues = 0;
1076        adapter->num_q_vectors = 0;
1077
1078        while (v_idx--) {
1079                igb_reset_q_vector(adapter, v_idx);
1080                igb_free_q_vector(adapter, v_idx);
1081        }
1082}
1083
1084/**
1085 *  igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1086 *  @adapter: board private structure to initialize
1087 *
1088 *  This function resets the device so that it has 0 Rx queues, Tx queues, and
1089 *  MSI-X interrupts allocated.
1090 */
1091static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1092{
1093        igb_free_q_vectors(adapter);
1094        igb_reset_interrupt_capability(adapter);
1095}
1096
1097/**
1098 *  igb_set_interrupt_capability - set MSI or MSI-X if supported
1099 *  @adapter: board private structure to initialize
1100 *  @msix: boolean value of MSIX capability
1101 *
1102 *  Attempt to configure interrupts using the best available
1103 *  capabilities of the hardware and kernel.
1104 **/
1105static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix)
1106{
1107        int err;
1108        int numvecs, i;
1109
1110        if (!msix)
1111                goto msi_only;
1112        adapter->flags |= IGB_FLAG_HAS_MSIX;
1113
1114        /* Number of supported queues. */
1115        adapter->num_rx_queues = adapter->rss_queues;
1116        if (adapter->vfs_allocated_count)
1117                adapter->num_tx_queues = 1;
1118        else
1119                adapter->num_tx_queues = adapter->rss_queues;
1120
1121        /* start with one vector for every Rx queue */
1122        numvecs = adapter->num_rx_queues;
1123
1124        /* if Tx handler is separate add 1 for every Tx queue */
1125        if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1126                numvecs += adapter->num_tx_queues;
1127
1128        /* store the number of vectors reserved for queues */
1129        adapter->num_q_vectors = numvecs;
1130
1131        /* add 1 vector for link status interrupts */
1132        numvecs++;
1133        for (i = 0; i < numvecs; i++)
1134                adapter->msix_entries[i].entry = i;
1135
1136        err = pci_enable_msix_range(adapter->pdev,
1137                                    adapter->msix_entries,
1138                                    numvecs,
1139                                    numvecs);
1140        if (err > 0)
1141                return;
1142
1143        igb_reset_interrupt_capability(adapter);
1144
1145        /* If we can't do MSI-X, try MSI */
1146msi_only:
1147        adapter->flags &= ~IGB_FLAG_HAS_MSIX;
1148#ifdef CONFIG_PCI_IOV
1149        /* disable SR-IOV for non MSI-X configurations */
1150        if (adapter->vf_data) {
1151                struct e1000_hw *hw = &adapter->hw;
1152                /* disable iov and allow time for transactions to clear */
1153                pci_disable_sriov(adapter->pdev);
1154                msleep(500);
1155
1156                kfree(adapter->vf_data);
1157                adapter->vf_data = NULL;
1158                wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1159                wrfl();
1160                msleep(100);
1161                dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1162        }
1163#endif
1164        adapter->vfs_allocated_count = 0;
1165        adapter->rss_queues = 1;
1166        adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1167        adapter->num_rx_queues = 1;
1168        adapter->num_tx_queues = 1;
1169        adapter->num_q_vectors = 1;
1170        if (!pci_enable_msi(adapter->pdev))
1171                adapter->flags |= IGB_FLAG_HAS_MSI;
1172}
1173
1174static void igb_add_ring(struct igb_ring *ring,
1175                         struct igb_ring_container *head)
1176{
1177        head->ring = ring;
1178        head->count++;
1179}
1180
1181/**
1182 *  igb_alloc_q_vector - Allocate memory for a single interrupt vector
1183 *  @adapter: board private structure to initialize
1184 *  @v_count: q_vectors allocated on adapter, used for ring interleaving
1185 *  @v_idx: index of vector in adapter struct
1186 *  @txr_count: total number of Tx rings to allocate
1187 *  @txr_idx: index of first Tx ring to allocate
1188 *  @rxr_count: total number of Rx rings to allocate
1189 *  @rxr_idx: index of first Rx ring to allocate
1190 *
1191 *  We allocate one q_vector.  If allocation fails we return -ENOMEM.
1192 **/
1193static int igb_alloc_q_vector(struct igb_adapter *adapter,
1194                              int v_count, int v_idx,
1195                              int txr_count, int txr_idx,
1196                              int rxr_count, int rxr_idx)
1197{
1198        struct igb_q_vector *q_vector;
1199        struct igb_ring *ring;
1200        int ring_count, size;
1201
1202        /* igb only supports 1 Tx and/or 1 Rx queue per vector */
1203        if (txr_count > 1 || rxr_count > 1)
1204                return -ENOMEM;
1205
1206        ring_count = txr_count + rxr_count;
1207        size = sizeof(struct igb_q_vector) +
1208               (sizeof(struct igb_ring) * ring_count);
1209
1210        /* allocate q_vector and rings */
1211        q_vector = adapter->q_vector[v_idx];
1212        if (!q_vector) {
1213                q_vector = kzalloc(size, GFP_KERNEL);
1214        } else if (size > ksize(q_vector)) {
1215                kfree_rcu(q_vector, rcu);
1216                q_vector = kzalloc(size, GFP_KERNEL);
1217        } else {
1218                memset(q_vector, 0, size);
1219        }
1220        if (!q_vector)
1221                return -ENOMEM;
1222
1223        /* initialize NAPI */
1224        netif_napi_add(adapter->netdev, &q_vector->napi,
1225                       igb_poll, 64);
1226
1227        /* tie q_vector and adapter together */
1228        adapter->q_vector[v_idx] = q_vector;
1229        q_vector->adapter = adapter;
1230
1231        /* initialize work limits */
1232        q_vector->tx.work_limit = adapter->tx_work_limit;
1233
1234        /* initialize ITR configuration */
1235        q_vector->itr_register = adapter->io_addr + E1000_EITR(0);
1236        q_vector->itr_val = IGB_START_ITR;
1237
1238        /* initialize pointer to rings */
1239        ring = q_vector->ring;
1240
1241        /* intialize ITR */
1242        if (rxr_count) {
1243                /* rx or rx/tx vector */
1244                if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
1245                        q_vector->itr_val = adapter->rx_itr_setting;
1246        } else {
1247                /* tx only vector */
1248                if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
1249                        q_vector->itr_val = adapter->tx_itr_setting;
1250        }
1251
1252        if (txr_count) {
1253                /* assign generic ring traits */
1254                ring->dev = &adapter->pdev->dev;
1255                ring->netdev = adapter->netdev;
1256
1257                /* configure backlink on ring */
1258                ring->q_vector = q_vector;
1259
1260                /* update q_vector Tx values */
1261                igb_add_ring(ring, &q_vector->tx);
1262
1263                /* For 82575, context index must be unique per ring. */
1264                if (adapter->hw.mac.type == e1000_82575)
1265                        set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
1266
1267                /* apply Tx specific ring traits */
1268                ring->count = adapter->tx_ring_count;
1269                ring->queue_index = txr_idx;
1270
1271                u64_stats_init(&ring->tx_syncp);
1272                u64_stats_init(&ring->tx_syncp2);
1273
1274                /* assign ring to adapter */
1275                adapter->tx_ring[txr_idx] = ring;
1276
1277                /* push pointer to next ring */
1278                ring++;
1279        }
1280
1281        if (rxr_count) {
1282                /* assign generic ring traits */
1283                ring->dev = &adapter->pdev->dev;
1284                ring->netdev = adapter->netdev;
1285
1286                /* configure backlink on ring */
1287                ring->q_vector = q_vector;
1288
1289                /* update q_vector Rx values */
1290                igb_add_ring(ring, &q_vector->rx);
1291
1292                /* set flag indicating ring supports SCTP checksum offload */
1293                if (adapter->hw.mac.type >= e1000_82576)
1294                        set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
1295
1296                /* On i350, i354, i210, and i211, loopback VLAN packets
1297                 * have the tag byte-swapped.
1298                 */
1299                if (adapter->hw.mac.type >= e1000_i350)
1300                        set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
1301
1302                /* apply Rx specific ring traits */
1303                ring->count = adapter->rx_ring_count;
1304                ring->queue_index = rxr_idx;
1305
1306                u64_stats_init(&ring->rx_syncp);
1307
1308                /* assign ring to adapter */
1309                adapter->rx_ring[rxr_idx] = ring;
1310        }
1311
1312        return 0;
1313}
1314
1315
1316/**
1317 *  igb_alloc_q_vectors - Allocate memory for interrupt vectors
1318 *  @adapter: board private structure to initialize
1319 *
1320 *  We allocate one q_vector per queue interrupt.  If allocation fails we
1321 *  return -ENOMEM.
1322 **/
1323static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1324{
1325        int q_vectors = adapter->num_q_vectors;
1326        int rxr_remaining = adapter->num_rx_queues;
1327        int txr_remaining = adapter->num_tx_queues;
1328        int rxr_idx = 0, txr_idx = 0, v_idx = 0;
1329        int err;
1330
1331        if (q_vectors >= (rxr_remaining + txr_remaining)) {
1332                for (; rxr_remaining; v_idx++) {
1333                        err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
1334                                                 0, 0, 1, rxr_idx);
1335
1336                        if (err)
1337                                goto err_out;
1338
1339                        /* update counts and index */
1340                        rxr_remaining--;
1341                        rxr_idx++;
1342                }
1343        }
1344
1345        for (; v_idx < q_vectors; v_idx++) {
1346                int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
1347                int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
1348
1349                err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
1350                                         tqpv, txr_idx, rqpv, rxr_idx);
1351
1352                if (err)
1353                        goto err_out;
1354
1355                /* update counts and index */
1356                rxr_remaining -= rqpv;
1357                txr_remaining -= tqpv;
1358                rxr_idx++;
1359                txr_idx++;
1360        }
1361
1362        return 0;
1363
1364err_out:
1365        adapter->num_tx_queues = 0;
1366        adapter->num_rx_queues = 0;
1367        adapter->num_q_vectors = 0;
1368
1369        while (v_idx--)
1370                igb_free_q_vector(adapter, v_idx);
1371
1372        return -ENOMEM;
1373}
1374
1375/**
1376 *  igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1377 *  @adapter: board private structure to initialize
1378 *  @msix: boolean value of MSIX capability
1379 *
1380 *  This function initializes the interrupts and allocates all of the queues.
1381 **/
1382static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix)
1383{
1384        struct pci_dev *pdev = adapter->pdev;
1385        int err;
1386
1387        igb_set_interrupt_capability(adapter, msix);
1388
1389        err = igb_alloc_q_vectors(adapter);
1390        if (err) {
1391                dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1392                goto err_alloc_q_vectors;
1393        }
1394
1395        igb_cache_ring_register(adapter);
1396
1397        return 0;
1398
1399err_alloc_q_vectors:
1400        igb_reset_interrupt_capability(adapter);
1401        return err;
1402}
1403
1404/**
1405 *  igb_request_irq - initialize interrupts
1406 *  @adapter: board private structure to initialize
1407 *
1408 *  Attempts to configure interrupts using the best available
1409 *  capabilities of the hardware and kernel.
1410 **/
1411static int igb_request_irq(struct igb_adapter *adapter)
1412{
1413        struct net_device *netdev = adapter->netdev;
1414        struct pci_dev *pdev = adapter->pdev;
1415        int err = 0;
1416
1417        if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1418                err = igb_request_msix(adapter);
1419                if (!err)
1420                        goto request_done;
1421                /* fall back to MSI */
1422                igb_free_all_tx_resources(adapter);
1423                igb_free_all_rx_resources(adapter);
1424
1425                igb_clear_interrupt_scheme(adapter);
1426                err = igb_init_interrupt_scheme(adapter, false);
1427                if (err)
1428                        goto request_done;
1429
1430                igb_setup_all_tx_resources(adapter);
1431                igb_setup_all_rx_resources(adapter);
1432                igb_configure(adapter);
1433        }
1434
1435        igb_assign_vector(adapter->q_vector[0], 0);
1436
1437        if (adapter->flags & IGB_FLAG_HAS_MSI) {
1438                err = request_irq(pdev->irq, igb_intr_msi, 0,
1439                                  netdev->name, adapter);
1440                if (!err)
1441                        goto request_done;
1442
1443                /* fall back to legacy interrupts */
1444                igb_reset_interrupt_capability(adapter);
1445                adapter->flags &= ~IGB_FLAG_HAS_MSI;
1446        }
1447
1448        err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1449                          netdev->name, adapter);
1450
1451        if (err)
1452                dev_err(&pdev->dev, "Error %d getting interrupt\n",
1453                        err);
1454
1455request_done:
1456        return err;
1457}
1458
1459static void igb_free_irq(struct igb_adapter *adapter)
1460{
1461        if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1462                int vector = 0, i;
1463
1464                free_irq(adapter->msix_entries[vector++].vector, adapter);
1465
1466                for (i = 0; i < adapter->num_q_vectors; i++)
1467                        free_irq(adapter->msix_entries[vector++].vector,
1468                                 adapter->q_vector[i]);
1469        } else {
1470                free_irq(adapter->pdev->irq, adapter);
1471        }
1472}
1473
1474/**
1475 *  igb_irq_disable - Mask off interrupt generation on the NIC
1476 *  @adapter: board private structure
1477 **/
1478static void igb_irq_disable(struct igb_adapter *adapter)
1479{
1480        struct e1000_hw *hw = &adapter->hw;
1481
1482        /* we need to be careful when disabling interrupts.  The VFs are also
1483         * mapped into these registers and so clearing the bits can cause
1484         * issues on the VF drivers so we only need to clear what we set
1485         */
1486        if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1487                u32 regval = rd32(E1000_EIAM);
1488
1489                wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1490                wr32(E1000_EIMC, adapter->eims_enable_mask);
1491                regval = rd32(E1000_EIAC);
1492                wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1493        }
1494
1495        wr32(E1000_IAM, 0);
1496        wr32(E1000_IMC, ~0);
1497        wrfl();
1498        if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1499                int i;
1500
1501                for (i = 0; i < adapter->num_q_vectors; i++)
1502                        synchronize_irq(adapter->msix_entries[i].vector);
1503        } else {
1504                synchronize_irq(adapter->pdev->irq);
1505        }
1506}
1507
1508/**
1509 *  igb_irq_enable - Enable default interrupt generation settings
1510 *  @adapter: board private structure
1511 **/
1512static void igb_irq_enable(struct igb_adapter *adapter)
1513{
1514        struct e1000_hw *hw = &adapter->hw;
1515
1516        if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1517                u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1518                u32 regval = rd32(E1000_EIAC);
1519
1520                wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1521                regval = rd32(E1000_EIAM);
1522                wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1523                wr32(E1000_EIMS, adapter->eims_enable_mask);
1524                if (adapter->vfs_allocated_count) {
1525                        wr32(E1000_MBVFIMR, 0xFF);
1526                        ims |= E1000_IMS_VMMB;
1527                }
1528                wr32(E1000_IMS, ims);
1529        } else {
1530                wr32(E1000_IMS, IMS_ENABLE_MASK |
1531                                E1000_IMS_DRSTA);
1532                wr32(E1000_IAM, IMS_ENABLE_MASK |
1533                                E1000_IMS_DRSTA);
1534        }
1535}
1536
1537static void igb_update_mng_vlan(struct igb_adapter *adapter)
1538{
1539        struct e1000_hw *hw = &adapter->hw;
1540        u16 pf_id = adapter->vfs_allocated_count;
1541        u16 vid = adapter->hw.mng_cookie.vlan_id;
1542        u16 old_vid = adapter->mng_vlan_id;
1543
1544        if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1545                /* add VID to filter table */
1546                igb_vfta_set(hw, vid, pf_id, true, true);
1547                adapter->mng_vlan_id = vid;
1548        } else {
1549                adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1550        }
1551
1552        if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1553            (vid != old_vid) &&
1554            !test_bit(old_vid, adapter->active_vlans)) {
1555                /* remove VID from filter table */
1556                igb_vfta_set(hw, vid, pf_id, false, true);
1557        }
1558}
1559
1560/**
1561 *  igb_release_hw_control - release control of the h/w to f/w
1562 *  @adapter: address of board private structure
1563 *
1564 *  igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1565 *  For ASF and Pass Through versions of f/w this means that the
1566 *  driver is no longer loaded.
1567 **/
1568static void igb_release_hw_control(struct igb_adapter *adapter)
1569{
1570        struct e1000_hw *hw = &adapter->hw;
1571        u32 ctrl_ext;
1572
1573        /* Let firmware take over control of h/w */
1574        ctrl_ext = rd32(E1000_CTRL_EXT);
1575        wr32(E1000_CTRL_EXT,
1576                        ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1577}
1578
1579/**
1580 *  igb_get_hw_control - get control of the h/w from f/w
1581 *  @adapter: address of board private structure
1582 *
1583 *  igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1584 *  For ASF and Pass Through versions of f/w this means that
1585 *  the driver is loaded.
1586 **/
1587static void igb_get_hw_control(struct igb_adapter *adapter)
1588{
1589        struct e1000_hw *hw = &adapter->hw;
1590        u32 ctrl_ext;
1591
1592        /* Let firmware know the driver has taken over */
1593        ctrl_ext = rd32(E1000_CTRL_EXT);
1594        wr32(E1000_CTRL_EXT,
1595                        ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1596}
1597
1598/**
1599 *  igb_configure - configure the hardware for RX and TX
1600 *  @adapter: private board structure
1601 **/
1602static void igb_configure(struct igb_adapter *adapter)
1603{
1604        struct net_device *netdev = adapter->netdev;
1605        int i;
1606
1607        igb_get_hw_control(adapter);
1608        igb_set_rx_mode(netdev);
1609
1610        igb_restore_vlan(adapter);
1611
1612        igb_setup_tctl(adapter);
1613        igb_setup_mrqc(adapter);
1614        igb_setup_rctl(adapter);
1615
1616        igb_nfc_filter_restore(adapter);
1617        igb_configure_tx(adapter);
1618        igb_configure_rx(adapter);
1619
1620        igb_rx_fifo_flush_82575(&adapter->hw);
1621
1622        /* call igb_desc_unused which always leaves
1623         * at least 1 descriptor unused to make sure
1624         * next_to_use != next_to_clean
1625         */
1626        for (i = 0; i < adapter->num_rx_queues; i++) {
1627                struct igb_ring *ring = adapter->rx_ring[i];
1628                igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1629        }
1630}
1631
1632/**
1633 *  igb_power_up_link - Power up the phy/serdes link
1634 *  @adapter: address of board private structure
1635 **/
1636void igb_power_up_link(struct igb_adapter *adapter)
1637{
1638        igb_reset_phy(&adapter->hw);
1639
1640        if (adapter->hw.phy.media_type == e1000_media_type_copper)
1641                igb_power_up_phy_copper(&adapter->hw);
1642        else
1643                igb_power_up_serdes_link_82575(&adapter->hw);
1644
1645        igb_setup_link(&adapter->hw);
1646}
1647
1648/**
1649 *  igb_power_down_link - Power down the phy/serdes link
1650 *  @adapter: address of board private structure
1651 */
1652static void igb_power_down_link(struct igb_adapter *adapter)
1653{
1654        if (adapter->hw.phy.media_type == e1000_media_type_copper)
1655                igb_power_down_phy_copper_82575(&adapter->hw);
1656        else
1657                igb_shutdown_serdes_link_82575(&adapter->hw);
1658}
1659
1660/**
1661 * Detect and switch function for Media Auto Sense
1662 * @adapter: address of the board private structure
1663 **/
1664static void igb_check_swap_media(struct igb_adapter *adapter)
1665{
1666        struct e1000_hw *hw = &adapter->hw;
1667        u32 ctrl_ext, connsw;
1668        bool swap_now = false;
1669
1670        ctrl_ext = rd32(E1000_CTRL_EXT);
1671        connsw = rd32(E1000_CONNSW);
1672
1673        /* need to live swap if current media is copper and we have fiber/serdes
1674         * to go to.
1675         */
1676
1677        if ((hw->phy.media_type == e1000_media_type_copper) &&
1678            (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) {
1679                swap_now = true;
1680        } else if (!(connsw & E1000_CONNSW_SERDESD)) {
1681                /* copper signal takes time to appear */
1682                if (adapter->copper_tries < 4) {
1683                        adapter->copper_tries++;
1684                        connsw |= E1000_CONNSW_AUTOSENSE_CONF;
1685                        wr32(E1000_CONNSW, connsw);
1686                        return;
1687                } else {
1688                        adapter->copper_tries = 0;
1689                        if ((connsw & E1000_CONNSW_PHYSD) &&
1690                            (!(connsw & E1000_CONNSW_PHY_PDN))) {
1691                                swap_now = true;
1692                                connsw &= ~E1000_CONNSW_AUTOSENSE_CONF;
1693                                wr32(E1000_CONNSW, connsw);
1694                        }
1695                }
1696        }
1697
1698        if (!swap_now)
1699                return;
1700
1701        switch (hw->phy.media_type) {
1702        case e1000_media_type_copper:
1703                netdev_info(adapter->netdev,
1704                        "MAS: changing media to fiber/serdes\n");
1705                ctrl_ext |=
1706                        E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
1707                adapter->flags |= IGB_FLAG_MEDIA_RESET;
1708                adapter->copper_tries = 0;
1709                break;
1710        case e1000_media_type_internal_serdes:
1711        case e1000_media_type_fiber:
1712                netdev_info(adapter->netdev,
1713                        "MAS: changing media to copper\n");
1714                ctrl_ext &=
1715                        ~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
1716                adapter->flags |= IGB_FLAG_MEDIA_RESET;
1717                break;
1718        default:
1719                /* shouldn't get here during regular operation */
1720                netdev_err(adapter->netdev,
1721                        "AMS: Invalid media type found, returning\n");
1722                break;
1723        }
1724        wr32(E1000_CTRL_EXT, ctrl_ext);
1725}
1726
1727/**
1728 *  igb_up - Open the interface and prepare it to handle traffic
1729 *  @adapter: board private structure
1730 **/
1731int igb_up(struct igb_adapter *adapter)
1732{
1733        struct e1000_hw *hw = &adapter->hw;
1734        int i;
1735
1736        /* hardware has been reset, we need to reload some things */
1737        igb_configure(adapter);
1738
1739        clear_bit(__IGB_DOWN, &adapter->state);
1740
1741        for (i = 0; i < adapter->num_q_vectors; i++)
1742                napi_enable(&(adapter->q_vector[i]->napi));
1743
1744        if (adapter->flags & IGB_FLAG_HAS_MSIX)
1745                igb_configure_msix(adapter);
1746        else
1747                igb_assign_vector(adapter->q_vector[0], 0);
1748
1749        /* Clear any pending interrupts. */
1750        rd32(E1000_ICR);
1751        igb_irq_enable(adapter);
1752
1753        /* notify VFs that reset has been completed */
1754        if (adapter->vfs_allocated_count) {
1755                u32 reg_data = rd32(E1000_CTRL_EXT);
1756
1757                reg_data |= E1000_CTRL_EXT_PFRSTD;
1758                wr32(E1000_CTRL_EXT, reg_data);
1759        }
1760
1761        netif_tx_start_all_queues(adapter->netdev);
1762
1763        /* start the watchdog. */
1764        hw->mac.get_link_status = 1;
1765        schedule_work(&adapter->watchdog_task);
1766
1767        if ((adapter->flags & IGB_FLAG_EEE) &&
1768            (!hw->dev_spec._82575.eee_disable))
1769                adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T;
1770
1771        return 0;
1772}
1773
1774void igb_down(struct igb_adapter *adapter)
1775{
1776        struct net_device *netdev = adapter->netdev;
1777        struct e1000_hw *hw = &adapter->hw;
1778        u32 tctl, rctl;
1779        int i;
1780
1781        /* signal that we're down so the interrupt handler does not
1782         * reschedule our watchdog timer
1783         */
1784        set_bit(__IGB_DOWN, &adapter->state);
1785
1786        /* disable receives in the hardware */
1787        rctl = rd32(E1000_RCTL);
1788        wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1789        /* flush and sleep below */
1790
1791        netif_carrier_off(netdev);
1792        netif_tx_stop_all_queues(netdev);
1793
1794        /* disable transmits in the hardware */
1795        tctl = rd32(E1000_TCTL);
1796        tctl &= ~E1000_TCTL_EN;
1797        wr32(E1000_TCTL, tctl);
1798        /* flush both disables and wait for them to finish */
1799        wrfl();
1800        usleep_range(10000, 11000);
1801
1802        igb_irq_disable(adapter);
1803
1804        adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
1805
1806        for (i = 0; i < adapter->num_q_vectors; i++) {
1807                if (adapter->q_vector[i]) {
1808                        napi_synchronize(&adapter->q_vector[i]->napi);
1809                        napi_disable(&adapter->q_vector[i]->napi);
1810                }
1811        }
1812
1813        del_timer_sync(&adapter->watchdog_timer);
1814        del_timer_sync(&adapter->phy_info_timer);
1815
1816        /* record the stats before reset*/
1817        spin_lock(&adapter->stats64_lock);
1818        igb_update_stats(adapter, &adapter->stats64);
1819        spin_unlock(&adapter->stats64_lock);
1820
1821        adapter->link_speed = 0;
1822        adapter->link_duplex = 0;
1823
1824        if (!pci_channel_offline(adapter->pdev))
1825                igb_reset(adapter);
1826
1827        /* clear VLAN promisc flag so VFTA will be updated if necessary */
1828        adapter->flags &= ~IGB_FLAG_VLAN_PROMISC;
1829
1830        igb_clean_all_tx_rings(adapter);
1831        igb_clean_all_rx_rings(adapter);
1832#ifdef CONFIG_IGB_DCA
1833
1834        /* since we reset the hardware DCA settings were cleared */
1835        igb_setup_dca(adapter);
1836#endif
1837}
1838
1839void igb_reinit_locked(struct igb_adapter *adapter)
1840{
1841        WARN_ON(in_interrupt());
1842        while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1843                usleep_range(1000, 2000);
1844        igb_down(adapter);
1845        igb_up(adapter);
1846        clear_bit(__IGB_RESETTING, &adapter->state);
1847}
1848
1849/** igb_enable_mas - Media Autosense re-enable after swap
1850 *
1851 * @adapter: adapter struct
1852 **/
1853static void igb_enable_mas(struct igb_adapter *adapter)
1854{
1855        struct e1000_hw *hw = &adapter->hw;
1856        u32 connsw = rd32(E1000_CONNSW);
1857
1858        /* configure for SerDes media detect */
1859        if ((hw->phy.media_type == e1000_media_type_copper) &&
1860            (!(connsw & E1000_CONNSW_SERDESD))) {
1861                connsw |= E1000_CONNSW_ENRGSRC;
1862                connsw |= E1000_CONNSW_AUTOSENSE_EN;
1863                wr32(E1000_CONNSW, connsw);
1864                wrfl();
1865        }
1866}
1867
1868void igb_reset(struct igb_adapter *adapter)
1869{
1870        struct pci_dev *pdev = adapter->pdev;
1871        struct e1000_hw *hw = &adapter->hw;
1872        struct e1000_mac_info *mac = &hw->mac;
1873        struct e1000_fc_info *fc = &hw->fc;
1874        u32 pba, hwm;
1875
1876        /* Repartition Pba for greater than 9k mtu
1877         * To take effect CTRL.RST is required.
1878         */
1879        switch (mac->type) {
1880        case e1000_i350:
1881        case e1000_i354:
1882        case e1000_82580:
1883                pba = rd32(E1000_RXPBS);
1884                pba = igb_rxpbs_adjust_82580(pba);
1885                break;
1886        case e1000_82576:
1887                pba = rd32(E1000_RXPBS);
1888                pba &= E1000_RXPBS_SIZE_MASK_82576;
1889                break;
1890        case e1000_82575:
1891        case e1000_i210:
1892        case e1000_i211:
1893        default:
1894                pba = E1000_PBA_34K;
1895                break;
1896        }
1897
1898        if (mac->type == e1000_82575) {
1899                u32 min_rx_space, min_tx_space, needed_tx_space;
1900
1901                /* write Rx PBA so that hardware can report correct Tx PBA */
1902                wr32(E1000_PBA, pba);
1903
1904                /* To maintain wire speed transmits, the Tx FIFO should be
1905                 * large enough to accommodate two full transmit packets,
1906                 * rounded up to the next 1KB and expressed in KB.  Likewise,
1907                 * the Rx FIFO should be large enough to accommodate at least
1908                 * one full receive packet and is similarly rounded up and
1909                 * expressed in KB.
1910                 */
1911                min_rx_space = DIV_ROUND_UP(MAX_JUMBO_FRAME_SIZE, 1024);
1912
1913                /* The Tx FIFO also stores 16 bytes of information about the Tx
1914                 * but don't include Ethernet FCS because hardware appends it.
1915                 * We only need to round down to the nearest 512 byte block
1916                 * count since the value we care about is 2 frames, not 1.
1917                 */
1918                min_tx_space = adapter->max_frame_size;
1919                min_tx_space += sizeof(union e1000_adv_tx_desc) - ETH_FCS_LEN;
1920                min_tx_space = DIV_ROUND_UP(min_tx_space, 512);
1921
1922                /* upper 16 bits has Tx packet buffer allocation size in KB */
1923                needed_tx_space = min_tx_space - (rd32(E1000_PBA) >> 16);
1924
1925                /* If current Tx allocation is less than the min Tx FIFO size,
1926                 * and the min Tx FIFO size is less than the current Rx FIFO
1927                 * allocation, take space away from current Rx allocation.
1928                 */
1929                if (needed_tx_space < pba) {
1930                        pba -= needed_tx_space;
1931
1932                        /* if short on Rx space, Rx wins and must trump Tx
1933                         * adjustment
1934                         */
1935                        if (pba < min_rx_space)
1936                                pba = min_rx_space;
1937                }
1938
1939                /* adjust PBA for jumbo frames */
1940                wr32(E1000_PBA, pba);
1941        }
1942
1943        /* flow control settings
1944         * The high water mark must be low enough to fit one full frame
1945         * after transmitting the pause frame.  As such we must have enough
1946         * space to allow for us to complete our current transmit and then
1947         * receive the frame that is in progress from the link partner.
1948         * Set it to:
1949         * - the full Rx FIFO size minus one full Tx plus one full Rx frame
1950         */
1951        hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
1952
1953        fc->high_water = hwm & 0xFFFFFFF0;      /* 16-byte granularity */
1954        fc->low_water = fc->high_water - 16;
1955        fc->pause_time = 0xFFFF;
1956        fc->send_xon = 1;
1957        fc->current_mode = fc->requested_mode;
1958
1959        /* disable receive for all VFs and wait one second */
1960        if (adapter->vfs_allocated_count) {
1961                int i;
1962
1963                for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1964                        adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1965
1966                /* ping all the active vfs to let them know we are going down */
1967                igb_ping_all_vfs(adapter);
1968
1969                /* disable transmits and receives */
1970                wr32(E1000_VFRE, 0);
1971                wr32(E1000_VFTE, 0);
1972        }
1973
1974        /* Allow time for pending master requests to run */
1975        hw->mac.ops.reset_hw(hw);
1976        wr32(E1000_WUC, 0);
1977
1978        if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
1979                /* need to resetup here after media swap */
1980                adapter->ei.get_invariants(hw);
1981                adapter->flags &= ~IGB_FLAG_MEDIA_RESET;
1982        }
1983        if ((mac->type == e1000_82575) &&
1984            (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
1985                igb_enable_mas(adapter);
1986        }
1987        if (hw->mac.ops.init_hw(hw))
1988                dev_err(&pdev->dev, "Hardware Error\n");
1989
1990        /* Flow control settings reset on hardware reset, so guarantee flow
1991         * control is off when forcing speed.
1992         */
1993        if (!hw->mac.autoneg)
1994                igb_force_mac_fc(hw);
1995
1996        igb_init_dmac(adapter, pba);
1997#ifdef CONFIG_IGB_HWMON
1998        /* Re-initialize the thermal sensor on i350 devices. */
1999        if (!test_bit(__IGB_DOWN, &adapter->state)) {
2000                if (mac->type == e1000_i350 && hw->bus.func == 0) {
2001                        /* If present, re-initialize the external thermal sensor
2002                         * interface.
2003                         */
2004                        if (adapter->ets)
2005                                mac->ops.init_thermal_sensor_thresh(hw);
2006                }
2007        }
2008#endif
2009        /* Re-establish EEE setting */
2010        if (hw->phy.media_type == e1000_media_type_copper) {
2011                switch (mac->type) {
2012                case e1000_i350:
2013                case e1000_i210:
2014                case e1000_i211:
2015                        igb_set_eee_i350(hw, true, true);
2016                        break;
2017                case e1000_i354:
2018                        igb_set_eee_i354(hw, true, true);
2019                        break;
2020                default:
2021                        break;
2022                }
2023        }
2024        if (!netif_running(adapter->netdev))
2025                igb_power_down_link(adapter);
2026
2027        igb_update_mng_vlan(adapter);
2028
2029        /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
2030        wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
2031
2032        /* Re-enable PTP, where applicable. */
2033        if (adapter->ptp_flags & IGB_PTP_ENABLED)
2034                igb_ptp_reset(adapter);
2035
2036        igb_get_phy_info(hw);
2037}
2038
2039static netdev_features_t igb_fix_features(struct net_device *netdev,
2040        netdev_features_t features)
2041{
2042        /* Since there is no support for separate Rx/Tx vlan accel
2043         * enable/disable make sure Tx flag is always in same state as Rx.
2044         */
2045        if (features & NETIF_F_HW_VLAN_CTAG_RX)
2046                features |= NETIF_F_HW_VLAN_CTAG_TX;
2047        else
2048                features &= ~NETIF_F_HW_VLAN_CTAG_TX;
2049
2050        return features;
2051}
2052
2053static int igb_set_features(struct net_device *netdev,
2054        netdev_features_t features)
2055{
2056        netdev_features_t changed = netdev->features ^ features;
2057        struct igb_adapter *adapter = netdev_priv(netdev);
2058
2059        if (changed & NETIF_F_HW_VLAN_CTAG_RX)
2060                igb_vlan_mode(netdev, features);
2061
2062        if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
2063                return 0;
2064
2065        if (!(features & NETIF_F_NTUPLE)) {
2066                struct hlist_node *node2;
2067                struct igb_nfc_filter *rule;
2068
2069                spin_lock(&adapter->nfc_lock);
2070                hlist_for_each_entry_safe(rule, node2,
2071                                          &adapter->nfc_filter_list, nfc_node) {
2072                        igb_erase_filter(adapter, rule);
2073                        hlist_del(&rule->nfc_node);
2074                        kfree(rule);
2075                }
2076                spin_unlock(&adapter->nfc_lock);
2077                adapter->nfc_filter_count = 0;
2078        }
2079
2080        netdev->features = features;
2081
2082        if (netif_running(netdev))
2083                igb_reinit_locked(adapter);
2084        else
2085                igb_reset(adapter);
2086
2087        return 0;
2088}
2089
2090static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
2091                           struct net_device *dev,
2092                           const unsigned char *addr, u16 vid,
2093                           u16 flags)
2094{
2095        /* guarantee we can provide a unique filter for the unicast address */
2096        if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
2097                struct igb_adapter *adapter = netdev_priv(dev);
2098                struct e1000_hw *hw = &adapter->hw;
2099                int vfn = adapter->vfs_allocated_count;
2100                int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2101
2102                if (netdev_uc_count(dev) >= rar_entries)
2103                        return -ENOMEM;
2104        }
2105
2106        return ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, flags);
2107}
2108
2109#define IGB_MAX_MAC_HDR_LEN     127
2110#define IGB_MAX_NETWORK_HDR_LEN 511
2111
2112static netdev_features_t
2113igb_features_check(struct sk_buff *skb, struct net_device *dev,
2114                   netdev_features_t features)
2115{
2116        unsigned int network_hdr_len, mac_hdr_len;
2117
2118        /* Make certain the headers can be described by a context descriptor */
2119        mac_hdr_len = skb_network_header(skb) - skb->data;
2120        if (unlikely(mac_hdr_len > IGB_MAX_MAC_HDR_LEN))
2121                return features & ~(NETIF_F_HW_CSUM |
2122                                    NETIF_F_SCTP_CRC |
2123                                    NETIF_F_HW_VLAN_CTAG_TX |
2124                                    NETIF_F_TSO |
2125                                    NETIF_F_TSO6);
2126
2127        network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
2128        if (unlikely(network_hdr_len >  IGB_MAX_NETWORK_HDR_LEN))
2129                return features & ~(NETIF_F_HW_CSUM |
2130                                    NETIF_F_SCTP_CRC |
2131                                    NETIF_F_TSO |
2132                                    NETIF_F_TSO6);
2133
2134        /* We can only support IPV4 TSO in tunnels if we can mangle the
2135         * inner IP ID field, so strip TSO if MANGLEID is not supported.
2136         */
2137        if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
2138                features &= ~NETIF_F_TSO;
2139
2140        return features;
2141}
2142
2143static const struct net_device_ops igb_netdev_ops = {
2144        .ndo_open               = igb_open,
2145        .ndo_stop               = igb_close,
2146        .ndo_start_xmit         = igb_xmit_frame,
2147        .ndo_get_stats64        = igb_get_stats64,
2148        .ndo_set_rx_mode        = igb_set_rx_mode,
2149        .ndo_set_mac_address    = igb_set_mac,
2150        .ndo_change_mtu         = igb_change_mtu,
2151        .ndo_do_ioctl           = igb_ioctl,
2152        .ndo_tx_timeout         = igb_tx_timeout,
2153        .ndo_validate_addr      = eth_validate_addr,
2154        .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
2155        .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
2156        .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
2157        .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
2158        .ndo_set_vf_rate        = igb_ndo_set_vf_bw,
2159        .ndo_set_vf_spoofchk    = igb_ndo_set_vf_spoofchk,
2160        .ndo_get_vf_config      = igb_ndo_get_vf_config,
2161#ifdef CONFIG_NET_POLL_CONTROLLER
2162        .ndo_poll_controller    = igb_netpoll,
2163#endif
2164        .ndo_fix_features       = igb_fix_features,
2165        .ndo_set_features       = igb_set_features,
2166        .ndo_fdb_add            = igb_ndo_fdb_add,
2167        .ndo_features_check     = igb_features_check,
2168};
2169
2170/**
2171 * igb_set_fw_version - Configure version string for ethtool
2172 * @adapter: adapter struct
2173 **/
2174void igb_set_fw_version(struct igb_adapter *adapter)
2175{
2176        struct e1000_hw *hw = &adapter->hw;
2177        struct e1000_fw_version fw;
2178
2179        igb_get_fw_version(hw, &fw);
2180
2181        switch (hw->mac.type) {
2182        case e1000_i210:
2183        case e1000_i211:
2184                if (!(igb_get_flash_presence_i210(hw))) {
2185                        snprintf(adapter->fw_version,
2186                                 sizeof(adapter->fw_version),
2187                                 "%2d.%2d-%d",
2188                                 fw.invm_major, fw.invm_minor,
2189                                 fw.invm_img_type);
2190                        break;
2191                }
2192                /* fall through */
2193        default:
2194                /* if option is rom valid, display its version too */
2195                if (fw.or_valid) {
2196                        snprintf(adapter->fw_version,
2197                                 sizeof(adapter->fw_version),
2198                                 "%d.%d, 0x%08x, %d.%d.%d",
2199                                 fw.eep_major, fw.eep_minor, fw.etrack_id,
2200                                 fw.or_major, fw.or_build, fw.or_patch);
2201                /* no option rom */
2202                } else if (fw.etrack_id != 0X0000) {
2203                        snprintf(adapter->fw_version,
2204                            sizeof(adapter->fw_version),
2205                            "%d.%d, 0x%08x",
2206                            fw.eep_major, fw.eep_minor, fw.etrack_id);
2207                } else {
2208                snprintf(adapter->fw_version,
2209                    sizeof(adapter->fw_version),
2210                    "%d.%d.%d",
2211                    fw.eep_major, fw.eep_minor, fw.eep_build);
2212                }
2213                break;
2214        }
2215}
2216
2217/**
2218 * igb_init_mas - init Media Autosense feature if enabled in the NVM
2219 *
2220 * @adapter: adapter struct
2221 **/
2222static void igb_init_mas(struct igb_adapter *adapter)
2223{
2224        struct e1000_hw *hw = &adapter->hw;
2225        u16 eeprom_data;
2226
2227        hw->nvm.ops.read(hw, NVM_COMPAT, 1, &eeprom_data);
2228        switch (hw->bus.func) {
2229        case E1000_FUNC_0:
2230                if (eeprom_data & IGB_MAS_ENABLE_0) {
2231                        adapter->flags |= IGB_FLAG_MAS_ENABLE;
2232                        netdev_info(adapter->netdev,
2233                                "MAS: Enabling Media Autosense for port %d\n",
2234                                hw->bus.func);
2235                }
2236                break;
2237        case E1000_FUNC_1:
2238                if (eeprom_data & IGB_MAS_ENABLE_1) {
2239                        adapter->flags |= IGB_FLAG_MAS_ENABLE;
2240                        netdev_info(adapter->netdev,
2241                                "MAS: Enabling Media Autosense for port %d\n",
2242                                hw->bus.func);
2243                }
2244                break;
2245        case E1000_FUNC_2:
2246                if (eeprom_data & IGB_MAS_ENABLE_2) {
2247                        adapter->flags |= IGB_FLAG_MAS_ENABLE;
2248                        netdev_info(adapter->netdev,
2249                                "MAS: Enabling Media Autosense for port %d\n",
2250                                hw->bus.func);
2251                }
2252                break;
2253        case E1000_FUNC_3:
2254                if (eeprom_data & IGB_MAS_ENABLE_3) {
2255                        adapter->flags |= IGB_FLAG_MAS_ENABLE;
2256                        netdev_info(adapter->netdev,
2257                                "MAS: Enabling Media Autosense for port %d\n",
2258                                hw->bus.func);
2259                }
2260                break;
2261        default:
2262                /* Shouldn't get here */
2263                netdev_err(adapter->netdev,
2264                        "MAS: Invalid port configuration, returning\n");
2265                break;
2266        }
2267}
2268
2269/**
2270 *  igb_init_i2c - Init I2C interface
2271 *  @adapter: pointer to adapter structure
2272 **/
2273static s32 igb_init_i2c(struct igb_adapter *adapter)
2274{
2275        s32 status = 0;
2276
2277        /* I2C interface supported on i350 devices */
2278        if (adapter->hw.mac.type != e1000_i350)
2279                return 0;
2280
2281        /* Initialize the i2c bus which is controlled by the registers.
2282         * This bus will use the i2c_algo_bit structue that implements
2283         * the protocol through toggling of the 4 bits in the register.
2284         */
2285        adapter->i2c_adap.owner = THIS_MODULE;
2286        adapter->i2c_algo = igb_i2c_algo;
2287        adapter->i2c_algo.data = adapter;
2288        adapter->i2c_adap.algo_data = &adapter->i2c_algo;
2289        adapter->i2c_adap.dev.parent = &adapter->pdev->dev;
2290        strlcpy(adapter->i2c_adap.name, "igb BB",
2291                sizeof(adapter->i2c_adap.name));
2292        status = i2c_bit_add_bus(&adapter->i2c_adap);
2293        return status;
2294}
2295
2296/**
2297 *  igb_probe - Device Initialization Routine
2298 *  @pdev: PCI device information struct
2299 *  @ent: entry in igb_pci_tbl
2300 *
2301 *  Returns 0 on success, negative on failure
2302 *
2303 *  igb_probe initializes an adapter identified by a pci_dev structure.
2304 *  The OS initialization, configuring of the adapter private structure,
2305 *  and a hardware reset occur.
2306 **/
2307static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2308{
2309        struct net_device *netdev;
2310        struct igb_adapter *adapter;
2311        struct e1000_hw *hw;
2312        u16 eeprom_data = 0;
2313        s32 ret_val;
2314        static int global_quad_port_a; /* global quad port a indication */
2315        const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
2316        int err, pci_using_dac;
2317        u8 part_str[E1000_PBANUM_LENGTH];
2318
2319        /* Catch broken hardware that put the wrong VF device ID in
2320         * the PCIe SR-IOV capability.
2321         */
2322        if (pdev->is_virtfn) {
2323                WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
2324                        pci_name(pdev), pdev->vendor, pdev->device);
2325                return -EINVAL;
2326        }
2327
2328        err = pci_enable_device_mem(pdev);
2329        if (err)
2330                return err;
2331
2332        pci_using_dac = 0;
2333        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2334        if (!err) {
2335                pci_using_dac = 1;
2336        } else {
2337                err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
2338                if (err) {
2339                        dev_err(&pdev->dev,
2340                                "No usable DMA configuration, aborting\n");
2341                        goto err_dma;
2342                }
2343        }
2344
2345        err = pci_request_mem_regions(pdev, igb_driver_name);
2346        if (err)
2347                goto err_pci_reg;
2348
2349        pci_enable_pcie_error_reporting(pdev);
2350
2351        pci_set_master(pdev);
2352        pci_save_state(pdev);
2353
2354        err = -ENOMEM;
2355        netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
2356                                   IGB_MAX_TX_QUEUES);
2357        if (!netdev)
2358                goto err_alloc_etherdev;
2359
2360        SET_NETDEV_DEV(netdev, &pdev->dev);
2361
2362        pci_set_drvdata(pdev, netdev);
2363        adapter = netdev_priv(netdev);
2364        adapter->netdev = netdev;
2365        adapter->pdev = pdev;
2366        hw = &adapter->hw;
2367        hw->back = adapter;
2368        adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
2369
2370        err = -EIO;
2371        adapter->io_addr = pci_iomap(pdev, 0, 0);
2372        if (!adapter->io_addr)
2373                goto err_ioremap;
2374        /* hw->hw_addr can be altered, we'll use adapter->io_addr for unmap */
2375        hw->hw_addr = adapter->io_addr;
2376
2377        netdev->netdev_ops = &igb_netdev_ops;
2378        igb_set_ethtool_ops(netdev);
2379        netdev->watchdog_timeo = 5 * HZ;
2380
2381        strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
2382
2383        netdev->mem_start = pci_resource_start(pdev, 0);
2384        netdev->mem_end = pci_resource_end(pdev, 0);
2385
2386        /* PCI config space info */
2387        hw->vendor_id = pdev->vendor;
2388        hw->device_id = pdev->device;
2389        hw->revision_id = pdev->revision;
2390        hw->subsystem_vendor_id = pdev->subsystem_vendor;
2391        hw->subsystem_device_id = pdev->subsystem_device;
2392
2393        /* Copy the default MAC, PHY and NVM function pointers */
2394        memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
2395        memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
2396        memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
2397        /* Initialize skew-specific constants */
2398        err = ei->get_invariants(hw);
2399        if (err)
2400                goto err_sw_init;
2401
2402        /* setup the private structure */
2403        err = igb_sw_init(adapter);
2404        if (err)
2405                goto err_sw_init;
2406
2407        igb_get_bus_info_pcie(hw);
2408
2409        hw->phy.autoneg_wait_to_complete = false;
2410
2411        /* Copper options */
2412        if (hw->phy.media_type == e1000_media_type_copper) {
2413                hw->phy.mdix = AUTO_ALL_MODES;
2414                hw->phy.disable_polarity_correction = false;
2415                hw->phy.ms_type = e1000_ms_hw_default;
2416        }
2417
2418        if (igb_check_reset_block(hw))
2419                dev_info(&pdev->dev,
2420                        "PHY reset is blocked due to SOL/IDER session.\n");
2421
2422        /* features is initialized to 0 in allocation, it might have bits
2423         * set by igb_sw_init so we should use an or instead of an
2424         * assignment.
2425         */
2426        netdev->features |= NETIF_F_SG |
2427                            NETIF_F_TSO |
2428                            NETIF_F_TSO6 |
2429                            NETIF_F_RXHASH |
2430                            NETIF_F_RXCSUM |
2431                            NETIF_F_HW_CSUM;
2432
2433        if (hw->mac.type >= e1000_82576)
2434                netdev->features |= NETIF_F_SCTP_CRC;
2435
2436#define IGB_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
2437                                  NETIF_F_GSO_GRE_CSUM | \
2438                                  NETIF_F_GSO_IPXIP4 | \
2439                                  NETIF_F_GSO_IPXIP6 | \
2440                                  NETIF_F_GSO_UDP_TUNNEL | \
2441                                  NETIF_F_GSO_UDP_TUNNEL_CSUM)
2442
2443        netdev->gso_partial_features = IGB_GSO_PARTIAL_FEATURES;
2444        netdev->features |= NETIF_F_GSO_PARTIAL | IGB_GSO_PARTIAL_FEATURES;
2445
2446        /* copy netdev features into list of user selectable features */
2447        netdev->hw_features |= netdev->features |
2448                               NETIF_F_HW_VLAN_CTAG_RX |
2449                               NETIF_F_HW_VLAN_CTAG_TX |
2450                               NETIF_F_RXALL;
2451
2452        if (hw->mac.type >= e1000_i350)
2453                netdev->hw_features |= NETIF_F_NTUPLE;
2454
2455        if (pci_using_dac)
2456                netdev->features |= NETIF_F_HIGHDMA;
2457
2458        netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
2459        netdev->mpls_features |= NETIF_F_HW_CSUM;
2460        netdev->hw_enc_features |= netdev->vlan_features;
2461
2462        /* set this bit last since it cannot be part of vlan_features */
2463        netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
2464                            NETIF_F_HW_VLAN_CTAG_RX |
2465                            NETIF_F_HW_VLAN_CTAG_TX;
2466
2467        netdev->priv_flags |= IFF_SUPP_NOFCS;
2468
2469        netdev->priv_flags |= IFF_UNICAST_FLT;
2470
2471        /* MTU range: 68 - 9216 */
2472        netdev->min_mtu = ETH_MIN_MTU;
2473        netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
2474
2475        adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2476
2477        /* before reading the NVM, reset the controller to put the device in a
2478         * known good starting state
2479         */
2480        hw->mac.ops.reset_hw(hw);
2481
2482        /* make sure the NVM is good , i211/i210 parts can have special NVM
2483         * that doesn't contain a checksum
2484         */
2485        switch (hw->mac.type) {
2486        case e1000_i210:
2487        case e1000_i211:
2488                if (igb_get_flash_presence_i210(hw)) {
2489                        if (hw->nvm.ops.validate(hw) < 0) {
2490                                dev_err(&pdev->dev,
2491                                        "The NVM Checksum Is Not Valid\n");
2492                                err = -EIO;
2493                                goto err_eeprom;
2494                        }
2495                }
2496                break;
2497        default:
2498                if (hw->nvm.ops.validate(hw) < 0) {
2499                        dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2500                        err = -EIO;
2501                        goto err_eeprom;
2502                }
2503                break;
2504        }
2505
2506        if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
2507                /* copy the MAC address out of the NVM */
2508                if (hw->mac.ops.read_mac_addr(hw))
2509                        dev_err(&pdev->dev, "NVM Read Error\n");
2510        }
2511
2512        memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2513
2514        if (!is_valid_ether_addr(netdev->dev_addr)) {
2515                dev_err(&pdev->dev, "Invalid MAC Address\n");
2516                err = -EIO;
2517                goto err_eeprom;
2518        }
2519
2520        /* get firmware version for ethtool -i */
2521        igb_set_fw_version(adapter);
2522
2523        /* configure RXPBSIZE and TXPBSIZE */
2524        if (hw->mac.type == e1000_i210) {
2525                wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT);
2526                wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
2527        }
2528
2529        setup_timer(&adapter->watchdog_timer, igb_watchdog,
2530                    (unsigned long) adapter);
2531        setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2532                    (unsigned long) adapter);
2533
2534        INIT_WORK(&adapter->reset_task, igb_reset_task);
2535        INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2536
2537        /* Initialize link properties that are user-changeable */
2538        adapter->fc_autoneg = true;
2539        hw->mac.autoneg = true;
2540        hw->phy.autoneg_advertised = 0x2f;
2541
2542        hw->fc.requested_mode = e1000_fc_default;
2543        hw->fc.current_mode = e1000_fc_default;
2544
2545        igb_validate_mdi_setting(hw);
2546
2547        /* By default, support wake on port A */
2548        if (hw->bus.func == 0)
2549                adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
2550
2551        /* Check the NVM for wake support on non-port A ports */
2552        if (hw->mac.type >= e1000_82580)
2553                hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2554                                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2555                                 &eeprom_data);
2556        else if (hw->bus.func == 1)
2557                hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2558
2559        if (eeprom_data & IGB_EEPROM_APME)
2560                adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
2561
2562        /* now that we have the eeprom settings, apply the special cases where
2563         * the eeprom may be wrong or the board simply won't support wake on
2564         * lan on a particular port
2565         */
2566        switch (pdev->device) {
2567        case E1000_DEV_ID_82575GB_QUAD_COPPER:
2568                adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
2569                break;
2570        case E1000_DEV_ID_82575EB_FIBER_SERDES:
2571        case E1000_DEV_ID_82576_FIBER:
2572        case E1000_DEV_ID_82576_SERDES:
2573                /* Wake events only supported on port A for dual fiber
2574                 * regardless of eeprom setting
2575                 */
2576                if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2577                        adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
2578                break;
2579        case E1000_DEV_ID_82576_QUAD_COPPER:
2580        case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2581                /* if quad port adapter, disable WoL on all but port A */
2582                if (global_quad_port_a != 0)
2583                        adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
2584                else
2585                        adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2586                /* Reset for multiple quad port adapters */
2587                if (++global_quad_port_a == 4)
2588                        global_quad_port_a = 0;
2589                break;
2590        default:
2591                /* If the device can't wake, don't set software support */
2592                if (!device_can_wakeup(&adapter->pdev->dev))
2593                        adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
2594        }
2595
2596        /* initialize the wol settings based on the eeprom settings */
2597        if (adapter->flags & IGB_FLAG_WOL_SUPPORTED)
2598                adapter->wol |= E1000_WUFC_MAG;
2599
2600        /* Some vendors want WoL disabled by default, but still supported */
2601        if ((hw->mac.type == e1000_i350) &&
2602            (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) {
2603                adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
2604                adapter->wol = 0;
2605        }
2606
2607        /* Some vendors want the ability to Use the EEPROM setting as
2608         * enable/disable only, and not for capability
2609         */
2610        if (((hw->mac.type == e1000_i350) ||
2611             (hw->mac.type == e1000_i354)) &&
2612            (pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)) {
2613                adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
2614                adapter->wol = 0;
2615        }
2616        if (hw->mac.type == e1000_i350) {
2617                if (((pdev->subsystem_device == 0x5001) ||
2618                     (pdev->subsystem_device == 0x5002)) &&
2619                                (hw->bus.func == 0)) {
2620                        adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
2621                        adapter->wol = 0;
2622                }
2623                if (pdev->subsystem_device == 0x1F52)
2624                        adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
2625        }
2626
2627        device_set_wakeup_enable(&adapter->pdev->dev,
2628                                 adapter->flags & IGB_FLAG_WOL_SUPPORTED);
2629
2630        /* reset the hardware with the new settings */
2631        igb_reset(adapter);
2632
2633        /* Init the I2C interface */
2634        err = igb_init_i2c(adapter);
2635        if (err) {
2636                dev_err(&pdev->dev, "failed to init i2c interface\n");
2637                goto err_eeprom;
2638        }
2639
2640        /* let the f/w know that the h/w is now under the control of the
2641         * driver.
2642         */
2643        igb_get_hw_control(adapter);
2644
2645        strcpy(netdev->name, "eth%d");
2646        err = register_netdev(netdev);
2647        if (err)
2648                goto err_register;
2649
2650        /* carrier off reporting is important to ethtool even BEFORE open */
2651        netif_carrier_off(netdev);
2652
2653#ifdef CONFIG_IGB_DCA
2654        if (dca_add_requester(&pdev->dev) == 0) {
2655                adapter->flags |= IGB_FLAG_DCA_ENABLED;
2656                dev_info(&pdev->dev, "DCA enabled\n");
2657                igb_setup_dca(adapter);
2658        }
2659
2660#endif
2661#ifdef CONFIG_IGB_HWMON
2662        /* Initialize the thermal sensor on i350 devices. */
2663        if (hw->mac.type == e1000_i350 && hw->bus.func == 0) {
2664                u16 ets_word;
2665
2666                /* Read the NVM to determine if this i350 device supports an
2667                 * external thermal sensor.
2668                 */
2669                hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_word);
2670                if (ets_word != 0x0000 && ets_word != 0xFFFF)
2671                        adapter->ets = true;
2672                else
2673                        adapter->ets = false;
2674                if (igb_sysfs_init(adapter))
2675                        dev_err(&pdev->dev,
2676                                "failed to allocate sysfs resources\n");
2677        } else {
2678                adapter->ets = false;
2679        }
2680#endif
2681        /* Check if Media Autosense is enabled */
2682        adapter->ei = *ei;
2683        if (hw->dev_spec._82575.mas_capable)
2684                igb_init_mas(adapter);
2685
2686        /* do hw tstamp init after resetting */
2687        igb_ptp_init(adapter);
2688
2689        dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2690        /* print bus type/speed/width info, not applicable to i354 */
2691        if (hw->mac.type != e1000_i354) {
2692                dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2693                         netdev->name,
2694                         ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2695                          (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2696                           "unknown"),
2697                         ((hw->bus.width == e1000_bus_width_pcie_x4) ?
2698                          "Width x4" :
2699                          (hw->bus.width == e1000_bus_width_pcie_x2) ?
2700                          "Width x2" :
2701                          (hw->bus.width == e1000_bus_width_pcie_x1) ?
2702                          "Width x1" : "unknown"), netdev->dev_addr);
2703        }
2704
2705        if ((hw->mac.type >= e1000_i210 ||
2706             igb_get_flash_presence_i210(hw))) {
2707                ret_val = igb_read_part_string(hw, part_str,
2708                                               E1000_PBANUM_LENGTH);
2709        } else {
2710                ret_val = -E1000_ERR_INVM_VALUE_NOT_FOUND;
2711        }
2712
2713        if (ret_val)
2714                strcpy(part_str, "Unknown");
2715        dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2716        dev_info(&pdev->dev,
2717                "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2718                (adapter->flags & IGB_FLAG_HAS_MSIX) ? "MSI-X" :
2719                (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2720                adapter->num_rx_queues, adapter->num_tx_queues);
2721        if (hw->phy.media_type == e1000_media_type_copper) {
2722                switch (hw->mac.type) {
2723                case e1000_i350:
2724                case e1000_i210:
2725                case e1000_i211:
2726                        /* Enable EEE for internal copper PHY devices */
2727                        err = igb_set_eee_i350(hw, true, true);
2728                        if ((!err) &&
2729                            (!hw->dev_spec._82575.eee_disable)) {
2730                                adapter->eee_advert =
2731                                        MDIO_EEE_100TX | MDIO_EEE_1000T;
2732                                adapter->flags |= IGB_FLAG_EEE;
2733                        }
2734                        break;
2735                case e1000_i354:
2736                        if ((rd32(E1000_CTRL_EXT) &
2737                            E1000_CTRL_EXT_LINK_MODE_SGMII)) {
2738                                err = igb_set_eee_i354(hw, true, true);
2739                                if ((!err) &&
2740                                        (!hw->dev_spec._82575.eee_disable)) {
2741                                        adapter->eee_advert =
2742                                           MDIO_EEE_100TX | MDIO_EEE_1000T;
2743                                        adapter->flags |= IGB_FLAG_EEE;
2744                                }
2745                        }
2746                        break;
2747                default:
2748                        break;
2749                }
2750        }
2751        pm_runtime_put_noidle(&pdev->dev);
2752        return 0;
2753
2754err_register:
2755        igb_release_hw_control(adapter);
2756        memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap));
2757err_eeprom:
2758        if (!igb_check_reset_block(hw))
2759                igb_reset_phy(hw);
2760
2761        if (hw->flash_address)
2762                iounmap(hw->flash_address);
2763err_sw_init:
2764        kfree(adapter->shadow_vfta);
2765        igb_clear_interrupt_scheme(adapter);
2766#ifdef CONFIG_PCI_IOV
2767        igb_disable_sriov(pdev);
2768#endif
2769        pci_iounmap(pdev, adapter->io_addr);
2770err_ioremap:
2771        free_netdev(netdev);
2772err_alloc_etherdev:
2773        pci_release_mem_regions(pdev);
2774err_pci_reg:
2775err_dma:
2776        pci_disable_device(pdev);
2777        return err;
2778}
2779
2780#ifdef CONFIG_PCI_IOV
2781static int igb_disable_sriov(struct pci_dev *pdev)
2782{
2783        struct net_device *netdev = pci_get_drvdata(pdev);
2784        struct igb_adapter *adapter = netdev_priv(netdev);
2785        struct e1000_hw *hw = &adapter->hw;
2786
2787        /* reclaim resources allocated to VFs */
2788        if (adapter->vf_data) {
2789                /* disable iov and allow time for transactions to clear */
2790                if (pci_vfs_assigned(pdev)) {
2791                        dev_warn(&pdev->dev,
2792                                 "Cannot deallocate SR-IOV virtual functions while they are assigned - VFs will not be deallocated\n");
2793                        return -EPERM;
2794                } else {
2795                        pci_disable_sriov(pdev);
2796                        msleep(500);
2797                }
2798
2799                kfree(adapter->vf_data);
2800                adapter->vf_data = NULL;
2801                adapter->vfs_allocated_count = 0;
2802                wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2803                wrfl();
2804                msleep(100);
2805                dev_info(&pdev->dev, "IOV Disabled\n");
2806
2807                /* Re-enable DMA Coalescing flag since IOV is turned off */
2808                adapter->flags |= IGB_FLAG_DMAC;
2809        }
2810
2811        return 0;
2812}
2813
2814static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs)
2815{
2816        struct net_device *netdev = pci_get_drvdata(pdev);
2817        struct igb_adapter *adapter = netdev_priv(netdev);
2818        int old_vfs = pci_num_vf(pdev);
2819        int err = 0;
2820        int i;
2821
2822        if (!(adapter->flags & IGB_FLAG_HAS_MSIX) || num_vfs > 7) {
2823                err = -EPERM;
2824                goto out;
2825        }
2826        if (!num_vfs)
2827                goto out;
2828
2829        if (old_vfs) {
2830                dev_info(&pdev->dev, "%d pre-allocated VFs found - override max_vfs setting of %d\n",
2831                         old_vfs, max_vfs);
2832                adapter->vfs_allocated_count = old_vfs;
2833        } else
2834                adapter->vfs_allocated_count = num_vfs;
2835
2836        adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2837                                sizeof(struct vf_data_storage), GFP_KERNEL);
2838
2839        /* if allocation failed then we do not support SR-IOV */
2840        if (!adapter->vf_data) {
2841                adapter->vfs_allocated_count = 0;
2842                dev_err(&pdev->dev,
2843                        "Unable to allocate memory for VF Data Storage\n");
2844                err = -ENOMEM;
2845                goto out;
2846        }
2847
2848        /* only call pci_enable_sriov() if no VFs are allocated already */
2849        if (!old_vfs) {
2850                err = pci_enable_sriov(pdev, adapter->vfs_allocated_count);
2851                if (err)
2852                        goto err_out;
2853        }
2854        dev_info(&pdev->dev, "%d VFs allocated\n",
2855                 adapter->vfs_allocated_count);
2856        for (i = 0; i < adapter->vfs_allocated_count; i++)
2857                igb_vf_configure(adapter, i);
2858
2859        /* DMA Coalescing is not supported in IOV mode. */
2860        adapter->flags &= ~IGB_FLAG_DMAC;
2861        goto out;
2862
2863err_out:
2864        kfree(adapter->vf_data);
2865        adapter->vf_data = NULL;
2866        adapter->vfs_allocated_count = 0;
2867out:
2868        return err;
2869}
2870
2871#endif
2872/**
2873 *  igb_remove_i2c - Cleanup  I2C interface
2874 *  @adapter: pointer to adapter structure
2875 **/
2876static void igb_remove_i2c(struct igb_adapter *adapter)
2877{
2878        /* free the adapter bus structure */
2879        i2c_del_adapter(&adapter->i2c_adap);
2880}
2881
2882/**
2883 *  igb_remove - Device Removal Routine
2884 *  @pdev: PCI device information struct
2885 *
2886 *  igb_remove is called by the PCI subsystem to alert the driver
2887 *  that it should release a PCI device.  The could be caused by a
2888 *  Hot-Plug event, or because the driver is going to be removed from
2889 *  memory.
2890 **/
2891static void igb_remove(struct pci_dev *pdev)
2892{
2893        struct net_device *netdev = pci_get_drvdata(pdev);
2894        struct igb_adapter *adapter = netdev_priv(netdev);
2895        struct e1000_hw *hw = &adapter->hw;
2896
2897        pm_runtime_get_noresume(&pdev->dev);
2898#ifdef CONFIG_IGB_HWMON
2899        igb_sysfs_exit(adapter);
2900#endif
2901        igb_remove_i2c(adapter);
2902        igb_ptp_stop(adapter);
2903        /* The watchdog timer may be rescheduled, so explicitly
2904         * disable watchdog from being rescheduled.
2905         */
2906        set_bit(__IGB_DOWN, &adapter->state);
2907        del_timer_sync(&adapter->watchdog_timer);
2908        del_timer_sync(&adapter->phy_info_timer);
2909
2910        cancel_work_sync(&adapter->reset_task);
2911        cancel_work_sync(&adapter->watchdog_task);
2912
2913#ifdef CONFIG_IGB_DCA
2914        if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2915                dev_info(&pdev->dev, "DCA disabled\n");
2916                dca_remove_requester(&pdev->dev);
2917                adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2918                wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2919        }
2920#endif
2921
2922        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2923         * would have already happened in close and is redundant.
2924         */
2925        igb_release_hw_control(adapter);
2926
2927#ifdef CONFIG_PCI_IOV
2928        igb_disable_sriov(pdev);
2929#endif
2930
2931        unregister_netdev(netdev);
2932
2933        igb_clear_interrupt_scheme(adapter);
2934
2935        pci_iounmap(pdev, adapter->io_addr);
2936        if (hw->flash_address)
2937                iounmap(hw->flash_address);
2938        pci_release_mem_regions(pdev);
2939
2940        kfree(adapter->shadow_vfta);
2941        free_netdev(netdev);
2942
2943        pci_disable_pcie_error_reporting(pdev);
2944
2945        pci_disable_device(pdev);
2946}
2947
2948/**
2949 *  igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2950 *  @adapter: board private structure to initialize
2951 *
2952 *  This function initializes the vf specific data storage and then attempts to
2953 *  allocate the VFs.  The reason for ordering it this way is because it is much
2954 *  mor expensive time wise to disable SR-IOV than it is to allocate and free
2955 *  the memory for the VFs.
2956 **/
2957static void igb_probe_vfs(struct igb_adapter *adapter)
2958{
2959#ifdef CONFIG_PCI_IOV
2960        struct pci_dev *pdev = adapter->pdev;
2961        struct e1000_hw *hw = &adapter->hw;
2962
2963        /* Virtualization features not supported on i210 family. */
2964        if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
2965                return;
2966
2967        /* Of the below we really only want the effect of getting
2968         * IGB_FLAG_HAS_MSIX set (if available), without which
2969         * igb_enable_sriov() has no effect.
2970         */
2971        igb_set_interrupt_capability(adapter, true);
2972        igb_reset_interrupt_capability(adapter);
2973
2974        pci_sriov_set_totalvfs(pdev, 7);
2975        igb_enable_sriov(pdev, max_vfs);
2976
2977#endif /* CONFIG_PCI_IOV */
2978}
2979
2980static void igb_init_queue_configuration(struct igb_adapter *adapter)
2981{
2982        struct e1000_hw *hw = &adapter->hw;
2983        u32 max_rss_queues;
2984
2985        /* Determine the maximum number of RSS queues supported. */
2986        switch (hw->mac.type) {
2987        case e1000_i211:
2988                max_rss_queues = IGB_MAX_RX_QUEUES_I211;
2989                break;
2990        case e1000_82575:
2991        case e1000_i210:
2992                max_rss_queues = IGB_MAX_RX_QUEUES_82575;
2993                break;
2994        case e1000_i350:
2995                /* I350 cannot do RSS and SR-IOV at the same time */
2996                if (!!adapter->vfs_allocated_count) {
2997                        max_rss_queues = 1;
2998                        break;
2999                }
3000                /* fall through */
3001        case e1000_82576:
3002                if (!!adapter->vfs_allocated_count) {
3003                        max_rss_queues = 2;
3004                        break;
3005                }
3006                /* fall through */
3007        case e1000_82580:
3008        case e1000_i354:
3009        default:
3010                max_rss_queues = IGB_MAX_RX_QUEUES;
3011                break;
3012        }
3013
3014        adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
3015
3016        igb_set_flag_queue_pairs(adapter, max_rss_queues);
3017}
3018
3019void igb_set_flag_queue_pairs(struct igb_adapter *adapter,
3020                              const u32 max_rss_queues)
3021{
3022        struct e1000_hw *hw = &adapter->hw;
3023
3024        /* Determine if we need to pair queues. */
3025        switch (hw->mac.type) {
3026        case e1000_82575:
3027        case e1000_i211:
3028                /* Device supports enough interrupts without queue pairing. */
3029                break;
3030        case e1000_82576:
3031        case e1000_82580:
3032        case e1000_i350:
3033        case e1000_i354:
3034        case e1000_i210:
3035        default:
3036                /* If rss_queues > half of max_rss_queues, pair the queues in
3037                 * order to conserve interrupts due to limited supply.
3038                 */
3039                if (adapter->rss_queues > (max_rss_queues / 2))
3040                        adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
3041                else
3042                        adapter->flags &= ~IGB_FLAG_QUEUE_PAIRS;
3043                break;
3044        }
3045}
3046
3047/**
3048 *  igb_sw_init - Initialize general software structures (struct igb_adapter)
3049 *  @adapter: board private structure to initialize
3050 *
3051 *  igb_sw_init initializes the Adapter private data structure.
3052 *  Fields are initialized based on PCI device information and
3053 *  OS network device settings (MTU size).
3054 **/
3055static int igb_sw_init(struct igb_adapter *adapter)
3056{
3057        struct e1000_hw *hw = &adapter->hw;
3058        struct net_device *netdev = adapter->netdev;
3059        struct pci_dev *pdev = adapter->pdev;
3060
3061        pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
3062
3063        /* set default ring sizes */
3064        adapter->tx_ring_count = IGB_DEFAULT_TXD;
3065        adapter->rx_ring_count = IGB_DEFAULT_RXD;
3066
3067        /* set default ITR values */
3068        adapter->rx_itr_setting = IGB_DEFAULT_ITR;
3069        adapter->tx_itr_setting = IGB_DEFAULT_ITR;
3070
3071        /* set default work limits */
3072        adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
3073
3074        adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
3075                                  VLAN_HLEN;
3076        adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
3077
3078        spin_lock_init(&adapter->nfc_lock);
3079        spin_lock_init(&adapter->stats64_lock);
3080#ifdef CONFIG_PCI_IOV
3081        switch (hw->mac.type) {
3082        case e1000_82576:
3083        case e1000_i350:
3084                if (max_vfs > 7) {
3085                        dev_warn(&pdev->dev,
3086                                 "Maximum of 7 VFs per PF, using max\n");
3087                        max_vfs = adapter->vfs_allocated_count = 7;
3088                } else
3089                        adapter->vfs_allocated_count = max_vfs;
3090                if (adapter->vfs_allocated_count)
3091                        dev_warn(&pdev->dev,
3092                                 "Enabling SR-IOV VFs using the module parameter is deprecated - please use the pci sysfs interface.\n");
3093                break;
3094        default:
3095                break;
3096        }
3097#endif /* CONFIG_PCI_IOV */
3098
3099        /* Assume MSI-X interrupts, will be checked during IRQ allocation */
3100        adapter->flags |= IGB_FLAG_HAS_MSIX;
3101
3102        igb_probe_vfs(adapter);
3103
3104        igb_init_queue_configuration(adapter);
3105
3106        /* Setup and initialize a copy of the hw vlan table array */
3107        adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
3108                                       GFP_ATOMIC);
3109
3110        /* This call may decrease the number of queues */
3111        if (igb_init_interrupt_scheme(adapter, true)) {
3112                dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
3113                return -ENOMEM;
3114        }
3115
3116        /* Explicitly disable IRQ since the NIC can be in any state. */
3117        igb_irq_disable(adapter);
3118
3119        if (hw->mac.type >= e1000_i350)
3120                adapter->flags &= ~IGB_FLAG_DMAC;
3121
3122        set_bit(__IGB_DOWN, &adapter->state);
3123        return 0;
3124}
3125
3126/**
3127 *  igb_open - Called when a network interface is made active
3128 *  @netdev: network interface device structure
3129 *
3130 *  Returns 0 on success, negative value on failure
3131 *
3132 *  The open entry point is called when a network interface is made
3133 *  active by the system (IFF_UP).  At this point all resources needed
3134 *  for transmit and receive operations are allocated, the interrupt
3135 *  handler is registered with the OS, the watchdog timer is started,
3136 *  and the stack is notified that the interface is ready.
3137 **/
3138static int __igb_open(struct net_device *netdev, bool resuming)
3139{
3140        struct igb_adapter *adapter = netdev_priv(netdev);
3141        struct e1000_hw *hw = &adapter->hw;
3142        struct pci_dev *pdev = adapter->pdev;
3143        int err;
3144        int i;
3145
3146        /* disallow open during test */
3147        if (test_bit(__IGB_TESTING, &adapter->state)) {
3148                WARN_ON(resuming);
3149                return -EBUSY;
3150        }
3151
3152        if (!resuming)
3153                pm_runtime_get_sync(&pdev->dev);
3154
3155        netif_carrier_off(netdev);
3156
3157        /* allocate transmit descriptors */
3158        err = igb_setup_all_tx_resources(adapter);
3159        if (err)
3160                goto err_setup_tx;
3161
3162        /* allocate receive descriptors */
3163        err = igb_setup_all_rx_resources(adapter);
3164        if (err)
3165                goto err_setup_rx;
3166
3167        igb_power_up_link(adapter);
3168
3169        /* before we allocate an interrupt, we must be ready to handle it.
3170         * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
3171         * as soon as we call pci_request_irq, so we have to setup our
3172         * clean_rx handler before we do so.
3173         */
3174        igb_configure(adapter);
3175
3176        err = igb_request_irq(adapter);
3177        if (err)
3178                goto err_req_irq;
3179
3180        /* Notify the stack of the actual queue counts. */
3181        err = netif_set_real_num_tx_queues(adapter->netdev,
3182                                           adapter->num_tx_queues);
3183        if (err)
3184                goto err_set_queues;
3185
3186        err = netif_set_real_num_rx_queues(adapter->netdev,
3187                                           adapter->num_rx_queues);
3188        if (err)
3189                goto err_set_queues;
3190
3191        /* From here on the code is the same as igb_up() */
3192        clear_bit(__IGB_DOWN, &adapter->state);
3193
3194        for (i = 0; i < adapter->num_q_vectors; i++)
3195                napi_enable(&(adapter->q_vector[i]->napi));
3196
3197        /* Clear any pending interrupts. */
3198        rd32(E1000_ICR);
3199
3200        igb_irq_enable(adapter);
3201
3202        /* notify VFs that reset has been completed */
3203        if (adapter->vfs_allocated_count) {
3204                u32 reg_data = rd32(E1000_CTRL_EXT);
3205
3206                reg_data |= E1000_CTRL_EXT_PFRSTD;
3207                wr32(E1000_CTRL_EXT, reg_data);
3208        }
3209
3210        netif_tx_start_all_queues(netdev);
3211
3212        if (!resuming)
3213                pm_runtime_put(&pdev->dev);
3214
3215        /* start the watchdog. */
3216        hw->mac.get_link_status = 1;
3217        schedule_work(&adapter->watchdog_task);
3218
3219        return 0;
3220
3221err_set_queues:
3222        igb_free_irq(adapter);
3223err_req_irq:
3224        igb_release_hw_control(adapter);
3225        igb_power_down_link(adapter);
3226        igb_free_all_rx_resources(adapter);
3227err_setup_rx:
3228        igb_free_all_tx_resources(adapter);
3229err_setup_tx:
3230        igb_reset(adapter);
3231        if (!resuming)
3232                pm_runtime_put(&pdev->dev);
3233
3234        return err;
3235}
3236
3237int igb_open(struct net_device *netdev)
3238{
3239        return __igb_open(netdev, false);
3240}
3241
3242/**
3243 *  igb_close - Disables a network interface
3244 *  @netdev: network interface device structure
3245 *
3246 *  Returns 0, this is not allowed to fail
3247 *
3248 *  The close entry point is called when an interface is de-activated
3249 *  by the OS.  The hardware is still under the driver's control, but
3250 *  needs to be disabled.  A global MAC reset is issued to stop the
3251 *  hardware, and all transmit and receive resources are freed.
3252 **/
3253static int __igb_close(struct net_device *netdev, bool suspending)
3254{
3255        struct igb_adapter *adapter = netdev_priv(netdev);
3256        struct pci_dev *pdev = adapter->pdev;
3257
3258        WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
3259
3260        if (!suspending)
3261                pm_runtime_get_sync(&pdev->dev);
3262
3263        igb_down(adapter);
3264        igb_free_irq(adapter);
3265
3266        igb_nfc_filter_exit(adapter);
3267
3268        igb_free_all_tx_resources(adapter);
3269        igb_free_all_rx_resources(adapter);
3270
3271        if (!suspending)
3272                pm_runtime_put_sync(&pdev->dev);
3273        return 0;
3274}
3275
3276int igb_close(struct net_device *netdev)
3277{
3278        if (netif_device_present(netdev))
3279                return __igb_close(netdev, false);
3280        return 0;
3281}
3282
3283/**
3284 *  igb_setup_tx_resources - allocate Tx resources (Descriptors)
3285 *  @tx_ring: tx descriptor ring (for a specific queue) to setup
3286 *
3287 *  Return 0 on success, negative on failure
3288 **/
3289int igb_setup_tx_resources(struct igb_ring *tx_ring)
3290{
3291        struct device *dev = tx_ring->dev;
3292        int size;
3293
3294        size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3295
3296        tx_ring->tx_buffer_info = vzalloc(size);
3297        if (!tx_ring->tx_buffer_info)
3298                goto err;
3299
3300        /* round up to nearest 4K */
3301        tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
3302        tx_ring->size = ALIGN(tx_ring->size, 4096);
3303
3304        tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
3305                                           &tx_ring->dma, GFP_KERNEL);
3306        if (!tx_ring->desc)
3307                goto err;
3308
3309        tx_ring->next_to_use = 0;
3310        tx_ring->next_to_clean = 0;
3311
3312        return 0;
3313
3314err:
3315        vfree(tx_ring->tx_buffer_info);
3316        tx_ring->tx_buffer_info = NULL;
3317        dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
3318        return -ENOMEM;
3319}
3320
3321/**
3322 *  igb_setup_all_tx_resources - wrapper to allocate Tx resources
3323 *                               (Descriptors) for all queues
3324 *  @adapter: board private structure
3325 *
3326 *  Return 0 on success, negative on failure
3327 **/
3328static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
3329{
3330        struct pci_dev *pdev = adapter->pdev;
3331        int i, err = 0;
3332
3333        for (i = 0; i < adapter->num_tx_queues; i++) {
3334                err = igb_setup_tx_resources(adapter->tx_ring[i]);
3335                if (err) {
3336                        dev_err(&pdev->dev,
3337                                "Allocation for Tx Queue %u failed\n", i);
3338                        for (i--; i >= 0; i--)
3339                                igb_free_tx_resources(adapter->tx_ring[i]);
3340                        break;
3341                }
3342        }
3343
3344        return err;
3345}
3346
3347/**
3348 *  igb_setup_tctl - configure the transmit control registers
3349 *  @adapter: Board private structure
3350 **/
3351void igb_setup_tctl(struct igb_adapter *adapter)
3352{
3353        struct e1000_hw *hw = &adapter->hw;
3354        u32 tctl;
3355
3356        /* disable queue 0 which is enabled by default on 82575 and 82576 */
3357        wr32(E1000_TXDCTL(0), 0);
3358
3359        /* Program the Transmit Control Register */
3360        tctl = rd32(E1000_TCTL);
3361        tctl &= ~E1000_TCTL_CT;
3362        tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
3363                (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
3364
3365        igb_config_collision_dist(hw);
3366
3367        /* Enable transmits */
3368        tctl |= E1000_TCTL_EN;
3369
3370        wr32(E1000_TCTL, tctl);
3371}
3372
3373/**
3374 *  igb_configure_tx_ring - Configure transmit ring after Reset
3375 *  @adapter: board private structure
3376 *  @ring: tx ring to configure
3377 *
3378 *  Configure a transmit ring after a reset.
3379 **/
3380void igb_configure_tx_ring(struct igb_adapter *adapter,
3381                           struct igb_ring *ring)
3382{
3383        struct e1000_hw *hw = &adapter->hw;
3384        u32 txdctl = 0;
3385        u64 tdba = ring->dma;
3386        int reg_idx = ring->reg_idx;
3387
3388        /* disable the queue */
3389        wr32(E1000_TXDCTL(reg_idx), 0);
3390        wrfl();
3391        mdelay(10);
3392
3393        wr32(E1000_TDLEN(reg_idx),
3394             ring->count * sizeof(union e1000_adv_tx_desc));
3395        wr32(E1000_TDBAL(reg_idx),
3396             tdba & 0x00000000ffffffffULL);
3397        wr32(E1000_TDBAH(reg_idx), tdba >> 32);
3398
3399        ring->tail = adapter->io_addr + E1000_TDT(reg_idx);
3400        wr32(E1000_TDH(reg_idx), 0);
3401        writel(0, ring->tail);
3402
3403        txdctl |= IGB_TX_PTHRESH;
3404        txdctl |= IGB_TX_HTHRESH << 8;
3405        txdctl |= IGB_TX_WTHRESH << 16;
3406
3407        txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3408        wr32(E1000_TXDCTL(reg_idx), txdctl);
3409}
3410
3411/**
3412 *  igb_configure_tx - Configure transmit Unit after Reset
3413 *  @adapter: board private structure
3414 *
3415 *  Configure the Tx unit of the MAC after a reset.
3416 **/
3417static void igb_configure_tx(struct igb_adapter *adapter)
3418{
3419        int i;
3420
3421        for (i = 0; i < adapter->num_tx_queues; i++)
3422                igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
3423}
3424
3425/**
3426 *  igb_setup_rx_resources - allocate Rx resources (Descriptors)
3427 *  @rx_ring: Rx descriptor ring (for a specific queue) to setup
3428 *
3429 *  Returns 0 on success, negative on failure
3430 **/
3431int igb_setup_rx_resources(struct igb_ring *rx_ring)
3432{
3433        struct device *dev = rx_ring->dev;
3434        int size;
3435
3436        size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3437
3438        rx_ring->rx_buffer_info = vzalloc(size);
3439        if (!rx_ring->rx_buffer_info)
3440                goto err;
3441
3442        /* Round up to nearest 4K */
3443        rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc);
3444        rx_ring->size = ALIGN(rx_ring->size, 4096);
3445
3446        rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
3447                                           &rx_ring->dma, GFP_KERNEL);
3448        if (!rx_ring->desc)
3449                goto err;
3450
3451        rx_ring->next_to_alloc = 0;
3452        rx_ring->next_to_clean = 0;
3453        rx_ring->next_to_use = 0;
3454
3455        return 0;
3456
3457err:
3458        vfree(rx_ring->rx_buffer_info);
3459        rx_ring->rx_buffer_info = NULL;
3460        dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
3461        return -ENOMEM;
3462}
3463
3464/**
3465 *  igb_setup_all_rx_resources - wrapper to allocate Rx resources
3466 *                               (Descriptors) for all queues
3467 *  @adapter: board private structure
3468 *
3469 *  Return 0 on success, negative on failure
3470 **/
3471static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
3472{
3473        struct pci_dev *pdev = adapter->pdev;
3474        int i, err = 0;
3475
3476        for (i = 0; i < adapter->num_rx_queues; i++) {
3477                err = igb_setup_rx_resources(adapter->rx_ring[i]);
3478                if (err) {
3479                        dev_err(&pdev->dev,
3480                                "Allocation for Rx Queue %u failed\n", i);
3481                        for (i--; i >= 0; i--)
3482                                igb_free_rx_resources(adapter->rx_ring[i]);
3483                        break;
3484                }
3485        }
3486
3487        return err;
3488}
3489
3490/**
3491 *  igb_setup_mrqc - configure the multiple receive queue control registers
3492 *  @adapter: Board private structure
3493 **/
3494static void igb_setup_mrqc(struct igb_adapter *adapter)
3495{
3496        struct e1000_hw *hw = &adapter->hw;
3497        u32 mrqc, rxcsum;
3498        u32 j, num_rx_queues;
3499        u32 rss_key[10];
3500
3501        netdev_rss_key_fill(rss_key, sizeof(rss_key));
3502        for (j = 0; j < 10; j++)
3503                wr32(E1000_RSSRK(j), rss_key[j]);
3504
3505        num_rx_queues = adapter->rss_queues;
3506
3507        switch (hw->mac.type) {
3508        case e1000_82576:
3509                /* 82576 supports 2 RSS queues for SR-IOV */
3510                if (adapter->vfs_allocated_count)
3511                        num_rx_queues = 2;
3512                break;
3513        default:
3514                break;
3515        }
3516
3517        if (adapter->rss_indir_tbl_init != num_rx_queues) {
3518                for (j = 0; j < IGB_RETA_SIZE; j++)
3519                        adapter->rss_indir_tbl[j] =
3520                        (j * num_rx_queues) / IGB_RETA_SIZE;
3521                adapter->rss_indir_tbl_init = num_rx_queues;
3522        }
3523        igb_write_rss_indir_tbl(adapter);
3524
3525        /* Disable raw packet checksumming so that RSS hash is placed in
3526         * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
3527         * offloads as they are enabled by default
3528         */
3529        rxcsum = rd32(E1000_RXCSUM);
3530        rxcsum |= E1000_RXCSUM_PCSD;
3531
3532        if (adapter->hw.mac.type >= e1000_82576)
3533                /* Enable Receive Checksum Offload for SCTP */
3534                rxcsum |= E1000_RXCSUM_CRCOFL;
3535
3536        /* Don't need to set TUOFL or IPOFL, they default to 1 */
3537        wr32(E1000_RXCSUM, rxcsum);
3538
3539        /* Generate RSS hash based on packet types, TCP/UDP
3540         * port numbers and/or IPv4/v6 src and dst addresses
3541         */
3542        mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
3543               E1000_MRQC_RSS_FIELD_IPV4_TCP |
3544               E1000_MRQC_RSS_FIELD_IPV6 |
3545               E1000_MRQC_RSS_FIELD_IPV6_TCP |
3546               E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
3547
3548        if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
3549                mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
3550        if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
3551                mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
3552
3553        /* If VMDq is enabled then we set the appropriate mode for that, else
3554         * we default to RSS so that an RSS hash is calculated per packet even
3555         * if we are only using one queue
3556         */
3557        if (adapter->vfs_allocated_count) {
3558                if (hw->mac.type > e1000_82575) {
3559                        /* Set the default pool for the PF's first queue */
3560                        u32 vtctl = rd32(E1000_VT_CTL);
3561
3562                        vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
3563                                   E1000_VT_CTL_DISABLE_DEF_POOL);
3564                        vtctl |= adapter->vfs_allocated_count <<
3565                                E1000_VT_CTL_DEFAULT_POOL_SHIFT;
3566                        wr32(E1000_VT_CTL, vtctl);
3567                }
3568                if (adapter->rss_queues > 1)
3569                        mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_MQ;
3570                else
3571                        mrqc |= E1000_MRQC_ENABLE_VMDQ;
3572        } else {
3573                if (hw->mac.type != e1000_i211)
3574                        mrqc |= E1000_MRQC_ENABLE_RSS_MQ;
3575        }
3576        igb_vmm_control(adapter);
3577
3578        wr32(E1000_MRQC, mrqc);
3579}
3580
3581/**
3582 *  igb_setup_rctl - configure the receive control registers
3583 *  @adapter: Board private structure
3584 **/
3585void igb_setup_rctl(struct igb_adapter *adapter)
3586{
3587        struct e1000_hw *hw = &adapter->hw;
3588        u32 rctl;
3589
3590        rctl = rd32(E1000_RCTL);
3591
3592        rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3593        rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
3594
3595        rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
3596                (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3597
3598        /* enable stripping of CRC. It's unlikely this will break BMC
3599         * redirection as it did with e1000. Newer features require
3600         * that the HW strips the CRC.
3601         */
3602        rctl |= E1000_RCTL_SECRC;
3603
3604        /* disable store bad packets and clear size bits. */
3605        rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3606
3607        /* enable LPE to allow for reception of jumbo frames */
3608        rctl |= E1000_RCTL_LPE;
3609
3610        /* disable queue 0 to prevent tail write w/o re-config */
3611        wr32(E1000_RXDCTL(0), 0);
3612
3613        /* Attention!!!  For SR-IOV PF driver operations you must enable
3614         * queue drop for all VF and PF queues to prevent head of line blocking
3615         * if an un-trusted VF does not provide descriptors to hardware.
3616         */
3617        if (adapter->vfs_allocated_count) {
3618                /* set all queue drop enable bits */
3619                wr32(E1000_QDE, ALL_QUEUES);
3620        }
3621
3622        /* This is useful for sniffing bad packets. */
3623        if (adapter->netdev->features & NETIF_F_RXALL) {
3624                /* UPE and MPE will be handled by normal PROMISC logic
3625                 * in e1000e_set_rx_mode
3626                 */
3627                rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3628                         E1000_RCTL_BAM | /* RX All Bcast Pkts */
3629                         E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3630
3631                rctl &= ~(E1000_RCTL_DPF | /* Allow filtered pause */
3632                          E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3633                /* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3634                 * and that breaks VLANs.
3635                 */
3636        }
3637
3638        wr32(E1000_RCTL, rctl);
3639}
3640
3641static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3642                                   int vfn)
3643{
3644        struct e1000_hw *hw = &adapter->hw;
3645        u32 vmolr;
3646
3647        if (size > MAX_JUMBO_FRAME_SIZE)
3648                size = MAX_JUMBO_FRAME_SIZE;
3649
3650        vmolr = rd32(E1000_VMOLR(vfn));
3651        vmolr &= ~E1000_VMOLR_RLPML_MASK;
3652        vmolr |= size | E1000_VMOLR_LPE;
3653        wr32(E1000_VMOLR(vfn), vmolr);
3654
3655        return 0;
3656}
3657
3658static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter,
3659                                         int vfn, bool enable)
3660{
3661        struct e1000_hw *hw = &adapter->hw;
3662        u32 val, reg;
3663
3664        if (hw->mac.type < e1000_82576)
3665                return;
3666
3667        if (hw->mac.type == e1000_i350)
3668                reg = E1000_DVMOLR(vfn);
3669        else
3670                reg = E1000_VMOLR(vfn);
3671
3672        val = rd32(reg);
3673        if (enable)
3674                val |= E1000_VMOLR_STRVLAN;
3675        else
3676                val &= ~(E1000_VMOLR_STRVLAN);
3677        wr32(reg, val);
3678}
3679
3680static inline void igb_set_vmolr(struct igb_adapter *adapter,
3681                                 int vfn, bool aupe)
3682{
3683        struct e1000_hw *hw = &adapter->hw;
3684        u32 vmolr;
3685
3686        /* This register exists only on 82576 and newer so if we are older then
3687         * we should exit and do nothing
3688         */
3689        if (hw->mac.type < e1000_82576)
3690                return;
3691
3692        vmolr = rd32(E1000_VMOLR(vfn));
3693        if (aupe)
3694                vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3695        else
3696                vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3697
3698        /* clear all bits that might not be set */
3699        vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3700
3701        if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3702                vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3703        /* for VMDq only allow the VFs and pool 0 to accept broadcast and
3704         * multicast packets
3705         */
3706        if (vfn <= adapter->vfs_allocated_count)
3707                vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3708
3709        wr32(E1000_VMOLR(vfn), vmolr);
3710}
3711
3712/**
3713 *  igb_configure_rx_ring - Configure a receive ring after Reset
3714 *  @adapter: board private structure
3715 *  @ring: receive ring to be configured
3716 *
3717 *  Configure the Rx unit of the MAC after a reset.
3718 **/
3719void igb_configure_rx_ring(struct igb_adapter *adapter,
3720                           struct igb_ring *ring)
3721{
3722        struct e1000_hw *hw = &adapter->hw;
3723        u64 rdba = ring->dma;
3724        int reg_idx = ring->reg_idx;
3725        u32 srrctl = 0, rxdctl = 0;
3726
3727        /* disable the queue */
3728        wr32(E1000_RXDCTL(reg_idx), 0);
3729
3730        /* Set DMA base address registers */
3731        wr32(E1000_RDBAL(reg_idx),
3732             rdba & 0x00000000ffffffffULL);
3733        wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3734        wr32(E1000_RDLEN(reg_idx),
3735             ring->count * sizeof(union e1000_adv_rx_desc));
3736
3737        /* initialize head and tail */
3738        ring->tail = adapter->io_addr + E1000_RDT(reg_idx);
3739        wr32(E1000_RDH(reg_idx), 0);
3740        writel(0, ring->tail);
3741
3742        /* set descriptor configuration */
3743        srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3744        srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3745        srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3746        if (hw->mac.type >= e1000_82580)
3747                srrctl |= E1000_SRRCTL_TIMESTAMP;
3748        /* Only set Drop Enable if we are supporting multiple queues */
3749        if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3750                srrctl |= E1000_SRRCTL_DROP_EN;
3751
3752        wr32(E1000_SRRCTL(reg_idx), srrctl);
3753
3754        /* set filtering for VMDQ pools */
3755        igb_set_vmolr(adapter, reg_idx & 0x7, true);
3756
3757        rxdctl |= IGB_RX_PTHRESH;
3758        rxdctl |= IGB_RX_HTHRESH << 8;
3759        rxdctl |= IGB_RX_WTHRESH << 16;
3760
3761        /* enable receive descriptor fetching */
3762        rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3763        wr32(E1000_RXDCTL(reg_idx), rxdctl);
3764}
3765
3766/**
3767 *  igb_configure_rx - Configure receive Unit after Reset
3768 *  @adapter: board private structure
3769 *
3770 *  Configure the Rx unit of the MAC after a reset.
3771 **/
3772static void igb_configure_rx(struct igb_adapter *adapter)
3773{
3774        int i;
3775
3776        /* set the correct pool for the PF default MAC address in entry 0 */
3777        igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3778                         adapter->vfs_allocated_count);
3779
3780        /* Setup the HW Rx Head and Tail Descriptor Pointers and
3781         * the Base and Length of the Rx Descriptor Ring
3782         */
3783        for (i = 0; i < adapter->num_rx_queues; i++)
3784                igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3785}
3786
3787/**
3788 *  igb_free_tx_resources - Free Tx Resources per Queue
3789 *  @tx_ring: Tx descriptor ring for a specific queue
3790 *
3791 *  Free all transmit software resources
3792 **/
3793void igb_free_tx_resources(struct igb_ring *tx_ring)
3794{
3795        igb_clean_tx_ring(tx_ring);
3796
3797        vfree(tx_ring->tx_buffer_info);
3798        tx_ring->tx_buffer_info = NULL;
3799
3800        /* if not set, then don't free */
3801        if (!tx_ring->desc)
3802                return;
3803
3804        dma_free_coherent(tx_ring->dev, tx_ring->size,
3805                          tx_ring->desc, tx_ring->dma);
3806
3807        tx_ring->desc = NULL;
3808}
3809
3810/**
3811 *  igb_free_all_tx_resources - Free Tx Resources for All Queues
3812 *  @adapter: board private structure
3813 *
3814 *  Free all transmit software resources
3815 **/
3816static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3817{
3818        int i;
3819
3820        for (i = 0; i < adapter->num_tx_queues; i++)
3821                if (adapter->tx_ring[i])
3822                        igb_free_tx_resources(adapter->tx_ring[i]);
3823}
3824
3825void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3826                                    struct igb_tx_buffer *tx_buffer)
3827{
3828        if (tx_buffer->skb) {
3829                dev_kfree_skb_any(tx_buffer->skb);
3830                if (dma_unmap_len(tx_buffer, len))
3831                        dma_unmap_single(ring->dev,
3832                                         dma_unmap_addr(tx_buffer, dma),
3833                                         dma_unmap_len(tx_buffer, len),
3834                                         DMA_TO_DEVICE);
3835        } else if (dma_unmap_len(tx_buffer, len)) {
3836                dma_unmap_page(ring->dev,
3837                               dma_unmap_addr(tx_buffer, dma),
3838                               dma_unmap_len(tx_buffer, len),
3839                               DMA_TO_DEVICE);
3840        }
3841        tx_buffer->next_to_watch = NULL;
3842        tx_buffer->skb = NULL;
3843        dma_unmap_len_set(tx_buffer, len, 0);
3844        /* buffer_info must be completely set up in the transmit path */
3845}
3846
3847/**
3848 *  igb_clean_tx_ring - Free Tx Buffers
3849 *  @tx_ring: ring to be cleaned
3850 **/
3851static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3852{
3853        struct igb_tx_buffer *buffer_info;
3854        unsigned long size;
3855        u16 i;
3856
3857        if (!tx_ring->tx_buffer_info)
3858                return;
3859        /* Free all the Tx ring sk_buffs */
3860
3861        for (i = 0; i < tx_ring->count; i++) {
3862                buffer_info = &tx_ring->tx_buffer_info[i];
3863                igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3864        }
3865
3866        netdev_tx_reset_queue(txring_txq(tx_ring));
3867
3868        size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3869        memset(tx_ring->tx_buffer_info, 0, size);
3870
3871        /* Zero out the descriptor ring */
3872        memset(tx_ring->desc, 0, tx_ring->size);
3873
3874        tx_ring->next_to_use = 0;
3875        tx_ring->next_to_clean = 0;
3876}
3877
3878/**
3879 *  igb_clean_all_tx_rings - Free Tx Buffers for all queues
3880 *  @adapter: board private structure
3881 **/
3882static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3883{
3884        int i;
3885
3886        for (i = 0; i < adapter->num_tx_queues; i++)
3887                if (adapter->tx_ring[i])
3888                        igb_clean_tx_ring(adapter->tx_ring[i]);
3889}
3890
3891/**
3892 *  igb_free_rx_resources - Free Rx Resources
3893 *  @rx_ring: ring to clean the resources from
3894 *
3895 *  Free all receive software resources
3896 **/
3897void igb_free_rx_resources(struct igb_ring *rx_ring)
3898{
3899        igb_clean_rx_ring(rx_ring);
3900
3901        vfree(rx_ring->rx_buffer_info);
3902        rx_ring->rx_buffer_info = NULL;
3903
3904        /* if not set, then don't free */
3905        if (!rx_ring->desc)
3906                return;
3907
3908        dma_free_coherent(rx_ring->dev, rx_ring->size,
3909                          rx_ring->desc, rx_ring->dma);
3910
3911        rx_ring->desc = NULL;
3912}
3913
3914/**
3915 *  igb_free_all_rx_resources - Free Rx Resources for All Queues
3916 *  @adapter: board private structure
3917 *
3918 *  Free all receive software resources
3919 **/
3920static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3921{
3922        int i;
3923
3924        for (i = 0; i < adapter->num_rx_queues; i++)
3925                if (adapter->rx_ring[i])
3926                        igb_free_rx_resources(adapter->rx_ring[i]);
3927}
3928
3929/**
3930 *  igb_clean_rx_ring - Free Rx Buffers per Queue
3931 *  @rx_ring: ring to free buffers from
3932 **/
3933static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3934{
3935        unsigned long size;
3936        u16 i;
3937
3938        if (rx_ring->skb)
3939                dev_kfree_skb(rx_ring->skb);
3940        rx_ring->skb = NULL;
3941
3942        if (!rx_ring->rx_buffer_info)
3943                return;
3944
3945        /* Free all the Rx ring sk_buffs */
3946        for (i = 0; i < rx_ring->count; i++) {
3947                struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3948
3949                if (!buffer_info->page)
3950                        continue;
3951
3952                /* Invalidate cache lines that may have been written to by
3953                 * device so that we avoid corrupting memory.
3954                 */
3955                dma_sync_single_range_for_cpu(rx_ring->dev,
3956                                              buffer_info->dma,
3957                                              buffer_info->page_offset,
3958                                              IGB_RX_BUFSZ,
3959                                              DMA_FROM_DEVICE);
3960
3961                /* free resources associated with mapping */
3962                dma_unmap_page_attrs(rx_ring->dev,
3963                                     buffer_info->dma,
3964                                     PAGE_SIZE,
3965                                     DMA_FROM_DEVICE,
3966                                     DMA_ATTR_SKIP_CPU_SYNC);
3967                __page_frag_cache_drain(buffer_info->page,
3968                                        buffer_info->pagecnt_bias);
3969
3970                buffer_info->page = NULL;
3971        }
3972
3973        size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3974        memset(rx_ring->rx_buffer_info, 0, size);
3975
3976        /* Zero out the descriptor ring */
3977        memset(rx_ring->desc, 0, rx_ring->size);
3978
3979        rx_ring->next_to_alloc = 0;
3980        rx_ring->next_to_clean = 0;
3981        rx_ring->next_to_use = 0;
3982}
3983
3984/**
3985 *  igb_clean_all_rx_rings - Free Rx Buffers for all queues
3986 *  @adapter: board private structure
3987 **/
3988static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3989{
3990        int i;
3991
3992        for (i = 0; i < adapter->num_rx_queues; i++)
3993                if (adapter->rx_ring[i])
3994                        igb_clean_rx_ring(adapter->rx_ring[i]);
3995}
3996
3997/**
3998 *  igb_set_mac - Change the Ethernet Address of the NIC
3999 *  @netdev: network interface device structure
4000 *  @p: pointer to an address structure
4001 *
4002 *  Returns 0 on success, negative on failure
4003 **/
4004static int igb_set_mac(struct net_device *netdev, void *p)
4005{
4006        struct igb_adapter *adapter = netdev_priv(netdev);
4007        struct e1000_hw *hw = &adapter->hw;
4008        struct sockaddr *addr = p;
4009
4010        if (!is_valid_ether_addr(addr->sa_data))
4011                return -EADDRNOTAVAIL;
4012
4013        memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
4014        memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
4015
4016        /* set the correct pool for the new PF MAC address in entry 0 */
4017        igb_rar_set_qsel(adapter, hw->mac.addr, 0,
4018                         adapter->vfs_allocated_count);
4019
4020        return 0;
4021}
4022
4023/**
4024 *  igb_write_mc_addr_list - write multicast addresses to MTA
4025 *  @netdev: network interface device structure
4026 *
4027 *  Writes multicast address list to the MTA hash table.
4028 *  Returns: -ENOMEM on failure
4029 *           0 on no addresses written
4030 *           X on writing X addresses to MTA
4031 **/
4032static int igb_write_mc_addr_list(struct net_device *netdev)
4033{
4034        struct igb_adapter *adapter = netdev_priv(netdev);
4035        struct e1000_hw *hw = &adapter->hw;
4036        struct netdev_hw_addr *ha;
4037        u8  *mta_list;
4038        int i;
4039
4040        if (netdev_mc_empty(netdev)) {
4041                /* nothing to program, so clear mc list */
4042                igb_update_mc_addr_list(hw, NULL, 0);
4043                igb_restore_vf_multicasts(adapter);
4044                return 0;
4045        }
4046
4047        mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
4048        if (!mta_list)
4049                return -ENOMEM;
4050
4051        /* The shared function expects a packed array of only addresses. */
4052        i = 0;
4053        netdev_for_each_mc_addr(ha, netdev)
4054                memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
4055
4056        igb_update_mc_addr_list(hw, mta_list, i);
4057        kfree(mta_list);
4058
4059        return netdev_mc_count(netdev);
4060}
4061
4062/**
4063 *  igb_write_uc_addr_list - write unicast addresses to RAR table
4064 *  @netdev: network interface device structure
4065 *
4066 *  Writes unicast address list to the RAR table.
4067 *  Returns: -ENOMEM on failure/insufficient address space
4068 *           0 on no addresses written
4069 *           X on writing X addresses to the RAR table
4070 **/
4071static int igb_write_uc_addr_list(struct net_device *netdev)
4072{
4073        struct igb_adapter *adapter = netdev_priv(netdev);
4074        struct e1000_hw *hw = &adapter->hw;
4075        unsigned int vfn = adapter->vfs_allocated_count;
4076        unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
4077        int count = 0;
4078
4079        /* return ENOMEM indicating insufficient memory for addresses */
4080        if (netdev_uc_count(netdev) > rar_entries)
4081                return -ENOMEM;
4082
4083        if (!netdev_uc_empty(netdev) && rar_entries) {
4084                struct netdev_hw_addr *ha;
4085
4086                netdev_for_each_uc_addr(ha, netdev) {
4087                        if (!rar_entries)
4088                                break;
4089                        igb_rar_set_qsel(adapter, ha->addr,
4090                                         rar_entries--,
4091                                         vfn);
4092                        count++;
4093                }
4094        }
4095        /* write the addresses in reverse order to avoid write combining */
4096        for (; rar_entries > 0 ; rar_entries--) {
4097                wr32(E1000_RAH(rar_entries), 0);
4098                wr32(E1000_RAL(rar_entries), 0);
4099        }
4100        wrfl();
4101
4102        return count;
4103}
4104
4105static int igb_vlan_promisc_enable(struct igb_adapter *adapter)
4106{
4107        struct e1000_hw *hw = &adapter->hw;
4108        u32 i, pf_id;
4109
4110        switch (hw->mac.type) {
4111        case e1000_i210:
4112        case e1000_i211:
4113        case e1000_i350:
4114                /* VLAN filtering needed for VLAN prio filter */
4115                if (adapter->netdev->features & NETIF_F_NTUPLE)
4116                        break;
4117                /* fall through */
4118        case e1000_82576:
4119        case e1000_82580:
4120        case e1000_i354:
4121                /* VLAN filtering needed for pool filtering */
4122                if (adapter->vfs_allocated_count)
4123                        break;
4124                /* fall through */
4125        default:
4126                return 1;
4127        }
4128
4129        /* We are already in VLAN promisc, nothing to do */
4130        if (adapter->flags & IGB_FLAG_VLAN_PROMISC)
4131                return 0;
4132
4133        if (!adapter->vfs_allocated_count)
4134                goto set_vfta;
4135
4136        /* Add PF to all active pools */
4137        pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT;
4138
4139        for (i = E1000_VLVF_ARRAY_SIZE; --i;) {
4140                u32 vlvf = rd32(E1000_VLVF(i));
4141
4142                vlvf |= BIT(pf_id);
4143                wr32(E1000_VLVF(i), vlvf);
4144        }
4145
4146set_vfta:
4147        /* Set all bits in the VLAN filter table array */
4148        for (i = E1000_VLAN_FILTER_TBL_SIZE; i--;)
4149                hw->mac.ops.write_vfta(hw, i, ~0U);
4150
4151        /* Set flag so we don't redo unnecessary work */
4152        adapter->flags |= IGB_FLAG_VLAN_PROMISC;
4153
4154        return 0;
4155}
4156
4157#define VFTA_BLOCK_SIZE 8
4158static void igb_scrub_vfta(struct igb_adapter *adapter, u32 vfta_offset)
4159{
4160        struct e1000_hw *hw = &adapter->hw;
4161        u32 vfta[VFTA_BLOCK_SIZE] = { 0 };
4162        u32 vid_start = vfta_offset * 32;
4163        u32 vid_end = vid_start + (VFTA_BLOCK_SIZE * 32);
4164        u32 i, vid, word, bits, pf_id;
4165
4166        /* guarantee that we don't scrub out management VLAN */
4167        vid = adapter->mng_vlan_id;
4168        if (vid >= vid_start && vid < vid_end)
4169                vfta[(vid - vid_start) / 32] |= BIT(vid % 32);
4170
4171        if (!adapter->vfs_allocated_count)
4172                goto set_vfta;
4173
4174        pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT;
4175
4176        for (i = E1000_VLVF_ARRAY_SIZE; --i;) {
4177                u32 vlvf = rd32(E1000_VLVF(i));
4178
4179                /* pull VLAN ID from VLVF */
4180                vid = vlvf & VLAN_VID_MASK;
4181
4182                /* only concern ourselves with a certain range */
4183                if (vid < vid_start || vid >= vid_end)
4184                        continue;
4185
4186                if (vlvf & E1000_VLVF_VLANID_ENABLE) {
4187                        /* record VLAN ID in VFTA */
4188                        vfta[(vid - vid_start) / 32] |= BIT(vid % 32);
4189
4190                        /* if PF is part of this then continue */
4191                        if (test_bit(vid, adapter->active_vlans))
4192                                continue;
4193                }
4194
4195                /* remove PF from the pool */
4196                bits = ~BIT(pf_id);
4197                bits &= rd32(E1000_VLVF(i));
4198                wr32(E1000_VLVF(i), bits);
4199        }
4200
4201set_vfta:
4202        /* extract values from active_vlans and write back to VFTA */
4203        for (i = VFTA_BLOCK_SIZE; i--;) {
4204                vid = (vfta_offset + i) * 32;
4205                word = vid / BITS_PER_LONG;
4206                bits = vid % BITS_PER_LONG;
4207
4208                vfta[i] |= adapter->active_vlans[word] >> bits;
4209
4210                hw->mac.ops.write_vfta(hw, vfta_offset + i, vfta[i]);
4211        }
4212}
4213
4214static void igb_vlan_promisc_disable(struct igb_adapter *adapter)
4215{
4216        u32 i;
4217
4218        /* We are not in VLAN promisc, nothing to do */
4219        if (!(adapter->flags & IGB_FLAG_VLAN_PROMISC))
4220                return;
4221
4222        /* Set flag so we don't redo unnecessary work */
4223        adapter->flags &= ~IGB_FLAG_VLAN_PROMISC;
4224
4225        for (i = 0; i < E1000_VLAN_FILTER_TBL_SIZE; i += VFTA_BLOCK_SIZE)
4226                igb_scrub_vfta(adapter, i);
4227}
4228
4229/**
4230 *  igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
4231 *  @netdev: network interface device structure
4232 *
4233 *  The set_rx_mode entry point is called whenever the unicast or multicast
4234 *  address lists or the network interface flags are updated.  This routine is
4235 *  responsible for configuring the hardware for proper unicast, multicast,
4236 *  promiscuous mode, and all-multi behavior.
4237 **/
4238static void igb_set_rx_mode(struct net_device *netdev)
4239{
4240        struct igb_adapter *adapter = netdev_priv(netdev);
4241        struct e1000_hw *hw = &adapter->hw;
4242        unsigned int vfn = adapter->vfs_allocated_count;
4243        u32 rctl = 0, vmolr = 0;
4244        int count;
4245
4246        /* Check for Promiscuous and All Multicast modes */
4247        if (netdev->flags & IFF_PROMISC) {
4248                rctl |= E1000_RCTL_UPE | E1000_RCTL_MPE;
4249                vmolr |= E1000_VMOLR_MPME;
4250
4251                /* enable use of UTA filter to force packets to default pool */
4252                if (hw->mac.type == e1000_82576)
4253                        vmolr |= E1000_VMOLR_ROPE;
4254        } else {
4255                if (netdev->flags & IFF_ALLMULTI) {
4256                        rctl |= E1000_RCTL_MPE;
4257                        vmolr |= E1000_VMOLR_MPME;
4258                } else {
4259                        /* Write addresses to the MTA, if the attempt fails
4260                         * then we should just turn on promiscuous mode so
4261                         * that we can at least receive multicast traffic
4262                         */
4263                        count = igb_write_mc_addr_list(netdev);
4264                        if (count < 0) {
4265                                rctl |= E1000_RCTL_MPE;
4266                                vmolr |= E1000_VMOLR_MPME;
4267                        } else if (count) {
4268                                vmolr |= E1000_VMOLR_ROMPE;
4269                        }
4270                }
4271        }
4272
4273        /* Write addresses to available RAR registers, if there is not
4274         * sufficient space to store all the addresses then enable
4275         * unicast promiscuous mode
4276         */
4277        count = igb_write_uc_addr_list(netdev);
4278        if (count < 0) {
4279                rctl |= E1000_RCTL_UPE;
4280                vmolr |= E1000_VMOLR_ROPE;
4281        }
4282
4283        /* enable VLAN filtering by default */
4284        rctl |= E1000_RCTL_VFE;
4285
4286        /* disable VLAN filtering for modes that require it */
4287        if ((netdev->flags & IFF_PROMISC) ||
4288            (netdev->features & NETIF_F_RXALL)) {
4289                /* if we fail to set all rules then just clear VFE */
4290                if (igb_vlan_promisc_enable(adapter))
4291                        rctl &= ~E1000_RCTL_VFE;
4292        } else {
4293                igb_vlan_promisc_disable(adapter);
4294        }
4295
4296        /* update state of unicast, multicast, and VLAN filtering modes */
4297        rctl |= rd32(E1000_RCTL) & ~(E1000_RCTL_UPE | E1000_RCTL_MPE |
4298                                     E1000_RCTL_VFE);
4299        wr32(E1000_RCTL, rctl);
4300
4301        /* In order to support SR-IOV and eventually VMDq it is necessary to set
4302         * the VMOLR to enable the appropriate modes.  Without this workaround
4303         * we will have issues with VLAN tag stripping not being done for frames
4304         * that are only arriving because we are the default pool
4305         */
4306        if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
4307                return;
4308
4309        /* set UTA to appropriate mode */
4310        igb_set_uta(adapter, !!(vmolr & E1000_VMOLR_ROPE));
4311
4312        vmolr |= rd32(E1000_VMOLR(vfn)) &
4313                 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
4314
4315        /* enable Rx jumbo frames, no need for restriction */
4316        vmolr &= ~E1000_VMOLR_RLPML_MASK;
4317        vmolr |= MAX_JUMBO_FRAME_SIZE | E1000_VMOLR_LPE;
4318
4319        wr32(E1000_VMOLR(vfn), vmolr);
4320        wr32(E1000_RLPML, MAX_JUMBO_FRAME_SIZE);
4321
4322        igb_restore_vf_multicasts(adapter);
4323}
4324
4325static void igb_check_wvbr(struct igb_adapter *adapter)
4326{
4327        struct e1000_hw *hw = &adapter->hw;
4328        u32 wvbr = 0;
4329
4330        switch (hw->mac.type) {
4331        case e1000_82576:
4332        case e1000_i350:
4333                wvbr = rd32(E1000_WVBR);
4334                if (!wvbr)
4335                        return;
4336                break;
4337        default:
4338                break;
4339        }
4340
4341        adapter->wvbr |= wvbr;
4342}
4343
4344#define IGB_STAGGERED_QUEUE_OFFSET 8
4345
4346static void igb_spoof_check(struct igb_adapter *adapter)
4347{
4348        int j;
4349
4350        if (!adapter->wvbr)
4351                return;
4352
4353        for (j = 0; j < adapter->vfs_allocated_count; j++) {
4354                if (adapter->wvbr & BIT(j) ||
4355                    adapter->wvbr & BIT(j + IGB_STAGGERED_QUEUE_OFFSET)) {
4356                        dev_warn(&adapter->pdev->dev,
4357                                "Spoof event(s) detected on VF %d\n", j);
4358                        adapter->wvbr &=
4359                                ~(BIT(j) |
4360                                  BIT(j + IGB_STAGGERED_QUEUE_OFFSET));
4361                }
4362        }
4363}
4364
4365/* Need to wait a few seconds after link up to get diagnostic information from
4366 * the phy
4367 */
4368static void igb_update_phy_info(unsigned long data)
4369{
4370        struct igb_adapter *adapter = (struct igb_adapter *) data;
4371        igb_get_phy_info(&adapter->hw);
4372}
4373
4374/**
4375 *  igb_has_link - check shared code for link and determine up/down
4376 *  @adapter: pointer to driver private info
4377 **/
4378bool igb_has_link(struct igb_adapter *adapter)
4379{
4380        struct e1000_hw *hw = &adapter->hw;
4381        bool link_active = false;
4382
4383        /* get_link_status is set on LSC (link status) interrupt or
4384         * rx sequence error interrupt.  get_link_status will stay
4385         * false until the e1000_check_for_link establishes link
4386         * for copper adapters ONLY
4387         */
4388        switch (hw->phy.media_type) {
4389        case e1000_media_type_copper:
4390                if (!hw->mac.get_link_status)
4391                        return true;
4392        case e1000_media_type_internal_serdes:
4393                hw->mac.ops.check_for_link(hw);
4394                link_active = !hw->mac.get_link_status;
4395                break;
4396        default:
4397        case e1000_media_type_unknown:
4398                break;
4399        }
4400
4401        if (((hw->mac.type == e1000_i210) ||
4402             (hw->mac.type == e1000_i211)) &&
4403             (hw->phy.id == I210_I_PHY_ID)) {
4404                if (!netif_carrier_ok(adapter->netdev)) {
4405                        adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
4406                } else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) {
4407                        adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE;
4408                        adapter->link_check_timeout = jiffies;
4409                }
4410        }
4411
4412        return link_active;
4413}
4414
4415static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
4416{
4417        bool ret = false;
4418        u32 ctrl_ext, thstat;
4419
4420        /* check for thermal sensor event on i350 copper only */
4421        if (hw->mac.type == e1000_i350) {
4422                thstat = rd32(E1000_THSTAT);
4423                ctrl_ext = rd32(E1000_CTRL_EXT);
4424
4425                if ((hw->phy.media_type == e1000_media_type_copper) &&
4426                    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII))
4427                        ret = !!(thstat & event);
4428        }
4429
4430        return ret;
4431}
4432
4433/**
4434 *  igb_check_lvmmc - check for malformed packets received
4435 *  and indicated in LVMMC register
4436 *  @adapter: pointer to adapter
4437 **/
4438static void igb_check_lvmmc(struct igb_adapter *adapter)
4439{
4440        struct e1000_hw *hw = &adapter->hw;
4441        u32 lvmmc;
4442
4443        lvmmc = rd32(E1000_LVMMC);
4444        if (lvmmc) {
4445                if (unlikely(net_ratelimit())) {
4446                        netdev_warn(adapter->netdev,
4447                                    "malformed Tx packet detected and dropped, LVMMC:0x%08x\n",
4448                                    lvmmc);
4449                }
4450        }
4451}
4452
4453/**
4454 *  igb_watchdog - Timer Call-back
4455 *  @data: pointer to adapter cast into an unsigned long
4456 **/
4457static void igb_watchdog(unsigned long data)
4458{
4459        struct igb_adapter *adapter = (struct igb_adapter *)data;
4460        /* Do the rest outside of interrupt context */
4461        schedule_work(&adapter->watchdog_task);
4462}
4463
4464static void igb_watchdog_task(struct work_struct *work)
4465{
4466        struct igb_adapter *adapter = container_of(work,
4467                                                   struct igb_adapter,
4468                                                   watchdog_task);
4469        struct e1000_hw *hw = &adapter->hw;
4470        struct e1000_phy_info *phy = &hw->phy;
4471        struct net_device *netdev = adapter->netdev;
4472        u32 link;
4473        int i;
4474        u32 connsw;
4475        u16 phy_data, retry_count = 20;
4476
4477        link = igb_has_link(adapter);
4478
4479        if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) {
4480                if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
4481                        adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
4482                else
4483                        link = false;
4484        }
4485
4486        /* Force link down if we have fiber to swap to */
4487        if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
4488                if (hw->phy.media_type == e1000_media_type_copper) {
4489                        connsw = rd32(E1000_CONNSW);
4490                        if (!(connsw & E1000_CONNSW_AUTOSENSE_EN))
4491                                link = 0;
4492                }
4493        }
4494        if (link) {
4495                /* Perform a reset if the media type changed. */
4496                if (hw->dev_spec._82575.media_changed) {
4497                        hw->dev_spec._82575.media_changed = false;
4498                        adapter->flags |= IGB_FLAG_MEDIA_RESET;
4499                        igb_reset(adapter);
4500                }
4501                /* Cancel scheduled suspend requests. */
4502                pm_runtime_resume(netdev->dev.parent);
4503
4504                if (!netif_carrier_ok(netdev)) {
4505                        u32 ctrl;
4506
4507                        hw->mac.ops.get_speed_and_duplex(hw,
4508                                                         &adapter->link_speed,
4509                                                         &adapter->link_duplex);
4510
4511                        ctrl = rd32(E1000_CTRL);
4512                        /* Links status message must follow this format */
4513                        netdev_info(netdev,
4514                               "igb: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
4515                               netdev->name,
4516                               adapter->link_speed,
4517                               adapter->link_duplex == FULL_DUPLEX ?
4518                               "Full" : "Half",
4519                               (ctrl & E1000_CTRL_TFCE) &&
4520                               (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
4521                               (ctrl & E1000_CTRL_RFCE) ?  "RX" :
4522                               (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
4523
4524                        /* disable EEE if enabled */
4525                        if ((adapter->flags & IGB_FLAG_EEE) &&
4526                                (adapter->link_duplex == HALF_DUPLEX)) {
4527                                dev_info(&adapter->pdev->dev,
4528                                "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex.\n");
4529                                adapter->hw.dev_spec._82575.eee_disable = true;
4530                                adapter->flags &= ~IGB_FLAG_EEE;
4531                        }
4532
4533                        /* check if SmartSpeed worked */
4534                        igb_check_downshift(hw);
4535                        if (phy->speed_downgraded)
4536                                netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
4537
4538                        /* check for thermal sensor event */
4539                        if (igb_thermal_sensor_event(hw,
4540                            E1000_THSTAT_LINK_THROTTLE))
4541                                netdev_info(netdev, "The network adapter link speed was downshifted because it overheated\n");
4542
4543                        /* adjust timeout factor according to speed/duplex */
4544                        adapter->tx_timeout_factor = 1;
4545                        switch (adapter->link_speed) {
4546                        case SPEED_10:
4547                                adapter->tx_timeout_factor = 14;
4548                                break;
4549                        case SPEED_100:
4550                                /* maybe add some timeout factor ? */
4551                                break;
4552                        }
4553
4554                        if (adapter->link_speed != SPEED_1000)
4555                                goto no_wait;
4556
4557                        /* wait for Remote receiver status OK */
4558retry_read_status:
4559                        if (!igb_read_phy_reg(hw, PHY_1000T_STATUS,
4560                                              &phy_data)) {
4561                                if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
4562                                    retry_count) {
4563                                        msleep(100);
4564                                        retry_count--;
4565                                        goto retry_read_status;
4566                                } else if (!retry_count) {
4567                                        dev_err(&adapter->pdev->dev, "exceed max 2 second\n");
4568                                }
4569                        } else {
4570                                dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n");
4571                        }
4572no_wait:
4573                        netif_carrier_on(netdev);
4574
4575                        igb_ping_all_vfs(adapter);
4576                        igb_check_vf_rate_limit(adapter);
4577
4578                        /* link state has changed, schedule phy info update */
4579                        if (!test_bit(__IGB_DOWN, &adapter->state))
4580                                mod_timer(&adapter->phy_info_timer,
4581                                          round_jiffies(jiffies + 2 * HZ));
4582                }
4583        } else {
4584                if (netif_carrier_ok(netdev)) {
4585                        adapter->link_speed = 0;
4586                        adapter->link_duplex = 0;
4587
4588                        /* check for thermal sensor event */
4589                        if (igb_thermal_sensor_event(hw,
4590                            E1000_THSTAT_PWR_DOWN)) {
4591                                netdev_err(netdev, "The network adapter was stopped because it overheated\n");
4592                        }
4593
4594                        /* Links status message must follow this format */
4595                        netdev_info(netdev, "igb: %s NIC Link is Down\n",
4596                               netdev->name);
4597                        netif_carrier_off(netdev);
4598
4599                        igb_ping_all_vfs(adapter);
4600
4601                        /* link state has changed, schedule phy info update */
4602                        if (!test_bit(__IGB_DOWN, &adapter->state))
4603                                mod_timer(&adapter->phy_info_timer,
4604                                          round_jiffies(jiffies + 2 * HZ));
4605
4606                        /* link is down, time to check for alternate media */
4607                        if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
4608                                igb_check_swap_media(adapter);
4609                                if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
4610                                        schedule_work(&adapter->reset_task);
4611                                        /* return immediately */
4612                                        return;
4613                                }
4614                        }
4615                        pm_schedule_suspend(netdev->dev.parent,
4616                                            MSEC_PER_SEC * 5);
4617
4618                /* also check for alternate media here */
4619                } else if (!netif_carrier_ok(netdev) &&
4620                           (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
4621                        igb_check_swap_media(adapter);
4622                        if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
4623                                schedule_work(&adapter->reset_task);
4624                                /* return immediately */
4625                                return;
4626                        }
4627                }
4628        }
4629
4630        spin_lock(&adapter->stats64_lock);
4631        igb_update_stats(adapter, &adapter->stats64);
4632        spin_unlock(&adapter->stats64_lock);
4633
4634        for (i = 0; i < adapter->num_tx_queues; i++) {
4635                struct igb_ring *tx_ring = adapter->tx_ring[i];
4636                if (!netif_carrier_ok(netdev)) {
4637                        /* We've lost link, so the controller stops DMA,
4638                         * but we've got queued Tx work that's never going
4639                         * to get done, so reset controller to flush Tx.
4640                         * (Do the reset outside of interrupt context).
4641                         */
4642                        if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
4643                                adapter->tx_timeout_count++;
4644                                schedule_work(&adapter->reset_task);
4645                                /* return immediately since reset is imminent */
4646                                return;
4647                        }
4648                }
4649
4650                /* Force detection of hung controller every watchdog period */
4651                set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
4652        }
4653
4654        /* Cause software interrupt to ensure Rx ring is cleaned */
4655        if (adapter->flags & IGB_FLAG_HAS_MSIX) {
4656                u32 eics = 0;
4657
4658                for (i = 0; i < adapter->num_q_vectors; i++)
4659                        eics |= adapter->q_vector[i]->eims_value;
4660                wr32(E1000_EICS, eics);
4661        } else {
4662                wr32(E1000_ICS, E1000_ICS_RXDMT0);
4663        }
4664
4665        igb_spoof_check(adapter);
4666        igb_ptp_rx_hang(adapter);
4667
4668        /* Check LVMMC register on i350/i354 only */
4669        if ((adapter->hw.mac.type == e1000_i350) ||
4670            (adapter->hw.mac.type == e1000_i354))
4671                igb_check_lvmmc(adapter);
4672
4673        /* Reset the timer */
4674        if (!test_bit(__IGB_DOWN, &adapter->state)) {
4675                if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)
4676                        mod_timer(&adapter->watchdog_timer,
4677                                  round_jiffies(jiffies +  HZ));
4678                else
4679                        mod_timer(&adapter->watchdog_timer,
4680                                  round_jiffies(jiffies + 2 * HZ));
4681        }
4682}
4683
4684enum latency_range {
4685        lowest_latency = 0,
4686        low_latency = 1,
4687        bulk_latency = 2,
4688        latency_invalid = 255
4689};
4690
4691/**
4692 *  igb_update_ring_itr - update the dynamic ITR value based on packet size
4693 *  @q_vector: pointer to q_vector
4694 *
4695 *  Stores a new ITR value based on strictly on packet size.  This
4696 *  algorithm is less sophisticated than that used in igb_update_itr,
4697 *  due to the difficulty of synchronizing statistics across multiple
4698 *  receive rings.  The divisors and thresholds used by this function
4699 *  were determined based on theoretical maximum wire speed and testing
4700 *  data, in order to minimize response time while increasing bulk
4701 *  throughput.
4702 *  This functionality is controlled by ethtool's coalescing settings.
4703 *  NOTE:  This function is called only when operating in a multiqueue
4704 *         receive environment.
4705 **/
4706static void igb_update_ring_itr(struct igb_q_vector *q_vector)
4707{
4708        int new_val = q_vector->itr_val;
4709        int avg_wire_size = 0;
4710        struct igb_adapter *adapter = q_vector->adapter;
4711        unsigned int packets;
4712
4713        /* For non-gigabit speeds, just fix the interrupt rate at 4000
4714         * ints/sec - ITR timer value of 120 ticks.
4715         */
4716        if (adapter->link_speed != SPEED_1000) {
4717                new_val = IGB_4K_ITR;
4718                goto set_itr_val;
4719        }
4720
4721        packets = q_vector->rx.total_packets;
4722        if (packets)
4723                avg_wire_size = q_vector->rx.total_bytes / packets;
4724
4725        packets = q_vector->tx.total_packets;
4726        if (packets)
4727                avg_wire_size = max_t(u32, avg_wire_size,
4728                                      q_vector->tx.total_bytes / packets);
4729
4730        /* if avg_wire_size isn't set no work was done */
4731        if (!avg_wire_size)
4732                goto clear_counts;
4733
4734        /* Add 24 bytes to size to account for CRC, preamble, and gap */
4735        avg_wire_size += 24;
4736
4737        /* Don't starve jumbo frames */
4738        avg_wire_size = min(avg_wire_size, 3000);
4739
4740        /* Give a little boost to mid-size frames */
4741        if ((avg_wire_size > 300) && (avg_wire_size < 1200))
4742                new_val = avg_wire_size / 3;
4743        else
4744                new_val = avg_wire_size / 2;
4745
4746        /* conservative mode (itr 3) eliminates the lowest_latency setting */
4747        if (new_val < IGB_20K_ITR &&
4748            ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4749             (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4750                new_val = IGB_20K_ITR;
4751
4752set_itr_val:
4753        if (new_val != q_vector->itr_val) {
4754                q_vector->itr_val = new_val;
4755                q_vector->set_itr = 1;
4756        }
4757clear_counts:
4758        q_vector->rx.total_bytes = 0;
4759        q_vector->rx.total_packets = 0;
4760        q_vector->tx.total_bytes = 0;
4761        q_vector->tx.total_packets = 0;
4762}
4763
4764/**
4765 *  igb_update_itr - update the dynamic ITR value based on statistics
4766 *  @q_vector: pointer to q_vector
4767 *  @ring_container: ring info to update the itr for
4768 *
4769 *  Stores a new ITR value based on packets and byte
4770 *  counts during the last interrupt.  The advantage of per interrupt
4771 *  computation is faster updates and more accurate ITR for the current
4772 *  traffic pattern.  Constants in this function were computed
4773 *  based on theoretical maximum wire speed and thresholds were set based
4774 *  on testing data as well as attempting to minimize response time
4775 *  while increasing bulk throughput.
4776 *  This functionality is controlled by ethtool's coalescing settings.
4777 *  NOTE:  These calculations are only valid when operating in a single-
4778 *         queue environment.
4779 **/
4780static void igb_update_itr(struct igb_q_vector *q_vector,
4781                           struct igb_ring_container *ring_container)
4782{
4783        unsigned int packets = ring_container->total_packets;
4784        unsigned int bytes = ring_container->total_bytes;
4785        u8 itrval = ring_container->itr;
4786
4787        /* no packets, exit with status unchanged */
4788        if (packets == 0)
4789                return;
4790
4791        switch (itrval) {
4792        case lowest_latency:
4793                /* handle TSO and jumbo frames */
4794                if (bytes/packets > 8000)
4795                        itrval = bulk_latency;
4796                else if ((packets < 5) && (bytes > 512))
4797                        itrval = low_latency;
4798                break;
4799        case low_latency:  /* 50 usec aka 20000 ints/s */
4800                if (bytes > 10000) {
4801                        /* this if handles the TSO accounting */
4802                        if (bytes/packets > 8000)
4803                                itrval = bulk_latency;
4804                        else if ((packets < 10) || ((bytes/packets) > 1200))
4805                                itrval = bulk_latency;
4806                        else if ((packets > 35))
4807                                itrval = lowest_latency;
4808                } else if (bytes/packets > 2000) {
4809                        itrval = bulk_latency;
4810                } else if (packets <= 2 && bytes < 512) {
4811                        itrval = lowest_latency;
4812                }
4813                break;
4814        case bulk_latency: /* 250 usec aka 4000 ints/s */
4815                if (bytes > 25000) {
4816                        if (packets > 35)
4817                                itrval = low_latency;
4818                } else if (bytes < 1500) {
4819                        itrval = low_latency;
4820                }
4821                break;
4822        }
4823
4824        /* clear work counters since we have the values we need */
4825        ring_container->total_bytes = 0;
4826        ring_container->total_packets = 0;
4827
4828        /* write updated itr to ring container */
4829        ring_container->itr = itrval;
4830}
4831
4832static void igb_set_itr(struct igb_q_vector *q_vector)
4833{
4834        struct igb_adapter *adapter = q_vector->adapter;
4835        u32 new_itr = q_vector->itr_val;
4836        u8 current_itr = 0;
4837
4838        /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4839        if (adapter->link_speed != SPEED_1000) {
4840                current_itr = 0;
4841                new_itr = IGB_4K_ITR;
4842                goto set_itr_now;
4843        }
4844
4845        igb_update_itr(q_vector, &q_vector->tx);
4846        igb_update_itr(q_vector, &q_vector->rx);
4847
4848        current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4849
4850        /* conservative mode (itr 3) eliminates the lowest_latency setting */
4851        if (current_itr == lowest_latency &&
4852            ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4853             (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4854                current_itr = low_latency;
4855
4856        switch (current_itr) {
4857        /* counts and packets in update_itr are dependent on these numbers */
4858        case lowest_latency:
4859                new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4860                break;
4861        case low_latency:
4862                new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4863                break;
4864        case bulk_latency:
4865                new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4866                break;
4867        default:
4868                break;
4869        }
4870
4871set_itr_now:
4872        if (new_itr != q_vector->itr_val) {
4873                /* this attempts to bias the interrupt rate towards Bulk
4874                 * by adding intermediate steps when interrupt rate is
4875                 * increasing
4876                 */
4877                new_itr = new_itr > q_vector->itr_val ?
4878                          max((new_itr * q_vector->itr_val) /
4879                          (new_itr + (q_vector->itr_val >> 2)),
4880                          new_itr) : new_itr;
4881                /* Don't write the value here; it resets the adapter's
4882                 * internal timer, and causes us to delay far longer than
4883                 * we should between interrupts.  Instead, we write the ITR
4884                 * value at the beginning of the next interrupt so the timing
4885                 * ends up being correct.
4886                 */
4887                q_vector->itr_val = new_itr;
4888                q_vector->set_itr = 1;
4889        }
4890}
4891
4892static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4893                            u32 type_tucmd, u32 mss_l4len_idx)
4894{
4895        struct e1000_adv_tx_context_desc *context_desc;
4896        u16 i = tx_ring->next_to_use;
4897
4898        context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4899
4900        i++;
4901        tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4902
4903        /* set bits to identify this as an advanced context descriptor */
4904        type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4905
4906        /* For 82575, context index must be unique per ring. */
4907        if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4908                mss_l4len_idx |= tx_ring->reg_idx << 4;
4909
4910        context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
4911        context_desc->seqnum_seed       = 0;
4912        context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
4913        context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
4914}
4915
4916static int igb_tso(struct igb_ring *tx_ring,
4917                   struct igb_tx_buffer *first,
4918                   u8 *hdr_len)
4919{
4920        u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
4921        struct sk_buff *skb = first->skb;
4922        union {
4923                struct iphdr *v4;
4924                struct ipv6hdr *v6;
4925                unsigned char *hdr;
4926        } ip;
4927        union {
4928                struct tcphdr *tcp;
4929                unsigned char *hdr;
4930        } l4;
4931        u32 paylen, l4_offset;
4932        int err;
4933
4934        if (skb->ip_summed != CHECKSUM_PARTIAL)
4935                return 0;
4936
4937        if (!skb_is_gso(skb))
4938                return 0;
4939
4940        err = skb_cow_head(skb, 0);
4941        if (err < 0)
4942                return err;
4943
4944        ip.hdr = skb_network_header(skb);
4945        l4.hdr = skb_checksum_start(skb);
4946
4947        /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4948        type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4949
4950        /* initialize outer IP header fields */
4951        if (ip.v4->version == 4) {
4952                unsigned char *csum_start = skb_checksum_start(skb);
4953                unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
4954
4955                /* IP header will have to cancel out any data that
4956                 * is not a part of the outer IP header
4957                 */
4958                ip.v4->check = csum_fold(csum_partial(trans_start,
4959                                                      csum_start - trans_start,
4960                                                      0));
4961                type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4962
4963                ip.v4->tot_len = 0;
4964                first->tx_flags |= IGB_TX_FLAGS_TSO |
4965                                   IGB_TX_FLAGS_CSUM |
4966                                   IGB_TX_FLAGS_IPV4;
4967        } else {
4968                ip.v6->payload_len = 0;
4969                first->tx_flags |= IGB_TX_FLAGS_TSO |
4970                                   IGB_TX_FLAGS_CSUM;
4971        }
4972
4973        /* determine offset of inner transport header */
4974        l4_offset = l4.hdr - skb->data;
4975
4976        /* compute length of segmentation header */
4977        *hdr_len = (l4.tcp->doff * 4) + l4_offset;
4978
4979        /* remove payload length from inner checksum */
4980        paylen = skb->len - l4_offset;
4981        csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
4982
4983        /* update gso size and bytecount with header size */
4984        first->gso_segs = skb_shinfo(skb)->gso_segs;
4985        first->bytecount += (first->gso_segs - 1) * *hdr_len;
4986
4987        /* MSS L4LEN IDX */
4988        mss_l4len_idx = (*hdr_len - l4_offset) << E1000_ADVTXD_L4LEN_SHIFT;
4989        mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4990
4991        /* VLAN MACLEN IPLEN */
4992        vlan_macip_lens = l4.hdr - ip.hdr;
4993        vlan_macip_lens |= (ip.hdr - skb->data) << E1000_ADVTXD_MACLEN_SHIFT;
4994        vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4995
4996        igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4997
4998        return 1;
4999}
5000
5001static inline bool igb_ipv6_csum_is_sctp(struct sk_buff *skb)
5002{
5003        unsigned int offset = 0;
5004
5005        ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL);
5006
5007        return offset == skb_checksum_start_offset(skb);
5008}
5009
5010static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
5011{
5012        struct sk_buff *skb = first->skb;
5013        u32 vlan_macip_lens = 0;
5014        u32 type_tucmd = 0;
5015
5016        if (skb->ip_summed != CHECKSUM_PARTIAL) {
5017csum_failed:
5018                if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
5019                        return;
5020                goto no_csum;
5021        }
5022
5023        switch (skb->csum_offset) {
5024        case offsetof(struct tcphdr, check):
5025                type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
5026                /* fall through */
5027        case offsetof(struct udphdr, check):
5028                break;
5029        case offsetof(struct sctphdr, checksum):
5030                /* validate that this is actually an SCTP request */
5031                if (((first->protocol == htons(ETH_P_IP)) &&
5032                     (ip_hdr(skb)->protocol == IPPROTO_SCTP)) ||
5033                    ((first->protocol == htons(ETH_P_IPV6)) &&
5034                     igb_ipv6_csum_is_sctp(skb))) {
5035                        type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP;
5036                        break;
5037                }
5038        default:
5039                skb_checksum_help(skb);
5040                goto csum_failed;
5041        }
5042
5043        /* update TX checksum flag */
5044        first->tx_flags |= IGB_TX_FLAGS_CSUM;
5045        vlan_macip_lens = skb_checksum_start_offset(skb) -
5046                          skb_network_offset(skb);
5047no_csum:
5048        vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
5049        vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
5050
5051        igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, 0);
5052}
5053
5054#define IGB_SET_FLAG(_input, _flag, _result) \
5055        ((_flag <= _result) ? \
5056         ((u32)(_input & _flag) * (_result / _flag)) : \
5057         ((u32)(_input & _flag) / (_flag / _result)))
5058
5059static u32 igb_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
5060{
5061        /* set type for advanced descriptor with frame checksum insertion */
5062        u32 cmd_type = E1000_ADVTXD_DTYP_DATA |
5063                       E1000_ADVTXD_DCMD_DEXT |
5064                       E1000_ADVTXD_DCMD_IFCS;
5065
5066        /* set HW vlan bit if vlan is present */
5067        cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_VLAN,
5068                                 (E1000_ADVTXD_DCMD_VLE));
5069
5070        /* set segmentation bits for TSO */
5071        cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSO,
5072                                 (E1000_ADVTXD_DCMD_TSE));
5073
5074        /* set timestamp bit if present */
5075        cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSTAMP,
5076                                 (E1000_ADVTXD_MAC_TSTAMP));
5077
5078        /* insert frame checksum */
5079        cmd_type ^= IGB_SET_FLAG(skb->no_fcs, 1, E1000_ADVTXD_DCMD_IFCS);
5080
5081        return cmd_type;
5082}
5083
5084static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
5085                                 union e1000_adv_tx_desc *tx_desc,
5086                                 u32 tx_flags, unsigned int paylen)
5087{
5088        u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
5089
5090        /* 82575 requires a unique index per ring */
5091        if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
5092                olinfo_status |= tx_ring->reg_idx << 4;
5093
5094        /* insert L4 checksum */
5095        olinfo_status |= IGB_SET_FLAG(tx_flags,
5096                                      IGB_TX_FLAGS_CSUM,
5097                                      (E1000_TXD_POPTS_TXSM << 8));
5098
5099        /* insert IPv4 checksum */
5100        olinfo_status |= IGB_SET_FLAG(tx_flags,
5101                                      IGB_TX_FLAGS_IPV4,
5102                                      (E1000_TXD_POPTS_IXSM << 8));
5103
5104        tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
5105}
5106
5107static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
5108{
5109        struct net_device *netdev = tx_ring->netdev;
5110
5111        netif_stop_subqueue(netdev, tx_ring->queue_index);
5112
5113        /* Herbert's original patch had:
5114         *  smp_mb__after_netif_stop_queue();
5115         * but since that doesn't exist yet, just open code it.
5116         */
5117        smp_mb();
5118
5119        /* We need to check again in a case another CPU has just
5120         * made room available.
5121         */
5122        if (igb_desc_unused(tx_ring) < size)
5123                return -EBUSY;
5124
5125        /* A reprieve! */
5126        netif_wake_subqueue(netdev, tx_ring->queue_index);
5127
5128        u64_stats_update_begin(&tx_ring->tx_syncp2);
5129        tx_ring->tx_stats.restart_queue2++;
5130        u64_stats_update_end(&tx_ring->tx_syncp2);
5131
5132        return 0;
5133}
5134
5135static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
5136{
5137        if (igb_desc_unused(tx_ring) >= size)
5138                return 0;
5139        return __igb_maybe_stop_tx(tx_ring, size);
5140}
5141
5142static void igb_tx_map(struct igb_ring *tx_ring,
5143                       struct igb_tx_buffer *first,
5144                       const u8 hdr_len)
5145{
5146        struct sk_buff *skb = first->skb;
5147        struct igb_tx_buffer *tx_buffer;
5148        union e1000_adv_tx_desc *tx_desc;
5149        struct skb_frag_struct *frag;
5150        dma_addr_t dma;
5151        unsigned int data_len, size;
5152        u32 tx_flags = first->tx_flags;
5153        u32 cmd_type = igb_tx_cmd_type(skb, tx_flags);
5154        u16 i = tx_ring->next_to_use;
5155
5156        tx_desc = IGB_TX_DESC(tx_ring, i);
5157
5158        igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
5159
5160        size = skb_headlen(skb);
5161        data_len = skb->data_len;
5162
5163        dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
5164
5165        tx_buffer = first;
5166
5167        for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
5168                if (dma_mapping_error(tx_ring->dev, dma))
5169                        goto dma_error;
5170
5171                /* record length, and DMA address */
5172                dma_unmap_len_set(tx_buffer, len, size);
5173                dma_unmap_addr_set(tx_buffer, dma, dma);
5174
5175                tx_desc->read.buffer_addr = cpu_to_le64(dma);
5176
5177                while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
5178                        tx_desc->read.cmd_type_len =
5179                                cpu_to_le32(cmd_type ^ IGB_MAX_DATA_PER_TXD);
5180
5181                        i++;
5182                        tx_desc++;
5183                        if (i == tx_ring->count) {
5184                                tx_desc = IGB_TX_DESC(tx_ring, 0);
5185                                i = 0;
5186                        }
5187                        tx_desc->read.olinfo_status = 0;
5188
5189                        dma += IGB_MAX_DATA_PER_TXD;
5190                        size -= IGB_MAX_DATA_PER_TXD;
5191
5192                        tx_desc->read.buffer_addr = cpu_to_le64(dma);
5193                }
5194
5195                if (likely(!data_len))
5196                        break;
5197
5198                tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
5199
5200                i++;
5201                tx_desc++;
5202                if (i == tx_ring->count) {
5203                        tx_desc = IGB_TX_DESC(tx_ring, 0);
5204                        i = 0;
5205                }
5206                tx_desc->read.olinfo_status = 0;
5207
5208                size = skb_frag_size(frag);
5209                data_len -= size;
5210
5211                dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
5212                                       size, DMA_TO_DEVICE);
5213
5214                tx_buffer = &tx_ring->tx_buffer_info[i];
5215        }
5216
5217        /* write last descriptor with RS and EOP bits */
5218        cmd_type |= size | IGB_TXD_DCMD;
5219        tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
5220
5221        netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
5222
5223        /* set the timestamp */
5224        first->time_stamp = jiffies;
5225
5226        /* Force memory writes to complete before letting h/w know there
5227         * are new descriptors to fetch.  (Only applicable for weak-ordered
5228         * memory model archs, such as IA-64).
5229         *
5230         * We also need this memory barrier to make certain all of the
5231         * status bits have been updated before next_to_watch is written.
5232         */
5233        wmb();
5234
5235        /* set next_to_watch value indicating a packet is present */
5236        first->next_to_watch = tx_desc;
5237
5238        i++;
5239        if (i == tx_ring->count)
5240                i = 0;
5241
5242        tx_ring->next_to_use = i;
5243
5244        /* Make sure there is space in the ring for the next send. */
5245        igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
5246
5247        if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
5248                writel(i, tx_ring->tail);
5249
5250                /* we need this if more than one processor can write to our tail
5251                 * at a time, it synchronizes IO on IA64/Altix systems
5252                 */
5253                mmiowb();
5254        }
5255        return;
5256
5257dma_error:
5258        dev_err(tx_ring->dev, "TX DMA map failed\n");
5259
5260        /* clear dma mappings for failed tx_buffer_info map */
5261        for (;;) {
5262                tx_buffer = &tx_ring->tx_buffer_info[i];
5263                igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
5264                if (tx_buffer == first)
5265                        break;
5266                if (i == 0)
5267                        i = tx_ring->count;
5268                i--;
5269        }
5270
5271        tx_ring->next_to_use = i;
5272}
5273
5274netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
5275                                struct igb_ring *tx_ring)
5276{
5277        struct igb_tx_buffer *first;
5278        int tso;
5279        u32 tx_flags = 0;
5280        unsigned short f;
5281        u16 count = TXD_USE_COUNT(skb_headlen(skb));
5282        __be16 protocol = vlan_get_protocol(skb);
5283        u8 hdr_len = 0;
5284
5285        /* need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD,
5286         *       + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD,
5287         *       + 2 desc gap to keep tail from touching head,
5288         *       + 1 desc for context descriptor,
5289         * otherwise try next time
5290         */
5291        for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
5292                count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
5293
5294        if (igb_maybe_stop_tx(tx_ring, count + 3)) {
5295                /* this is a hard error */
5296                return NETDEV_TX_BUSY;
5297        }
5298
5299        /* record the location of the first descriptor for this packet */
5300        first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
5301        first->skb = skb;
5302        first->bytecount = skb->len;
5303        first->gso_segs = 1;
5304
5305        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
5306                struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
5307
5308                if (!test_and_set_bit_lock(__IGB_PTP_TX_IN_PROGRESS,
5309                                           &adapter->state)) {
5310                        skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
5311                        tx_flags |= IGB_TX_FLAGS_TSTAMP;
5312
5313                        adapter->ptp_tx_skb = skb_get(skb);
5314                        adapter->ptp_tx_start = jiffies;
5315                        if (adapter->hw.mac.type == e1000_82576)
5316                                schedule_work(&adapter->ptp_tx_work);
5317                }
5318        }
5319
5320        skb_tx_timestamp(skb);
5321
5322        if (skb_vlan_tag_present(skb)) {
5323                tx_flags |= IGB_TX_FLAGS_VLAN;
5324                tx_flags |= (skb_vlan_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
5325        }
5326
5327        /* record initial flags and protocol */
5328        first->tx_flags = tx_flags;
5329        first->protocol = protocol;
5330
5331        tso = igb_tso(tx_ring, first, &hdr_len);
5332        if (tso < 0)
5333                goto out_drop;
5334        else if (!tso)
5335                igb_tx_csum(tx_ring, first);
5336
5337        igb_tx_map(tx_ring, first, hdr_len);
5338
5339        return NETDEV_TX_OK;
5340
5341out_drop:
5342        igb_unmap_and_free_tx_resource(tx_ring, first);
5343
5344        return NETDEV_TX_OK;
5345}
5346
5347static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
5348                                                    struct sk_buff *skb)
5349{
5350        unsigned int r_idx = skb->queue_mapping;
5351
5352        if (r_idx >= adapter->num_tx_queues)
5353                r_idx = r_idx % adapter->num_tx_queues;
5354
5355        return adapter->tx_ring[r_idx];
5356}
5357
5358static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
5359                                  struct net_device *netdev)
5360{
5361        struct igb_adapter *adapter = netdev_priv(netdev);
5362
5363        /* The minimum packet size with TCTL.PSP set is 17 so pad the skb
5364         * in order to meet this minimum size requirement.
5365         */
5366        if (skb_put_padto(skb, 17))
5367                return NETDEV_TX_OK;
5368
5369        return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
5370}
5371
5372/**
5373 *  igb_tx_timeout - Respond to a Tx Hang
5374 *  @netdev: network interface device structure
5375 **/
5376static void igb_tx_timeout(struct net_device *netdev)
5377{
5378        struct igb_adapter *adapter = netdev_priv(netdev);
5379        struct e1000_hw *hw = &adapter->hw;
5380
5381        /* Do the reset outside of interrupt context */
5382        adapter->tx_timeout_count++;
5383
5384        if (hw->mac.type >= e1000_82580)
5385                hw->dev_spec._82575.global_device_reset = true;
5386
5387        schedule_work(&adapter->reset_task);
5388        wr32(E1000_EICS,
5389             (adapter->eims_enable_mask & ~adapter->eims_other));
5390}
5391
5392static void igb_reset_task(struct work_struct *work)
5393{
5394        struct igb_adapter *adapter;
5395        adapter = container_of(work, struct igb_adapter, reset_task);
5396
5397        igb_dump(adapter);
5398        netdev_err(adapter->netdev, "Reset adapter\n");
5399        igb_reinit_locked(adapter);
5400}
5401
5402/**
5403 *  igb_get_stats64 - Get System Network Statistics
5404 *  @netdev: network interface device structure
5405 *  @stats: rtnl_link_stats64 pointer
5406 **/
5407static void igb_get_stats64(struct net_device *netdev,
5408                            struct rtnl_link_stats64 *stats)
5409{
5410        struct igb_adapter *adapter = netdev_priv(netdev);
5411
5412        spin_lock(&adapter->stats64_lock);
5413        igb_update_stats(adapter, &adapter->stats64);
5414        memcpy(stats, &adapter->stats64, sizeof(*stats));
5415        spin_unlock(&adapter->stats64_lock);
5416}
5417
5418/**
5419 *  igb_change_mtu - Change the Maximum Transfer Unit
5420 *  @netdev: network interface device structure
5421 *  @new_mtu: new value for maximum frame size
5422 *
5423 *  Returns 0 on success, negative on failure
5424 **/
5425static int igb_change_mtu(struct net_device *netdev, int new_mtu)
5426{
5427        struct igb_adapter *adapter = netdev_priv(netdev);
5428        struct pci_dev *pdev = adapter->pdev;
5429        int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
5430
5431        /* adjust max frame to be at least the size of a standard frame */
5432        if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
5433                max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
5434
5435        while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
5436                usleep_range(1000, 2000);
5437
5438        /* igb_down has a dependency on max_frame_size */
5439        adapter->max_frame_size = max_frame;
5440
5441        if (netif_running(netdev))
5442                igb_down(adapter);
5443
5444        dev_info(&pdev->dev, "changing MTU from %d to %d\n",
5445                 netdev->mtu, new_mtu);
5446        netdev->mtu = new_mtu;
5447
5448        if (netif_running(netdev))
5449                igb_up(adapter);
5450        else
5451                igb_reset(adapter);
5452
5453        clear_bit(__IGB_RESETTING, &adapter->state);
5454
5455        return 0;
5456}
5457
5458/**
5459 *  igb_update_stats - Update the board statistics counters
5460 *  @adapter: board private structure
5461 **/
5462void igb_update_stats(struct igb_adapter *adapter,
5463                      struct rtnl_link_stats64 *net_stats)
5464{
5465        struct e1000_hw *hw = &adapter->hw;
5466        struct pci_dev *pdev = adapter->pdev;
5467        u32 reg, mpc;
5468        int i;
5469        u64 bytes, packets;
5470        unsigned int start;
5471        u64 _bytes, _packets;
5472
5473        /* Prevent stats update while adapter is being reset, or if the pci
5474         * connection is down.
5475         */
5476        if (adapter->link_speed == 0)
5477                return;
5478        if (pci_channel_offline(pdev))
5479                return;
5480
5481        bytes = 0;
5482        packets = 0;
5483
5484        rcu_read_lock();
5485        for (i = 0; i < adapter->num_rx_queues; i++) {
5486                struct igb_ring *ring = adapter->rx_ring[i];
5487                u32 rqdpc = rd32(E1000_RQDPC(i));
5488                if (hw->mac.type >= e1000_i210)
5489                        wr32(E1000_RQDPC(i), 0);
5490
5491                if (rqdpc) {
5492                        ring->rx_stats.drops += rqdpc;
5493                        net_stats->rx_fifo_errors += rqdpc;
5494                }
5495
5496                do {
5497                        start = u64_stats_fetch_begin_irq(&ring->rx_syncp);
5498                        _bytes = ring->rx_stats.bytes;
5499                        _packets = ring->rx_stats.packets;
5500                } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start));
5501                bytes += _bytes;
5502                packets += _packets;
5503        }
5504
5505        net_stats->rx_bytes = bytes;
5506        net_stats->rx_packets = packets;
5507
5508        bytes = 0;
5509        packets = 0;
5510        for (i = 0; i < adapter->num_tx_queues; i++) {
5511                struct igb_ring *ring = adapter->tx_ring[i];
5512                do {
5513                        start = u64_stats_fetch_begin_irq(&ring->tx_syncp);
5514                        _bytes = ring->tx_stats.bytes;
5515                        _packets = ring->tx_stats.packets;
5516                } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start));
5517                bytes += _bytes;
5518                packets += _packets;
5519        }
5520        net_stats->tx_bytes = bytes;
5521        net_stats->tx_packets = packets;
5522        rcu_read_unlock();
5523
5524        /* read stats registers */
5525        adapter->stats.crcerrs += rd32(E1000_CRCERRS);
5526        adapter->stats.gprc += rd32(E1000_GPRC);
5527        adapter->stats.gorc += rd32(E1000_GORCL);
5528        rd32(E1000_GORCH); /* clear GORCL */
5529        adapter->stats.bprc += rd32(E1000_BPRC);
5530        adapter->stats.mprc += rd32(E1000_MPRC);
5531        adapter->stats.roc += rd32(E1000_ROC);
5532
5533        adapter->stats.prc64 += rd32(E1000_PRC64);
5534        adapter->stats.prc127 += rd32(E1000_PRC127);
5535        adapter->stats.prc255 += rd32(E1000_PRC255);
5536        adapter->stats.prc511 += rd32(E1000_PRC511);
5537        adapter->stats.prc1023 += rd32(E1000_PRC1023);
5538        adapter->stats.prc1522 += rd32(E1000_PRC1522);
5539        adapter->stats.symerrs += rd32(E1000_SYMERRS);
5540        adapter->stats.sec += rd32(E1000_SEC);
5541
5542        mpc = rd32(E1000_MPC);
5543        adapter->stats.mpc += mpc;
5544        net_stats->rx_fifo_errors += mpc;
5545        adapter->stats.scc += rd32(E1000_SCC);
5546        adapter->stats.ecol += rd32(E1000_ECOL);
5547        adapter->stats.mcc += rd32(E1000_MCC);
5548        adapter->stats.latecol += rd32(E1000_LATECOL);
5549        adapter->stats.dc += rd32(E1000_DC);
5550        adapter->stats.rlec += rd32(E1000_RLEC);
5551        adapter->stats.xonrxc += rd32(E1000_XONRXC);
5552        adapter->stats.xontxc += rd32(E1000_XONTXC);
5553        adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
5554        adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
5555        adapter->stats.fcruc += rd32(E1000_FCRUC);
5556        adapter->stats.gptc += rd32(E1000_GPTC);
5557        adapter->stats.gotc += rd32(E1000_GOTCL);
5558        rd32(E1000_GOTCH); /* clear GOTCL */
5559        adapter->stats.rnbc += rd32(E1000_RNBC);
5560        adapter->stats.ruc += rd32(E1000_RUC);
5561        adapter->stats.rfc += rd32(E1000_RFC);
5562        adapter->stats.rjc += rd32(E1000_RJC);
5563        adapter->stats.tor += rd32(E1000_TORH);
5564        adapter->stats.tot += rd32(E1000_TOTH);
5565        adapter->stats.tpr += rd32(E1000_TPR);
5566
5567        adapter->stats.ptc64 += rd32(E1000_PTC64);
5568        adapter->stats.ptc127 += rd32(E1000_PTC127);
5569        adapter->stats.ptc255 += rd32(E1000_PTC255);
5570        adapter->stats.ptc511 += rd32(E1000_PTC511);
5571        adapter->stats.ptc1023 += rd32(E1000_PTC1023);
5572        adapter->stats.ptc1522 += rd32(E1000_PTC1522);
5573
5574        adapter->stats.mptc += rd32(E1000_MPTC);
5575        adapter->stats.bptc += rd32(E1000_BPTC);
5576
5577        adapter->stats.tpt += rd32(E1000_TPT);
5578        adapter->stats.colc += rd32(E1000_COLC);
5579
5580        adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
5581        /* read internal phy specific stats */
5582        reg = rd32(E1000_CTRL_EXT);
5583        if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
5584                adapter->stats.rxerrc += rd32(E1000_RXERRC);
5585
5586                /* this stat has invalid values on i210/i211 */
5587                if ((hw->mac.type != e1000_i210) &&
5588                    (hw->mac.type != e1000_i211))
5589                        adapter->stats.tncrs += rd32(E1000_TNCRS);
5590        }
5591
5592        adapter->stats.tsctc += rd32(E1000_TSCTC);
5593        adapter->stats.tsctfc += rd32(E1000_TSCTFC);
5594
5595        adapter->stats.iac += rd32(E1000_IAC);
5596        adapter->stats.icrxoc += rd32(E1000_ICRXOC);
5597        adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
5598        adapter->stats.icrxatc += rd32(E1000_ICRXATC);
5599        adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
5600        adapter->stats.ictxatc += rd32(E1000_ICTXATC);
5601        adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
5602        adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
5603        adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
5604
5605        /* Fill out the OS statistics structure */
5606        net_stats->multicast = adapter->stats.mprc;
5607        net_stats->collisions = adapter->stats.colc;
5608
5609        /* Rx Errors */
5610
5611        /* RLEC on some newer hardware can be incorrect so build
5612         * our own version based on RUC and ROC
5613         */
5614        net_stats->rx_errors = adapter->stats.rxerrc +
5615                adapter->stats.crcerrs + adapter->stats.algnerrc +
5616                adapter->stats.ruc + adapter->stats.roc +
5617                adapter->stats.cexterr;
5618        net_stats->rx_length_errors = adapter->stats.ruc +
5619                                      adapter->stats.roc;
5620        net_stats->rx_crc_errors = adapter->stats.crcerrs;
5621        net_stats->rx_frame_errors = adapter->stats.algnerrc;
5622        net_stats->rx_missed_errors = adapter->stats.mpc;
5623
5624        /* Tx Errors */
5625        net_stats->tx_errors = adapter->stats.ecol +
5626                               adapter->stats.latecol;
5627        net_stats->tx_aborted_errors = adapter->stats.ecol;
5628        net_stats->tx_window_errors = adapter->stats.latecol;
5629        net_stats->tx_carrier_errors = adapter->stats.tncrs;
5630
5631        /* Tx Dropped needs to be maintained elsewhere */
5632
5633        /* Management Stats */
5634        adapter->stats.mgptc += rd32(E1000_MGTPTC);
5635        adapter->stats.mgprc += rd32(E1000_MGTPRC);
5636        adapter->stats.mgpdc += rd32(E1000_MGTPDC);
5637
5638        /* OS2BMC Stats */
5639        reg = rd32(E1000_MANC);
5640        if (reg & E1000_MANC_EN_BMC2OS) {
5641                adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
5642                adapter->stats.o2bspc += rd32(E1000_O2BSPC);
5643                adapter->stats.b2ospc += rd32(E1000_B2OSPC);
5644                adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
5645        }
5646}
5647
5648static void igb_tsync_interrupt(struct igb_adapter *adapter)
5649{
5650        struct e1000_hw *hw = &adapter->hw;
5651        struct ptp_clock_event event;
5652        struct timespec64 ts;
5653        u32 ack = 0, tsauxc, sec, nsec, tsicr = rd32(E1000_TSICR);
5654
5655        if (tsicr & TSINTR_SYS_WRAP) {
5656                event.type = PTP_CLOCK_PPS;
5657                if (adapter->ptp_caps.pps)
5658                        ptp_clock_event(adapter->ptp_clock, &event);
5659                else
5660                        dev_err(&adapter->pdev->dev, "unexpected SYS WRAP");
5661                ack |= TSINTR_SYS_WRAP;
5662        }
5663
5664        if (tsicr & E1000_TSICR_TXTS) {
5665                /* retrieve hardware timestamp */
5666                schedule_work(&adapter->ptp_tx_work);
5667                ack |= E1000_TSICR_TXTS;
5668        }
5669
5670        if (tsicr & TSINTR_TT0) {
5671                spin_lock(&adapter->tmreg_lock);
5672                ts = timespec64_add(adapter->perout[0].start,
5673                                    adapter->perout[0].period);
5674                /* u32 conversion of tv_sec is safe until y2106 */
5675                wr32(E1000_TRGTTIML0, ts.tv_nsec);
5676                wr32(E1000_TRGTTIMH0, (u32)ts.tv_sec);
5677                tsauxc = rd32(E1000_TSAUXC);
5678                tsauxc |= TSAUXC_EN_TT0;
5679                wr32(E1000_TSAUXC, tsauxc);
5680                adapter->perout[0].start = ts;
5681                spin_unlock(&adapter->tmreg_lock);
5682                ack |= TSINTR_TT0;
5683        }
5684
5685        if (tsicr & TSINTR_TT1) {
5686                spin_lock(&adapter->tmreg_lock);
5687                ts = timespec64_add(adapter->perout[1].start,
5688                                    adapter->perout[1].period);
5689                wr32(E1000_TRGTTIML1, ts.tv_nsec);
5690                wr32(E1000_TRGTTIMH1, (u32)ts.tv_sec);
5691                tsauxc = rd32(E1000_TSAUXC);
5692                tsauxc |= TSAUXC_EN_TT1;
5693                wr32(E1000_TSAUXC, tsauxc);
5694                adapter->perout[1].start = ts;
5695                spin_unlock(&adapter->tmreg_lock);
5696                ack |= TSINTR_TT1;
5697        }
5698
5699        if (tsicr & TSINTR_AUTT0) {
5700                nsec = rd32(E1000_AUXSTMPL0);
5701                sec  = rd32(E1000_AUXSTMPH0);
5702                event.type = PTP_CLOCK_EXTTS;
5703                event.index = 0;
5704                event.timestamp = sec * 1000000000ULL + nsec;
5705                ptp_clock_event(adapter->ptp_clock, &event);
5706                ack |= TSINTR_AUTT0;
5707        }
5708
5709        if (tsicr & TSINTR_AUTT1) {
5710                nsec = rd32(E1000_AUXSTMPL1);
5711                sec  = rd32(E1000_AUXSTMPH1);
5712                event.type = PTP_CLOCK_EXTTS;
5713                event.index = 1;
5714                event.timestamp = sec * 1000000000ULL + nsec;
5715                ptp_clock_event(adapter->ptp_clock, &event);
5716                ack |= TSINTR_AUTT1;
5717        }
5718
5719        /* acknowledge the interrupts */
5720        wr32(E1000_TSICR, ack);
5721}
5722
5723static irqreturn_t igb_msix_other(int irq, void *data)
5724{
5725        struct igb_adapter *adapter = data;
5726        struct e1000_hw *hw = &adapter->hw;
5727        u32 icr = rd32(E1000_ICR);
5728        /* reading ICR causes bit 31 of EICR to be cleared */
5729
5730        if (icr & E1000_ICR_DRSTA)
5731                schedule_work(&adapter->reset_task);
5732
5733        if (icr & E1000_ICR_DOUTSYNC) {
5734                /* HW is reporting DMA is out of sync */
5735                adapter->stats.doosync++;
5736                /* The DMA Out of Sync is also indication of a spoof event
5737                 * in IOV mode. Check the Wrong VM Behavior register to
5738                 * see if it is really a spoof event.
5739                 */
5740                igb_check_wvbr(adapter);
5741        }
5742
5743        /* Check for a mailbox event */
5744        if (icr & E1000_ICR_VMMB)
5745                igb_msg_task(adapter);
5746
5747        if (icr & E1000_ICR_LSC) {
5748                hw->mac.get_link_status = 1;
5749                /* guard against interrupt when we're going down */
5750                if (!test_bit(__IGB_DOWN, &adapter->state))
5751                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
5752        }
5753
5754        if (icr & E1000_ICR_TS)
5755                igb_tsync_interrupt(adapter);
5756
5757        wr32(E1000_EIMS, adapter->eims_other);
5758
5759        return IRQ_HANDLED;
5760}
5761
5762static void igb_write_itr(struct igb_q_vector *q_vector)
5763{
5764        struct igb_adapter *adapter = q_vector->adapter;
5765        u32 itr_val = q_vector->itr_val & 0x7FFC;
5766
5767        if (!q_vector->set_itr)
5768                return;
5769
5770        if (!itr_val)
5771                itr_val = 0x4;
5772
5773        if (adapter->hw.mac.type == e1000_82575)
5774                itr_val |= itr_val << 16;
5775        else
5776                itr_val |= E1000_EITR_CNT_IGNR;
5777
5778        writel(itr_val, q_vector->itr_register);
5779        q_vector->set_itr = 0;
5780}
5781
5782static irqreturn_t igb_msix_ring(int irq, void *data)
5783{
5784        struct igb_q_vector *q_vector = data;
5785
5786        /* Write the ITR value calculated from the previous interrupt. */
5787        igb_write_itr(q_vector);
5788
5789        napi_schedule(&q_vector->napi);
5790
5791        return IRQ_HANDLED;
5792}
5793
5794#ifdef CONFIG_IGB_DCA
5795static void igb_update_tx_dca(struct igb_adapter *adapter,
5796                              struct igb_ring *tx_ring,
5797                              int cpu)
5798{
5799        struct e1000_hw *hw = &adapter->hw;
5800        u32 txctrl = dca3_get_tag(tx_ring->dev, cpu);
5801
5802        if (hw->mac.type != e1000_82575)
5803                txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT;
5804
5805        /* We can enable relaxed ordering for reads, but not writes when
5806         * DCA is enabled.  This is due to a known issue in some chipsets
5807         * which will cause the DCA tag to be cleared.
5808         */
5809        txctrl |= E1000_DCA_TXCTRL_DESC_RRO_EN |
5810                  E1000_DCA_TXCTRL_DATA_RRO_EN |
5811                  E1000_DCA_TXCTRL_DESC_DCA_EN;
5812
5813        wr32(E1000_DCA_TXCTRL(tx_ring->reg_idx), txctrl);
5814}
5815
5816static void igb_update_rx_dca(struct igb_adapter *adapter,
5817                              struct igb_ring *rx_ring,
5818                              int cpu)
5819{
5820        struct e1000_hw *hw = &adapter->hw;
5821        u32 rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu);
5822
5823        if (hw->mac.type != e1000_82575)
5824                rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT;
5825
5826        /* We can enable relaxed ordering for reads, but not writes when
5827         * DCA is enabled.  This is due to a known issue in some chipsets
5828         * which will cause the DCA tag to be cleared.
5829         */
5830        rxctrl |= E1000_DCA_RXCTRL_DESC_RRO_EN |
5831                  E1000_DCA_RXCTRL_DESC_DCA_EN;
5832
5833        wr32(E1000_DCA_RXCTRL(rx_ring->reg_idx), rxctrl);
5834}
5835
5836static void igb_update_dca(struct igb_q_vector *q_vector)
5837{
5838        struct igb_adapter *adapter = q_vector->adapter;
5839        int cpu = get_cpu();
5840
5841        if (q_vector->cpu == cpu)
5842                goto out_no_update;
5843
5844        if (q_vector->tx.ring)
5845                igb_update_tx_dca(adapter, q_vector->tx.ring, cpu);
5846
5847        if (q_vector->rx.ring)
5848                igb_update_rx_dca(adapter, q_vector->rx.ring, cpu);
5849
5850        q_vector->cpu = cpu;
5851out_no_update:
5852        put_cpu();
5853}
5854
5855static void igb_setup_dca(struct igb_adapter *adapter)
5856{
5857        struct e1000_hw *hw = &adapter->hw;
5858        int i;
5859
5860        if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
5861                return;
5862
5863        /* Always use CB2 mode, difference is masked in the CB driver. */
5864        wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
5865
5866        for (i = 0; i < adapter->num_q_vectors; i++) {
5867                adapter->q_vector[i]->cpu = -1;
5868                igb_update_dca(adapter->q_vector[i]);
5869        }
5870}
5871
5872static int __igb_notify_dca(struct device *dev, void *data)
5873{
5874        struct net_device *netdev = dev_get_drvdata(dev);
5875        struct igb_adapter *adapter = netdev_priv(netdev);
5876        struct pci_dev *pdev = adapter->pdev;
5877        struct e1000_hw *hw = &adapter->hw;
5878        unsigned long event = *(unsigned long *)data;
5879
5880        switch (event) {
5881        case DCA_PROVIDER_ADD:
5882                /* if already enabled, don't do it again */
5883                if (adapter->flags & IGB_FLAG_DCA_ENABLED)
5884                        break;
5885                if (dca_add_requester(dev) == 0) {
5886                        adapter->flags |= IGB_FLAG_DCA_ENABLED;
5887                        dev_info(&pdev->dev, "DCA enabled\n");
5888                        igb_setup_dca(adapter);
5889                        break;
5890                }
5891                /* Fall Through since DCA is disabled. */
5892        case DCA_PROVIDER_REMOVE:
5893                if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
5894                        /* without this a class_device is left
5895                         * hanging around in the sysfs model
5896                         */
5897                        dca_remove_requester(dev);
5898                        dev_info(&pdev->dev, "DCA disabled\n");
5899                        adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
5900                        wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
5901                }
5902                break;
5903        }
5904
5905        return 0;
5906}
5907
5908static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
5909                          void *p)
5910{
5911        int ret_val;
5912
5913        ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
5914                                         __igb_notify_dca);
5915
5916        return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
5917}
5918#endif /* CONFIG_IGB_DCA */
5919
5920#ifdef CONFIG_PCI_IOV
5921static int igb_vf_configure(struct igb_adapter *adapter, int vf)
5922{
5923        unsigned char mac_addr[ETH_ALEN];
5924
5925        eth_zero_addr(mac_addr);
5926        igb_set_vf_mac(adapter, vf, mac_addr);
5927
5928        /* By default spoof check is enabled for all VFs */
5929        adapter->vf_data[vf].spoofchk_enabled = true;
5930
5931        return 0;
5932}
5933
5934#endif
5935static void igb_ping_all_vfs(struct igb_adapter *adapter)
5936{
5937        struct e1000_hw *hw = &adapter->hw;
5938        u32 ping;
5939        int i;
5940
5941        for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5942                ping = E1000_PF_CONTROL_MSG;
5943                if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5944                        ping |= E1000_VT_MSGTYPE_CTS;
5945                igb_write_mbx(hw, &ping, 1, i);
5946        }
5947}
5948
5949static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5950{
5951        struct e1000_hw *hw = &adapter->hw;
5952        u32 vmolr = rd32(E1000_VMOLR(vf));
5953        struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5954
5955        vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5956                            IGB_VF_FLAG_MULTI_PROMISC);
5957        vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5958
5959        if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5960                vmolr |= E1000_VMOLR_MPME;
5961                vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5962                *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5963        } else {
5964                /* if we have hashes and we are clearing a multicast promisc
5965                 * flag we need to write the hashes to the MTA as this step
5966                 * was previously skipped
5967                 */
5968                if (vf_data->num_vf_mc_hashes > 30) {
5969                        vmolr |= E1000_VMOLR_MPME;
5970                } else if (vf_data->num_vf_mc_hashes) {
5971                        int j;
5972
5973                        vmolr |= E1000_VMOLR_ROMPE;
5974                        for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5975                                igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5976                }
5977        }
5978
5979        wr32(E1000_VMOLR(vf), vmolr);
5980
5981        /* there are flags left unprocessed, likely not supported */
5982        if (*msgbuf & E1000_VT_MSGINFO_MASK)
5983                return -EINVAL;
5984
5985        return 0;
5986}
5987
5988static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5989                                  u32 *msgbuf, u32 vf)
5990{
5991        int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5992        u16 *hash_list = (u16 *)&msgbuf[1];
5993        struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5994        int i;
5995
5996        /* salt away the number of multicast addresses assigned
5997         * to this VF for later use to restore when the PF multi cast
5998         * list changes
5999         */
6000        vf_data->num_vf_mc_hashes = n;
6001
6002        /* only up to 30 hash values supported */
6003        if (n > 30)
6004                n = 30;
6005
6006        /* store the hashes for later use */
6007        for (i = 0; i < n; i++)
6008                vf_data->vf_mc_hashes[i] = hash_list[i];
6009
6010        /* Flush and reset the mta with the new values */
6011        igb_set_rx_mode(adapter->netdev);
6012
6013        return 0;
6014}
6015
6016static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
6017{
6018        struct e1000_hw *hw = &adapter->hw;
6019        struct vf_data_storage *vf_data;
6020        int i, j;
6021
6022        for (i = 0; i < adapter->vfs_allocated_count; i++) {
6023                u32 vmolr = rd32(E1000_VMOLR(i));
6024
6025                vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
6026
6027                vf_data = &adapter->vf_data[i];
6028
6029                if ((vf_data->num_vf_mc_hashes > 30) ||
6030                    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
6031                        vmolr |= E1000_VMOLR_MPME;
6032                } else if (vf_data->num_vf_mc_hashes) {
6033                        vmolr |= E1000_VMOLR_ROMPE;
6034                        for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
6035                                igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
6036                }
6037                wr32(E1000_VMOLR(i), vmolr);
6038        }
6039}
6040
6041static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
6042{
6043        struct e1000_hw *hw = &adapter->hw;
6044        u32 pool_mask, vlvf_mask, i;
6045
6046        /* create mask for VF and other pools */
6047        pool_mask = E1000_VLVF_POOLSEL_MASK;
6048        vlvf_mask = BIT(E1000_VLVF_POOLSEL_SHIFT + vf);
6049
6050        /* drop PF from pool bits */
6051        pool_mask &= ~BIT(E1000_VLVF_POOLSEL_SHIFT +
6052                             adapter->vfs_allocated_count);
6053
6054        /* Find the vlan filter for this id */
6055        for (i = E1000_VLVF_ARRAY_SIZE; i--;) {
6056                u32 vlvf = rd32(E1000_VLVF(i));
6057                u32 vfta_mask, vid, vfta;
6058
6059                /* remove the vf from the pool */
6060                if (!(vlvf & vlvf_mask))
6061                        continue;
6062
6063                /* clear out bit from VLVF */
6064                vlvf ^= vlvf_mask;
6065
6066                /* if other pools are present, just remove ourselves */
6067                if (vlvf & pool_mask)
6068                        goto update_vlvfb;
6069
6070                /* if PF is present, leave VFTA */
6071                if (vlvf & E1000_VLVF_POOLSEL_MASK)
6072                        goto update_vlvf;
6073
6074                vid = vlvf & E1000_VLVF_VLANID_MASK;
6075                vfta_mask = BIT(vid % 32);
6076
6077                /* clear bit from VFTA */
6078                vfta = adapter->shadow_vfta[vid / 32];
6079                if (vfta & vfta_mask)
6080                        hw->mac.ops.write_vfta(hw, vid / 32, vfta ^ vfta_mask);
6081update_vlvf:
6082                /* clear pool selection enable */
6083                if (adapter->flags & IGB_FLAG_VLAN_PROMISC)
6084                        vlvf &= E1000_VLVF_POOLSEL_MASK;
6085                else
6086                        vlvf = 0;
6087update_vlvfb:
6088                /* clear pool bits */
6089                wr32(E1000_VLVF(i), vlvf);
6090        }
6091}
6092
6093static int igb_find_vlvf_entry(struct e1000_hw *hw, u32 vlan)
6094{
6095        u32 vlvf;
6096        int idx;
6097
6098        /* short cut the special case */
6099        if (vlan == 0)
6100                return 0;
6101
6102        /* Search for the VLAN id in the VLVF entries */
6103        for (idx = E1000_VLVF_ARRAY_SIZE; --idx;) {
6104                vlvf = rd32(E1000_VLVF(idx));
6105                if ((vlvf & VLAN_VID_MASK) == vlan)
6106                        break;
6107        }
6108
6109        return idx;
6110}
6111
6112static void igb_update_pf_vlvf(struct igb_adapter *adapter, u32 vid)
6113{
6114        struct e1000_hw *hw = &adapter->hw;
6115        u32 bits, pf_id;
6116        int idx;
6117
6118        idx = igb_find_vlvf_entry(hw, vid);
6119        if (!idx)
6120                return;
6121
6122        /* See if any other pools are set for this VLAN filter
6123         * entry other than the PF.
6124         */
6125        pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT;
6126        bits = ~BIT(pf_id) & E1000_VLVF_POOLSEL_MASK;
6127        bits &= rd32(E1000_VLVF(idx));
6128
6129        /* Disable the filter so this falls into the default pool. */
6130        if (!bits) {
6131                if (adapter->flags & IGB_FLAG_VLAN_PROMISC)
6132                        wr32(E1000_VLVF(idx), BIT(pf_id));
6133                else
6134                        wr32(E1000_VLVF(idx), 0);
6135        }
6136}
6137
6138static s32 igb_set_vf_vlan(struct igb_adapter *adapter, u32 vid,
6139                           bool add, u32 vf)
6140{
6141        int pf_id = adapter->vfs_allocated_count;
6142        struct e1000_hw *hw = &adapter->hw;
6143        int err;
6144
6145        /* If VLAN overlaps with one the PF is currently monitoring make
6146         * sure that we are able to allocate a VLVF entry.  This may be
6147         * redundant but it guarantees PF will maintain visibility to
6148         * the VLAN.
6149         */
6150        if (add && test_bit(vid, adapter->active_vlans)) {
6151                err = igb_vfta_set(hw, vid, pf_id, true, false);
6152                if (err)
6153                        return err;
6154        }
6155
6156        err = igb_vfta_set(hw, vid, vf, add, false);
6157
6158        if (add && !err)
6159                return err;
6160
6161        /* If we failed to add the VF VLAN or we are removing the VF VLAN
6162         * we may need to drop the PF pool bit in order to allow us to free
6163         * up the VLVF resources.
6164         */
6165        if (test_bit(vid, adapter->active_vlans) ||
6166            (adapter->flags & IGB_FLAG_VLAN_PROMISC))
6167                igb_update_pf_vlvf(adapter, vid);
6168
6169        return err;
6170}
6171
6172static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
6173{
6174        struct e1000_hw *hw = &adapter->hw;
6175
6176        if (vid)
6177                wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
6178        else
6179                wr32(E1000_VMVIR(vf), 0);
6180}
6181
6182static int igb_enable_port_vlan(struct igb_adapter *adapter, int vf,
6183                                u16 vlan, u8 qos)
6184{
6185        int err;
6186
6187        err = igb_set_vf_vlan(adapter, vlan, true, vf);
6188        if (err)
6189                return err;
6190
6191        igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
6192        igb_set_vmolr(adapter, vf, !vlan);
6193
6194        /* revoke access to previous VLAN */
6195        if (vlan != adapter->vf_data[vf].pf_vlan)
6196                igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan,
6197                                false, vf);
6198
6199        adapter->vf_data[vf].pf_vlan = vlan;
6200        adapter->vf_data[vf].pf_qos = qos;
6201        igb_set_vf_vlan_strip(adapter, vf, true);
6202        dev_info(&adapter->pdev->dev,
6203                 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
6204        if (test_bit(__IGB_DOWN, &adapter->state)) {
6205                dev_warn(&adapter->pdev->dev,
6206                         "The VF VLAN has been set, but the PF device is not up.\n");
6207                dev_warn(&adapter->pdev->dev,
6208                         "Bring the PF device up before attempting to use the VF device.\n");
6209        }
6210
6211        return err;
6212}
6213
6214static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf)
6215{
6216        /* Restore tagless access via VLAN 0 */
6217        igb_set_vf_vlan(adapter, 0, true, vf);
6218
6219        igb_set_vmvir(adapter, 0, vf);
6220        igb_set_vmolr(adapter, vf, true);
6221
6222        /* Remove any PF assigned VLAN */
6223        if (adapter->vf_data[vf].pf_vlan)
6224                igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan,
6225                                false, vf);
6226
6227        adapter->vf_data[vf].pf_vlan = 0;
6228        adapter->vf_data[vf].pf_qos = 0;
6229        igb_set_vf_vlan_strip(adapter, vf, false);
6230
6231        return 0;
6232}
6233
6234static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf,
6235                               u16 vlan, u8 qos, __be16 vlan_proto)
6236{
6237        struct igb_adapter *adapter = netdev_priv(netdev);
6238
6239        if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
6240                return -EINVAL;
6241
6242        if (vlan_proto != htons(ETH_P_8021Q))
6243                return -EPROTONOSUPPORT;
6244
6245        return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) :
6246                               igb_disable_port_vlan(adapter, vf);
6247}
6248
6249static int igb_set_vf_vlan_msg(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
6250{
6251        int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
6252        int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
6253        int ret;
6254
6255        if (adapter->vf_data[vf].pf_vlan)
6256                return -1;
6257
6258        /* VLAN 0 is a special case, don't allow it to be removed */
6259        if (!vid && !add)
6260                return 0;
6261
6262        ret = igb_set_vf_vlan(adapter, vid, !!add, vf);
6263        if (!ret)
6264                igb_set_vf_vlan_strip(adapter, vf, !!vid);
6265        return ret;
6266}
6267
6268static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
6269{
6270        struct vf_data_storage *vf_data = &adapter->vf_data[vf];
6271
6272        /* clear flags - except flag that indicates PF has set the MAC */
6273        vf_data->flags &= IGB_VF_FLAG_PF_SET_MAC;
6274        vf_data->last_nack = jiffies;
6275
6276        /* reset vlans for device */
6277        igb_clear_vf_vfta(adapter, vf);
6278        igb_set_vf_vlan(adapter, vf_data->pf_vlan, true, vf);
6279        igb_set_vmvir(adapter, vf_data->pf_vlan |
6280                               (vf_data->pf_qos << VLAN_PRIO_SHIFT), vf);
6281        igb_set_vmolr(adapter, vf, !vf_data->pf_vlan);
6282        igb_set_vf_vlan_strip(adapter, vf, !!(vf_data->pf_vlan));
6283
6284        /* reset multicast table array for vf */
6285        adapter->vf_data[vf].num_vf_mc_hashes = 0;
6286
6287        /* Flush and reset the mta with the new values */
6288        igb_set_rx_mode(adapter->netdev);
6289}
6290
6291static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
6292{
6293        unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
6294
6295        /* clear mac address as we were hotplug removed/added */
6296        if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
6297                eth_zero_addr(vf_mac);
6298
6299        /* process remaining reset events */
6300        igb_vf_reset(adapter, vf);
6301}
6302
6303static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
6304{
6305        struct e1000_hw *hw = &adapter->hw;
6306        unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
6307        int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6308        u32 reg, msgbuf[3];
6309        u8 *addr = (u8 *)(&msgbuf[1]);
6310
6311        /* process all the same items cleared in a function level reset */
6312        igb_vf_reset(adapter, vf);
6313
6314        /* set vf mac address */
6315        igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
6316
6317        /* enable transmit and receive for vf */
6318        reg = rd32(E1000_VFTE);
6319        wr32(E1000_VFTE, reg | BIT(vf));
6320        reg = rd32(E1000_VFRE);
6321        wr32(E1000_VFRE, reg | BIT(vf));
6322
6323        adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
6324
6325        /* reply to reset with ack and vf mac address */
6326        if (!is_zero_ether_addr(vf_mac)) {
6327                msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
6328                memcpy(addr, vf_mac, ETH_ALEN);
6329        } else {
6330                msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_NACK;
6331        }
6332        igb_write_mbx(hw, msgbuf, 3, vf);
6333}
6334
6335static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
6336{
6337        /* The VF MAC Address is stored in a packed array of bytes
6338         * starting at the second 32 bit word of the msg array
6339         */
6340        unsigned char *addr = (char *)&msg[1];
6341        int err = -1;
6342
6343        if (is_valid_ether_addr(addr))
6344                err = igb_set_vf_mac(adapter, vf, addr);
6345
6346        return err;
6347}
6348
6349static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
6350{
6351        struct e1000_hw *hw = &adapter->hw;
6352        struct vf_data_storage *vf_data = &adapter->vf_data[vf];
6353        u32 msg = E1000_VT_MSGTYPE_NACK;
6354
6355        /* if device isn't clear to send it shouldn't be reading either */
6356        if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
6357            time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
6358                igb_write_mbx(hw, &msg, 1, vf);
6359                vf_data->last_nack = jiffies;
6360        }
6361}
6362
6363static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
6364{
6365        struct pci_dev *pdev = adapter->pdev;
6366        u32 msgbuf[E1000_VFMAILBOX_SIZE];
6367        struct e1000_hw *hw = &adapter->hw;
6368        struct vf_data_storage *vf_data = &adapter->vf_data[vf];
6369        s32 retval;
6370
6371        retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
6372
6373        if (retval) {
6374                /* if receive failed revoke VF CTS stats and restart init */
6375                dev_err(&pdev->dev, "Error receiving message from VF\n");
6376                vf_data->flags &= ~IGB_VF_FLAG_CTS;
6377                if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
6378                        return;
6379                goto out;
6380        }
6381
6382        /* this is a message we already processed, do nothing */
6383        if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
6384                return;
6385
6386        /* until the vf completes a reset it should not be
6387         * allowed to start any configuration.
6388         */
6389        if (msgbuf[0] == E1000_VF_RESET) {
6390                igb_vf_reset_msg(adapter, vf);
6391                return;
6392        }
6393
6394        if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
6395                if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
6396                        return;
6397                retval = -1;
6398                goto out;
6399        }
6400
6401        switch ((msgbuf[0] & 0xFFFF)) {
6402        case E1000_VF_SET_MAC_ADDR:
6403                retval = -EINVAL;
6404                if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
6405                        retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
6406                else
6407                        dev_warn(&pdev->dev,
6408                                 "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n",
6409                                 vf);
6410                break;
6411        case E1000_VF_SET_PROMISC:
6412                retval = igb_set_vf_promisc(adapter, msgbuf, vf);
6413                break;
6414        case E1000_VF_SET_MULTICAST:
6415                retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
6416                break;
6417        case E1000_VF_SET_LPE:
6418                retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
6419                break;
6420        case E1000_VF_SET_VLAN:
6421                retval = -1;
6422                if (vf_data->pf_vlan)
6423                        dev_warn(&pdev->dev,
6424                                 "VF %d attempted to override administratively set VLAN tag\nReload the VF driver to resume operations\n",
6425                                 vf);
6426                else
6427                        retval = igb_set_vf_vlan_msg(adapter, msgbuf, vf);
6428                break;
6429        default:
6430                dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
6431                retval = -1;
6432                break;
6433        }
6434
6435        msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
6436out:
6437        /* notify the VF of the results of what it sent us */
6438        if (retval)
6439                msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
6440        else
6441                msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
6442
6443        igb_write_mbx(hw, msgbuf, 1, vf);
6444}
6445
6446static void igb_msg_task(struct igb_adapter *adapter)
6447{
6448        struct e1000_hw *hw = &adapter->hw;
6449        u32 vf;
6450
6451        for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
6452                /* process any reset requests */
6453                if (!igb_check_for_rst(hw, vf))
6454                        igb_vf_reset_event(adapter, vf);
6455
6456                /* process any messages pending */
6457                if (!igb_check_for_msg(hw, vf))
6458                        igb_rcv_msg_from_vf(adapter, vf);
6459
6460                /* process any acks */
6461                if (!igb_check_for_ack(hw, vf))
6462                        igb_rcv_ack_from_vf(adapter, vf);
6463        }
6464}
6465
6466/**
6467 *  igb_set_uta - Set unicast filter table address
6468 *  @adapter: board private structure
6469 *  @set: boolean indicating if we are setting or clearing bits
6470 *
6471 *  The unicast table address is a register array of 32-bit registers.
6472 *  The table is meant to be used in a way similar to how the MTA is used
6473 *  however due to certain limitations in the hardware it is necessary to
6474 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
6475 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
6476 **/
6477static void igb_set_uta(struct igb_adapter *adapter, bool set)
6478{
6479        struct e1000_hw *hw = &adapter->hw;
6480        u32 uta = set ? ~0 : 0;
6481        int i;
6482
6483        /* we only need to do this if VMDq is enabled */
6484        if (!adapter->vfs_allocated_count)
6485                return;
6486
6487        for (i = hw->mac.uta_reg_count; i--;)
6488                array_wr32(E1000_UTA, i, uta);
6489}
6490
6491/**
6492 *  igb_intr_msi - Interrupt Handler
6493 *  @irq: interrupt number
6494 *  @data: pointer to a network interface device structure
6495 **/
6496static irqreturn_t igb_intr_msi(int irq, void *data)
6497{
6498        struct igb_adapter *adapter = data;
6499        struct igb_q_vector *q_vector = adapter->q_vector[0];
6500        struct e1000_hw *hw = &adapter->hw;
6501        /* read ICR disables interrupts using IAM */
6502        u32 icr = rd32(E1000_ICR);
6503
6504        igb_write_itr(q_vector);
6505
6506        if (icr & E1000_ICR_DRSTA)
6507                schedule_work(&adapter->reset_task);
6508
6509        if (icr & E1000_ICR_DOUTSYNC) {
6510                /* HW is reporting DMA is out of sync */
6511                adapter->stats.doosync++;
6512        }
6513
6514        if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
6515                hw->mac.get_link_status = 1;
6516                if (!test_bit(__IGB_DOWN, &adapter->state))
6517                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
6518        }
6519
6520        if (icr & E1000_ICR_TS)
6521                igb_tsync_interrupt(adapter);
6522
6523        napi_schedule(&q_vector->napi);
6524
6525        return IRQ_HANDLED;
6526}
6527
6528/**
6529 *  igb_intr - Legacy Interrupt Handler
6530 *  @irq: interrupt number
6531 *  @data: pointer to a network interface device structure
6532 **/
6533static irqreturn_t igb_intr(int irq, void *data)
6534{
6535        struct igb_adapter *adapter = data;
6536        struct igb_q_vector *q_vector = adapter->q_vector[0];
6537        struct e1000_hw *hw = &adapter->hw;
6538        /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
6539         * need for the IMC write
6540         */
6541        u32 icr = rd32(E1000_ICR);
6542
6543        /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
6544         * not set, then the adapter didn't send an interrupt
6545         */
6546        if (!(icr & E1000_ICR_INT_ASSERTED))
6547                return IRQ_NONE;
6548
6549        igb_write_itr(q_vector);
6550
6551        if (icr & E1000_ICR_DRSTA)
6552                schedule_work(&adapter->reset_task);
6553
6554        if (icr & E1000_ICR_DOUTSYNC) {
6555                /* HW is reporting DMA is out of sync */
6556                adapter->stats.doosync++;
6557        }
6558
6559        if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
6560                hw->mac.get_link_status = 1;
6561                /* guard against interrupt when we're going down */
6562                if (!test_bit(__IGB_DOWN, &adapter->state))
6563                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
6564        }
6565
6566        if (icr & E1000_ICR_TS)
6567                igb_tsync_interrupt(adapter);
6568
6569        napi_schedule(&q_vector->napi);
6570
6571        return IRQ_HANDLED;
6572}
6573
6574static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
6575{
6576        struct igb_adapter *adapter = q_vector->adapter;
6577        struct e1000_hw *hw = &adapter->hw;
6578
6579        if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
6580            (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
6581                if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
6582                        igb_set_itr(q_vector);
6583                else
6584                        igb_update_ring_itr(q_vector);
6585        }
6586
6587        if (!test_bit(__IGB_DOWN, &adapter->state)) {
6588                if (adapter->flags & IGB_FLAG_HAS_MSIX)
6589                        wr32(E1000_EIMS, q_vector->eims_value);
6590                else
6591                        igb_irq_enable(adapter);
6592        }
6593}
6594
6595/**
6596 *  igb_poll - NAPI Rx polling callback
6597 *  @napi: napi polling structure
6598 *  @budget: count of how many packets we should handle
6599 **/
6600static int igb_poll(struct napi_struct *napi, int budget)
6601{
6602        struct igb_q_vector *q_vector = container_of(napi,
6603                                                     struct igb_q_vector,
6604                                                     napi);
6605        bool clean_complete = true;
6606        int work_done = 0;
6607
6608#ifdef CONFIG_IGB_DCA
6609        if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
6610                igb_update_dca(q_vector);
6611#endif
6612        if (q_vector->tx.ring)
6613                clean_complete = igb_clean_tx_irq(q_vector, budget);
6614
6615        if (q_vector->rx.ring) {
6616                int cleaned = igb_clean_rx_irq(q_vector, budget);
6617
6618                work_done += cleaned;
6619                if (cleaned >= budget)
6620                        clean_complete = false;
6621        }
6622
6623        /* If all work not completed, return budget and keep polling */
6624        if (!clean_complete)
6625                return budget;
6626
6627        /* If not enough Rx work done, exit the polling mode */
6628        napi_complete_done(napi, work_done);
6629        igb_ring_irq_enable(q_vector);
6630
6631        return 0;
6632}
6633
6634/**
6635 *  igb_clean_tx_irq - Reclaim resources after transmit completes
6636 *  @q_vector: pointer to q_vector containing needed info
6637 *  @napi_budget: Used to determine if we are in netpoll
6638 *
6639 *  returns true if ring is completely cleaned
6640 **/
6641static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
6642{
6643        struct igb_adapter *adapter = q_vector->adapter;
6644        struct igb_ring *tx_ring = q_vector->tx.ring;
6645        struct igb_tx_buffer *tx_buffer;
6646        union e1000_adv_tx_desc *tx_desc;
6647        unsigned int total_bytes = 0, total_packets = 0;
6648        unsigned int budget = q_vector->tx.work_limit;
6649        unsigned int i = tx_ring->next_to_clean;
6650
6651        if (test_bit(__IGB_DOWN, &adapter->state))
6652                return true;
6653
6654        tx_buffer = &tx_ring->tx_buffer_info[i];
6655        tx_desc = IGB_TX_DESC(tx_ring, i);
6656        i -= tx_ring->count;
6657
6658        do {
6659                union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
6660
6661                /* if next_to_watch is not set then there is no work pending */
6662                if (!eop_desc)
6663                        break;
6664
6665                /* prevent any other reads prior to eop_desc */
6666                read_barrier_depends();
6667
6668                /* if DD is not set pending work has not been completed */
6669                if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
6670                        break;
6671
6672                /* clear next_to_watch to prevent false hangs */
6673                tx_buffer->next_to_watch = NULL;
6674
6675                /* update the statistics for this packet */
6676                total_bytes += tx_buffer->bytecount;
6677                total_packets += tx_buffer->gso_segs;
6678
6679                /* free the skb */
6680                napi_consume_skb(tx_buffer->skb, napi_budget);
6681
6682                /* unmap skb header data */
6683                dma_unmap_single(tx_ring->dev,
6684                                 dma_unmap_addr(tx_buffer, dma),
6685                                 dma_unmap_len(tx_buffer, len),
6686                                 DMA_TO_DEVICE);
6687
6688                /* clear tx_buffer data */
6689                tx_buffer->skb = NULL;
6690                dma_unmap_len_set(tx_buffer, len, 0);
6691
6692                /* clear last DMA location and unmap remaining buffers */
6693                while (tx_desc != eop_desc) {
6694                        tx_buffer++;
6695                        tx_desc++;
6696                        i++;
6697                        if (unlikely(!i)) {
6698                                i -= tx_ring->count;
6699                                tx_buffer = tx_ring->tx_buffer_info;
6700                                tx_desc = IGB_TX_DESC(tx_ring, 0);
6701                        }
6702
6703                        /* unmap any remaining paged data */
6704                        if (dma_unmap_len(tx_buffer, len)) {
6705                                dma_unmap_page(tx_ring->dev,
6706                                               dma_unmap_addr(tx_buffer, dma),
6707                                               dma_unmap_len(tx_buffer, len),
6708                                               DMA_TO_DEVICE);
6709                                dma_unmap_len_set(tx_buffer, len, 0);
6710                        }
6711                }
6712
6713                /* move us one more past the eop_desc for start of next pkt */
6714                tx_buffer++;
6715                tx_desc++;
6716                i++;
6717                if (unlikely(!i)) {
6718                        i -= tx_ring->count;
6719                        tx_buffer = tx_ring->tx_buffer_info;
6720                        tx_desc = IGB_TX_DESC(tx_ring, 0);
6721                }
6722
6723                /* issue prefetch for next Tx descriptor */
6724                prefetch(tx_desc);
6725
6726                /* update budget accounting */
6727                budget--;
6728        } while (likely(budget));
6729
6730        netdev_tx_completed_queue(txring_txq(tx_ring),
6731                                  total_packets, total_bytes);
6732        i += tx_ring->count;
6733        tx_ring->next_to_clean = i;
6734        u64_stats_update_begin(&tx_ring->tx_syncp);
6735        tx_ring->tx_stats.bytes += total_bytes;
6736        tx_ring->tx_stats.packets += total_packets;
6737        u64_stats_update_end(&tx_ring->tx_syncp);
6738        q_vector->tx.total_bytes += total_bytes;
6739        q_vector->tx.total_packets += total_packets;
6740
6741        if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
6742                struct e1000_hw *hw = &adapter->hw;
6743
6744                /* Detect a transmit hang in hardware, this serializes the
6745                 * check with the clearing of time_stamp and movement of i
6746                 */
6747                clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
6748                if (tx_buffer->next_to_watch &&
6749                    time_after(jiffies, tx_buffer->time_stamp +
6750                               (adapter->tx_timeout_factor * HZ)) &&
6751                    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
6752
6753                        /* detected Tx unit hang */
6754                        dev_err(tx_ring->dev,
6755                                "Detected Tx Unit Hang\n"
6756                                "  Tx Queue             <%d>\n"
6757                                "  TDH                  <%x>\n"
6758                                "  TDT                  <%x>\n"
6759                                "  next_to_use          <%x>\n"
6760                                "  next_to_clean        <%x>\n"
6761                                "buffer_info[next_to_clean]\n"
6762                                "  time_stamp           <%lx>\n"
6763                                "  next_to_watch        <%p>\n"
6764                                "  jiffies              <%lx>\n"
6765                                "  desc.status          <%x>\n",
6766                                tx_ring->queue_index,
6767                                rd32(E1000_TDH(tx_ring->reg_idx)),
6768                                readl(tx_ring->tail),
6769                                tx_ring->next_to_use,
6770                                tx_ring->next_to_clean,
6771                                tx_buffer->time_stamp,
6772                                tx_buffer->next_to_watch,
6773                                jiffies,
6774                                tx_buffer->next_to_watch->wb.status);
6775                        netif_stop_subqueue(tx_ring->netdev,
6776                                            tx_ring->queue_index);
6777
6778                        /* we are about to reset, no point in enabling stuff */
6779                        return true;
6780                }
6781        }
6782
6783#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
6784        if (unlikely(total_packets &&
6785            netif_carrier_ok(tx_ring->netdev) &&
6786            igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
6787                /* Make sure that anybody stopping the queue after this
6788                 * sees the new next_to_clean.
6789                 */
6790                smp_mb();
6791                if (__netif_subqueue_stopped(tx_ring->netdev,
6792                                             tx_ring->queue_index) &&
6793                    !(test_bit(__IGB_DOWN, &adapter->state))) {
6794                        netif_wake_subqueue(tx_ring->netdev,
6795                                            tx_ring->queue_index);
6796
6797                        u64_stats_update_begin(&tx_ring->tx_syncp);
6798                        tx_ring->tx_stats.restart_queue++;
6799                        u64_stats_update_end(&tx_ring->tx_syncp);
6800                }
6801        }
6802
6803        return !!budget;
6804}
6805
6806/**
6807 *  igb_reuse_rx_page - page flip buffer and store it back on the ring
6808 *  @rx_ring: rx descriptor ring to store buffers on
6809 *  @old_buff: donor buffer to have page reused
6810 *
6811 *  Synchronizes page for reuse by the adapter
6812 **/
6813static void igb_reuse_rx_page(struct igb_ring *rx_ring,
6814                              struct igb_rx_buffer *old_buff)
6815{
6816        struct igb_rx_buffer *new_buff;
6817        u16 nta = rx_ring->next_to_alloc;
6818
6819        new_buff = &rx_ring->rx_buffer_info[nta];
6820
6821        /* update, and store next to alloc */
6822        nta++;
6823        rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
6824
6825        /* transfer page from old buffer to new buffer */
6826        *new_buff = *old_buff;
6827}
6828
6829static inline bool igb_page_is_reserved(struct page *page)
6830{
6831        return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
6832}
6833
6834static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
6835                                  struct page *page,
6836                                  unsigned int truesize)
6837{
6838        unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
6839
6840        /* avoid re-using remote pages */
6841        if (unlikely(igb_page_is_reserved(page)))
6842                return false;
6843
6844#if (PAGE_SIZE < 8192)
6845        /* if we are only owner of page we can reuse it */
6846        if (unlikely(page_ref_count(page) != pagecnt_bias))
6847                return false;
6848
6849        /* flip page offset to other buffer */
6850        rx_buffer->page_offset ^= IGB_RX_BUFSZ;
6851#else
6852        /* move offset up to the next cache line */
6853        rx_buffer->page_offset += truesize;
6854
6855        if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ))
6856                return false;
6857#endif
6858
6859        /* If we have drained the page fragment pool we need to update
6860         * the pagecnt_bias and page count so that we fully restock the
6861         * number of references the driver holds.
6862         */
6863        if (unlikely(pagecnt_bias == 1)) {
6864                page_ref_add(page, USHRT_MAX);
6865                rx_buffer->pagecnt_bias = USHRT_MAX;
6866        }
6867
6868        return true;
6869}
6870
6871/**
6872 *  igb_add_rx_frag - Add contents of Rx buffer to sk_buff
6873 *  @rx_ring: rx descriptor ring to transact packets on
6874 *  @rx_buffer: buffer containing page to add
6875 *  @rx_desc: descriptor containing length of buffer written by hardware
6876 *  @skb: sk_buff to place the data into
6877 *
6878 *  This function will add the data contained in rx_buffer->page to the skb.
6879 *  This is done either through a direct copy if the data in the buffer is
6880 *  less than the skb header size, otherwise it will just attach the page as
6881 *  a frag to the skb.
6882 *
6883 *  The function will then update the page offset if necessary and return
6884 *  true if the buffer can be reused by the adapter.
6885 **/
6886static bool igb_add_rx_frag(struct igb_ring *rx_ring,
6887                            struct igb_rx_buffer *rx_buffer,
6888                            unsigned int size,
6889                            union e1000_adv_rx_desc *rx_desc,
6890                            struct sk_buff *skb)
6891{
6892        struct page *page = rx_buffer->page;
6893        unsigned char *va = page_address(page) + rx_buffer->page_offset;
6894#if (PAGE_SIZE < 8192)
6895        unsigned int truesize = IGB_RX_BUFSZ;
6896#else
6897        unsigned int truesize = SKB_DATA_ALIGN(size);
6898#endif
6899        unsigned int pull_len;
6900
6901        if (unlikely(skb_is_nonlinear(skb)))
6902                goto add_tail_frag;
6903
6904        if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) {
6905                igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
6906                va += IGB_TS_HDR_LEN;
6907                size -= IGB_TS_HDR_LEN;
6908        }
6909
6910        if (likely(size <= IGB_RX_HDR_LEN)) {
6911                memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
6912
6913                /* page is not reserved, we can reuse buffer as-is */
6914                if (likely(!igb_page_is_reserved(page)))
6915                        return true;
6916
6917                /* this page cannot be reused so discard it */
6918                return false;
6919        }
6920
6921        /* we need the header to contain the greater of either ETH_HLEN or
6922         * 60 bytes if the skb->len is less than 60 for skb_pad.
6923         */
6924        pull_len = eth_get_headlen(va, IGB_RX_HDR_LEN);
6925
6926        /* align pull length to size of long to optimize memcpy performance */
6927        memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
6928
6929        /* update all of the pointers */
6930        va += pull_len;
6931        size -= pull_len;
6932
6933add_tail_frag:
6934        skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
6935                        (unsigned long)va & ~PAGE_MASK, size, truesize);
6936
6937        return igb_can_reuse_rx_page(rx_buffer, page, truesize);
6938}
6939
6940static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
6941                                           union e1000_adv_rx_desc *rx_desc,
6942                                           struct sk_buff *skb)
6943{
6944        unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
6945        struct igb_rx_buffer *rx_buffer;
6946        struct page *page;
6947
6948        rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
6949        page = rx_buffer->page;
6950        prefetchw(page);
6951
6952        /* we are reusing so sync this buffer for CPU use */
6953        dma_sync_single_range_for_cpu(rx_ring->dev,
6954                                      rx_buffer->dma,
6955                                      rx_buffer->page_offset,
6956                                      size,
6957                                      DMA_FROM_DEVICE);
6958
6959        if (likely(!skb)) {
6960                void *page_addr = page_address(page) +
6961                                  rx_buffer->page_offset;
6962
6963                /* prefetch first cache line of first page */
6964                prefetch(page_addr);
6965#if L1_CACHE_BYTES < 128
6966                prefetch(page_addr + L1_CACHE_BYTES);
6967#endif
6968
6969                /* allocate a skb to store the frags */
6970                skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
6971                if (unlikely(!skb)) {
6972                        rx_ring->rx_stats.alloc_failed++;
6973                        return NULL;
6974                }
6975
6976                /* we will be copying header into skb->data in
6977                 * pskb_may_pull so it is in our interest to prefetch
6978                 * it now to avoid a possible cache miss
6979                 */
6980                prefetchw(skb->data);
6981        }
6982
6983        /* pull page into skb */
6984        if (igb_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
6985                /* hand second half of page back to the ring */
6986                igb_reuse_rx_page(rx_ring, rx_buffer);
6987        } else {
6988                /* We are not reusing the buffer so unmap it and free
6989                 * any references we are holding to it
6990                 */
6991                dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
6992                                     PAGE_SIZE, DMA_FROM_DEVICE,
6993                                     DMA_ATTR_SKIP_CPU_SYNC);
6994                __page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
6995        }
6996
6997        /* clear contents of rx_buffer */
6998        rx_buffer->page = NULL;
6999
7000        return skb;
7001}
7002
7003static inline void igb_rx_checksum(struct igb_ring *ring,
7004                                   union e1000_adv_rx_desc *rx_desc,
7005                                   struct sk_buff *skb)
7006{
7007        skb_checksum_none_assert(skb);
7008
7009        /* Ignore Checksum bit is set */
7010        if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
7011                return;
7012
7013        /* Rx checksum disabled via ethtool */
7014        if (!(ring->netdev->features & NETIF_F_RXCSUM))
7015                return;
7016
7017        /* TCP/UDP checksum error bit is set */
7018        if (igb_test_staterr(rx_desc,
7019                             E1000_RXDEXT_STATERR_TCPE |
7020                             E1000_RXDEXT_STATERR_IPE)) {
7021                /* work around errata with sctp packets where the TCPE aka
7022                 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
7023                 * packets, (aka let the stack check the crc32c)
7024                 */
7025                if (!((skb->len == 60) &&
7026                      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
7027                        u64_stats_update_begin(&ring->rx_syncp);
7028                        ring->rx_stats.csum_err++;
7029                        u64_stats_update_end(&ring->rx_syncp);
7030                }
7031                /* let the stack verify checksum errors */
7032                return;
7033        }
7034        /* It must be a TCP or UDP packet with a valid checksum */
7035        if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
7036                                      E1000_RXD_STAT_UDPCS))
7037                skb->ip_summed = CHECKSUM_UNNECESSARY;
7038
7039        dev_dbg(ring->dev, "cksum success: bits %08X\n",
7040                le32_to_cpu(rx_desc->wb.upper.status_error));
7041}
7042
7043static inline void igb_rx_hash(struct igb_ring *ring,
7044                               union e1000_adv_rx_desc *rx_desc,
7045                               struct sk_buff *skb)
7046{
7047        if (ring->netdev->features & NETIF_F_RXHASH)
7048                skb_set_hash(skb,
7049                             le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
7050                             PKT_HASH_TYPE_L3);
7051}
7052
7053/**
7054 *  igb_is_non_eop - process handling of non-EOP buffers
7055 *  @rx_ring: Rx ring being processed
7056 *  @rx_desc: Rx descriptor for current buffer
7057 *  @skb: current socket buffer containing buffer in progress
7058 *
7059 *  This function updates next to clean.  If the buffer is an EOP buffer
7060 *  this function exits returning false, otherwise it will place the
7061 *  sk_buff in the next buffer to be chained and return true indicating
7062 *  that this is in fact a non-EOP buffer.
7063 **/
7064static bool igb_is_non_eop(struct igb_ring *rx_ring,
7065                           union e1000_adv_rx_desc *rx_desc)
7066{
7067        u32 ntc = rx_ring->next_to_clean + 1;
7068
7069        /* fetch, update, and store next to clean */
7070        ntc = (ntc < rx_ring->count) ? ntc : 0;
7071        rx_ring->next_to_clean = ntc;
7072
7073        prefetch(IGB_RX_DESC(rx_ring, ntc));
7074
7075        if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)))
7076                return false;
7077
7078        return true;
7079}
7080
7081/**
7082 *  igb_cleanup_headers - Correct corrupted or empty headers
7083 *  @rx_ring: rx descriptor ring packet is being transacted on
7084 *  @rx_desc: pointer to the EOP Rx descriptor
7085 *  @skb: pointer to current skb being fixed
7086 *
7087 *  Address the case where we are pulling data in on pages only
7088 *  and as such no data is present in the skb header.
7089 *
7090 *  In addition if skb is not at least 60 bytes we need to pad it so that
7091 *  it is large enough to qualify as a valid Ethernet frame.
7092 *
7093 *  Returns true if an error was encountered and skb was freed.
7094 **/
7095static bool igb_cleanup_headers(struct igb_ring *rx_ring,
7096                                union e1000_adv_rx_desc *rx_desc,
7097                                struct sk_buff *skb)
7098{
7099        if (unlikely((igb_test_staterr(rx_desc,
7100                                       E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
7101                struct net_device *netdev = rx_ring->netdev;
7102                if (!(netdev->features & NETIF_F_RXALL)) {
7103                        dev_kfree_skb_any(skb);
7104                        return true;
7105                }
7106        }
7107
7108        /* if eth_skb_pad returns an error the skb was freed */
7109        if (eth_skb_pad(skb))
7110                return true;
7111
7112        return false;
7113}
7114
7115/**
7116 *  igb_process_skb_fields - Populate skb header fields from Rx descriptor
7117 *  @rx_ring: rx descriptor ring packet is being transacted on
7118 *  @rx_desc: pointer to the EOP Rx descriptor
7119 *  @skb: pointer to current skb being populated
7120 *
7121 *  This function checks the ring, descriptor, and packet information in
7122 *  order to populate the hash, checksum, VLAN, timestamp, protocol, and
7123 *  other fields within the skb.
7124 **/
7125static void igb_process_skb_fields(struct igb_ring *rx_ring,
7126                                   union e1000_adv_rx_desc *rx_desc,
7127                                   struct sk_buff *skb)
7128{
7129        struct net_device *dev = rx_ring->netdev;
7130
7131        igb_rx_hash(rx_ring, rx_desc, skb);
7132
7133        igb_rx_checksum(rx_ring, rx_desc, skb);
7134
7135        if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS) &&
7136            !igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))
7137                igb_ptp_rx_rgtstamp(rx_ring->q_vector, skb);
7138
7139        if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
7140            igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
7141                u16 vid;
7142
7143                if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
7144                    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
7145                        vid = be16_to_cpu(rx_desc->wb.upper.vlan);
7146                else
7147                        vid = le16_to_cpu(rx_desc->wb.upper.vlan);
7148
7149                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
7150        }
7151
7152        skb_record_rx_queue(skb, rx_ring->queue_index);
7153
7154        skb->protocol = eth_type_trans(skb, rx_ring->netdev);
7155}
7156
7157static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
7158{
7159        struct igb_ring *rx_ring = q_vector->rx.ring;
7160        struct sk_buff *skb = rx_ring->skb;
7161        unsigned int total_bytes = 0, total_packets = 0;
7162        u16 cleaned_count = igb_desc_unused(rx_ring);
7163
7164        while (likely(total_packets < budget)) {
7165                union e1000_adv_rx_desc *rx_desc;
7166
7167                /* return some buffers to hardware, one at a time is too slow */
7168                if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
7169                        igb_alloc_rx_buffers(rx_ring, cleaned_count);
7170                        cleaned_count = 0;
7171                }
7172
7173                rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
7174
7175                if (!rx_desc->wb.upper.status_error)
7176                        break;
7177
7178                /* This memory barrier is needed to keep us from reading
7179                 * any other fields out of the rx_desc until we know the
7180                 * descriptor has been written back
7181                 */
7182                dma_rmb();
7183
7184                /* retrieve a buffer from the ring */
7185                skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
7186
7187                /* exit if we failed to retrieve a buffer */
7188                if (!skb)
7189                        break;
7190
7191                cleaned_count++;
7192
7193                /* fetch next buffer in frame if non-eop */
7194                if (igb_is_non_eop(rx_ring, rx_desc))
7195                        continue;
7196
7197                /* verify the packet layout is correct */
7198                if (igb_cleanup_headers(rx_ring, rx_desc, skb)) {
7199                        skb = NULL;
7200                        continue;
7201                }
7202
7203                /* probably a little skewed due to removing CRC */
7204                total_bytes += skb->len;
7205
7206                /* populate checksum, timestamp, VLAN, and protocol */
7207                igb_process_skb_fields(rx_ring, rx_desc, skb);
7208
7209                napi_gro_receive(&q_vector->napi, skb);
7210
7211                /* reset skb pointer */
7212                skb = NULL;
7213
7214                /* update budget accounting */
7215                total_packets++;
7216        }
7217
7218        /* place incomplete frames back on ring for completion */
7219        rx_ring->skb = skb;
7220
7221        u64_stats_update_begin(&rx_ring->rx_syncp);
7222        rx_ring->rx_stats.packets += total_packets;
7223        rx_ring->rx_stats.bytes += total_bytes;
7224        u64_stats_update_end(&rx_ring->rx_syncp);
7225        q_vector->rx.total_packets += total_packets;
7226        q_vector->rx.total_bytes += total_bytes;
7227
7228        if (cleaned_count)
7229                igb_alloc_rx_buffers(rx_ring, cleaned_count);
7230
7231        return total_packets;
7232}
7233
7234static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
7235                                  struct igb_rx_buffer *bi)
7236{
7237        struct page *page = bi->page;
7238        dma_addr_t dma;
7239
7240        /* since we are recycling buffers we should seldom need to alloc */
7241        if (likely(page))
7242                return true;
7243
7244        /* alloc new page for storage */
7245        page = dev_alloc_page();
7246        if (unlikely(!page)) {
7247                rx_ring->rx_stats.alloc_failed++;
7248                return false;
7249        }
7250
7251        /* map page for use */
7252        dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
7253                                 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
7254
7255        /* if mapping failed free memory back to system since
7256         * there isn't much point in holding memory we can't use
7257         */
7258        if (dma_mapping_error(rx_ring->dev, dma)) {
7259                __free_page(page);
7260
7261                rx_ring->rx_stats.alloc_failed++;
7262                return false;
7263        }
7264
7265        bi->dma = dma;
7266        bi->page = page;
7267        bi->page_offset = 0;
7268        bi->pagecnt_bias = 1;
7269
7270        return true;
7271}
7272
7273/**
7274 *  igb_alloc_rx_buffers - Replace used receive buffers; packet split
7275 *  @adapter: address of board private structure
7276 **/
7277void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
7278{
7279        union e1000_adv_rx_desc *rx_desc;
7280        struct igb_rx_buffer *bi;
7281        u16 i = rx_ring->next_to_use;
7282
7283        /* nothing to do */
7284        if (!cleaned_count)
7285                return;
7286
7287        rx_desc = IGB_RX_DESC(rx_ring, i);
7288        bi = &rx_ring->rx_buffer_info[i];
7289        i -= rx_ring->count;
7290
7291        do {
7292                if (!igb_alloc_mapped_page(rx_ring, bi))
7293                        break;
7294
7295                /* sync the buffer for use by the device */
7296                dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
7297                                                 bi->page_offset,
7298                                                 IGB_RX_BUFSZ,
7299                                                 DMA_FROM_DEVICE);
7300
7301                /* Refresh the desc even if buffer_addrs didn't change
7302                 * because each write-back erases this info.
7303                 */
7304                rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
7305
7306                rx_desc++;
7307                bi++;
7308                i++;
7309                if (unlikely(!i)) {
7310                        rx_desc = IGB_RX_DESC(rx_ring, 0);
7311                        bi = rx_ring->rx_buffer_info;
7312                        i -= rx_ring->count;
7313                }
7314
7315                /* clear the status bits for the next_to_use descriptor */
7316                rx_desc->wb.upper.status_error = 0;
7317
7318                cleaned_count--;
7319        } while (cleaned_count);
7320
7321        i += rx_ring->count;
7322
7323        if (rx_ring->next_to_use != i) {
7324                /* record the next descriptor to use */
7325                rx_ring->next_to_use = i;
7326
7327                /* update next to alloc since we have filled the ring */
7328                rx_ring->next_to_alloc = i;
7329
7330                /* Force memory writes to complete before letting h/w
7331                 * know there are new descriptors to fetch.  (Only
7332                 * applicable for weak-ordered memory model archs,
7333                 * such as IA-64).
7334                 */
7335                wmb();
7336                writel(i, rx_ring->tail);
7337        }
7338}
7339
7340/**
7341 * igb_mii_ioctl -
7342 * @netdev:
7343 * @ifreq:
7344 * @cmd:
7345 **/
7346static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
7347{
7348        struct igb_adapter *adapter = netdev_priv(netdev);
7349        struct mii_ioctl_data *data = if_mii(ifr);
7350
7351        if (adapter->hw.phy.media_type != e1000_media_type_copper)
7352                return -EOPNOTSUPP;
7353
7354        switch (cmd) {
7355        case SIOCGMIIPHY:
7356                data->phy_id = adapter->hw.phy.addr;
7357                break;
7358        case SIOCGMIIREG:
7359                if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
7360                                     &data->val_out))
7361                        return -EIO;
7362                break;
7363        case SIOCSMIIREG:
7364        default:
7365                return -EOPNOTSUPP;
7366        }
7367        return 0;
7368}
7369
7370/**
7371 * igb_ioctl -
7372 * @netdev:
7373 * @ifreq:
7374 * @cmd:
7375 **/
7376static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
7377{
7378        switch (cmd) {
7379        case SIOCGMIIPHY:
7380        case SIOCGMIIREG:
7381        case SIOCSMIIREG:
7382                return igb_mii_ioctl(netdev, ifr, cmd);
7383        case SIOCGHWTSTAMP:
7384                return igb_ptp_get_ts_config(netdev, ifr);
7385        case SIOCSHWTSTAMP:
7386                return igb_ptp_set_ts_config(netdev, ifr);
7387        default:
7388                return -EOPNOTSUPP;
7389        }
7390}
7391
7392void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value)
7393{
7394        struct igb_adapter *adapter = hw->back;
7395
7396        pci_read_config_word(adapter->pdev, reg, value);
7397}
7398
7399void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value)
7400{
7401        struct igb_adapter *adapter = hw->back;
7402
7403        pci_write_config_word(adapter->pdev, reg, *value);
7404}
7405
7406s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
7407{
7408        struct igb_adapter *adapter = hw->back;
7409
7410        if (pcie_capability_read_word(adapter->pdev, reg, value))
7411                return -E1000_ERR_CONFIG;
7412
7413        return 0;
7414}
7415
7416s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
7417{
7418        struct igb_adapter *adapter = hw->back;
7419
7420        if (pcie_capability_write_word(adapter->pdev, reg, *value))
7421                return -E1000_ERR_CONFIG;
7422
7423        return 0;
7424}
7425
7426static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
7427{
7428        struct igb_adapter *adapter = netdev_priv(netdev);
7429        struct e1000_hw *hw = &adapter->hw;
7430        u32 ctrl, rctl;
7431        bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
7432
7433        if (enable) {
7434                /* enable VLAN tag insert/strip */
7435                ctrl = rd32(E1000_CTRL);
7436                ctrl |= E1000_CTRL_VME;
7437                wr32(E1000_CTRL, ctrl);
7438
7439                /* Disable CFI check */
7440                rctl = rd32(E1000_RCTL);
7441                rctl &= ~E1000_RCTL_CFIEN;
7442                wr32(E1000_RCTL, rctl);
7443        } else {
7444                /* disable VLAN tag insert/strip */
7445                ctrl = rd32(E1000_CTRL);
7446                ctrl &= ~E1000_CTRL_VME;
7447                wr32(E1000_CTRL, ctrl);
7448        }
7449
7450        igb_set_vf_vlan_strip(adapter, adapter->vfs_allocated_count, enable);
7451}
7452
7453static int igb_vlan_rx_add_vid(struct net_device *netdev,
7454                               __be16 proto, u16 vid)
7455{
7456        struct igb_adapter *adapter = netdev_priv(netdev);
7457        struct e1000_hw *hw = &adapter->hw;
7458        int pf_id = adapter->vfs_allocated_count;
7459
7460        /* add the filter since PF can receive vlans w/o entry in vlvf */
7461        if (!vid || !(adapter->flags & IGB_FLAG_VLAN_PROMISC))
7462                igb_vfta_set(hw, vid, pf_id, true, !!vid);
7463
7464        set_bit(vid, adapter->active_vlans);
7465
7466        return 0;
7467}
7468
7469static int igb_vlan_rx_kill_vid(struct net_device *netdev,
7470                                __be16 proto, u16 vid)
7471{
7472        struct igb_adapter *adapter = netdev_priv(netdev);
7473        int pf_id = adapter->vfs_allocated_count;
7474        struct e1000_hw *hw = &adapter->hw;
7475
7476        /* remove VID from filter table */
7477        if (vid && !(adapter->flags & IGB_FLAG_VLAN_PROMISC))
7478                igb_vfta_set(hw, vid, pf_id, false, true);
7479
7480        clear_bit(vid, adapter->active_vlans);
7481
7482        return 0;
7483}
7484
7485static void igb_restore_vlan(struct igb_adapter *adapter)
7486{
7487        u16 vid = 1;
7488
7489        igb_vlan_mode(adapter->netdev, adapter->netdev->features);
7490        igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0);
7491
7492        for_each_set_bit_from(vid, adapter->active_vlans, VLAN_N_VID)
7493                igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
7494}
7495
7496int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
7497{
7498        struct pci_dev *pdev = adapter->pdev;
7499        struct e1000_mac_info *mac = &adapter->hw.mac;
7500
7501        mac->autoneg = 0;
7502
7503        /* Make sure dplx is at most 1 bit and lsb of speed is not set
7504         * for the switch() below to work
7505         */
7506        if ((spd & 1) || (dplx & ~1))
7507                goto err_inval;
7508
7509        /* Fiber NIC's only allow 1000 gbps Full duplex
7510         * and 100Mbps Full duplex for 100baseFx sfp
7511         */
7512        if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
7513                switch (spd + dplx) {
7514                case SPEED_10 + DUPLEX_HALF:
7515                case SPEED_10 + DUPLEX_FULL:
7516                case SPEED_100 + DUPLEX_HALF:
7517                        goto err_inval;
7518                default:
7519                        break;
7520                }
7521        }
7522
7523        switch (spd + dplx) {
7524        case SPEED_10 + DUPLEX_HALF:
7525                mac->forced_speed_duplex = ADVERTISE_10_HALF;
7526                break;
7527        case SPEED_10 + DUPLEX_FULL:
7528                mac->forced_speed_duplex = ADVERTISE_10_FULL;
7529                break;
7530        case SPEED_100 + DUPLEX_HALF:
7531                mac->forced_speed_duplex = ADVERTISE_100_HALF;
7532                break;
7533        case SPEED_100 + DUPLEX_FULL:
7534                mac->forced_speed_duplex = ADVERTISE_100_FULL;
7535                break;
7536        case SPEED_1000 + DUPLEX_FULL:
7537                mac->autoneg = 1;
7538                adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
7539                break;
7540        case SPEED_1000 + DUPLEX_HALF: /* not supported */
7541        default:
7542                goto err_inval;
7543        }
7544
7545        /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
7546        adapter->hw.phy.mdix = AUTO_ALL_MODES;
7547
7548        return 0;
7549
7550err_inval:
7551        dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
7552        return -EINVAL;
7553}
7554
7555static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
7556                          bool runtime)
7557{
7558        struct net_device *netdev = pci_get_drvdata(pdev);
7559        struct igb_adapter *adapter = netdev_priv(netdev);
7560        struct e1000_hw *hw = &adapter->hw;
7561        u32 ctrl, rctl, status;
7562        u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
7563#ifdef CONFIG_PM
7564        int retval = 0;
7565#endif
7566
7567        rtnl_lock();
7568        netif_device_detach(netdev);
7569
7570        if (netif_running(netdev))
7571                __igb_close(netdev, true);
7572
7573        igb_ptp_suspend(adapter);
7574
7575        igb_clear_interrupt_scheme(adapter);
7576        rtnl_unlock();
7577
7578#ifdef CONFIG_PM
7579        retval = pci_save_state(pdev);
7580        if (retval)
7581                return retval;
7582#endif
7583
7584        status = rd32(E1000_STATUS);
7585        if (status & E1000_STATUS_LU)
7586                wufc &= ~E1000_WUFC_LNKC;
7587
7588        if (wufc) {
7589                igb_setup_rctl(adapter);
7590                igb_set_rx_mode(netdev);
7591
7592                /* turn on all-multi mode if wake on multicast is enabled */
7593                if (wufc & E1000_WUFC_MC) {
7594                        rctl = rd32(E1000_RCTL);
7595                        rctl |= E1000_RCTL_MPE;
7596                        wr32(E1000_RCTL, rctl);
7597                }
7598
7599                ctrl = rd32(E1000_CTRL);
7600                /* advertise wake from D3Cold */
7601                #define E1000_CTRL_ADVD3WUC 0x00100000
7602                /* phy power management enable */
7603                #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
7604                ctrl |= E1000_CTRL_ADVD3WUC;
7605                wr32(E1000_CTRL, ctrl);
7606
7607                /* Allow time for pending master requests to run */
7608                igb_disable_pcie_master(hw);
7609
7610                wr32(E1000_WUC, E1000_WUC_PME_EN);
7611                wr32(E1000_WUFC, wufc);
7612        } else {
7613                wr32(E1000_WUC, 0);
7614                wr32(E1000_WUFC, 0);
7615        }
7616
7617        *enable_wake = wufc || adapter->en_mng_pt;
7618        if (!*enable_wake)
7619                igb_power_down_link(adapter);
7620        else
7621                igb_power_up_link(adapter);
7622
7623        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
7624         * would have already happened in close and is redundant.
7625         */
7626        igb_release_hw_control(adapter);
7627
7628        pci_disable_device(pdev);
7629
7630        return 0;
7631}
7632
7633#ifdef CONFIG_PM
7634#ifdef CONFIG_PM_SLEEP
7635static int igb_suspend(struct device *dev)
7636{
7637        int retval;
7638        bool wake;
7639        struct pci_dev *pdev = to_pci_dev(dev);
7640
7641        retval = __igb_shutdown(pdev, &wake, 0);
7642        if (retval)
7643                return retval;
7644
7645        if (wake) {
7646                pci_prepare_to_sleep(pdev);
7647        } else {
7648                pci_wake_from_d3(pdev, false);
7649                pci_set_power_state(pdev, PCI_D3hot);
7650        }
7651
7652        return 0;
7653}
7654#endif /* CONFIG_PM_SLEEP */
7655
7656static int igb_resume(struct device *dev)
7657{
7658        struct pci_dev *pdev = to_pci_dev(dev);
7659        struct net_device *netdev = pci_get_drvdata(pdev);
7660        struct igb_adapter *adapter = netdev_priv(netdev);
7661        struct e1000_hw *hw = &adapter->hw;
7662        u32 err;
7663
7664        pci_set_power_state(pdev, PCI_D0);
7665        pci_restore_state(pdev);
7666        pci_save_state(pdev);
7667
7668        if (!pci_device_is_present(pdev))
7669                return -ENODEV;
7670        err = pci_enable_device_mem(pdev);
7671        if (err) {
7672                dev_err(&pdev->dev,
7673                        "igb: Cannot enable PCI device from suspend\n");
7674                return err;
7675        }
7676        pci_set_master(pdev);
7677
7678        pci_enable_wake(pdev, PCI_D3hot, 0);
7679        pci_enable_wake(pdev, PCI_D3cold, 0);
7680
7681        if (igb_init_interrupt_scheme(adapter, true)) {
7682                dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
7683                return -ENOMEM;
7684        }
7685
7686        igb_reset(adapter);
7687
7688        /* let the f/w know that the h/w is now under the control of the
7689         * driver.
7690         */
7691        igb_get_hw_control(adapter);
7692
7693        wr32(E1000_WUS, ~0);
7694
7695        rtnl_lock();
7696        if (!err && netif_running(netdev))
7697                err = __igb_open(netdev, true);
7698
7699        if (!err)
7700                netif_device_attach(netdev);
7701        rtnl_unlock();
7702
7703        return err;
7704}
7705
7706static int igb_runtime_idle(struct device *dev)
7707{
7708        struct pci_dev *pdev = to_pci_dev(dev);
7709        struct net_device *netdev = pci_get_drvdata(pdev);
7710        struct igb_adapter *adapter = netdev_priv(netdev);
7711
7712        if (!igb_has_link(adapter))
7713                pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
7714
7715        return -EBUSY;
7716}
7717
7718static int igb_runtime_suspend(struct device *dev)
7719{
7720        struct pci_dev *pdev = to_pci_dev(dev);
7721        int retval;
7722        bool wake;
7723
7724        retval = __igb_shutdown(pdev, &wake, 1);
7725        if (retval)
7726                return retval;
7727
7728        if (wake) {
7729                pci_prepare_to_sleep(pdev);
7730        } else {
7731                pci_wake_from_d3(pdev, false);
7732                pci_set_power_state(pdev, PCI_D3hot);
7733        }
7734
7735        return 0;
7736}
7737
7738static int igb_runtime_resume(struct device *dev)
7739{
7740        return igb_resume(dev);
7741}
7742#endif /* CONFIG_PM */
7743
7744static void igb_shutdown(struct pci_dev *pdev)
7745{
7746        bool wake;
7747
7748        __igb_shutdown(pdev, &wake, 0);
7749
7750        if (system_state == SYSTEM_POWER_OFF) {
7751                pci_wake_from_d3(pdev, wake);
7752                pci_set_power_state(pdev, PCI_D3hot);
7753        }
7754}
7755
7756#ifdef CONFIG_PCI_IOV
7757static int igb_sriov_reinit(struct pci_dev *dev)
7758{
7759        struct net_device *netdev = pci_get_drvdata(dev);
7760        struct igb_adapter *adapter = netdev_priv(netdev);
7761        struct pci_dev *pdev = adapter->pdev;
7762
7763        rtnl_lock();
7764
7765        if (netif_running(netdev))
7766                igb_close(netdev);
7767        else
7768                igb_reset(adapter);
7769
7770        igb_clear_interrupt_scheme(adapter);
7771
7772        igb_init_queue_configuration(adapter);
7773
7774        if (igb_init_interrupt_scheme(adapter, true)) {
7775                rtnl_unlock();
7776                dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
7777                return -ENOMEM;
7778        }
7779
7780        if (netif_running(netdev))
7781                igb_open(netdev);
7782
7783        rtnl_unlock();
7784
7785        return 0;
7786}
7787
7788static int igb_pci_disable_sriov(struct pci_dev *dev)
7789{
7790        int err = igb_disable_sriov(dev);
7791
7792        if (!err)
7793                err = igb_sriov_reinit(dev);
7794
7795        return err;
7796}
7797
7798static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs)
7799{
7800        int err = igb_enable_sriov(dev, num_vfs);
7801
7802        if (err)
7803                goto out;
7804
7805        err = igb_sriov_reinit(dev);
7806        if (!err)
7807                return num_vfs;
7808
7809out:
7810        return err;
7811}
7812
7813#endif
7814static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
7815{
7816#ifdef CONFIG_PCI_IOV
7817        if (num_vfs == 0)
7818                return igb_pci_disable_sriov(dev);
7819        else
7820                return igb_pci_enable_sriov(dev, num_vfs);
7821#endif
7822        return 0;
7823}
7824
7825#ifdef CONFIG_NET_POLL_CONTROLLER
7826/* Polling 'interrupt' - used by things like netconsole to send skbs
7827 * without having to re-enable interrupts. It's not called while
7828 * the interrupt routine is executing.
7829 */
7830static void igb_netpoll(struct net_device *netdev)
7831{
7832        struct igb_adapter *adapter = netdev_priv(netdev);
7833        struct e1000_hw *hw = &adapter->hw;
7834        struct igb_q_vector *q_vector;
7835        int i;
7836
7837        for (i = 0; i < adapter->num_q_vectors; i++) {
7838                q_vector = adapter->q_vector[i];
7839                if (adapter->flags & IGB_FLAG_HAS_MSIX)
7840                        wr32(E1000_EIMC, q_vector->eims_value);
7841                else
7842                        igb_irq_disable(adapter);
7843                napi_schedule(&q_vector->napi);
7844        }
7845}
7846#endif /* CONFIG_NET_POLL_CONTROLLER */
7847
7848/**
7849 *  igb_io_error_detected - called when PCI error is detected
7850 *  @pdev: Pointer to PCI device
7851 *  @state: The current pci connection state
7852 *
7853 *  This function is called after a PCI bus error affecting
7854 *  this device has been detected.
7855 **/
7856static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
7857                                              pci_channel_state_t state)
7858{
7859        struct net_device *netdev = pci_get_drvdata(pdev);
7860        struct igb_adapter *adapter = netdev_priv(netdev);
7861
7862        netif_device_detach(netdev);
7863
7864        if (state == pci_channel_io_perm_failure)
7865                return PCI_ERS_RESULT_DISCONNECT;
7866
7867        if (netif_running(netdev))
7868                igb_down(adapter);
7869        pci_disable_device(pdev);
7870
7871        /* Request a slot slot reset. */
7872        return PCI_ERS_RESULT_NEED_RESET;
7873}
7874
7875/**
7876 *  igb_io_slot_reset - called after the pci bus has been reset.
7877 *  @pdev: Pointer to PCI device
7878 *
7879 *  Restart the card from scratch, as if from a cold-boot. Implementation
7880 *  resembles the first-half of the igb_resume routine.
7881 **/
7882static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
7883{
7884        struct net_device *netdev = pci_get_drvdata(pdev);
7885        struct igb_adapter *adapter = netdev_priv(netdev);
7886        struct e1000_hw *hw = &adapter->hw;
7887        pci_ers_result_t result;
7888        int err;
7889
7890        if (pci_enable_device_mem(pdev)) {
7891                dev_err(&pdev->dev,
7892                        "Cannot re-enable PCI device after reset.\n");
7893                result = PCI_ERS_RESULT_DISCONNECT;
7894        } else {
7895                pci_set_master(pdev);
7896                pci_restore_state(pdev);
7897                pci_save_state(pdev);
7898
7899                pci_enable_wake(pdev, PCI_D3hot, 0);
7900                pci_enable_wake(pdev, PCI_D3cold, 0);
7901
7902                /* In case of PCI error, adapter lose its HW address
7903                 * so we should re-assign it here.
7904                 */
7905                hw->hw_addr = adapter->io_addr;
7906
7907                igb_reset(adapter);
7908                wr32(E1000_WUS, ~0);
7909                result = PCI_ERS_RESULT_RECOVERED;
7910        }
7911
7912        err = pci_cleanup_aer_uncorrect_error_status(pdev);
7913        if (err) {
7914                dev_err(&pdev->dev,
7915                        "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n",
7916                        err);
7917                /* non-fatal, continue */
7918        }
7919
7920        return result;
7921}
7922
7923/**
7924 *  igb_io_resume - called when traffic can start flowing again.
7925 *  @pdev: Pointer to PCI device
7926 *
7927 *  This callback is called when the error recovery driver tells us that
7928 *  its OK to resume normal operation. Implementation resembles the
7929 *  second-half of the igb_resume routine.
7930 */
7931static void igb_io_resume(struct pci_dev *pdev)
7932{
7933        struct net_device *netdev = pci_get_drvdata(pdev);
7934        struct igb_adapter *adapter = netdev_priv(netdev);
7935
7936        if (netif_running(netdev)) {
7937                if (igb_up(adapter)) {
7938                        dev_err(&pdev->dev, "igb_up failed after reset\n");
7939                        return;
7940                }
7941        }
7942
7943        netif_device_attach(netdev);
7944
7945        /* let the f/w know that the h/w is now under the control of the
7946         * driver.
7947         */
7948        igb_get_hw_control(adapter);
7949}
7950
7951static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
7952                             u8 qsel)
7953{
7954        struct e1000_hw *hw = &adapter->hw;
7955        u32 rar_low, rar_high;
7956
7957        /* HW expects these to be in network order when they are plugged
7958         * into the registers which are little endian.  In order to guarantee
7959         * that ordering we need to do an leXX_to_cpup here in order to be
7960         * ready for the byteswap that occurs with writel
7961         */
7962        rar_low = le32_to_cpup((__le32 *)(addr));
7963        rar_high = le16_to_cpup((__le16 *)(addr + 4));
7964
7965        /* Indicate to hardware the Address is Valid. */
7966        rar_high |= E1000_RAH_AV;
7967
7968        if (hw->mac.type == e1000_82575)
7969                rar_high |= E1000_RAH_POOL_1 * qsel;
7970        else
7971                rar_high |= E1000_RAH_POOL_1 << qsel;
7972
7973        wr32(E1000_RAL(index), rar_low);
7974        wrfl();
7975        wr32(E1000_RAH(index), rar_high);
7976        wrfl();
7977}
7978
7979static int igb_set_vf_mac(struct igb_adapter *adapter,
7980                          int vf, unsigned char *mac_addr)
7981{
7982        struct e1000_hw *hw = &adapter->hw;
7983        /* VF MAC addresses start at end of receive addresses and moves
7984         * towards the first, as a result a collision should not be possible
7985         */
7986        int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7987
7988        memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7989
7990        igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7991
7992        return 0;
7993}
7994
7995static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7996{
7997        struct igb_adapter *adapter = netdev_priv(netdev);
7998        if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7999                return -EINVAL;
8000        adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
8001        dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
8002        dev_info(&adapter->pdev->dev,
8003                 "Reload the VF driver to make this change effective.");
8004        if (test_bit(__IGB_DOWN, &adapter->state)) {
8005                dev_warn(&adapter->pdev->dev,
8006                         "The VF MAC address has been set, but the PF device is not up.\n");
8007                dev_warn(&adapter->pdev->dev,
8008                         "Bring the PF device up before attempting to use the VF device.\n");
8009        }
8010        return igb_set_vf_mac(adapter, vf, mac);
8011}
8012
8013static int igb_link_mbps(int internal_link_speed)
8014{
8015        switch (internal_link_speed) {
8016        case SPEED_100:
8017                return 100;
8018        case SPEED_1000:
8019                return 1000;
8020        default:
8021                return 0;
8022        }
8023}
8024
8025static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
8026                                  int link_speed)
8027{
8028        int rf_dec, rf_int;
8029        u32 bcnrc_val;
8030
8031        if (tx_rate != 0) {
8032                /* Calculate the rate factor values to set */
8033                rf_int = link_speed / tx_rate;
8034                rf_dec = (link_speed - (rf_int * tx_rate));
8035                rf_dec = (rf_dec * BIT(E1000_RTTBCNRC_RF_INT_SHIFT)) /
8036                         tx_rate;
8037
8038                bcnrc_val = E1000_RTTBCNRC_RS_ENA;
8039                bcnrc_val |= ((rf_int << E1000_RTTBCNRC_RF_INT_SHIFT) &
8040                              E1000_RTTBCNRC_RF_INT_MASK);
8041                bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
8042        } else {
8043                bcnrc_val = 0;
8044        }
8045
8046        wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
8047        /* Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
8048         * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
8049         */
8050        wr32(E1000_RTTBCNRM, 0x14);
8051        wr32(E1000_RTTBCNRC, bcnrc_val);
8052}
8053
8054static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
8055{
8056        int actual_link_speed, i;
8057        bool reset_rate = false;
8058
8059        /* VF TX rate limit was not set or not supported */
8060        if ((adapter->vf_rate_link_speed == 0) ||
8061            (adapter->hw.mac.type != e1000_82576))
8062                return;
8063
8064        actual_link_speed = igb_link_mbps(adapter->link_speed);
8065        if (actual_link_speed != adapter->vf_rate_link_speed) {
8066                reset_rate = true;
8067                adapter->vf_rate_link_speed = 0;
8068                dev_info(&adapter->pdev->dev,
8069                         "Link speed has been changed. VF Transmit rate is disabled\n");
8070        }
8071
8072        for (i = 0; i < adapter->vfs_allocated_count; i++) {
8073                if (reset_rate)
8074                        adapter->vf_data[i].tx_rate = 0;
8075
8076                igb_set_vf_rate_limit(&adapter->hw, i,
8077                                      adapter->vf_data[i].tx_rate,
8078                                      actual_link_speed);
8079        }
8080}
8081
8082static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf,
8083                             int min_tx_rate, int max_tx_rate)
8084{
8085        struct igb_adapter *adapter = netdev_priv(netdev);
8086        struct e1000_hw *hw = &adapter->hw;
8087        int actual_link_speed;
8088
8089        if (hw->mac.type != e1000_82576)
8090                return -EOPNOTSUPP;
8091
8092        if (min_tx_rate)
8093                return -EINVAL;
8094
8095        actual_link_speed = igb_link_mbps(adapter->link_speed);
8096        if ((vf >= adapter->vfs_allocated_count) ||
8097            (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
8098            (max_tx_rate < 0) ||
8099            (max_tx_rate > actual_link_speed))
8100                return -EINVAL;
8101
8102        adapter->vf_rate_link_speed = actual_link_speed;
8103        adapter->vf_data[vf].tx_rate = (u16)max_tx_rate;
8104        igb_set_vf_rate_limit(hw, vf, max_tx_rate, actual_link_speed);
8105
8106        return 0;
8107}
8108
8109static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
8110                                   bool setting)
8111{
8112        struct igb_adapter *adapter = netdev_priv(netdev);
8113        struct e1000_hw *hw = &adapter->hw;
8114        u32 reg_val, reg_offset;
8115
8116        if (!adapter->vfs_allocated_count)
8117                return -EOPNOTSUPP;
8118
8119        if (vf >= adapter->vfs_allocated_count)
8120                return -EINVAL;
8121
8122        reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC;
8123        reg_val = rd32(reg_offset);
8124        if (setting)
8125                reg_val |= (BIT(vf) |
8126                            BIT(vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT));
8127        else
8128                reg_val &= ~(BIT(vf) |
8129                             BIT(vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT));
8130        wr32(reg_offset, reg_val);
8131
8132        adapter->vf_data[vf].spoofchk_enabled = setting;
8133        return 0;
8134}
8135
8136static int igb_ndo_get_vf_config(struct net_device *netdev,
8137                                 int vf, struct ifla_vf_info *ivi)
8138{
8139        struct igb_adapter *adapter = netdev_priv(netdev);
8140        if (vf >= adapter->vfs_allocated_count)
8141                return -EINVAL;
8142        ivi->vf = vf;
8143        memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
8144        ivi->max_tx_rate = adapter->vf_data[vf].tx_rate;
8145        ivi->min_tx_rate = 0;
8146        ivi->vlan = adapter->vf_data[vf].pf_vlan;
8147        ivi->qos = adapter->vf_data[vf].pf_qos;
8148        ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
8149        return 0;
8150}
8151
8152static void igb_vmm_control(struct igb_adapter *adapter)
8153{
8154        struct e1000_hw *hw = &adapter->hw;
8155        u32 reg;
8156
8157        switch (hw->mac.type) {
8158        case e1000_82575:
8159        case e1000_i210:
8160        case e1000_i211:
8161        case e1000_i354:
8162        default:
8163                /* replication is not supported for 82575 */
8164                return;
8165        case e1000_82576:
8166                /* notify HW that the MAC is adding vlan tags */
8167                reg = rd32(E1000_DTXCTL);
8168                reg |= E1000_DTXCTL_VLAN_ADDED;
8169                wr32(E1000_DTXCTL, reg);
8170                /* Fall through */
8171        case e1000_82580:
8172                /* enable replication vlan tag stripping */
8173                reg = rd32(E1000_RPLOLR);
8174                reg |= E1000_RPLOLR_STRVLAN;
8175                wr32(E1000_RPLOLR, reg);
8176                /* Fall through */
8177        case e1000_i350:
8178                /* none of the above registers are supported by i350 */
8179                break;
8180        }
8181
8182        if (adapter->vfs_allocated_count) {
8183                igb_vmdq_set_loopback_pf(hw, true);
8184                igb_vmdq_set_replication_pf(hw, true);
8185                igb_vmdq_set_anti_spoofing_pf(hw, true,
8186                                              adapter->vfs_allocated_count);
8187        } else {
8188                igb_vmdq_set_loopback_pf(hw, false);
8189                igb_vmdq_set_replication_pf(hw, false);
8190        }
8191}
8192
8193static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
8194{
8195        struct e1000_hw *hw = &adapter->hw;
8196        u32 dmac_thr;
8197        u16 hwm;
8198
8199        if (hw->mac.type > e1000_82580) {
8200                if (adapter->flags & IGB_FLAG_DMAC) {
8201                        u32 reg;
8202
8203                        /* force threshold to 0. */
8204                        wr32(E1000_DMCTXTH, 0);
8205
8206                        /* DMA Coalescing high water mark needs to be greater
8207                         * than the Rx threshold. Set hwm to PBA - max frame
8208                         * size in 16B units, capping it at PBA - 6KB.
8209                         */
8210                        hwm = 64 * (pba - 6);
8211                        reg = rd32(E1000_FCRTC);
8212                        reg &= ~E1000_FCRTC_RTH_COAL_MASK;
8213                        reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
8214                                & E1000_FCRTC_RTH_COAL_MASK);
8215                        wr32(E1000_FCRTC, reg);
8216
8217                        /* Set the DMA Coalescing Rx threshold to PBA - 2 * max
8218                         * frame size, capping it at PBA - 10KB.
8219                         */
8220                        dmac_thr = pba - 10;
8221                        reg = rd32(E1000_DMACR);
8222                        reg &= ~E1000_DMACR_DMACTHR_MASK;
8223                        reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
8224                                & E1000_DMACR_DMACTHR_MASK);
8225
8226                        /* transition to L0x or L1 if available..*/
8227                        reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
8228
8229                        /* watchdog timer= +-1000 usec in 32usec intervals */
8230                        reg |= (1000 >> 5);
8231
8232                        /* Disable BMC-to-OS Watchdog Enable */
8233                        if (hw->mac.type != e1000_i354)
8234                                reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
8235
8236                        wr32(E1000_DMACR, reg);
8237
8238                        /* no lower threshold to disable
8239                         * coalescing(smart fifb)-UTRESH=0
8240                         */
8241                        wr32(E1000_DMCRTRH, 0);
8242
8243                        reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
8244
8245                        wr32(E1000_DMCTLX, reg);
8246
8247                        /* free space in tx packet buffer to wake from
8248                         * DMA coal
8249                         */
8250                        wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
8251                             (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
8252
8253                        /* make low power state decision controlled
8254                         * by DMA coal
8255                         */
8256                        reg = rd32(E1000_PCIEMISC);
8257                        reg &= ~E1000_PCIEMISC_LX_DECISION;
8258                        wr32(E1000_PCIEMISC, reg);
8259                } /* endif adapter->dmac is not disabled */
8260        } else if (hw->mac.type == e1000_82580) {
8261                u32 reg = rd32(E1000_PCIEMISC);
8262
8263                wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
8264                wr32(E1000_DMACR, 0);
8265        }
8266}
8267
8268/**
8269 *  igb_read_i2c_byte - Reads 8 bit word over I2C
8270 *  @hw: pointer to hardware structure
8271 *  @byte_offset: byte offset to read
8272 *  @dev_addr: device address
8273 *  @data: value read
8274 *
8275 *  Performs byte read operation over I2C interface at
8276 *  a specified device address.
8277 **/
8278s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
8279                      u8 dev_addr, u8 *data)
8280{
8281        struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
8282        struct i2c_client *this_client = adapter->i2c_client;
8283        s32 status;
8284        u16 swfw_mask = 0;
8285
8286        if (!this_client)
8287                return E1000_ERR_I2C;
8288
8289        swfw_mask = E1000_SWFW_PHY0_SM;
8290
8291        if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
8292                return E1000_ERR_SWFW_SYNC;
8293
8294        status = i2c_smbus_read_byte_data(this_client, byte_offset);
8295        hw->mac.ops.release_swfw_sync(hw, swfw_mask);
8296
8297        if (status < 0)
8298                return E1000_ERR_I2C;
8299        else {
8300                *data = status;
8301                return 0;
8302        }
8303}
8304
8305/**
8306 *  igb_write_i2c_byte - Writes 8 bit word over I2C
8307 *  @hw: pointer to hardware structure
8308 *  @byte_offset: byte offset to write
8309 *  @dev_addr: device address
8310 *  @data: value to write
8311 *
8312 *  Performs byte write operation over I2C interface at
8313 *  a specified device address.
8314 **/
8315s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
8316                       u8 dev_addr, u8 data)
8317{
8318        struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
8319        struct i2c_client *this_client = adapter->i2c_client;
8320        s32 status;
8321        u16 swfw_mask = E1000_SWFW_PHY0_SM;
8322
8323        if (!this_client)
8324                return E1000_ERR_I2C;
8325
8326        if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
8327                return E1000_ERR_SWFW_SYNC;
8328        status = i2c_smbus_write_byte_data(this_client, byte_offset, data);
8329        hw->mac.ops.release_swfw_sync(hw, swfw_mask);
8330
8331        if (status)
8332                return E1000_ERR_I2C;
8333        else
8334                return 0;
8335
8336}
8337
8338int igb_reinit_queues(struct igb_adapter *adapter)
8339{
8340        struct net_device *netdev = adapter->netdev;
8341        struct pci_dev *pdev = adapter->pdev;
8342        int err = 0;
8343
8344        if (netif_running(netdev))
8345                igb_close(netdev);
8346
8347        igb_reset_interrupt_capability(adapter);
8348
8349        if (igb_init_interrupt_scheme(adapter, true)) {
8350                dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
8351                return -ENOMEM;
8352        }
8353
8354        if (netif_running(netdev))
8355                err = igb_open(netdev);
8356
8357        return err;
8358}
8359
8360static void igb_nfc_filter_exit(struct igb_adapter *adapter)
8361{
8362        struct igb_nfc_filter *rule;
8363
8364        spin_lock(&adapter->nfc_lock);
8365
8366        hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
8367                igb_erase_filter(adapter, rule);
8368
8369        spin_unlock(&adapter->nfc_lock);
8370}
8371
8372static void igb_nfc_filter_restore(struct igb_adapter *adapter)
8373{
8374        struct igb_nfc_filter *rule;
8375
8376        spin_lock(&adapter->nfc_lock);
8377
8378        hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
8379                igb_add_filter(adapter, rule);
8380
8381        spin_unlock(&adapter->nfc_lock);
8382}
8383/* igb_main.c */
8384