linux/drivers/net/igb/igb_main.c
<<
>>
Prefs
   1/*******************************************************************************
   2
   3  Intel(R) Gigabit Ethernet Linux driver
   4  Copyright(c) 2007-2009 Intel Corporation.
   5
   6  This program is free software; you can redistribute it and/or modify it
   7  under the terms and conditions of the GNU General Public License,
   8  version 2, as published by the Free Software Foundation.
   9
  10  This program is distributed in the hope it will be useful, but WITHOUT
  11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13  more details.
  14
  15  You should have received a copy of the GNU General Public License along with
  16  this program; if not, write to the Free Software Foundation, Inc.,
  17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  18
  19  The full GNU General Public License is included in this distribution in
  20  the file called "COPYING".
  21
  22  Contact Information:
  23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
  24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
  25
  26*******************************************************************************/
  27
  28#include <linux/module.h>
  29#include <linux/types.h>
  30#include <linux/init.h>
  31#include <linux/vmalloc.h>
  32#include <linux/pagemap.h>
  33#include <linux/netdevice.h>
  34#include <linux/ipv6.h>
  35#include <linux/slab.h>
  36#include <net/checksum.h>
  37#include <net/ip6_checksum.h>
  38#include <linux/net_tstamp.h>
  39#include <linux/mii.h>
  40#include <linux/ethtool.h>
  41#include <linux/if_vlan.h>
  42#include <linux/pci.h>
  43#include <linux/pci-aspm.h>
  44#include <linux/delay.h>
  45#include <linux/interrupt.h>
  46#include <linux/if_ether.h>
  47#include <linux/aer.h>
  48#ifdef CONFIG_IGB_DCA
  49#include <linux/dca.h>
  50#endif
  51#include "igb.h"
  52
  53#define DRV_VERSION "2.1.0-k2"
  54char igb_driver_name[] = "igb";
  55char igb_driver_version[] = DRV_VERSION;
  56static const char igb_driver_string[] =
  57                                "Intel(R) Gigabit Ethernet Network Driver";
  58static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
  59
  60static const struct e1000_info *igb_info_tbl[] = {
  61        [board_82575] = &e1000_82575_info,
  62};
  63
  64static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
  65        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
  66        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
  67        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
  68        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
  69        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
  70        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
  71        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
  72        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
  73        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
  74        { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
  75        { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
  76        { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
  77        { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
  78        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
  79        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
  80        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
  81        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
  82        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
  83        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
  84        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
  85        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
  86        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
  87        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
  88        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
  89        /* required last entry */
  90        {0, }
  91};
  92
  93MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
  94
  95void igb_reset(struct igb_adapter *);
  96static int igb_setup_all_tx_resources(struct igb_adapter *);
  97static int igb_setup_all_rx_resources(struct igb_adapter *);
  98static void igb_free_all_tx_resources(struct igb_adapter *);
  99static void igb_free_all_rx_resources(struct igb_adapter *);
 100static void igb_setup_mrqc(struct igb_adapter *);
 101static int igb_probe(struct pci_dev *, const struct pci_device_id *);
 102static void __devexit igb_remove(struct pci_dev *pdev);
 103static int igb_sw_init(struct igb_adapter *);
 104static int igb_open(struct net_device *);
 105static int igb_close(struct net_device *);
 106static void igb_configure_tx(struct igb_adapter *);
 107static void igb_configure_rx(struct igb_adapter *);
 108static void igb_clean_all_tx_rings(struct igb_adapter *);
 109static void igb_clean_all_rx_rings(struct igb_adapter *);
 110static void igb_clean_tx_ring(struct igb_ring *);
 111static void igb_clean_rx_ring(struct igb_ring *);
 112static void igb_set_rx_mode(struct net_device *);
 113static void igb_update_phy_info(unsigned long);
 114static void igb_watchdog(unsigned long);
 115static void igb_watchdog_task(struct work_struct *);
 116static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
 117static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
 118                                                 struct rtnl_link_stats64 *stats);
 119static int igb_change_mtu(struct net_device *, int);
 120static int igb_set_mac(struct net_device *, void *);
 121static void igb_set_uta(struct igb_adapter *adapter);
 122static irqreturn_t igb_intr(int irq, void *);
 123static irqreturn_t igb_intr_msi(int irq, void *);
 124static irqreturn_t igb_msix_other(int irq, void *);
 125static irqreturn_t igb_msix_ring(int irq, void *);
 126#ifdef CONFIG_IGB_DCA
 127static void igb_update_dca(struct igb_q_vector *);
 128static void igb_setup_dca(struct igb_adapter *);
 129#endif /* CONFIG_IGB_DCA */
 130static bool igb_clean_tx_irq(struct igb_q_vector *);
 131static int igb_poll(struct napi_struct *, int);
 132static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
 133static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
 134static void igb_tx_timeout(struct net_device *);
 135static void igb_reset_task(struct work_struct *);
 136static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
 137static void igb_vlan_rx_add_vid(struct net_device *, u16);
 138static void igb_vlan_rx_kill_vid(struct net_device *, u16);
 139static void igb_restore_vlan(struct igb_adapter *);
 140static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
 141static void igb_ping_all_vfs(struct igb_adapter *);
 142static void igb_msg_task(struct igb_adapter *);
 143static void igb_vmm_control(struct igb_adapter *);
 144static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
 145static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
 146static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
 147static int igb_ndo_set_vf_vlan(struct net_device *netdev,
 148                               int vf, u16 vlan, u8 qos);
 149static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
 150static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
 151                                 struct ifla_vf_info *ivi);
 152
 153#ifdef CONFIG_PM
 154static int igb_suspend(struct pci_dev *, pm_message_t);
 155static int igb_resume(struct pci_dev *);
 156#endif
 157static void igb_shutdown(struct pci_dev *);
 158#ifdef CONFIG_IGB_DCA
 159static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
 160static struct notifier_block dca_notifier = {
 161        .notifier_call  = igb_notify_dca,
 162        .next           = NULL,
 163        .priority       = 0
 164};
 165#endif
 166#ifdef CONFIG_NET_POLL_CONTROLLER
 167/* for netdump / net console */
 168static void igb_netpoll(struct net_device *);
 169#endif
 170#ifdef CONFIG_PCI_IOV
 171static unsigned int max_vfs = 0;
 172module_param(max_vfs, uint, 0);
 173MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
 174                 "per physical function");
 175#endif /* CONFIG_PCI_IOV */
 176
 177static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
 178                     pci_channel_state_t);
 179static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
 180static void igb_io_resume(struct pci_dev *);
 181
 182static struct pci_error_handlers igb_err_handler = {
 183        .error_detected = igb_io_error_detected,
 184        .slot_reset = igb_io_slot_reset,
 185        .resume = igb_io_resume,
 186};
 187
 188
 189static struct pci_driver igb_driver = {
 190        .name     = igb_driver_name,
 191        .id_table = igb_pci_tbl,
 192        .probe    = igb_probe,
 193        .remove   = __devexit_p(igb_remove),
 194#ifdef CONFIG_PM
 195        /* Power Managment Hooks */
 196        .suspend  = igb_suspend,
 197        .resume   = igb_resume,
 198#endif
 199        .shutdown = igb_shutdown,
 200        .err_handler = &igb_err_handler
 201};
 202
 203MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
 204MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
 205MODULE_LICENSE("GPL");
 206MODULE_VERSION(DRV_VERSION);
 207
 208struct igb_reg_info {
 209        u32 ofs;
 210        char *name;
 211};
 212
 213static const struct igb_reg_info igb_reg_info_tbl[] = {
 214
 215        /* General Registers */
 216        {E1000_CTRL, "CTRL"},
 217        {E1000_STATUS, "STATUS"},
 218        {E1000_CTRL_EXT, "CTRL_EXT"},
 219
 220        /* Interrupt Registers */
 221        {E1000_ICR, "ICR"},
 222
 223        /* RX Registers */
 224        {E1000_RCTL, "RCTL"},
 225        {E1000_RDLEN(0), "RDLEN"},
 226        {E1000_RDH(0), "RDH"},
 227        {E1000_RDT(0), "RDT"},
 228        {E1000_RXDCTL(0), "RXDCTL"},
 229        {E1000_RDBAL(0), "RDBAL"},
 230        {E1000_RDBAH(0), "RDBAH"},
 231
 232        /* TX Registers */
 233        {E1000_TCTL, "TCTL"},
 234        {E1000_TDBAL(0), "TDBAL"},
 235        {E1000_TDBAH(0), "TDBAH"},
 236        {E1000_TDLEN(0), "TDLEN"},
 237        {E1000_TDH(0), "TDH"},
 238        {E1000_TDT(0), "TDT"},
 239        {E1000_TXDCTL(0), "TXDCTL"},
 240        {E1000_TDFH, "TDFH"},
 241        {E1000_TDFT, "TDFT"},
 242        {E1000_TDFHS, "TDFHS"},
 243        {E1000_TDFPC, "TDFPC"},
 244
 245        /* List Terminator */
 246        {}
 247};
 248
 249/*
 250 * igb_regdump - register printout routine
 251 */
 252static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
 253{
 254        int n = 0;
 255        char rname[16];
 256        u32 regs[8];
 257
 258        switch (reginfo->ofs) {
 259        case E1000_RDLEN(0):
 260                for (n = 0; n < 4; n++)
 261                        regs[n] = rd32(E1000_RDLEN(n));
 262                break;
 263        case E1000_RDH(0):
 264                for (n = 0; n < 4; n++)
 265                        regs[n] = rd32(E1000_RDH(n));
 266                break;
 267        case E1000_RDT(0):
 268                for (n = 0; n < 4; n++)
 269                        regs[n] = rd32(E1000_RDT(n));
 270                break;
 271        case E1000_RXDCTL(0):
 272                for (n = 0; n < 4; n++)
 273                        regs[n] = rd32(E1000_RXDCTL(n));
 274                break;
 275        case E1000_RDBAL(0):
 276                for (n = 0; n < 4; n++)
 277                        regs[n] = rd32(E1000_RDBAL(n));
 278                break;
 279        case E1000_RDBAH(0):
 280                for (n = 0; n < 4; n++)
 281                        regs[n] = rd32(E1000_RDBAH(n));
 282                break;
 283        case E1000_TDBAL(0):
 284                for (n = 0; n < 4; n++)
 285                        regs[n] = rd32(E1000_RDBAL(n));
 286                break;
 287        case E1000_TDBAH(0):
 288                for (n = 0; n < 4; n++)
 289                        regs[n] = rd32(E1000_TDBAH(n));
 290                break;
 291        case E1000_TDLEN(0):
 292                for (n = 0; n < 4; n++)
 293                        regs[n] = rd32(E1000_TDLEN(n));
 294                break;
 295        case E1000_TDH(0):
 296                for (n = 0; n < 4; n++)
 297                        regs[n] = rd32(E1000_TDH(n));
 298                break;
 299        case E1000_TDT(0):
 300                for (n = 0; n < 4; n++)
 301                        regs[n] = rd32(E1000_TDT(n));
 302                break;
 303        case E1000_TXDCTL(0):
 304                for (n = 0; n < 4; n++)
 305                        regs[n] = rd32(E1000_TXDCTL(n));
 306                break;
 307        default:
 308                printk(KERN_INFO "%-15s %08x\n",
 309                        reginfo->name, rd32(reginfo->ofs));
 310                return;
 311        }
 312
 313        snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
 314        printk(KERN_INFO "%-15s ", rname);
 315        for (n = 0; n < 4; n++)
 316                printk(KERN_CONT "%08x ", regs[n]);
 317        printk(KERN_CONT "\n");
 318}
 319
 320/*
 321 * igb_dump - Print registers, tx-rings and rx-rings
 322 */
 323static void igb_dump(struct igb_adapter *adapter)
 324{
 325        struct net_device *netdev = adapter->netdev;
 326        struct e1000_hw *hw = &adapter->hw;
 327        struct igb_reg_info *reginfo;
 328        int n = 0;
 329        struct igb_ring *tx_ring;
 330        union e1000_adv_tx_desc *tx_desc;
 331        struct my_u0 { u64 a; u64 b; } *u0;
 332        struct igb_buffer *buffer_info;
 333        struct igb_ring *rx_ring;
 334        union e1000_adv_rx_desc *rx_desc;
 335        u32 staterr;
 336        int i = 0;
 337
 338        if (!netif_msg_hw(adapter))
 339                return;
 340
 341        /* Print netdevice Info */
 342        if (netdev) {
 343                dev_info(&adapter->pdev->dev, "Net device Info\n");
 344                printk(KERN_INFO "Device Name     state            "
 345                        "trans_start      last_rx\n");
 346                printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
 347                netdev->name,
 348                netdev->state,
 349                netdev->trans_start,
 350                netdev->last_rx);
 351        }
 352
 353        /* Print Registers */
 354        dev_info(&adapter->pdev->dev, "Register Dump\n");
 355        printk(KERN_INFO " Register Name   Value\n");
 356        for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
 357             reginfo->name; reginfo++) {
 358                igb_regdump(hw, reginfo);
 359        }
 360
 361        /* Print TX Ring Summary */
 362        if (!netdev || !netif_running(netdev))
 363                goto exit;
 364
 365        dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
 366        printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
 367                " leng ntw timestamp\n");
 368        for (n = 0; n < adapter->num_tx_queues; n++) {
 369                tx_ring = adapter->tx_ring[n];
 370                buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
 371                printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
 372                           n, tx_ring->next_to_use, tx_ring->next_to_clean,
 373                           (u64)buffer_info->dma,
 374                           buffer_info->length,
 375                           buffer_info->next_to_watch,
 376                           (u64)buffer_info->time_stamp);
 377        }
 378
 379        /* Print TX Rings */
 380        if (!netif_msg_tx_done(adapter))
 381                goto rx_ring_summary;
 382
 383        dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
 384
 385        /* Transmit Descriptor Formats
 386         *
 387         * Advanced Transmit Descriptor
 388         *   +--------------------------------------------------------------+
 389         * 0 |         Buffer Address [63:0]                                |
 390         *   +--------------------------------------------------------------+
 391         * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
 392         *   +--------------------------------------------------------------+
 393         *   63      46 45    40 39 38 36 35 32 31   24             15       0
 394         */
 395
 396        for (n = 0; n < adapter->num_tx_queues; n++) {
 397                tx_ring = adapter->tx_ring[n];
 398                printk(KERN_INFO "------------------------------------\n");
 399                printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
 400                printk(KERN_INFO "------------------------------------\n");
 401                printk(KERN_INFO "T [desc]     [address 63:0  ] "
 402                        "[PlPOCIStDDM Ln] [bi->dma       ] "
 403                        "leng  ntw timestamp        bi->skb\n");
 404
 405                for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
 406                        tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
 407                        buffer_info = &tx_ring->buffer_info[i];
 408                        u0 = (struct my_u0 *)tx_desc;
 409                        printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
 410                                " %04X  %3X %016llX %p", i,
 411                                le64_to_cpu(u0->a),
 412                                le64_to_cpu(u0->b),
 413                                (u64)buffer_info->dma,
 414                                buffer_info->length,
 415                                buffer_info->next_to_watch,
 416                                (u64)buffer_info->time_stamp,
 417                                buffer_info->skb);
 418                        if (i == tx_ring->next_to_use &&
 419                                i == tx_ring->next_to_clean)
 420                                printk(KERN_CONT " NTC/U\n");
 421                        else if (i == tx_ring->next_to_use)
 422                                printk(KERN_CONT " NTU\n");
 423                        else if (i == tx_ring->next_to_clean)
 424                                printk(KERN_CONT " NTC\n");
 425                        else
 426                                printk(KERN_CONT "\n");
 427
 428                        if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
 429                                print_hex_dump(KERN_INFO, "",
 430                                        DUMP_PREFIX_ADDRESS,
 431                                        16, 1, phys_to_virt(buffer_info->dma),
 432                                        buffer_info->length, true);
 433                }
 434        }
 435
 436        /* Print RX Rings Summary */
 437rx_ring_summary:
 438        dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
 439        printk(KERN_INFO "Queue [NTU] [NTC]\n");
 440        for (n = 0; n < adapter->num_rx_queues; n++) {
 441                rx_ring = adapter->rx_ring[n];
 442                printk(KERN_INFO " %5d %5X %5X\n", n,
 443                           rx_ring->next_to_use, rx_ring->next_to_clean);
 444        }
 445
 446        /* Print RX Rings */
 447        if (!netif_msg_rx_status(adapter))
 448                goto exit;
 449
 450        dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
 451
 452        /* Advanced Receive Descriptor (Read) Format
 453         *    63                                           1        0
 454         *    +-----------------------------------------------------+
 455         *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
 456         *    +----------------------------------------------+------+
 457         *  8 |       Header Buffer Address [63:1]           |  DD  |
 458         *    +-----------------------------------------------------+
 459         *
 460         *
 461         * Advanced Receive Descriptor (Write-Back) Format
 462         *
 463         *   63       48 47    32 31  30      21 20 17 16   4 3     0
 464         *   +------------------------------------------------------+
 465         * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
 466         *   | Checksum   Ident  |   |           |    | Type | Type |
 467         *   +------------------------------------------------------+
 468         * 8 | VLAN Tag | Length | Extended Error | Extended Status |
 469         *   +------------------------------------------------------+
 470         *   63       48 47    32 31            20 19               0
 471         */
 472
 473        for (n = 0; n < adapter->num_rx_queues; n++) {
 474                rx_ring = adapter->rx_ring[n];
 475                printk(KERN_INFO "------------------------------------\n");
 476                printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
 477                printk(KERN_INFO "------------------------------------\n");
 478                printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
 479                        "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
 480                        "<-- Adv Rx Read format\n");
 481                printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
 482                        "[vl er S cks ln] ---------------- [bi->skb] "
 483                        "<-- Adv Rx Write-Back format\n");
 484
 485                for (i = 0; i < rx_ring->count; i++) {
 486                        buffer_info = &rx_ring->buffer_info[i];
 487                        rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
 488                        u0 = (struct my_u0 *)rx_desc;
 489                        staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 490                        if (staterr & E1000_RXD_STAT_DD) {
 491                                /* Descriptor Done */
 492                                printk(KERN_INFO "RWB[0x%03X]     %016llX "
 493                                        "%016llX ---------------- %p", i,
 494                                        le64_to_cpu(u0->a),
 495                                        le64_to_cpu(u0->b),
 496                                        buffer_info->skb);
 497                        } else {
 498                                printk(KERN_INFO "R  [0x%03X]     %016llX "
 499                                        "%016llX %016llX %p", i,
 500                                        le64_to_cpu(u0->a),
 501                                        le64_to_cpu(u0->b),
 502                                        (u64)buffer_info->dma,
 503                                        buffer_info->skb);
 504
 505                                if (netif_msg_pktdata(adapter)) {
 506                                        print_hex_dump(KERN_INFO, "",
 507                                                DUMP_PREFIX_ADDRESS,
 508                                                16, 1,
 509                                                phys_to_virt(buffer_info->dma),
 510                                                rx_ring->rx_buffer_len, true);
 511                                        if (rx_ring->rx_buffer_len
 512                                                < IGB_RXBUFFER_1024)
 513                                                print_hex_dump(KERN_INFO, "",
 514                                                  DUMP_PREFIX_ADDRESS,
 515                                                  16, 1,
 516                                                  phys_to_virt(
 517                                                    buffer_info->page_dma +
 518                                                    buffer_info->page_offset),
 519                                                  PAGE_SIZE/2, true);
 520                                }
 521                        }
 522
 523                        if (i == rx_ring->next_to_use)
 524                                printk(KERN_CONT " NTU\n");
 525                        else if (i == rx_ring->next_to_clean)
 526                                printk(KERN_CONT " NTC\n");
 527                        else
 528                                printk(KERN_CONT "\n");
 529
 530                }
 531        }
 532
 533exit:
 534        return;
 535}
 536
 537
 538/**
 539 * igb_read_clock - read raw cycle counter (to be used by time counter)
 540 */
 541static cycle_t igb_read_clock(const struct cyclecounter *tc)
 542{
 543        struct igb_adapter *adapter =
 544                container_of(tc, struct igb_adapter, cycles);
 545        struct e1000_hw *hw = &adapter->hw;
 546        u64 stamp = 0;
 547        int shift = 0;
 548
 549        /*
 550         * The timestamp latches on lowest register read. For the 82580
 551         * the lowest register is SYSTIMR instead of SYSTIML.  However we never
 552         * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
 553         */
 554        if (hw->mac.type == e1000_82580) {
 555                stamp = rd32(E1000_SYSTIMR) >> 8;
 556                shift = IGB_82580_TSYNC_SHIFT;
 557        }
 558
 559        stamp |= (u64)rd32(E1000_SYSTIML) << shift;
 560        stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
 561        return stamp;
 562}
 563
 564/**
 565 * igb_get_hw_dev - return device
 566 * used by hardware layer to print debugging information
 567 **/
 568struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
 569{
 570        struct igb_adapter *adapter = hw->back;
 571        return adapter->netdev;
 572}
 573
 574/**
 575 * igb_init_module - Driver Registration Routine
 576 *
 577 * igb_init_module is the first routine called when the driver is
 578 * loaded. All it does is register with the PCI subsystem.
 579 **/
 580static int __init igb_init_module(void)
 581{
 582        int ret;
 583        printk(KERN_INFO "%s - version %s\n",
 584               igb_driver_string, igb_driver_version);
 585
 586        printk(KERN_INFO "%s\n", igb_copyright);
 587
 588#ifdef CONFIG_IGB_DCA
 589        dca_register_notify(&dca_notifier);
 590#endif
 591        ret = pci_register_driver(&igb_driver);
 592        return ret;
 593}
 594
 595module_init(igb_init_module);
 596
 597/**
 598 * igb_exit_module - Driver Exit Cleanup Routine
 599 *
 600 * igb_exit_module is called just before the driver is removed
 601 * from memory.
 602 **/
 603static void __exit igb_exit_module(void)
 604{
 605#ifdef CONFIG_IGB_DCA
 606        dca_unregister_notify(&dca_notifier);
 607#endif
 608        pci_unregister_driver(&igb_driver);
 609}
 610
 611module_exit(igb_exit_module);
 612
 613#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
 614/**
 615 * igb_cache_ring_register - Descriptor ring to register mapping
 616 * @adapter: board private structure to initialize
 617 *
 618 * Once we know the feature-set enabled for the device, we'll cache
 619 * the register offset the descriptor ring is assigned to.
 620 **/
 621static void igb_cache_ring_register(struct igb_adapter *adapter)
 622{
 623        int i = 0, j = 0;
 624        u32 rbase_offset = adapter->vfs_allocated_count;
 625
 626        switch (adapter->hw.mac.type) {
 627        case e1000_82576:
 628                /* The queues are allocated for virtualization such that VF 0
 629                 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
 630                 * In order to avoid collision we start at the first free queue
 631                 * and continue consuming queues in the same sequence
 632                 */
 633                if (adapter->vfs_allocated_count) {
 634                        for (; i < adapter->rss_queues; i++)
 635                                adapter->rx_ring[i]->reg_idx = rbase_offset +
 636                                                               Q_IDX_82576(i);
 637                }
 638        case e1000_82575:
 639        case e1000_82580:
 640        case e1000_i350:
 641        default:
 642                for (; i < adapter->num_rx_queues; i++)
 643                        adapter->rx_ring[i]->reg_idx = rbase_offset + i;
 644                for (; j < adapter->num_tx_queues; j++)
 645                        adapter->tx_ring[j]->reg_idx = rbase_offset + j;
 646                break;
 647        }
 648}
 649
 650static void igb_free_queues(struct igb_adapter *adapter)
 651{
 652        int i;
 653
 654        for (i = 0; i < adapter->num_tx_queues; i++) {
 655                kfree(adapter->tx_ring[i]);
 656                adapter->tx_ring[i] = NULL;
 657        }
 658        for (i = 0; i < adapter->num_rx_queues; i++) {
 659                kfree(adapter->rx_ring[i]);
 660                adapter->rx_ring[i] = NULL;
 661        }
 662        adapter->num_rx_queues = 0;
 663        adapter->num_tx_queues = 0;
 664}
 665
 666/**
 667 * igb_alloc_queues - Allocate memory for all rings
 668 * @adapter: board private structure to initialize
 669 *
 670 * We allocate one ring per queue at run-time since we don't know the
 671 * number of queues at compile-time.
 672 **/
 673static int igb_alloc_queues(struct igb_adapter *adapter)
 674{
 675        struct igb_ring *ring;
 676        int i;
 677
 678        for (i = 0; i < adapter->num_tx_queues; i++) {
 679                ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
 680                if (!ring)
 681                        goto err;
 682                ring->count = adapter->tx_ring_count;
 683                ring->queue_index = i;
 684                ring->dev = &adapter->pdev->dev;
 685                ring->netdev = adapter->netdev;
 686                /* For 82575, context index must be unique per ring. */
 687                if (adapter->hw.mac.type == e1000_82575)
 688                        ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
 689                adapter->tx_ring[i] = ring;
 690        }
 691
 692        for (i = 0; i < adapter->num_rx_queues; i++) {
 693                ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
 694                if (!ring)
 695                        goto err;
 696                ring->count = adapter->rx_ring_count;
 697                ring->queue_index = i;
 698                ring->dev = &adapter->pdev->dev;
 699                ring->netdev = adapter->netdev;
 700                ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
 701                ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
 702                /* set flag indicating ring supports SCTP checksum offload */
 703                if (adapter->hw.mac.type >= e1000_82576)
 704                        ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
 705                adapter->rx_ring[i] = ring;
 706        }
 707
 708        igb_cache_ring_register(adapter);
 709
 710        return 0;
 711
 712err:
 713        igb_free_queues(adapter);
 714
 715        return -ENOMEM;
 716}
 717
 718#define IGB_N0_QUEUE -1
 719static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
 720{
 721        u32 msixbm = 0;
 722        struct igb_adapter *adapter = q_vector->adapter;
 723        struct e1000_hw *hw = &adapter->hw;
 724        u32 ivar, index;
 725        int rx_queue = IGB_N0_QUEUE;
 726        int tx_queue = IGB_N0_QUEUE;
 727
 728        if (q_vector->rx_ring)
 729                rx_queue = q_vector->rx_ring->reg_idx;
 730        if (q_vector->tx_ring)
 731                tx_queue = q_vector->tx_ring->reg_idx;
 732
 733        switch (hw->mac.type) {
 734        case e1000_82575:
 735                /* The 82575 assigns vectors using a bitmask, which matches the
 736                   bitmask for the EICR/EIMS/EIMC registers.  To assign one
 737                   or more queues to a vector, we write the appropriate bits
 738                   into the MSIXBM register for that vector. */
 739                if (rx_queue > IGB_N0_QUEUE)
 740                        msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
 741                if (tx_queue > IGB_N0_QUEUE)
 742                        msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
 743                if (!adapter->msix_entries && msix_vector == 0)
 744                        msixbm |= E1000_EIMS_OTHER;
 745                array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
 746                q_vector->eims_value = msixbm;
 747                break;
 748        case e1000_82576:
 749                /* 82576 uses a table-based method for assigning vectors.
 750                   Each queue has a single entry in the table to which we write
 751                   a vector number along with a "valid" bit.  Sadly, the layout
 752                   of the table is somewhat counterintuitive. */
 753                if (rx_queue > IGB_N0_QUEUE) {
 754                        index = (rx_queue & 0x7);
 755                        ivar = array_rd32(E1000_IVAR0, index);
 756                        if (rx_queue < 8) {
 757                                /* vector goes into low byte of register */
 758                                ivar = ivar & 0xFFFFFF00;
 759                                ivar |= msix_vector | E1000_IVAR_VALID;
 760                        } else {
 761                                /* vector goes into third byte of register */
 762                                ivar = ivar & 0xFF00FFFF;
 763                                ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
 764                        }
 765                        array_wr32(E1000_IVAR0, index, ivar);
 766                }
 767                if (tx_queue > IGB_N0_QUEUE) {
 768                        index = (tx_queue & 0x7);
 769                        ivar = array_rd32(E1000_IVAR0, index);
 770                        if (tx_queue < 8) {
 771                                /* vector goes into second byte of register */
 772                                ivar = ivar & 0xFFFF00FF;
 773                                ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
 774                        } else {
 775                                /* vector goes into high byte of register */
 776                                ivar = ivar & 0x00FFFFFF;
 777                                ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
 778                        }
 779                        array_wr32(E1000_IVAR0, index, ivar);
 780                }
 781                q_vector->eims_value = 1 << msix_vector;
 782                break;
 783        case e1000_82580:
 784        case e1000_i350:
 785                /* 82580 uses the same table-based approach as 82576 but has fewer
 786                   entries as a result we carry over for queues greater than 4. */
 787                if (rx_queue > IGB_N0_QUEUE) {
 788                        index = (rx_queue >> 1);
 789                        ivar = array_rd32(E1000_IVAR0, index);
 790                        if (rx_queue & 0x1) {
 791                                /* vector goes into third byte of register */
 792                                ivar = ivar & 0xFF00FFFF;
 793                                ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
 794                        } else {
 795                                /* vector goes into low byte of register */
 796                                ivar = ivar & 0xFFFFFF00;
 797                                ivar |= msix_vector | E1000_IVAR_VALID;
 798                        }
 799                        array_wr32(E1000_IVAR0, index, ivar);
 800                }
 801                if (tx_queue > IGB_N0_QUEUE) {
 802                        index = (tx_queue >> 1);
 803                        ivar = array_rd32(E1000_IVAR0, index);
 804                        if (tx_queue & 0x1) {
 805                                /* vector goes into high byte of register */
 806                                ivar = ivar & 0x00FFFFFF;
 807                                ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
 808                        } else {
 809                                /* vector goes into second byte of register */
 810                                ivar = ivar & 0xFFFF00FF;
 811                                ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
 812                        }
 813                        array_wr32(E1000_IVAR0, index, ivar);
 814                }
 815                q_vector->eims_value = 1 << msix_vector;
 816                break;
 817        default:
 818                BUG();
 819                break;
 820        }
 821
 822        /* add q_vector eims value to global eims_enable_mask */
 823        adapter->eims_enable_mask |= q_vector->eims_value;
 824
 825        /* configure q_vector to set itr on first interrupt */
 826        q_vector->set_itr = 1;
 827}
 828
 829/**
 830 * igb_configure_msix - Configure MSI-X hardware
 831 *
 832 * igb_configure_msix sets up the hardware to properly
 833 * generate MSI-X interrupts.
 834 **/
 835static void igb_configure_msix(struct igb_adapter *adapter)
 836{
 837        u32 tmp;
 838        int i, vector = 0;
 839        struct e1000_hw *hw = &adapter->hw;
 840
 841        adapter->eims_enable_mask = 0;
 842
 843        /* set vector for other causes, i.e. link changes */
 844        switch (hw->mac.type) {
 845        case e1000_82575:
 846                tmp = rd32(E1000_CTRL_EXT);
 847                /* enable MSI-X PBA support*/
 848                tmp |= E1000_CTRL_EXT_PBA_CLR;
 849
 850                /* Auto-Mask interrupts upon ICR read. */
 851                tmp |= E1000_CTRL_EXT_EIAME;
 852                tmp |= E1000_CTRL_EXT_IRCA;
 853
 854                wr32(E1000_CTRL_EXT, tmp);
 855
 856                /* enable msix_other interrupt */
 857                array_wr32(E1000_MSIXBM(0), vector++,
 858                                      E1000_EIMS_OTHER);
 859                adapter->eims_other = E1000_EIMS_OTHER;
 860
 861                break;
 862
 863        case e1000_82576:
 864        case e1000_82580:
 865        case e1000_i350:
 866                /* Turn on MSI-X capability first, or our settings
 867                 * won't stick.  And it will take days to debug. */
 868                wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
 869                                E1000_GPIE_PBA | E1000_GPIE_EIAME |
 870                                E1000_GPIE_NSICR);
 871
 872                /* enable msix_other interrupt */
 873                adapter->eims_other = 1 << vector;
 874                tmp = (vector++ | E1000_IVAR_VALID) << 8;
 875
 876                wr32(E1000_IVAR_MISC, tmp);
 877                break;
 878        default:
 879                /* do nothing, since nothing else supports MSI-X */
 880                break;
 881        } /* switch (hw->mac.type) */
 882
 883        adapter->eims_enable_mask |= adapter->eims_other;
 884
 885        for (i = 0; i < adapter->num_q_vectors; i++)
 886                igb_assign_vector(adapter->q_vector[i], vector++);
 887
 888        wrfl();
 889}
 890
 891/**
 892 * igb_request_msix - Initialize MSI-X interrupts
 893 *
 894 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
 895 * kernel.
 896 **/
 897static int igb_request_msix(struct igb_adapter *adapter)
 898{
 899        struct net_device *netdev = adapter->netdev;
 900        struct e1000_hw *hw = &adapter->hw;
 901        int i, err = 0, vector = 0;
 902
 903        err = request_irq(adapter->msix_entries[vector].vector,
 904                          igb_msix_other, 0, netdev->name, adapter);
 905        if (err)
 906                goto out;
 907        vector++;
 908
 909        for (i = 0; i < adapter->num_q_vectors; i++) {
 910                struct igb_q_vector *q_vector = adapter->q_vector[i];
 911
 912                q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
 913
 914                if (q_vector->rx_ring && q_vector->tx_ring)
 915                        sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
 916                                q_vector->rx_ring->queue_index);
 917                else if (q_vector->tx_ring)
 918                        sprintf(q_vector->name, "%s-tx-%u", netdev->name,
 919                                q_vector->tx_ring->queue_index);
 920                else if (q_vector->rx_ring)
 921                        sprintf(q_vector->name, "%s-rx-%u", netdev->name,
 922                                q_vector->rx_ring->queue_index);
 923                else
 924                        sprintf(q_vector->name, "%s-unused", netdev->name);
 925
 926                err = request_irq(adapter->msix_entries[vector].vector,
 927                                  igb_msix_ring, 0, q_vector->name,
 928                                  q_vector);
 929                if (err)
 930                        goto out;
 931                vector++;
 932        }
 933
 934        igb_configure_msix(adapter);
 935        return 0;
 936out:
 937        return err;
 938}
 939
 940static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
 941{
 942        if (adapter->msix_entries) {
 943                pci_disable_msix(adapter->pdev);
 944                kfree(adapter->msix_entries);
 945                adapter->msix_entries = NULL;
 946        } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
 947                pci_disable_msi(adapter->pdev);
 948        }
 949}
 950
 951/**
 952 * igb_free_q_vectors - Free memory allocated for interrupt vectors
 953 * @adapter: board private structure to initialize
 954 *
 955 * This function frees the memory allocated to the q_vectors.  In addition if
 956 * NAPI is enabled it will delete any references to the NAPI struct prior
 957 * to freeing the q_vector.
 958 **/
 959static void igb_free_q_vectors(struct igb_adapter *adapter)
 960{
 961        int v_idx;
 962
 963        for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
 964                struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
 965                adapter->q_vector[v_idx] = NULL;
 966                if (!q_vector)
 967                        continue;
 968                netif_napi_del(&q_vector->napi);
 969                kfree(q_vector);
 970        }
 971        adapter->num_q_vectors = 0;
 972}
 973
 974/**
 975 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
 976 *
 977 * This function resets the device so that it has 0 rx queues, tx queues, and
 978 * MSI-X interrupts allocated.
 979 */
 980static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
 981{
 982        igb_free_queues(adapter);
 983        igb_free_q_vectors(adapter);
 984        igb_reset_interrupt_capability(adapter);
 985}
 986
 987/**
 988 * igb_set_interrupt_capability - set MSI or MSI-X if supported
 989 *
 990 * Attempt to configure interrupts using the best available
 991 * capabilities of the hardware and kernel.
 992 **/
 993static int igb_set_interrupt_capability(struct igb_adapter *adapter)
 994{
 995        int err;
 996        int numvecs, i;
 997
 998        /* Number of supported queues. */
 999        adapter->num_rx_queues = adapter->rss_queues;
1000        if (adapter->vfs_allocated_count)
1001                adapter->num_tx_queues = 1;
1002        else
1003                adapter->num_tx_queues = adapter->rss_queues;
1004
1005        /* start with one vector for every rx queue */
1006        numvecs = adapter->num_rx_queues;
1007
1008        /* if tx handler is separate add 1 for every tx queue */
1009        if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1010                numvecs += adapter->num_tx_queues;
1011
1012        /* store the number of vectors reserved for queues */
1013        adapter->num_q_vectors = numvecs;
1014
1015        /* add 1 vector for link status interrupts */
1016        numvecs++;
1017        adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1018                                        GFP_KERNEL);
1019        if (!adapter->msix_entries)
1020                goto msi_only;
1021
1022        for (i = 0; i < numvecs; i++)
1023                adapter->msix_entries[i].entry = i;
1024
1025        err = pci_enable_msix(adapter->pdev,
1026                              adapter->msix_entries,
1027                              numvecs);
1028        if (err == 0)
1029                goto out;
1030
1031        igb_reset_interrupt_capability(adapter);
1032
1033        /* If we can't do MSI-X, try MSI */
1034msi_only:
1035#ifdef CONFIG_PCI_IOV
1036        /* disable SR-IOV for non MSI-X configurations */
1037        if (adapter->vf_data) {
1038                struct e1000_hw *hw = &adapter->hw;
1039                /* disable iov and allow time for transactions to clear */
1040                pci_disable_sriov(adapter->pdev);
1041                msleep(500);
1042
1043                kfree(adapter->vf_data);
1044                adapter->vf_data = NULL;
1045                wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1046                msleep(100);
1047                dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1048        }
1049#endif
1050        adapter->vfs_allocated_count = 0;
1051        adapter->rss_queues = 1;
1052        adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1053        adapter->num_rx_queues = 1;
1054        adapter->num_tx_queues = 1;
1055        adapter->num_q_vectors = 1;
1056        if (!pci_enable_msi(adapter->pdev))
1057                adapter->flags |= IGB_FLAG_HAS_MSI;
1058out:
1059        /* Notify the stack of the (possibly) reduced queue counts. */
1060        netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1061        return netif_set_real_num_rx_queues(adapter->netdev,
1062                                            adapter->num_rx_queues);
1063}
1064
1065/**
1066 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1067 * @adapter: board private structure to initialize
1068 *
1069 * We allocate one q_vector per queue interrupt.  If allocation fails we
1070 * return -ENOMEM.
1071 **/
1072static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1073{
1074        struct igb_q_vector *q_vector;
1075        struct e1000_hw *hw = &adapter->hw;
1076        int v_idx;
1077
1078        for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1079                q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1080                if (!q_vector)
1081                        goto err_out;
1082                q_vector->adapter = adapter;
1083                q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1084                q_vector->itr_val = IGB_START_ITR;
1085                netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1086                adapter->q_vector[v_idx] = q_vector;
1087        }
1088        return 0;
1089
1090err_out:
1091        igb_free_q_vectors(adapter);
1092        return -ENOMEM;
1093}
1094
1095static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1096                                      int ring_idx, int v_idx)
1097{
1098        struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1099
1100        q_vector->rx_ring = adapter->rx_ring[ring_idx];
1101        q_vector->rx_ring->q_vector = q_vector;
1102        q_vector->itr_val = adapter->rx_itr_setting;
1103        if (q_vector->itr_val && q_vector->itr_val <= 3)
1104                q_vector->itr_val = IGB_START_ITR;
1105}
1106
1107static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1108                                      int ring_idx, int v_idx)
1109{
1110        struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1111
1112        q_vector->tx_ring = adapter->tx_ring[ring_idx];
1113        q_vector->tx_ring->q_vector = q_vector;
1114        q_vector->itr_val = adapter->tx_itr_setting;
1115        if (q_vector->itr_val && q_vector->itr_val <= 3)
1116                q_vector->itr_val = IGB_START_ITR;
1117}
1118
1119/**
1120 * igb_map_ring_to_vector - maps allocated queues to vectors
1121 *
1122 * This function maps the recently allocated queues to vectors.
1123 **/
1124static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1125{
1126        int i;
1127        int v_idx = 0;
1128
1129        if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1130            (adapter->num_q_vectors < adapter->num_tx_queues))
1131                return -ENOMEM;
1132
1133        if (adapter->num_q_vectors >=
1134            (adapter->num_rx_queues + adapter->num_tx_queues)) {
1135                for (i = 0; i < adapter->num_rx_queues; i++)
1136                        igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1137                for (i = 0; i < adapter->num_tx_queues; i++)
1138                        igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1139        } else {
1140                for (i = 0; i < adapter->num_rx_queues; i++) {
1141                        if (i < adapter->num_tx_queues)
1142                                igb_map_tx_ring_to_vector(adapter, i, v_idx);
1143                        igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1144                }
1145                for (; i < adapter->num_tx_queues; i++)
1146                        igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1147        }
1148        return 0;
1149}
1150
1151/**
1152 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1153 *
1154 * This function initializes the interrupts and allocates all of the queues.
1155 **/
1156static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1157{
1158        struct pci_dev *pdev = adapter->pdev;
1159        int err;
1160
1161        err = igb_set_interrupt_capability(adapter);
1162        if (err)
1163                return err;
1164
1165        err = igb_alloc_q_vectors(adapter);
1166        if (err) {
1167                dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1168                goto err_alloc_q_vectors;
1169        }
1170
1171        err = igb_alloc_queues(adapter);
1172        if (err) {
1173                dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1174                goto err_alloc_queues;
1175        }
1176
1177        err = igb_map_ring_to_vector(adapter);
1178        if (err) {
1179                dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1180                goto err_map_queues;
1181        }
1182
1183
1184        return 0;
1185err_map_queues:
1186        igb_free_queues(adapter);
1187err_alloc_queues:
1188        igb_free_q_vectors(adapter);
1189err_alloc_q_vectors:
1190        igb_reset_interrupt_capability(adapter);
1191        return err;
1192}
1193
1194/**
1195 * igb_request_irq - initialize interrupts
1196 *
1197 * Attempts to configure interrupts using the best available
1198 * capabilities of the hardware and kernel.
1199 **/
1200static int igb_request_irq(struct igb_adapter *adapter)
1201{
1202        struct net_device *netdev = adapter->netdev;
1203        struct pci_dev *pdev = adapter->pdev;
1204        int err = 0;
1205
1206        if (adapter->msix_entries) {
1207                err = igb_request_msix(adapter);
1208                if (!err)
1209                        goto request_done;
1210                /* fall back to MSI */
1211                igb_clear_interrupt_scheme(adapter);
1212                if (!pci_enable_msi(adapter->pdev))
1213                        adapter->flags |= IGB_FLAG_HAS_MSI;
1214                igb_free_all_tx_resources(adapter);
1215                igb_free_all_rx_resources(adapter);
1216                adapter->num_tx_queues = 1;
1217                adapter->num_rx_queues = 1;
1218                adapter->num_q_vectors = 1;
1219                err = igb_alloc_q_vectors(adapter);
1220                if (err) {
1221                        dev_err(&pdev->dev,
1222                                "Unable to allocate memory for vectors\n");
1223                        goto request_done;
1224                }
1225                err = igb_alloc_queues(adapter);
1226                if (err) {
1227                        dev_err(&pdev->dev,
1228                                "Unable to allocate memory for queues\n");
1229                        igb_free_q_vectors(adapter);
1230                        goto request_done;
1231                }
1232                igb_setup_all_tx_resources(adapter);
1233                igb_setup_all_rx_resources(adapter);
1234        } else {
1235                igb_assign_vector(adapter->q_vector[0], 0);
1236        }
1237
1238        if (adapter->flags & IGB_FLAG_HAS_MSI) {
1239                err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1240                                  netdev->name, adapter);
1241                if (!err)
1242                        goto request_done;
1243
1244                /* fall back to legacy interrupts */
1245                igb_reset_interrupt_capability(adapter);
1246                adapter->flags &= ~IGB_FLAG_HAS_MSI;
1247        }
1248
1249        err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1250                          netdev->name, adapter);
1251
1252        if (err)
1253                dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1254                        err);
1255
1256request_done:
1257        return err;
1258}
1259
1260static void igb_free_irq(struct igb_adapter *adapter)
1261{
1262        if (adapter->msix_entries) {
1263                int vector = 0, i;
1264
1265                free_irq(adapter->msix_entries[vector++].vector, adapter);
1266
1267                for (i = 0; i < adapter->num_q_vectors; i++) {
1268                        struct igb_q_vector *q_vector = adapter->q_vector[i];
1269                        free_irq(adapter->msix_entries[vector++].vector,
1270                                 q_vector);
1271                }
1272        } else {
1273                free_irq(adapter->pdev->irq, adapter);
1274        }
1275}
1276
1277/**
1278 * igb_irq_disable - Mask off interrupt generation on the NIC
1279 * @adapter: board private structure
1280 **/
1281static void igb_irq_disable(struct igb_adapter *adapter)
1282{
1283        struct e1000_hw *hw = &adapter->hw;
1284
1285        /*
1286         * we need to be careful when disabling interrupts.  The VFs are also
1287         * mapped into these registers and so clearing the bits can cause
1288         * issues on the VF drivers so we only need to clear what we set
1289         */
1290        if (adapter->msix_entries) {
1291                u32 regval = rd32(E1000_EIAM);
1292                wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1293                wr32(E1000_EIMC, adapter->eims_enable_mask);
1294                regval = rd32(E1000_EIAC);
1295                wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1296        }
1297
1298        wr32(E1000_IAM, 0);
1299        wr32(E1000_IMC, ~0);
1300        wrfl();
1301        if (adapter->msix_entries) {
1302                int i;
1303                for (i = 0; i < adapter->num_q_vectors; i++)
1304                        synchronize_irq(adapter->msix_entries[i].vector);
1305        } else {
1306                synchronize_irq(adapter->pdev->irq);
1307        }
1308}
1309
1310/**
1311 * igb_irq_enable - Enable default interrupt generation settings
1312 * @adapter: board private structure
1313 **/
1314static void igb_irq_enable(struct igb_adapter *adapter)
1315{
1316        struct e1000_hw *hw = &adapter->hw;
1317
1318        if (adapter->msix_entries) {
1319                u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1320                u32 regval = rd32(E1000_EIAC);
1321                wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1322                regval = rd32(E1000_EIAM);
1323                wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1324                wr32(E1000_EIMS, adapter->eims_enable_mask);
1325                if (adapter->vfs_allocated_count) {
1326                        wr32(E1000_MBVFIMR, 0xFF);
1327                        ims |= E1000_IMS_VMMB;
1328                }
1329                if (adapter->hw.mac.type == e1000_82580)
1330                        ims |= E1000_IMS_DRSTA;
1331
1332                wr32(E1000_IMS, ims);
1333        } else {
1334                wr32(E1000_IMS, IMS_ENABLE_MASK |
1335                                E1000_IMS_DRSTA);
1336                wr32(E1000_IAM, IMS_ENABLE_MASK |
1337                                E1000_IMS_DRSTA);
1338        }
1339}
1340
1341static void igb_update_mng_vlan(struct igb_adapter *adapter)
1342{
1343        struct e1000_hw *hw = &adapter->hw;
1344        u16 vid = adapter->hw.mng_cookie.vlan_id;
1345        u16 old_vid = adapter->mng_vlan_id;
1346
1347        if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1348                /* add VID to filter table */
1349                igb_vfta_set(hw, vid, true);
1350                adapter->mng_vlan_id = vid;
1351        } else {
1352                adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1353        }
1354
1355        if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1356            (vid != old_vid) &&
1357            !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1358                /* remove VID from filter table */
1359                igb_vfta_set(hw, old_vid, false);
1360        }
1361}
1362
1363/**
1364 * igb_release_hw_control - release control of the h/w to f/w
1365 * @adapter: address of board private structure
1366 *
1367 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1368 * For ASF and Pass Through versions of f/w this means that the
1369 * driver is no longer loaded.
1370 *
1371 **/
1372static void igb_release_hw_control(struct igb_adapter *adapter)
1373{
1374        struct e1000_hw *hw = &adapter->hw;
1375        u32 ctrl_ext;
1376
1377        /* Let firmware take over control of h/w */
1378        ctrl_ext = rd32(E1000_CTRL_EXT);
1379        wr32(E1000_CTRL_EXT,
1380                        ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1381}
1382
1383/**
1384 * igb_get_hw_control - get control of the h/w from f/w
1385 * @adapter: address of board private structure
1386 *
1387 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1388 * For ASF and Pass Through versions of f/w this means that
1389 * the driver is loaded.
1390 *
1391 **/
1392static void igb_get_hw_control(struct igb_adapter *adapter)
1393{
1394        struct e1000_hw *hw = &adapter->hw;
1395        u32 ctrl_ext;
1396
1397        /* Let firmware know the driver has taken over */
1398        ctrl_ext = rd32(E1000_CTRL_EXT);
1399        wr32(E1000_CTRL_EXT,
1400                        ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1401}
1402
1403/**
1404 * igb_configure - configure the hardware for RX and TX
1405 * @adapter: private board structure
1406 **/
1407static void igb_configure(struct igb_adapter *adapter)
1408{
1409        struct net_device *netdev = adapter->netdev;
1410        int i;
1411
1412        igb_get_hw_control(adapter);
1413        igb_set_rx_mode(netdev);
1414
1415        igb_restore_vlan(adapter);
1416
1417        igb_setup_tctl(adapter);
1418        igb_setup_mrqc(adapter);
1419        igb_setup_rctl(adapter);
1420
1421        igb_configure_tx(adapter);
1422        igb_configure_rx(adapter);
1423
1424        igb_rx_fifo_flush_82575(&adapter->hw);
1425
1426        /* call igb_desc_unused which always leaves
1427         * at least 1 descriptor unused to make sure
1428         * next_to_use != next_to_clean */
1429        for (i = 0; i < adapter->num_rx_queues; i++) {
1430                struct igb_ring *ring = adapter->rx_ring[i];
1431                igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1432        }
1433}
1434
1435/**
1436 * igb_power_up_link - Power up the phy/serdes link
1437 * @adapter: address of board private structure
1438 **/
1439void igb_power_up_link(struct igb_adapter *adapter)
1440{
1441        if (adapter->hw.phy.media_type == e1000_media_type_copper)
1442                igb_power_up_phy_copper(&adapter->hw);
1443        else
1444                igb_power_up_serdes_link_82575(&adapter->hw);
1445}
1446
1447/**
1448 * igb_power_down_link - Power down the phy/serdes link
1449 * @adapter: address of board private structure
1450 */
1451static void igb_power_down_link(struct igb_adapter *adapter)
1452{
1453        if (adapter->hw.phy.media_type == e1000_media_type_copper)
1454                igb_power_down_phy_copper_82575(&adapter->hw);
1455        else
1456                igb_shutdown_serdes_link_82575(&adapter->hw);
1457}
1458
1459/**
1460 * igb_up - Open the interface and prepare it to handle traffic
1461 * @adapter: board private structure
1462 **/
1463int igb_up(struct igb_adapter *adapter)
1464{
1465        struct e1000_hw *hw = &adapter->hw;
1466        int i;
1467
1468        /* hardware has been reset, we need to reload some things */
1469        igb_configure(adapter);
1470
1471        clear_bit(__IGB_DOWN, &adapter->state);
1472
1473        for (i = 0; i < adapter->num_q_vectors; i++) {
1474                struct igb_q_vector *q_vector = adapter->q_vector[i];
1475                napi_enable(&q_vector->napi);
1476        }
1477        if (adapter->msix_entries)
1478                igb_configure_msix(adapter);
1479        else
1480                igb_assign_vector(adapter->q_vector[0], 0);
1481
1482        /* Clear any pending interrupts. */
1483        rd32(E1000_ICR);
1484        igb_irq_enable(adapter);
1485
1486        /* notify VFs that reset has been completed */
1487        if (adapter->vfs_allocated_count) {
1488                u32 reg_data = rd32(E1000_CTRL_EXT);
1489                reg_data |= E1000_CTRL_EXT_PFRSTD;
1490                wr32(E1000_CTRL_EXT, reg_data);
1491        }
1492
1493        netif_tx_start_all_queues(adapter->netdev);
1494
1495        /* start the watchdog. */
1496        hw->mac.get_link_status = 1;
1497        schedule_work(&adapter->watchdog_task);
1498
1499        return 0;
1500}
1501
1502void igb_down(struct igb_adapter *adapter)
1503{
1504        struct net_device *netdev = adapter->netdev;
1505        struct e1000_hw *hw = &adapter->hw;
1506        u32 tctl, rctl;
1507        int i;
1508
1509        /* signal that we're down so the interrupt handler does not
1510         * reschedule our watchdog timer */
1511        set_bit(__IGB_DOWN, &adapter->state);
1512
1513        /* disable receives in the hardware */
1514        rctl = rd32(E1000_RCTL);
1515        wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1516        /* flush and sleep below */
1517
1518        netif_tx_stop_all_queues(netdev);
1519
1520        /* disable transmits in the hardware */
1521        tctl = rd32(E1000_TCTL);
1522        tctl &= ~E1000_TCTL_EN;
1523        wr32(E1000_TCTL, tctl);
1524        /* flush both disables and wait for them to finish */
1525        wrfl();
1526        msleep(10);
1527
1528        for (i = 0; i < adapter->num_q_vectors; i++) {
1529                struct igb_q_vector *q_vector = adapter->q_vector[i];
1530                napi_disable(&q_vector->napi);
1531        }
1532
1533        igb_irq_disable(adapter);
1534
1535        del_timer_sync(&adapter->watchdog_timer);
1536        del_timer_sync(&adapter->phy_info_timer);
1537
1538        netif_carrier_off(netdev);
1539
1540        /* record the stats before reset*/
1541        spin_lock(&adapter->stats64_lock);
1542        igb_update_stats(adapter, &adapter->stats64);
1543        spin_unlock(&adapter->stats64_lock);
1544
1545        adapter->link_speed = 0;
1546        adapter->link_duplex = 0;
1547
1548        if (!pci_channel_offline(adapter->pdev))
1549                igb_reset(adapter);
1550        igb_clean_all_tx_rings(adapter);
1551        igb_clean_all_rx_rings(adapter);
1552#ifdef CONFIG_IGB_DCA
1553
1554        /* since we reset the hardware DCA settings were cleared */
1555        igb_setup_dca(adapter);
1556#endif
1557}
1558
1559void igb_reinit_locked(struct igb_adapter *adapter)
1560{
1561        WARN_ON(in_interrupt());
1562        while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1563                msleep(1);
1564        igb_down(adapter);
1565        igb_up(adapter);
1566        clear_bit(__IGB_RESETTING, &adapter->state);
1567}
1568
1569void igb_reset(struct igb_adapter *adapter)
1570{
1571        struct pci_dev *pdev = adapter->pdev;
1572        struct e1000_hw *hw = &adapter->hw;
1573        struct e1000_mac_info *mac = &hw->mac;
1574        struct e1000_fc_info *fc = &hw->fc;
1575        u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1576        u16 hwm;
1577
1578        /* Repartition Pba for greater than 9k mtu
1579         * To take effect CTRL.RST is required.
1580         */
1581        switch (mac->type) {
1582        case e1000_i350:
1583        case e1000_82580:
1584                pba = rd32(E1000_RXPBS);
1585                pba = igb_rxpbs_adjust_82580(pba);
1586                break;
1587        case e1000_82576:
1588                pba = rd32(E1000_RXPBS);
1589                pba &= E1000_RXPBS_SIZE_MASK_82576;
1590                break;
1591        case e1000_82575:
1592        default:
1593                pba = E1000_PBA_34K;
1594                break;
1595        }
1596
1597        if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1598            (mac->type < e1000_82576)) {
1599                /* adjust PBA for jumbo frames */
1600                wr32(E1000_PBA, pba);
1601
1602                /* To maintain wire speed transmits, the Tx FIFO should be
1603                 * large enough to accommodate two full transmit packets,
1604                 * rounded up to the next 1KB and expressed in KB.  Likewise,
1605                 * the Rx FIFO should be large enough to accommodate at least
1606                 * one full receive packet and is similarly rounded up and
1607                 * expressed in KB. */
1608                pba = rd32(E1000_PBA);
1609                /* upper 16 bits has Tx packet buffer allocation size in KB */
1610                tx_space = pba >> 16;
1611                /* lower 16 bits has Rx packet buffer allocation size in KB */
1612                pba &= 0xffff;
1613                /* the tx fifo also stores 16 bytes of information about the tx
1614                 * but don't include ethernet FCS because hardware appends it */
1615                min_tx_space = (adapter->max_frame_size +
1616                                sizeof(union e1000_adv_tx_desc) -
1617                                ETH_FCS_LEN) * 2;
1618                min_tx_space = ALIGN(min_tx_space, 1024);
1619                min_tx_space >>= 10;
1620                /* software strips receive CRC, so leave room for it */
1621                min_rx_space = adapter->max_frame_size;
1622                min_rx_space = ALIGN(min_rx_space, 1024);
1623                min_rx_space >>= 10;
1624
1625                /* If current Tx allocation is less than the min Tx FIFO size,
1626                 * and the min Tx FIFO size is less than the current Rx FIFO
1627                 * allocation, take space away from current Rx allocation */
1628                if (tx_space < min_tx_space &&
1629                    ((min_tx_space - tx_space) < pba)) {
1630                        pba = pba - (min_tx_space - tx_space);
1631
1632                        /* if short on rx space, rx wins and must trump tx
1633                         * adjustment */
1634                        if (pba < min_rx_space)
1635                                pba = min_rx_space;
1636                }
1637                wr32(E1000_PBA, pba);
1638        }
1639
1640        /* flow control settings */
1641        /* The high water mark must be low enough to fit one full frame
1642         * (or the size used for early receive) above it in the Rx FIFO.
1643         * Set it to the lower of:
1644         * - 90% of the Rx FIFO size, or
1645         * - the full Rx FIFO size minus one full frame */
1646        hwm = min(((pba << 10) * 9 / 10),
1647                        ((pba << 10) - 2 * adapter->max_frame_size));
1648
1649        fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1650        fc->low_water = fc->high_water - 16;
1651        fc->pause_time = 0xFFFF;
1652        fc->send_xon = 1;
1653        fc->current_mode = fc->requested_mode;
1654
1655        /* disable receive for all VFs and wait one second */
1656        if (adapter->vfs_allocated_count) {
1657                int i;
1658                for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1659                        adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1660
1661                /* ping all the active vfs to let them know we are going down */
1662                igb_ping_all_vfs(adapter);
1663
1664                /* disable transmits and receives */
1665                wr32(E1000_VFRE, 0);
1666                wr32(E1000_VFTE, 0);
1667        }
1668
1669        /* Allow time for pending master requests to run */
1670        hw->mac.ops.reset_hw(hw);
1671        wr32(E1000_WUC, 0);
1672
1673        if (hw->mac.ops.init_hw(hw))
1674                dev_err(&pdev->dev, "Hardware Error\n");
1675
1676        if (hw->mac.type == e1000_82580) {
1677                u32 reg = rd32(E1000_PCIEMISC);
1678                wr32(E1000_PCIEMISC,
1679                                reg & ~E1000_PCIEMISC_LX_DECISION);
1680        }
1681        if (!netif_running(adapter->netdev))
1682                igb_power_down_link(adapter);
1683
1684        igb_update_mng_vlan(adapter);
1685
1686        /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1687        wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1688
1689        igb_get_phy_info(hw);
1690}
1691
1692static const struct net_device_ops igb_netdev_ops = {
1693        .ndo_open               = igb_open,
1694        .ndo_stop               = igb_close,
1695        .ndo_start_xmit         = igb_xmit_frame_adv,
1696        .ndo_get_stats64        = igb_get_stats64,
1697        .ndo_set_rx_mode        = igb_set_rx_mode,
1698        .ndo_set_multicast_list = igb_set_rx_mode,
1699        .ndo_set_mac_address    = igb_set_mac,
1700        .ndo_change_mtu         = igb_change_mtu,
1701        .ndo_do_ioctl           = igb_ioctl,
1702        .ndo_tx_timeout         = igb_tx_timeout,
1703        .ndo_validate_addr      = eth_validate_addr,
1704        .ndo_vlan_rx_register   = igb_vlan_rx_register,
1705        .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1706        .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1707        .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1708        .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1709        .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1710        .ndo_get_vf_config      = igb_ndo_get_vf_config,
1711#ifdef CONFIG_NET_POLL_CONTROLLER
1712        .ndo_poll_controller    = igb_netpoll,
1713#endif
1714};
1715
1716/**
1717 * igb_probe - Device Initialization Routine
1718 * @pdev: PCI device information struct
1719 * @ent: entry in igb_pci_tbl
1720 *
1721 * Returns 0 on success, negative on failure
1722 *
1723 * igb_probe initializes an adapter identified by a pci_dev structure.
1724 * The OS initialization, configuring of the adapter private structure,
1725 * and a hardware reset occur.
1726 **/
1727static int __devinit igb_probe(struct pci_dev *pdev,
1728                               const struct pci_device_id *ent)
1729{
1730        struct net_device *netdev;
1731        struct igb_adapter *adapter;
1732        struct e1000_hw *hw;
1733        u16 eeprom_data = 0;
1734        s32 ret_val;
1735        static int global_quad_port_a; /* global quad port a indication */
1736        const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1737        unsigned long mmio_start, mmio_len;
1738        int err, pci_using_dac;
1739        u16 eeprom_apme_mask = IGB_EEPROM_APME;
1740        u8 part_str[E1000_PBANUM_LENGTH];
1741
1742        /* Catch broken hardware that put the wrong VF device ID in
1743         * the PCIe SR-IOV capability.
1744         */
1745        if (pdev->is_virtfn) {
1746                WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1747                     pci_name(pdev), pdev->vendor, pdev->device);
1748                return -EINVAL;
1749        }
1750
1751        err = pci_enable_device_mem(pdev);
1752        if (err)
1753                return err;
1754
1755        pci_using_dac = 0;
1756        err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1757        if (!err) {
1758                err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1759                if (!err)
1760                        pci_using_dac = 1;
1761        } else {
1762                err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1763                if (err) {
1764                        err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1765                        if (err) {
1766                                dev_err(&pdev->dev, "No usable DMA "
1767                                        "configuration, aborting\n");
1768                                goto err_dma;
1769                        }
1770                }
1771        }
1772
1773        err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1774                                           IORESOURCE_MEM),
1775                                           igb_driver_name);
1776        if (err)
1777                goto err_pci_reg;
1778
1779        pci_enable_pcie_error_reporting(pdev);
1780
1781        pci_set_master(pdev);
1782        pci_save_state(pdev);
1783
1784        err = -ENOMEM;
1785        netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1786                                   IGB_ABS_MAX_TX_QUEUES);
1787        if (!netdev)
1788                goto err_alloc_etherdev;
1789
1790        SET_NETDEV_DEV(netdev, &pdev->dev);
1791
1792        pci_set_drvdata(pdev, netdev);
1793        adapter = netdev_priv(netdev);
1794        adapter->netdev = netdev;
1795        adapter->pdev = pdev;
1796        hw = &adapter->hw;
1797        hw->back = adapter;
1798        adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1799
1800        mmio_start = pci_resource_start(pdev, 0);
1801        mmio_len = pci_resource_len(pdev, 0);
1802
1803        err = -EIO;
1804        hw->hw_addr = ioremap(mmio_start, mmio_len);
1805        if (!hw->hw_addr)
1806                goto err_ioremap;
1807
1808        netdev->netdev_ops = &igb_netdev_ops;
1809        igb_set_ethtool_ops(netdev);
1810        netdev->watchdog_timeo = 5 * HZ;
1811
1812        strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1813
1814        netdev->mem_start = mmio_start;
1815        netdev->mem_end = mmio_start + mmio_len;
1816
1817        /* PCI config space info */
1818        hw->vendor_id = pdev->vendor;
1819        hw->device_id = pdev->device;
1820        hw->revision_id = pdev->revision;
1821        hw->subsystem_vendor_id = pdev->subsystem_vendor;
1822        hw->subsystem_device_id = pdev->subsystem_device;
1823
1824        /* Copy the default MAC, PHY and NVM function pointers */
1825        memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1826        memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1827        memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1828        /* Initialize skew-specific constants */
1829        err = ei->get_invariants(hw);
1830        if (err)
1831                goto err_sw_init;
1832
1833        /* setup the private structure */
1834        err = igb_sw_init(adapter);
1835        if (err)
1836                goto err_sw_init;
1837
1838        igb_get_bus_info_pcie(hw);
1839
1840        hw->phy.autoneg_wait_to_complete = false;
1841
1842        /* Copper options */
1843        if (hw->phy.media_type == e1000_media_type_copper) {
1844                hw->phy.mdix = AUTO_ALL_MODES;
1845                hw->phy.disable_polarity_correction = false;
1846                hw->phy.ms_type = e1000_ms_hw_default;
1847        }
1848
1849        if (igb_check_reset_block(hw))
1850                dev_info(&pdev->dev,
1851                        "PHY reset is blocked due to SOL/IDER session.\n");
1852
1853        netdev->features = NETIF_F_SG |
1854                           NETIF_F_IP_CSUM |
1855                           NETIF_F_HW_VLAN_TX |
1856                           NETIF_F_HW_VLAN_RX |
1857                           NETIF_F_HW_VLAN_FILTER;
1858
1859        netdev->features |= NETIF_F_IPV6_CSUM;
1860        netdev->features |= NETIF_F_TSO;
1861        netdev->features |= NETIF_F_TSO6;
1862        netdev->features |= NETIF_F_GRO;
1863
1864        netdev->vlan_features |= NETIF_F_TSO;
1865        netdev->vlan_features |= NETIF_F_TSO6;
1866        netdev->vlan_features |= NETIF_F_IP_CSUM;
1867        netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1868        netdev->vlan_features |= NETIF_F_SG;
1869
1870        if (pci_using_dac) {
1871                netdev->features |= NETIF_F_HIGHDMA;
1872                netdev->vlan_features |= NETIF_F_HIGHDMA;
1873        }
1874
1875        if (hw->mac.type >= e1000_82576)
1876                netdev->features |= NETIF_F_SCTP_CSUM;
1877
1878        adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1879
1880        /* before reading the NVM, reset the controller to put the device in a
1881         * known good starting state */
1882        hw->mac.ops.reset_hw(hw);
1883
1884        /* make sure the NVM is good */
1885        if (igb_validate_nvm_checksum(hw) < 0) {
1886                dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1887                err = -EIO;
1888                goto err_eeprom;
1889        }
1890
1891        /* copy the MAC address out of the NVM */
1892        if (hw->mac.ops.read_mac_addr(hw))
1893                dev_err(&pdev->dev, "NVM Read Error\n");
1894
1895        memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1896        memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1897
1898        if (!is_valid_ether_addr(netdev->perm_addr)) {
1899                dev_err(&pdev->dev, "Invalid MAC Address\n");
1900                err = -EIO;
1901                goto err_eeprom;
1902        }
1903
1904        setup_timer(&adapter->watchdog_timer, igb_watchdog,
1905                    (unsigned long) adapter);
1906        setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1907                    (unsigned long) adapter);
1908
1909        INIT_WORK(&adapter->reset_task, igb_reset_task);
1910        INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1911
1912        /* Initialize link properties that are user-changeable */
1913        adapter->fc_autoneg = true;
1914        hw->mac.autoneg = true;
1915        hw->phy.autoneg_advertised = 0x2f;
1916
1917        hw->fc.requested_mode = e1000_fc_default;
1918        hw->fc.current_mode = e1000_fc_default;
1919
1920        igb_validate_mdi_setting(hw);
1921
1922        /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1923         * enable the ACPI Magic Packet filter
1924         */
1925
1926        if (hw->bus.func == 0)
1927                hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1928        else if (hw->mac.type == e1000_82580)
1929                hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1930                                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1931                                 &eeprom_data);
1932        else if (hw->bus.func == 1)
1933                hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1934
1935        if (eeprom_data & eeprom_apme_mask)
1936                adapter->eeprom_wol |= E1000_WUFC_MAG;
1937
1938        /* now that we have the eeprom settings, apply the special cases where
1939         * the eeprom may be wrong or the board simply won't support wake on
1940         * lan on a particular port */
1941        switch (pdev->device) {
1942        case E1000_DEV_ID_82575GB_QUAD_COPPER:
1943                adapter->eeprom_wol = 0;
1944                break;
1945        case E1000_DEV_ID_82575EB_FIBER_SERDES:
1946        case E1000_DEV_ID_82576_FIBER:
1947        case E1000_DEV_ID_82576_SERDES:
1948                /* Wake events only supported on port A for dual fiber
1949                 * regardless of eeprom setting */
1950                if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1951                        adapter->eeprom_wol = 0;
1952                break;
1953        case E1000_DEV_ID_82576_QUAD_COPPER:
1954        case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1955                /* if quad port adapter, disable WoL on all but port A */
1956                if (global_quad_port_a != 0)
1957                        adapter->eeprom_wol = 0;
1958                else
1959                        adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1960                /* Reset for multiple quad port adapters */
1961                if (++global_quad_port_a == 4)
1962                        global_quad_port_a = 0;
1963                break;
1964        }
1965
1966        /* initialize the wol settings based on the eeprom settings */
1967        adapter->wol = adapter->eeprom_wol;
1968        device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1969
1970        /* reset the hardware with the new settings */
1971        igb_reset(adapter);
1972
1973        /* let the f/w know that the h/w is now under the control of the
1974         * driver. */
1975        igb_get_hw_control(adapter);
1976
1977        strcpy(netdev->name, "eth%d");
1978        err = register_netdev(netdev);
1979        if (err)
1980                goto err_register;
1981
1982        /* carrier off reporting is important to ethtool even BEFORE open */
1983        netif_carrier_off(netdev);
1984
1985#ifdef CONFIG_IGB_DCA
1986        if (dca_add_requester(&pdev->dev) == 0) {
1987                adapter->flags |= IGB_FLAG_DCA_ENABLED;
1988                dev_info(&pdev->dev, "DCA enabled\n");
1989                igb_setup_dca(adapter);
1990        }
1991
1992#endif
1993        dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1994        /* print bus type/speed/width info */
1995        dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1996                 netdev->name,
1997                 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1998                  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
1999                                                            "unknown"),
2000                 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2001                  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2002                  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2003                   "unknown"),
2004                 netdev->dev_addr);
2005
2006        ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2007        if (ret_val)
2008                strcpy(part_str, "Unknown");
2009        dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2010        dev_info(&pdev->dev,
2011                "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2012                adapter->msix_entries ? "MSI-X" :
2013                (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2014                adapter->num_rx_queues, adapter->num_tx_queues);
2015
2016        return 0;
2017
2018err_register:
2019        igb_release_hw_control(adapter);
2020err_eeprom:
2021        if (!igb_check_reset_block(hw))
2022                igb_reset_phy(hw);
2023
2024        if (hw->flash_address)
2025                iounmap(hw->flash_address);
2026err_sw_init:
2027        igb_clear_interrupt_scheme(adapter);
2028        iounmap(hw->hw_addr);
2029err_ioremap:
2030        free_netdev(netdev);
2031err_alloc_etherdev:
2032        pci_release_selected_regions(pdev,
2033                                     pci_select_bars(pdev, IORESOURCE_MEM));
2034err_pci_reg:
2035err_dma:
2036        pci_disable_device(pdev);
2037        return err;
2038}
2039
2040/**
2041 * igb_remove - Device Removal Routine
2042 * @pdev: PCI device information struct
2043 *
2044 * igb_remove is called by the PCI subsystem to alert the driver
2045 * that it should release a PCI device.  The could be caused by a
2046 * Hot-Plug event, or because the driver is going to be removed from
2047 * memory.
2048 **/
2049static void __devexit igb_remove(struct pci_dev *pdev)
2050{
2051        struct net_device *netdev = pci_get_drvdata(pdev);
2052        struct igb_adapter *adapter = netdev_priv(netdev);
2053        struct e1000_hw *hw = &adapter->hw;
2054
2055        /*
2056         * The watchdog timer may be rescheduled, so explicitly
2057         * disable watchdog from being rescheduled.
2058         */
2059        set_bit(__IGB_DOWN, &adapter->state);
2060        del_timer_sync(&adapter->watchdog_timer);
2061        del_timer_sync(&adapter->phy_info_timer);
2062
2063        cancel_work_sync(&adapter->reset_task);
2064        cancel_work_sync(&adapter->watchdog_task);
2065
2066#ifdef CONFIG_IGB_DCA
2067        if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2068                dev_info(&pdev->dev, "DCA disabled\n");
2069                dca_remove_requester(&pdev->dev);
2070                adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2071                wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2072        }
2073#endif
2074
2075        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2076         * would have already happened in close and is redundant. */
2077        igb_release_hw_control(adapter);
2078
2079        unregister_netdev(netdev);
2080
2081        igb_clear_interrupt_scheme(adapter);
2082
2083#ifdef CONFIG_PCI_IOV
2084        /* reclaim resources allocated to VFs */
2085        if (adapter->vf_data) {
2086                /* disable iov and allow time for transactions to clear */
2087                pci_disable_sriov(pdev);
2088                msleep(500);
2089
2090                kfree(adapter->vf_data);
2091                adapter->vf_data = NULL;
2092                wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2093                msleep(100);
2094                dev_info(&pdev->dev, "IOV Disabled\n");
2095        }
2096#endif
2097
2098        iounmap(hw->hw_addr);
2099        if (hw->flash_address)
2100                iounmap(hw->flash_address);
2101        pci_release_selected_regions(pdev,
2102                                     pci_select_bars(pdev, IORESOURCE_MEM));
2103
2104        free_netdev(netdev);
2105
2106        pci_disable_pcie_error_reporting(pdev);
2107
2108        pci_disable_device(pdev);
2109}
2110
2111/**
2112 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2113 * @adapter: board private structure to initialize
2114 *
2115 * This function initializes the vf specific data storage and then attempts to
2116 * allocate the VFs.  The reason for ordering it this way is because it is much
2117 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2118 * the memory for the VFs.
2119 **/
2120static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2121{
2122#ifdef CONFIG_PCI_IOV
2123        struct pci_dev *pdev = adapter->pdev;
2124
2125        if (adapter->vfs_allocated_count) {
2126                adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2127                                           sizeof(struct vf_data_storage),
2128                                           GFP_KERNEL);
2129                /* if allocation failed then we do not support SR-IOV */
2130                if (!adapter->vf_data) {
2131                        adapter->vfs_allocated_count = 0;
2132                        dev_err(&pdev->dev, "Unable to allocate memory for VF "
2133                                "Data Storage\n");
2134                }
2135        }
2136
2137        if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2138                kfree(adapter->vf_data);
2139                adapter->vf_data = NULL;
2140#endif /* CONFIG_PCI_IOV */
2141                adapter->vfs_allocated_count = 0;
2142#ifdef CONFIG_PCI_IOV
2143        } else {
2144                unsigned char mac_addr[ETH_ALEN];
2145                int i;
2146                dev_info(&pdev->dev, "%d vfs allocated\n",
2147                         adapter->vfs_allocated_count);
2148                for (i = 0; i < adapter->vfs_allocated_count; i++) {
2149                        random_ether_addr(mac_addr);
2150                        igb_set_vf_mac(adapter, i, mac_addr);
2151                }
2152        }
2153#endif /* CONFIG_PCI_IOV */
2154}
2155
2156
2157/**
2158 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2159 * @adapter: board private structure to initialize
2160 *
2161 * igb_init_hw_timer initializes the function pointer and values for the hw
2162 * timer found in hardware.
2163 **/
2164static void igb_init_hw_timer(struct igb_adapter *adapter)
2165{
2166        struct e1000_hw *hw = &adapter->hw;
2167
2168        switch (hw->mac.type) {
2169        case e1000_i350:
2170        case e1000_82580:
2171                memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2172                adapter->cycles.read = igb_read_clock;
2173                adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2174                adapter->cycles.mult = 1;
2175                /*
2176                 * The 82580 timesync updates the system timer every 8ns by 8ns
2177                 * and the value cannot be shifted.  Instead we need to shift
2178                 * the registers to generate a 64bit timer value.  As a result
2179                 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2180                 * 24 in order to generate a larger value for synchronization.
2181                 */
2182                adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2183                /* disable system timer temporarily by setting bit 31 */
2184                wr32(E1000_TSAUXC, 0x80000000);
2185                wrfl();
2186
2187                /* Set registers so that rollover occurs soon to test this. */
2188                wr32(E1000_SYSTIMR, 0x00000000);
2189                wr32(E1000_SYSTIML, 0x80000000);
2190                wr32(E1000_SYSTIMH, 0x000000FF);
2191                wrfl();
2192
2193                /* enable system timer by clearing bit 31 */
2194                wr32(E1000_TSAUXC, 0x0);
2195                wrfl();
2196
2197                timecounter_init(&adapter->clock,
2198                                 &adapter->cycles,
2199                                 ktime_to_ns(ktime_get_real()));
2200                /*
2201                 * Synchronize our NIC clock against system wall clock. NIC
2202                 * time stamp reading requires ~3us per sample, each sample
2203                 * was pretty stable even under load => only require 10
2204                 * samples for each offset comparison.
2205                 */
2206                memset(&adapter->compare, 0, sizeof(adapter->compare));
2207                adapter->compare.source = &adapter->clock;
2208                adapter->compare.target = ktime_get_real;
2209                adapter->compare.num_samples = 10;
2210                timecompare_update(&adapter->compare, 0);
2211                break;
2212        case e1000_82576:
2213                /*
2214                 * Initialize hardware timer: we keep it running just in case
2215                 * that some program needs it later on.
2216                 */
2217                memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2218                adapter->cycles.read = igb_read_clock;
2219                adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2220                adapter->cycles.mult = 1;
2221                /**
2222                 * Scale the NIC clock cycle by a large factor so that
2223                 * relatively small clock corrections can be added or
2224                 * substracted at each clock tick. The drawbacks of a large
2225                 * factor are a) that the clock register overflows more quickly
2226                 * (not such a big deal) and b) that the increment per tick has
2227                 * to fit into 24 bits.  As a result we need to use a shift of
2228                 * 19 so we can fit a value of 16 into the TIMINCA register.
2229                 */
2230                adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2231                wr32(E1000_TIMINCA,
2232                                (1 << E1000_TIMINCA_16NS_SHIFT) |
2233                                (16 << IGB_82576_TSYNC_SHIFT));
2234
2235                /* Set registers so that rollover occurs soon to test this. */
2236                wr32(E1000_SYSTIML, 0x00000000);
2237                wr32(E1000_SYSTIMH, 0xFF800000);
2238                wrfl();
2239
2240                timecounter_init(&adapter->clock,
2241                                 &adapter->cycles,
2242                                 ktime_to_ns(ktime_get_real()));
2243                /*
2244                 * Synchronize our NIC clock against system wall clock. NIC
2245                 * time stamp reading requires ~3us per sample, each sample
2246                 * was pretty stable even under load => only require 10
2247                 * samples for each offset comparison.
2248                 */
2249                memset(&adapter->compare, 0, sizeof(adapter->compare));
2250                adapter->compare.source = &adapter->clock;
2251                adapter->compare.target = ktime_get_real;
2252                adapter->compare.num_samples = 10;
2253                timecompare_update(&adapter->compare, 0);
2254                break;
2255        case e1000_82575:
2256                /* 82575 does not support timesync */
2257        default:
2258                break;
2259        }
2260
2261}
2262
2263/**
2264 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2265 * @adapter: board private structure to initialize
2266 *
2267 * igb_sw_init initializes the Adapter private data structure.
2268 * Fields are initialized based on PCI device information and
2269 * OS network device settings (MTU size).
2270 **/
2271static int __devinit igb_sw_init(struct igb_adapter *adapter)
2272{
2273        struct e1000_hw *hw = &adapter->hw;
2274        struct net_device *netdev = adapter->netdev;
2275        struct pci_dev *pdev = adapter->pdev;
2276
2277        pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2278
2279        adapter->tx_ring_count = IGB_DEFAULT_TXD;
2280        adapter->rx_ring_count = IGB_DEFAULT_RXD;
2281        adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2282        adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2283
2284        adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2285        adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2286
2287        spin_lock_init(&adapter->stats64_lock);
2288#ifdef CONFIG_PCI_IOV
2289        if (hw->mac.type == e1000_82576)
2290                adapter->vfs_allocated_count = (max_vfs > 7) ? 7 : max_vfs;
2291
2292#endif /* CONFIG_PCI_IOV */
2293        adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2294
2295        /*
2296         * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2297         * then we should combine the queues into a queue pair in order to
2298         * conserve interrupts due to limited supply
2299         */
2300        if ((adapter->rss_queues > 4) ||
2301            ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2302                adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2303
2304        /* This call may decrease the number of queues */
2305        if (igb_init_interrupt_scheme(adapter)) {
2306                dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2307                return -ENOMEM;
2308        }
2309
2310        igb_init_hw_timer(adapter);
2311        igb_probe_vfs(adapter);
2312
2313        /* Explicitly disable IRQ since the NIC can be in any state. */
2314        igb_irq_disable(adapter);
2315
2316        set_bit(__IGB_DOWN, &adapter->state);
2317        return 0;
2318}
2319
2320/**
2321 * igb_open - Called when a network interface is made active
2322 * @netdev: network interface device structure
2323 *
2324 * Returns 0 on success, negative value on failure
2325 *
2326 * The open entry point is called when a network interface is made
2327 * active by the system (IFF_UP).  At this point all resources needed
2328 * for transmit and receive operations are allocated, the interrupt
2329 * handler is registered with the OS, the watchdog timer is started,
2330 * and the stack is notified that the interface is ready.
2331 **/
2332static int igb_open(struct net_device *netdev)
2333{
2334        struct igb_adapter *adapter = netdev_priv(netdev);
2335        struct e1000_hw *hw = &adapter->hw;
2336        int err;
2337        int i;
2338
2339        /* disallow open during test */
2340        if (test_bit(__IGB_TESTING, &adapter->state))
2341                return -EBUSY;
2342
2343        netif_carrier_off(netdev);
2344
2345        /* allocate transmit descriptors */
2346        err = igb_setup_all_tx_resources(adapter);
2347        if (err)
2348                goto err_setup_tx;
2349
2350        /* allocate receive descriptors */
2351        err = igb_setup_all_rx_resources(adapter);
2352        if (err)
2353                goto err_setup_rx;
2354
2355        igb_power_up_link(adapter);
2356
2357        /* before we allocate an interrupt, we must be ready to handle it.
2358         * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2359         * as soon as we call pci_request_irq, so we have to setup our
2360         * clean_rx handler before we do so.  */
2361        igb_configure(adapter);
2362
2363        err = igb_request_irq(adapter);
2364        if (err)
2365                goto err_req_irq;
2366
2367        /* From here on the code is the same as igb_up() */
2368        clear_bit(__IGB_DOWN, &adapter->state);
2369
2370        for (i = 0; i < adapter->num_q_vectors; i++) {
2371                struct igb_q_vector *q_vector = adapter->q_vector[i];
2372                napi_enable(&q_vector->napi);
2373        }
2374
2375        /* Clear any pending interrupts. */
2376        rd32(E1000_ICR);
2377
2378        igb_irq_enable(adapter);
2379
2380        /* notify VFs that reset has been completed */
2381        if (adapter->vfs_allocated_count) {
2382                u32 reg_data = rd32(E1000_CTRL_EXT);
2383                reg_data |= E1000_CTRL_EXT_PFRSTD;
2384                wr32(E1000_CTRL_EXT, reg_data);
2385        }
2386
2387        netif_tx_start_all_queues(netdev);
2388
2389        /* start the watchdog. */
2390        hw->mac.get_link_status = 1;
2391        schedule_work(&adapter->watchdog_task);
2392
2393        return 0;
2394
2395err_req_irq:
2396        igb_release_hw_control(adapter);
2397        igb_power_down_link(adapter);
2398        igb_free_all_rx_resources(adapter);
2399err_setup_rx:
2400        igb_free_all_tx_resources(adapter);
2401err_setup_tx:
2402        igb_reset(adapter);
2403
2404        return err;
2405}
2406
2407/**
2408 * igb_close - Disables a network interface
2409 * @netdev: network interface device structure
2410 *
2411 * Returns 0, this is not allowed to fail
2412 *
2413 * The close entry point is called when an interface is de-activated
2414 * by the OS.  The hardware is still under the driver's control, but
2415 * needs to be disabled.  A global MAC reset is issued to stop the
2416 * hardware, and all transmit and receive resources are freed.
2417 **/
2418static int igb_close(struct net_device *netdev)
2419{
2420        struct igb_adapter *adapter = netdev_priv(netdev);
2421
2422        WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2423        igb_down(adapter);
2424
2425        igb_free_irq(adapter);
2426
2427        igb_free_all_tx_resources(adapter);
2428        igb_free_all_rx_resources(adapter);
2429
2430        return 0;
2431}
2432
2433/**
2434 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2435 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2436 *
2437 * Return 0 on success, negative on failure
2438 **/
2439int igb_setup_tx_resources(struct igb_ring *tx_ring)
2440{
2441        struct device *dev = tx_ring->dev;
2442        int size;
2443
2444        size = sizeof(struct igb_buffer) * tx_ring->count;
2445        tx_ring->buffer_info = vzalloc(size);
2446        if (!tx_ring->buffer_info)
2447                goto err;
2448
2449        /* round up to nearest 4K */
2450        tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2451        tx_ring->size = ALIGN(tx_ring->size, 4096);
2452
2453        tx_ring->desc = dma_alloc_coherent(dev,
2454                                           tx_ring->size,
2455                                           &tx_ring->dma,
2456                                           GFP_KERNEL);
2457
2458        if (!tx_ring->desc)
2459                goto err;
2460
2461        tx_ring->next_to_use = 0;
2462        tx_ring->next_to_clean = 0;
2463        return 0;
2464
2465err:
2466        vfree(tx_ring->buffer_info);
2467        dev_err(dev,
2468                "Unable to allocate memory for the transmit descriptor ring\n");
2469        return -ENOMEM;
2470}
2471
2472/**
2473 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2474 *                                (Descriptors) for all queues
2475 * @adapter: board private structure
2476 *
2477 * Return 0 on success, negative on failure
2478 **/
2479static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2480{
2481        struct pci_dev *pdev = adapter->pdev;
2482        int i, err = 0;
2483
2484        for (i = 0; i < adapter->num_tx_queues; i++) {
2485                err = igb_setup_tx_resources(adapter->tx_ring[i]);
2486                if (err) {
2487                        dev_err(&pdev->dev,
2488                                "Allocation for Tx Queue %u failed\n", i);
2489                        for (i--; i >= 0; i--)
2490                                igb_free_tx_resources(adapter->tx_ring[i]);
2491                        break;
2492                }
2493        }
2494
2495        for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2496                int r_idx = i % adapter->num_tx_queues;
2497                adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2498        }
2499        return err;
2500}
2501
2502/**
2503 * igb_setup_tctl - configure the transmit control registers
2504 * @adapter: Board private structure
2505 **/
2506void igb_setup_tctl(struct igb_adapter *adapter)
2507{
2508        struct e1000_hw *hw = &adapter->hw;
2509        u32 tctl;
2510
2511        /* disable queue 0 which is enabled by default on 82575 and 82576 */
2512        wr32(E1000_TXDCTL(0), 0);
2513
2514        /* Program the Transmit Control Register */
2515        tctl = rd32(E1000_TCTL);
2516        tctl &= ~E1000_TCTL_CT;
2517        tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2518                (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2519
2520        igb_config_collision_dist(hw);
2521
2522        /* Enable transmits */
2523        tctl |= E1000_TCTL_EN;
2524
2525        wr32(E1000_TCTL, tctl);
2526}
2527
2528/**
2529 * igb_configure_tx_ring - Configure transmit ring after Reset
2530 * @adapter: board private structure
2531 * @ring: tx ring to configure
2532 *
2533 * Configure a transmit ring after a reset.
2534 **/
2535void igb_configure_tx_ring(struct igb_adapter *adapter,
2536                           struct igb_ring *ring)
2537{
2538        struct e1000_hw *hw = &adapter->hw;
2539        u32 txdctl;
2540        u64 tdba = ring->dma;
2541        int reg_idx = ring->reg_idx;
2542
2543        /* disable the queue */
2544        txdctl = rd32(E1000_TXDCTL(reg_idx));
2545        wr32(E1000_TXDCTL(reg_idx),
2546                        txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2547        wrfl();
2548        mdelay(10);
2549
2550        wr32(E1000_TDLEN(reg_idx),
2551                        ring->count * sizeof(union e1000_adv_tx_desc));
2552        wr32(E1000_TDBAL(reg_idx),
2553                        tdba & 0x00000000ffffffffULL);
2554        wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2555
2556        ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2557        ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2558        writel(0, ring->head);
2559        writel(0, ring->tail);
2560
2561        txdctl |= IGB_TX_PTHRESH;
2562        txdctl |= IGB_TX_HTHRESH << 8;
2563        txdctl |= IGB_TX_WTHRESH << 16;
2564
2565        txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2566        wr32(E1000_TXDCTL(reg_idx), txdctl);
2567}
2568
2569/**
2570 * igb_configure_tx - Configure transmit Unit after Reset
2571 * @adapter: board private structure
2572 *
2573 * Configure the Tx unit of the MAC after a reset.
2574 **/
2575static void igb_configure_tx(struct igb_adapter *adapter)
2576{
2577        int i;
2578
2579        for (i = 0; i < adapter->num_tx_queues; i++)
2580                igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2581}
2582
2583/**
2584 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2585 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2586 *
2587 * Returns 0 on success, negative on failure
2588 **/
2589int igb_setup_rx_resources(struct igb_ring *rx_ring)
2590{
2591        struct device *dev = rx_ring->dev;
2592        int size, desc_len;
2593
2594        size = sizeof(struct igb_buffer) * rx_ring->count;
2595        rx_ring->buffer_info = vzalloc(size);
2596        if (!rx_ring->buffer_info)
2597                goto err;
2598
2599        desc_len = sizeof(union e1000_adv_rx_desc);
2600
2601        /* Round up to nearest 4K */
2602        rx_ring->size = rx_ring->count * desc_len;
2603        rx_ring->size = ALIGN(rx_ring->size, 4096);
2604
2605        rx_ring->desc = dma_alloc_coherent(dev,
2606                                           rx_ring->size,
2607                                           &rx_ring->dma,
2608                                           GFP_KERNEL);
2609
2610        if (!rx_ring->desc)
2611                goto err;
2612
2613        rx_ring->next_to_clean = 0;
2614        rx_ring->next_to_use = 0;
2615
2616        return 0;
2617
2618err:
2619        vfree(rx_ring->buffer_info);
2620        rx_ring->buffer_info = NULL;
2621        dev_err(dev, "Unable to allocate memory for the receive descriptor"
2622                " ring\n");
2623        return -ENOMEM;
2624}
2625
2626/**
2627 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2628 *                                (Descriptors) for all queues
2629 * @adapter: board private structure
2630 *
2631 * Return 0 on success, negative on failure
2632 **/
2633static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2634{
2635        struct pci_dev *pdev = adapter->pdev;
2636        int i, err = 0;
2637
2638        for (i = 0; i < adapter->num_rx_queues; i++) {
2639                err = igb_setup_rx_resources(adapter->rx_ring[i]);
2640                if (err) {
2641                        dev_err(&pdev->dev,
2642                                "Allocation for Rx Queue %u failed\n", i);
2643                        for (i--; i >= 0; i--)
2644                                igb_free_rx_resources(adapter->rx_ring[i]);
2645                        break;
2646                }
2647        }
2648
2649        return err;
2650}
2651
2652/**
2653 * igb_setup_mrqc - configure the multiple receive queue control registers
2654 * @adapter: Board private structure
2655 **/
2656static void igb_setup_mrqc(struct igb_adapter *adapter)
2657{
2658        struct e1000_hw *hw = &adapter->hw;
2659        u32 mrqc, rxcsum;
2660        u32 j, num_rx_queues, shift = 0, shift2 = 0;
2661        union e1000_reta {
2662                u32 dword;
2663                u8  bytes[4];
2664        } reta;
2665        static const u8 rsshash[40] = {
2666                0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2667                0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2668                0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2669                0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2670
2671        /* Fill out hash function seeds */
2672        for (j = 0; j < 10; j++) {
2673                u32 rsskey = rsshash[(j * 4)];
2674                rsskey |= rsshash[(j * 4) + 1] << 8;
2675                rsskey |= rsshash[(j * 4) + 2] << 16;
2676                rsskey |= rsshash[(j * 4) + 3] << 24;
2677                array_wr32(E1000_RSSRK(0), j, rsskey);
2678        }
2679
2680        num_rx_queues = adapter->rss_queues;
2681
2682        if (adapter->vfs_allocated_count) {
2683                /* 82575 and 82576 supports 2 RSS queues for VMDq */
2684                switch (hw->mac.type) {
2685                case e1000_i350:
2686                case e1000_82580:
2687                        num_rx_queues = 1;
2688                        shift = 0;
2689                        break;
2690                case e1000_82576:
2691                        shift = 3;
2692                        num_rx_queues = 2;
2693                        break;
2694                case e1000_82575:
2695                        shift = 2;
2696                        shift2 = 6;
2697                default:
2698                        break;
2699                }
2700        } else {
2701                if (hw->mac.type == e1000_82575)
2702                        shift = 6;
2703        }
2704
2705        for (j = 0; j < (32 * 4); j++) {
2706                reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2707                if (shift2)
2708                        reta.bytes[j & 3] |= num_rx_queues << shift2;
2709                if ((j & 3) == 3)
2710                        wr32(E1000_RETA(j >> 2), reta.dword);
2711        }
2712
2713        /*
2714         * Disable raw packet checksumming so that RSS hash is placed in
2715         * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2716         * offloads as they are enabled by default
2717         */
2718        rxcsum = rd32(E1000_RXCSUM);
2719        rxcsum |= E1000_RXCSUM_PCSD;
2720
2721        if (adapter->hw.mac.type >= e1000_82576)
2722                /* Enable Receive Checksum Offload for SCTP */
2723                rxcsum |= E1000_RXCSUM_CRCOFL;
2724
2725        /* Don't need to set TUOFL or IPOFL, they default to 1 */
2726        wr32(E1000_RXCSUM, rxcsum);
2727
2728        /* If VMDq is enabled then we set the appropriate mode for that, else
2729         * we default to RSS so that an RSS hash is calculated per packet even
2730         * if we are only using one queue */
2731        if (adapter->vfs_allocated_count) {
2732                if (hw->mac.type > e1000_82575) {
2733                        /* Set the default pool for the PF's first queue */
2734                        u32 vtctl = rd32(E1000_VT_CTL);
2735                        vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2736                                   E1000_VT_CTL_DISABLE_DEF_POOL);
2737                        vtctl |= adapter->vfs_allocated_count <<
2738                                E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2739                        wr32(E1000_VT_CTL, vtctl);
2740                }
2741                if (adapter->rss_queues > 1)
2742                        mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2743                else
2744                        mrqc = E1000_MRQC_ENABLE_VMDQ;
2745        } else {
2746                mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2747        }
2748        igb_vmm_control(adapter);
2749
2750        /*
2751         * Generate RSS hash based on TCP port numbers and/or
2752         * IPv4/v6 src and dst addresses since UDP cannot be
2753         * hashed reliably due to IP fragmentation
2754         */
2755        mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2756                E1000_MRQC_RSS_FIELD_IPV4_TCP |
2757                E1000_MRQC_RSS_FIELD_IPV6 |
2758                E1000_MRQC_RSS_FIELD_IPV6_TCP |
2759                E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2760
2761        wr32(E1000_MRQC, mrqc);
2762}
2763
2764/**
2765 * igb_setup_rctl - configure the receive control registers
2766 * @adapter: Board private structure
2767 **/
2768void igb_setup_rctl(struct igb_adapter *adapter)
2769{
2770        struct e1000_hw *hw = &adapter->hw;
2771        u32 rctl;
2772
2773        rctl = rd32(E1000_RCTL);
2774
2775        rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2776        rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2777
2778        rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2779                (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2780
2781        /*
2782         * enable stripping of CRC. It's unlikely this will break BMC
2783         * redirection as it did with e1000. Newer features require
2784         * that the HW strips the CRC.
2785         */
2786        rctl |= E1000_RCTL_SECRC;
2787
2788        /* disable store bad packets and clear size bits. */
2789        rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2790
2791        /* enable LPE to prevent packets larger than max_frame_size */
2792        rctl |= E1000_RCTL_LPE;
2793
2794        /* disable queue 0 to prevent tail write w/o re-config */
2795        wr32(E1000_RXDCTL(0), 0);
2796
2797        /* Attention!!!  For SR-IOV PF driver operations you must enable
2798         * queue drop for all VF and PF queues to prevent head of line blocking
2799         * if an un-trusted VF does not provide descriptors to hardware.
2800         */
2801        if (adapter->vfs_allocated_count) {
2802                /* set all queue drop enable bits */
2803                wr32(E1000_QDE, ALL_QUEUES);
2804        }
2805
2806        wr32(E1000_RCTL, rctl);
2807}
2808
2809static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2810                                   int vfn)
2811{
2812        struct e1000_hw *hw = &adapter->hw;
2813        u32 vmolr;
2814
2815        /* if it isn't the PF check to see if VFs are enabled and
2816         * increase the size to support vlan tags */
2817        if (vfn < adapter->vfs_allocated_count &&
2818            adapter->vf_data[vfn].vlans_enabled)
2819                size += VLAN_TAG_SIZE;
2820
2821        vmolr = rd32(E1000_VMOLR(vfn));
2822        vmolr &= ~E1000_VMOLR_RLPML_MASK;
2823        vmolr |= size | E1000_VMOLR_LPE;
2824        wr32(E1000_VMOLR(vfn), vmolr);
2825
2826        return 0;
2827}
2828
2829/**
2830 * igb_rlpml_set - set maximum receive packet size
2831 * @adapter: board private structure
2832 *
2833 * Configure maximum receivable packet size.
2834 **/
2835static void igb_rlpml_set(struct igb_adapter *adapter)
2836{
2837        u32 max_frame_size = adapter->max_frame_size;
2838        struct e1000_hw *hw = &adapter->hw;
2839        u16 pf_id = adapter->vfs_allocated_count;
2840
2841        if (adapter->vlgrp)
2842                max_frame_size += VLAN_TAG_SIZE;
2843
2844        /* if vfs are enabled we set RLPML to the largest possible request
2845         * size and set the VMOLR RLPML to the size we need */
2846        if (pf_id) {
2847                igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2848                max_frame_size = MAX_JUMBO_FRAME_SIZE;
2849        }
2850
2851        wr32(E1000_RLPML, max_frame_size);
2852}
2853
2854static inline void igb_set_vmolr(struct igb_adapter *adapter,
2855                                 int vfn, bool aupe)
2856{
2857        struct e1000_hw *hw = &adapter->hw;
2858        u32 vmolr;
2859
2860        /*
2861         * This register exists only on 82576 and newer so if we are older then
2862         * we should exit and do nothing
2863         */
2864        if (hw->mac.type < e1000_82576)
2865                return;
2866
2867        vmolr = rd32(E1000_VMOLR(vfn));
2868        vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2869        if (aupe)
2870                vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2871        else
2872                vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2873
2874        /* clear all bits that might not be set */
2875        vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2876
2877        if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2878                vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2879        /*
2880         * for VMDq only allow the VFs and pool 0 to accept broadcast and
2881         * multicast packets
2882         */
2883        if (vfn <= adapter->vfs_allocated_count)
2884                vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2885
2886        wr32(E1000_VMOLR(vfn), vmolr);
2887}
2888
2889/**
2890 * igb_configure_rx_ring - Configure a receive ring after Reset
2891 * @adapter: board private structure
2892 * @ring: receive ring to be configured
2893 *
2894 * Configure the Rx unit of the MAC after a reset.
2895 **/
2896void igb_configure_rx_ring(struct igb_adapter *adapter,
2897                           struct igb_ring *ring)
2898{
2899        struct e1000_hw *hw = &adapter->hw;
2900        u64 rdba = ring->dma;
2901        int reg_idx = ring->reg_idx;
2902        u32 srrctl, rxdctl;
2903
2904        /* disable the queue */
2905        rxdctl = rd32(E1000_RXDCTL(reg_idx));
2906        wr32(E1000_RXDCTL(reg_idx),
2907                        rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2908
2909        /* Set DMA base address registers */
2910        wr32(E1000_RDBAL(reg_idx),
2911             rdba & 0x00000000ffffffffULL);
2912        wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2913        wr32(E1000_RDLEN(reg_idx),
2914                       ring->count * sizeof(union e1000_adv_rx_desc));
2915
2916        /* initialize head and tail */
2917        ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2918        ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2919        writel(0, ring->head);
2920        writel(0, ring->tail);
2921
2922        /* set descriptor configuration */
2923        if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2924                srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2925                         E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2926#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2927                srrctl |= IGB_RXBUFFER_16384 >>
2928                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2929#else
2930                srrctl |= (PAGE_SIZE / 2) >>
2931                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2932#endif
2933                srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2934        } else {
2935                srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2936                         E1000_SRRCTL_BSIZEPKT_SHIFT;
2937                srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2938        }
2939        if (hw->mac.type == e1000_82580)
2940                srrctl |= E1000_SRRCTL_TIMESTAMP;
2941        /* Only set Drop Enable if we are supporting multiple queues */
2942        if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2943                srrctl |= E1000_SRRCTL_DROP_EN;
2944
2945        wr32(E1000_SRRCTL(reg_idx), srrctl);
2946
2947        /* set filtering for VMDQ pools */
2948        igb_set_vmolr(adapter, reg_idx & 0x7, true);
2949
2950        /* enable receive descriptor fetching */
2951        rxdctl = rd32(E1000_RXDCTL(reg_idx));
2952        rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2953        rxdctl &= 0xFFF00000;
2954        rxdctl |= IGB_RX_PTHRESH;
2955        rxdctl |= IGB_RX_HTHRESH << 8;
2956        rxdctl |= IGB_RX_WTHRESH << 16;
2957        wr32(E1000_RXDCTL(reg_idx), rxdctl);
2958}
2959
2960/**
2961 * igb_configure_rx - Configure receive Unit after Reset
2962 * @adapter: board private structure
2963 *
2964 * Configure the Rx unit of the MAC after a reset.
2965 **/
2966static void igb_configure_rx(struct igb_adapter *adapter)
2967{
2968        int i;
2969
2970        /* set UTA to appropriate mode */
2971        igb_set_uta(adapter);
2972
2973        /* set the correct pool for the PF default MAC address in entry 0 */
2974        igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2975                         adapter->vfs_allocated_count);
2976
2977        /* Setup the HW Rx Head and Tail Descriptor Pointers and
2978         * the Base and Length of the Rx Descriptor Ring */
2979        for (i = 0; i < adapter->num_rx_queues; i++)
2980                igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2981}
2982
2983/**
2984 * igb_free_tx_resources - Free Tx Resources per Queue
2985 * @tx_ring: Tx descriptor ring for a specific queue
2986 *
2987 * Free all transmit software resources
2988 **/
2989void igb_free_tx_resources(struct igb_ring *tx_ring)
2990{
2991        igb_clean_tx_ring(tx_ring);
2992
2993        vfree(tx_ring->buffer_info);
2994        tx_ring->buffer_info = NULL;
2995
2996        /* if not set, then don't free */
2997        if (!tx_ring->desc)
2998                return;
2999
3000        dma_free_coherent(tx_ring->dev, tx_ring->size,
3001                          tx_ring->desc, tx_ring->dma);
3002
3003        tx_ring->desc = NULL;
3004}
3005
3006/**
3007 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3008 * @adapter: board private structure
3009 *
3010 * Free all transmit software resources
3011 **/
3012static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3013{
3014        int i;
3015
3016        for (i = 0; i < adapter->num_tx_queues; i++)
3017                igb_free_tx_resources(adapter->tx_ring[i]);
3018}
3019
3020void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3021                                    struct igb_buffer *buffer_info)
3022{
3023        if (buffer_info->dma) {
3024                if (buffer_info->mapped_as_page)
3025                        dma_unmap_page(tx_ring->dev,
3026                                        buffer_info->dma,
3027                                        buffer_info->length,
3028                                        DMA_TO_DEVICE);
3029                else
3030                        dma_unmap_single(tx_ring->dev,
3031                                        buffer_info->dma,
3032                                        buffer_info->length,
3033                                        DMA_TO_DEVICE);
3034                buffer_info->dma = 0;
3035        }
3036        if (buffer_info->skb) {
3037                dev_kfree_skb_any(buffer_info->skb);
3038                buffer_info->skb = NULL;
3039        }
3040        buffer_info->time_stamp = 0;
3041        buffer_info->length = 0;
3042        buffer_info->next_to_watch = 0;
3043        buffer_info->mapped_as_page = false;
3044}
3045
3046/**
3047 * igb_clean_tx_ring - Free Tx Buffers
3048 * @tx_ring: ring to be cleaned
3049 **/
3050static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3051{
3052        struct igb_buffer *buffer_info;
3053        unsigned long size;
3054        unsigned int i;
3055
3056        if (!tx_ring->buffer_info)
3057                return;
3058        /* Free all the Tx ring sk_buffs */
3059
3060        for (i = 0; i < tx_ring->count; i++) {
3061                buffer_info = &tx_ring->buffer_info[i];
3062                igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3063        }
3064
3065        size = sizeof(struct igb_buffer) * tx_ring->count;
3066        memset(tx_ring->buffer_info, 0, size);
3067
3068        /* Zero out the descriptor ring */
3069        memset(tx_ring->desc, 0, tx_ring->size);
3070
3071        tx_ring->next_to_use = 0;
3072        tx_ring->next_to_clean = 0;
3073}
3074
3075/**
3076 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3077 * @adapter: board private structure
3078 **/
3079static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3080{
3081        int i;
3082
3083        for (i = 0; i < adapter->num_tx_queues; i++)
3084                igb_clean_tx_ring(adapter->tx_ring[i]);
3085}
3086
3087/**
3088 * igb_free_rx_resources - Free Rx Resources
3089 * @rx_ring: ring to clean the resources from
3090 *
3091 * Free all receive software resources
3092 **/
3093void igb_free_rx_resources(struct igb_ring *rx_ring)
3094{
3095        igb_clean_rx_ring(rx_ring);
3096
3097        vfree(rx_ring->buffer_info);
3098        rx_ring->buffer_info = NULL;
3099
3100        /* if not set, then don't free */
3101        if (!rx_ring->desc)
3102                return;
3103
3104        dma_free_coherent(rx_ring->dev, rx_ring->size,
3105                          rx_ring->desc, rx_ring->dma);
3106
3107        rx_ring->desc = NULL;
3108}
3109
3110/**
3111 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3112 * @adapter: board private structure
3113 *
3114 * Free all receive software resources
3115 **/
3116static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3117{
3118        int i;
3119
3120        for (i = 0; i < adapter->num_rx_queues; i++)
3121                igb_free_rx_resources(adapter->rx_ring[i]);
3122}
3123
3124/**
3125 * igb_clean_rx_ring - Free Rx Buffers per Queue
3126 * @rx_ring: ring to free buffers from
3127 **/
3128static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3129{
3130        struct igb_buffer *buffer_info;
3131        unsigned long size;
3132        unsigned int i;
3133
3134        if (!rx_ring->buffer_info)
3135                return;
3136
3137        /* Free all the Rx ring sk_buffs */
3138        for (i = 0; i < rx_ring->count; i++) {
3139                buffer_info = &rx_ring->buffer_info[i];
3140                if (buffer_info->dma) {
3141                        dma_unmap_single(rx_ring->dev,
3142                                         buffer_info->dma,
3143                                         rx_ring->rx_buffer_len,
3144                                         DMA_FROM_DEVICE);
3145                        buffer_info->dma = 0;
3146                }
3147
3148                if (buffer_info->skb) {
3149                        dev_kfree_skb(buffer_info->skb);
3150                        buffer_info->skb = NULL;
3151                }
3152                if (buffer_info->page_dma) {
3153                        dma_unmap_page(rx_ring->dev,
3154                                       buffer_info->page_dma,
3155                                       PAGE_SIZE / 2,
3156                                       DMA_FROM_DEVICE);
3157                        buffer_info->page_dma = 0;
3158                }
3159                if (buffer_info->page) {
3160                        put_page(buffer_info->page);
3161                        buffer_info->page = NULL;
3162                        buffer_info->page_offset = 0;
3163                }
3164        }
3165
3166        size = sizeof(struct igb_buffer) * rx_ring->count;
3167        memset(rx_ring->buffer_info, 0, size);
3168
3169        /* Zero out the descriptor ring */
3170        memset(rx_ring->desc, 0, rx_ring->size);
3171
3172        rx_ring->next_to_clean = 0;
3173        rx_ring->next_to_use = 0;
3174}
3175
3176/**
3177 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3178 * @adapter: board private structure
3179 **/
3180static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3181{
3182        int i;
3183
3184        for (i = 0; i < adapter->num_rx_queues; i++)
3185                igb_clean_rx_ring(adapter->rx_ring[i]);
3186}
3187
3188/**
3189 * igb_set_mac - Change the Ethernet Address of the NIC
3190 * @netdev: network interface device structure
3191 * @p: pointer to an address structure
3192 *
3193 * Returns 0 on success, negative on failure
3194 **/
3195static int igb_set_mac(struct net_device *netdev, void *p)
3196{
3197        struct igb_adapter *adapter = netdev_priv(netdev);
3198        struct e1000_hw *hw = &adapter->hw;
3199        struct sockaddr *addr = p;
3200
3201        if (!is_valid_ether_addr(addr->sa_data))
3202                return -EADDRNOTAVAIL;
3203
3204        memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3205        memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3206
3207        /* set the correct pool for the new PF MAC address in entry 0 */
3208        igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3209                         adapter->vfs_allocated_count);
3210
3211        return 0;
3212}
3213
3214/**
3215 * igb_write_mc_addr_list - write multicast addresses to MTA
3216 * @netdev: network interface device structure
3217 *
3218 * Writes multicast address list to the MTA hash table.
3219 * Returns: -ENOMEM on failure
3220 *                0 on no addresses written
3221 *                X on writing X addresses to MTA
3222 **/
3223static int igb_write_mc_addr_list(struct net_device *netdev)
3224{
3225        struct igb_adapter *adapter = netdev_priv(netdev);
3226        struct e1000_hw *hw = &adapter->hw;
3227        struct netdev_hw_addr *ha;
3228        u8  *mta_list;
3229        int i;
3230
3231        if (netdev_mc_empty(netdev)) {
3232                /* nothing to program, so clear mc list */
3233                igb_update_mc_addr_list(hw, NULL, 0);
3234                igb_restore_vf_multicasts(adapter);
3235                return 0;
3236        }
3237
3238        mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3239        if (!mta_list)
3240                return -ENOMEM;
3241
3242        /* The shared function expects a packed array of only addresses. */
3243        i = 0;
3244        netdev_for_each_mc_addr(ha, netdev)
3245                memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3246
3247        igb_update_mc_addr_list(hw, mta_list, i);
3248        kfree(mta_list);
3249
3250        return netdev_mc_count(netdev);
3251}
3252
3253/**
3254 * igb_write_uc_addr_list - write unicast addresses to RAR table
3255 * @netdev: network interface device structure
3256 *
3257 * Writes unicast address list to the RAR table.
3258 * Returns: -ENOMEM on failure/insufficient address space
3259 *                0 on no addresses written
3260 *                X on writing X addresses to the RAR table
3261 **/
3262static int igb_write_uc_addr_list(struct net_device *netdev)
3263{
3264        struct igb_adapter *adapter = netdev_priv(netdev);
3265        struct e1000_hw *hw = &adapter->hw;
3266        unsigned int vfn = adapter->vfs_allocated_count;
3267        unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3268        int count = 0;
3269
3270        /* return ENOMEM indicating insufficient memory for addresses */
3271        if (netdev_uc_count(netdev) > rar_entries)
3272                return -ENOMEM;
3273
3274        if (!netdev_uc_empty(netdev) && rar_entries) {
3275                struct netdev_hw_addr *ha;
3276
3277                netdev_for_each_uc_addr(ha, netdev) {
3278                        if (!rar_entries)
3279                                break;
3280                        igb_rar_set_qsel(adapter, ha->addr,
3281                                         rar_entries--,
3282                                         vfn);
3283                        count++;
3284                }
3285        }
3286        /* write the addresses in reverse order to avoid write combining */
3287        for (; rar_entries > 0 ; rar_entries--) {
3288                wr32(E1000_RAH(rar_entries), 0);
3289                wr32(E1000_RAL(rar_entries), 0);
3290        }
3291        wrfl();
3292
3293        return count;
3294}
3295
3296/**
3297 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3298 * @netdev: network interface device structure
3299 *
3300 * The set_rx_mode entry point is called whenever the unicast or multicast
3301 * address lists or the network interface flags are updated.  This routine is
3302 * responsible for configuring the hardware for proper unicast, multicast,
3303 * promiscuous mode, and all-multi behavior.
3304 **/
3305static void igb_set_rx_mode(struct net_device *netdev)
3306{
3307        struct igb_adapter *adapter = netdev_priv(netdev);
3308        struct e1000_hw *hw = &adapter->hw;
3309        unsigned int vfn = adapter->vfs_allocated_count;
3310        u32 rctl, vmolr = 0;
3311        int count;
3312
3313        /* Check for Promiscuous and All Multicast modes */
3314        rctl = rd32(E1000_RCTL);
3315
3316        /* clear the effected bits */
3317        rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3318
3319        if (netdev->flags & IFF_PROMISC) {
3320                rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3321                vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3322        } else {
3323                if (netdev->flags & IFF_ALLMULTI) {
3324                        rctl |= E1000_RCTL_MPE;
3325                        vmolr |= E1000_VMOLR_MPME;
3326                } else {
3327                        /*
3328                         * Write addresses to the MTA, if the attempt fails
3329                         * then we should just turn on promiscous mode so
3330                         * that we can at least receive multicast traffic
3331                         */
3332                        count = igb_write_mc_addr_list(netdev);
3333                        if (count < 0) {
3334                                rctl |= E1000_RCTL_MPE;
3335                                vmolr |= E1000_VMOLR_MPME;
3336                        } else if (count) {
3337                                vmolr |= E1000_VMOLR_ROMPE;
3338                        }
3339                }
3340                /*
3341                 * Write addresses to available RAR registers, if there is not
3342                 * sufficient space to store all the addresses then enable
3343                 * unicast promiscous mode
3344                 */
3345                count = igb_write_uc_addr_list(netdev);
3346                if (count < 0) {
3347                        rctl |= E1000_RCTL_UPE;
3348                        vmolr |= E1000_VMOLR_ROPE;
3349                }
3350                rctl |= E1000_RCTL_VFE;
3351        }
3352        wr32(E1000_RCTL, rctl);
3353
3354        /*
3355         * In order to support SR-IOV and eventually VMDq it is necessary to set
3356         * the VMOLR to enable the appropriate modes.  Without this workaround
3357         * we will have issues with VLAN tag stripping not being done for frames
3358         * that are only arriving because we are the default pool
3359         */
3360        if (hw->mac.type < e1000_82576)
3361                return;
3362
3363        vmolr |= rd32(E1000_VMOLR(vfn)) &
3364                 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3365        wr32(E1000_VMOLR(vfn), vmolr);
3366        igb_restore_vf_multicasts(adapter);
3367}
3368
3369static void igb_check_wvbr(struct igb_adapter *adapter)
3370{
3371        struct e1000_hw *hw = &adapter->hw;
3372        u32 wvbr = 0;
3373
3374        switch (hw->mac.type) {
3375        case e1000_82576:
3376        case e1000_i350:
3377                if (!(wvbr = rd32(E1000_WVBR)))
3378                        return;
3379                break;
3380        default:
3381                break;
3382        }
3383
3384        adapter->wvbr |= wvbr;
3385}
3386
3387#define IGB_STAGGERED_QUEUE_OFFSET 8
3388
3389static void igb_spoof_check(struct igb_adapter *adapter)
3390{
3391        int j;
3392
3393        if (!adapter->wvbr)
3394                return;
3395
3396        for(j = 0; j < adapter->vfs_allocated_count; j++) {
3397                if (adapter->wvbr & (1 << j) ||
3398                    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3399                        dev_warn(&adapter->pdev->dev,
3400                                "Spoof event(s) detected on VF %d\n", j);
3401                        adapter->wvbr &=
3402                                ~((1 << j) |
3403                                  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3404                }
3405        }
3406}
3407
3408/* Need to wait a few seconds after link up to get diagnostic information from
3409 * the phy */
3410static void igb_update_phy_info(unsigned long data)
3411{
3412        struct igb_adapter *adapter = (struct igb_adapter *) data;
3413        igb_get_phy_info(&adapter->hw);
3414}
3415
3416/**
3417 * igb_has_link - check shared code for link and determine up/down
3418 * @adapter: pointer to driver private info
3419 **/
3420bool igb_has_link(struct igb_adapter *adapter)
3421{
3422        struct e1000_hw *hw = &adapter->hw;
3423        bool link_active = false;
3424        s32 ret_val = 0;
3425
3426        /* get_link_status is set on LSC (link status) interrupt or
3427         * rx sequence error interrupt.  get_link_status will stay
3428         * false until the e1000_check_for_link establishes link
3429         * for copper adapters ONLY
3430         */
3431        switch (hw->phy.media_type) {
3432        case e1000_media_type_copper:
3433                if (hw->mac.get_link_status) {
3434                        ret_val = hw->mac.ops.check_for_link(hw);
3435                        link_active = !hw->mac.get_link_status;
3436                } else {
3437                        link_active = true;
3438                }
3439                break;
3440        case e1000_media_type_internal_serdes:
3441                ret_val = hw->mac.ops.check_for_link(hw);
3442                link_active = hw->mac.serdes_has_link;
3443                break;
3444        default:
3445        case e1000_media_type_unknown:
3446                break;
3447        }
3448
3449        return link_active;
3450}
3451
3452/**
3453 * igb_watchdog - Timer Call-back
3454 * @data: pointer to adapter cast into an unsigned long
3455 **/
3456static void igb_watchdog(unsigned long data)
3457{
3458        struct igb_adapter *adapter = (struct igb_adapter *)data;
3459        /* Do the rest outside of interrupt context */
3460        schedule_work(&adapter->watchdog_task);
3461}
3462
3463static void igb_watchdog_task(struct work_struct *work)
3464{
3465        struct igb_adapter *adapter = container_of(work,
3466                                                   struct igb_adapter,
3467                                                   watchdog_task);
3468        struct e1000_hw *hw = &adapter->hw;
3469        struct net_device *netdev = adapter->netdev;
3470        u32 link;
3471        int i;
3472
3473        link = igb_has_link(adapter);
3474        if (link) {
3475                if (!netif_carrier_ok(netdev)) {
3476                        u32 ctrl;
3477                        hw->mac.ops.get_speed_and_duplex(hw,
3478                                                         &adapter->link_speed,
3479                                                         &adapter->link_duplex);
3480
3481                        ctrl = rd32(E1000_CTRL);
3482                        /* Links status message must follow this format */
3483                        printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3484                                 "Flow Control: %s\n",
3485                               netdev->name,
3486                               adapter->link_speed,
3487                               adapter->link_duplex == FULL_DUPLEX ?
3488                                 "Full Duplex" : "Half Duplex",
3489                               ((ctrl & E1000_CTRL_TFCE) &&
3490                                (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3491                               ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3492                               ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3493
3494                        /* adjust timeout factor according to speed/duplex */
3495                        adapter->tx_timeout_factor = 1;
3496                        switch (adapter->link_speed) {
3497                        case SPEED_10:
3498                                adapter->tx_timeout_factor = 14;
3499                                break;
3500                        case SPEED_100:
3501                                /* maybe add some timeout factor ? */
3502                                break;
3503                        }
3504
3505                        netif_carrier_on(netdev);
3506
3507                        igb_ping_all_vfs(adapter);
3508
3509                        /* link state has changed, schedule phy info update */
3510                        if (!test_bit(__IGB_DOWN, &adapter->state))
3511                                mod_timer(&adapter->phy_info_timer,
3512                                          round_jiffies(jiffies + 2 * HZ));
3513                }
3514        } else {
3515                if (netif_carrier_ok(netdev)) {
3516                        adapter->link_speed = 0;
3517                        adapter->link_duplex = 0;
3518                        /* Links status message must follow this format */
3519                        printk(KERN_INFO "igb: %s NIC Link is Down\n",
3520                               netdev->name);
3521                        netif_carrier_off(netdev);
3522
3523                        igb_ping_all_vfs(adapter);
3524
3525                        /* link state has changed, schedule phy info update */
3526                        if (!test_bit(__IGB_DOWN, &adapter->state))
3527                                mod_timer(&adapter->phy_info_timer,
3528                                          round_jiffies(jiffies + 2 * HZ));
3529                }
3530        }
3531
3532        spin_lock(&adapter->stats64_lock);
3533        igb_update_stats(adapter, &adapter->stats64);
3534        spin_unlock(&adapter->stats64_lock);
3535
3536        for (i = 0; i < adapter->num_tx_queues; i++) {
3537                struct igb_ring *tx_ring = adapter->tx_ring[i];
3538                if (!netif_carrier_ok(netdev)) {
3539                        /* We've lost link, so the controller stops DMA,
3540                         * but we've got queued Tx work that's never going
3541                         * to get done, so reset controller to flush Tx.
3542                         * (Do the reset outside of interrupt context). */
3543                        if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3544                                adapter->tx_timeout_count++;
3545                                schedule_work(&adapter->reset_task);
3546                                /* return immediately since reset is imminent */
3547                                return;
3548                        }
3549                }
3550
3551                /* Force detection of hung controller every watchdog period */
3552                tx_ring->detect_tx_hung = true;
3553        }
3554
3555        /* Cause software interrupt to ensure rx ring is cleaned */
3556        if (adapter->msix_entries) {
3557                u32 eics = 0;
3558                for (i = 0; i < adapter->num_q_vectors; i++) {
3559                        struct igb_q_vector *q_vector = adapter->q_vector[i];
3560                        eics |= q_vector->eims_value;
3561                }
3562                wr32(E1000_EICS, eics);
3563        } else {
3564                wr32(E1000_ICS, E1000_ICS_RXDMT0);
3565        }
3566
3567        igb_spoof_check(adapter);
3568
3569        /* Reset the timer */
3570        if (!test_bit(__IGB_DOWN, &adapter->state))
3571                mod_timer(&adapter->watchdog_timer,
3572                          round_jiffies(jiffies + 2 * HZ));
3573}
3574
3575enum latency_range {
3576        lowest_latency = 0,
3577        low_latency = 1,
3578        bulk_latency = 2,
3579        latency_invalid = 255
3580};
3581
3582/**
3583 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3584 *
3585 *      Stores a new ITR value based on strictly on packet size.  This
3586 *      algorithm is less sophisticated than that used in igb_update_itr,
3587 *      due to the difficulty of synchronizing statistics across multiple
3588 *      receive rings.  The divisors and thresholds used by this function
3589 *      were determined based on theoretical maximum wire speed and testing
3590 *      data, in order to minimize response time while increasing bulk
3591 *      throughput.
3592 *      This functionality is controlled by the InterruptThrottleRate module
3593 *      parameter (see igb_param.c)
3594 *      NOTE:  This function is called only when operating in a multiqueue
3595 *             receive environment.
3596 * @q_vector: pointer to q_vector
3597 **/
3598static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3599{
3600        int new_val = q_vector->itr_val;
3601        int avg_wire_size = 0;
3602        struct igb_adapter *adapter = q_vector->adapter;
3603        struct igb_ring *ring;
3604        unsigned int packets;
3605
3606        /* For non-gigabit speeds, just fix the interrupt rate at 4000
3607         * ints/sec - ITR timer value of 120 ticks.
3608         */
3609        if (adapter->link_speed != SPEED_1000) {
3610                new_val = 976;
3611                goto set_itr_val;
3612        }
3613
3614        ring = q_vector->rx_ring;
3615        if (ring) {
3616                packets = ACCESS_ONCE(ring->total_packets);
3617
3618                if (packets)
3619                        avg_wire_size = ring->total_bytes / packets;
3620        }
3621
3622        ring = q_vector->tx_ring;
3623        if (ring) {
3624                packets = ACCESS_ONCE(ring->total_packets);
3625
3626                if (packets)
3627                        avg_wire_size = max_t(u32, avg_wire_size,
3628                                              ring->total_bytes / packets);
3629        }
3630
3631        /* if avg_wire_size isn't set no work was done */
3632        if (!avg_wire_size)
3633                goto clear_counts;
3634
3635        /* Add 24 bytes to size to account for CRC, preamble, and gap */
3636        avg_wire_size += 24;
3637
3638        /* Don't starve jumbo frames */
3639        avg_wire_size = min(avg_wire_size, 3000);
3640
3641        /* Give a little boost to mid-size frames */
3642        if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3643                new_val = avg_wire_size / 3;
3644        else
3645                new_val = avg_wire_size / 2;
3646
3647        /* when in itr mode 3 do not exceed 20K ints/sec */
3648        if (adapter->rx_itr_setting == 3 && new_val < 196)
3649                new_val = 196;
3650
3651set_itr_val:
3652        if (new_val != q_vector->itr_val) {
3653                q_vector->itr_val = new_val;
3654                q_vector->set_itr = 1;
3655        }
3656clear_counts:
3657        if (q_vector->rx_ring) {
3658                q_vector->rx_ring->total_bytes = 0;
3659                q_vector->rx_ring->total_packets = 0;
3660        }
3661        if (q_vector->tx_ring) {
3662                q_vector->tx_ring->total_bytes = 0;
3663                q_vector->tx_ring->total_packets = 0;
3664        }
3665}
3666
3667/**
3668 * igb_update_itr - update the dynamic ITR value based on statistics
3669 *      Stores a new ITR value based on packets and byte
3670 *      counts during the last interrupt.  The advantage of per interrupt
3671 *      computation is faster updates and more accurate ITR for the current
3672 *      traffic pattern.  Constants in this function were computed
3673 *      based on theoretical maximum wire speed and thresholds were set based
3674 *      on testing data as well as attempting to minimize response time
3675 *      while increasing bulk throughput.
3676 *      this functionality is controlled by the InterruptThrottleRate module
3677 *      parameter (see igb_param.c)
3678 *      NOTE:  These calculations are only valid when operating in a single-
3679 *             queue environment.
3680 * @adapter: pointer to adapter
3681 * @itr_setting: current q_vector->itr_val
3682 * @packets: the number of packets during this measurement interval
3683 * @bytes: the number of bytes during this measurement interval
3684 **/
3685static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3686                                   int packets, int bytes)
3687{
3688        unsigned int retval = itr_setting;
3689
3690        if (packets == 0)
3691                goto update_itr_done;
3692
3693        switch (itr_setting) {
3694        case lowest_latency:
3695                /* handle TSO and jumbo frames */
3696                if (bytes/packets > 8000)
3697                        retval = bulk_latency;
3698                else if ((packets < 5) && (bytes > 512))
3699                        retval = low_latency;
3700                break;
3701        case low_latency:  /* 50 usec aka 20000 ints/s */
3702                if (bytes > 10000) {
3703                        /* this if handles the TSO accounting */
3704                        if (bytes/packets > 8000) {
3705                                retval = bulk_latency;
3706                        } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3707                                retval = bulk_latency;
3708                        } else if ((packets > 35)) {
3709                                retval = lowest_latency;
3710                        }
3711                } else if (bytes/packets > 2000) {
3712                        retval = bulk_latency;
3713                } else if (packets <= 2 && bytes < 512) {
3714                        retval = lowest_latency;
3715                }
3716                break;
3717        case bulk_latency: /* 250 usec aka 4000 ints/s */
3718                if (bytes > 25000) {
3719                        if (packets > 35)
3720                                retval = low_latency;
3721                } else if (bytes < 1500) {
3722                        retval = low_latency;
3723                }
3724                break;
3725        }
3726
3727update_itr_done:
3728        return retval;
3729}
3730
3731static void igb_set_itr(struct igb_adapter *adapter)
3732{
3733        struct igb_q_vector *q_vector = adapter->q_vector[0];
3734        u16 current_itr;
3735        u32 new_itr = q_vector->itr_val;
3736
3737        /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3738        if (adapter->link_speed != SPEED_1000) {
3739                current_itr = 0;
3740                new_itr = 4000;
3741                goto set_itr_now;
3742        }
3743
3744        adapter->rx_itr = igb_update_itr(adapter,
3745                                    adapter->rx_itr,
3746                                    q_vector->rx_ring->total_packets,
3747                                    q_vector->rx_ring->total_bytes);
3748
3749        adapter->tx_itr = igb_update_itr(adapter,
3750                                    adapter->tx_itr,
3751                                    q_vector->tx_ring->total_packets,
3752                                    q_vector->tx_ring->total_bytes);
3753        current_itr = max(adapter->rx_itr, adapter->tx_itr);
3754
3755        /* conservative mode (itr 3) eliminates the lowest_latency setting */
3756        if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3757                current_itr = low_latency;
3758
3759        switch (current_itr) {
3760        /* counts and packets in update_itr are dependent on these numbers */
3761        case lowest_latency:
3762                new_itr = 56;  /* aka 70,000 ints/sec */
3763                break;
3764        case low_latency:
3765                new_itr = 196; /* aka 20,000 ints/sec */
3766                break;
3767        case bulk_latency:
3768                new_itr = 980; /* aka 4,000 ints/sec */
3769                break;
3770        default:
3771                break;
3772        }
3773
3774set_itr_now:
3775        q_vector->rx_ring->total_bytes = 0;
3776        q_vector->rx_ring->total_packets = 0;
3777        q_vector->tx_ring->total_bytes = 0;
3778        q_vector->tx_ring->total_packets = 0;
3779
3780        if (new_itr != q_vector->itr_val) {
3781                /* this attempts to bias the interrupt rate towards Bulk
3782                 * by adding intermediate steps when interrupt rate is
3783                 * increasing */
3784                new_itr = new_itr > q_vector->itr_val ?
3785                             max((new_itr * q_vector->itr_val) /
3786                                 (new_itr + (q_vector->itr_val >> 2)),
3787                                 new_itr) :
3788                             new_itr;
3789                /* Don't write the value here; it resets the adapter's
3790                 * internal timer, and causes us to delay far longer than
3791                 * we should between interrupts.  Instead, we write the ITR
3792                 * value at the beginning of the next interrupt so the timing
3793                 * ends up being correct.
3794                 */
3795                q_vector->itr_val = new_itr;
3796                q_vector->set_itr = 1;
3797        }
3798}
3799
3800#define IGB_TX_FLAGS_CSUM               0x00000001
3801#define IGB_TX_FLAGS_VLAN               0x00000002
3802#define IGB_TX_FLAGS_TSO                0x00000004
3803#define IGB_TX_FLAGS_IPV4               0x00000008
3804#define IGB_TX_FLAGS_TSTAMP             0x00000010
3805#define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3806#define IGB_TX_FLAGS_VLAN_SHIFT                 16
3807
3808static inline int igb_tso_adv(struct igb_ring *tx_ring,
3809                              struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3810{
3811        struct e1000_adv_tx_context_desc *context_desc;
3812        unsigned int i;
3813        int err;
3814        struct igb_buffer *buffer_info;
3815        u32 info = 0, tu_cmd = 0;
3816        u32 mss_l4len_idx;
3817        u8 l4len;
3818
3819        if (skb_header_cloned(skb)) {
3820                err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3821                if (err)
3822                        return err;
3823        }
3824
3825        l4len = tcp_hdrlen(skb);
3826        *hdr_len += l4len;
3827
3828        if (skb->protocol == htons(ETH_P_IP)) {
3829                struct iphdr *iph = ip_hdr(skb);
3830                iph->tot_len = 0;
3831                iph->check = 0;
3832                tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3833                                                         iph->daddr, 0,
3834                                                         IPPROTO_TCP,
3835                                                         0);
3836        } else if (skb_is_gso_v6(skb)) {
3837                ipv6_hdr(skb)->payload_len = 0;
3838                tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3839                                                       &ipv6_hdr(skb)->daddr,
3840                                                       0, IPPROTO_TCP, 0);
3841        }
3842
3843        i = tx_ring->next_to_use;
3844
3845        buffer_info = &tx_ring->buffer_info[i];
3846        context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3847        /* VLAN MACLEN IPLEN */
3848        if (tx_flags & IGB_TX_FLAGS_VLAN)
3849                info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3850        info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3851        *hdr_len += skb_network_offset(skb);
3852        info |= skb_network_header_len(skb);
3853        *hdr_len += skb_network_header_len(skb);
3854        context_desc->vlan_macip_lens = cpu_to_le32(info);
3855
3856        /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3857        tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3858
3859        if (skb->protocol == htons(ETH_P_IP))
3860                tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3861        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3862
3863        context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3864
3865        /* MSS L4LEN IDX */
3866        mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3867        mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3868
3869        /* For 82575, context index must be unique per ring. */
3870        if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3871                mss_l4len_idx |= tx_ring->reg_idx << 4;
3872
3873        context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3874        context_desc->seqnum_seed = 0;
3875
3876        buffer_info->time_stamp = jiffies;
3877        buffer_info->next_to_watch = i;
3878        buffer_info->dma = 0;
3879        i++;
3880        if (i == tx_ring->count)
3881                i = 0;
3882
3883        tx_ring->next_to_use = i;
3884
3885        return true;
3886}
3887
3888static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3889                                   struct sk_buff *skb, u32 tx_flags)
3890{
3891        struct e1000_adv_tx_context_desc *context_desc;
3892        struct device *dev = tx_ring->dev;
3893        struct igb_buffer *buffer_info;
3894        u32 info = 0, tu_cmd = 0;
3895        unsigned int i;
3896
3897        if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3898            (tx_flags & IGB_TX_FLAGS_VLAN)) {
3899                i = tx_ring->next_to_use;
3900                buffer_info = &tx_ring->buffer_info[i];
3901                context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3902
3903                if (tx_flags & IGB_TX_FLAGS_VLAN)
3904                        info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3905
3906                info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3907                if (skb->ip_summed == CHECKSUM_PARTIAL)
3908                        info |= skb_network_header_len(skb);
3909
3910                context_desc->vlan_macip_lens = cpu_to_le32(info);
3911
3912                tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3913
3914                if (skb->ip_summed == CHECKSUM_PARTIAL) {
3915                        __be16 protocol;
3916
3917                        if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3918                                const struct vlan_ethhdr *vhdr =
3919                                          (const struct vlan_ethhdr*)skb->data;
3920
3921                                protocol = vhdr->h_vlan_encapsulated_proto;
3922                        } else {
3923                                protocol = skb->protocol;
3924                        }
3925
3926                        switch (protocol) {
3927                        case cpu_to_be16(ETH_P_IP):
3928                                tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3929                                if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3930                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3931                                else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3932                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3933                                break;
3934                        case cpu_to_be16(ETH_P_IPV6):
3935                                /* XXX what about other V6 headers?? */
3936                                if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3937                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3938                                else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3939                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3940                                break;
3941                        default:
3942                                if (unlikely(net_ratelimit()))
3943                                        dev_warn(dev,
3944                                            "partial checksum but proto=%x!\n",
3945                                            skb->protocol);
3946                                break;
3947                        }
3948                }
3949
3950                context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3951                context_desc->seqnum_seed = 0;
3952                if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3953                        context_desc->mss_l4len_idx =
3954                                cpu_to_le32(tx_ring->reg_idx << 4);
3955
3956                buffer_info->time_stamp = jiffies;
3957                buffer_info->next_to_watch = i;
3958                buffer_info->dma = 0;
3959
3960                i++;
3961                if (i == tx_ring->count)
3962                        i = 0;
3963                tx_ring->next_to_use = i;
3964
3965                return true;
3966        }
3967        return false;
3968}
3969
3970#define IGB_MAX_TXD_PWR 16
3971#define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3972
3973static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3974                                 unsigned int first)
3975{
3976        struct igb_buffer *buffer_info;
3977        struct device *dev = tx_ring->dev;
3978        unsigned int hlen = skb_headlen(skb);
3979        unsigned int count = 0, i;
3980        unsigned int f;
3981        u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3982
3983        i = tx_ring->next_to_use;
3984
3985        buffer_info = &tx_ring->buffer_info[i];
3986        BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3987        buffer_info->length = hlen;
3988        /* set time_stamp *before* dma to help avoid a possible race */
3989        buffer_info->time_stamp = jiffies;
3990        buffer_info->next_to_watch = i;
3991        buffer_info->dma = dma_map_single(dev, skb->data, hlen,
3992                                          DMA_TO_DEVICE);
3993        if (dma_mapping_error(dev, buffer_info->dma))
3994                goto dma_error;
3995
3996        for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3997                struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
3998                unsigned int len = frag->size;
3999
4000                count++;
4001                i++;
4002                if (i == tx_ring->count)
4003                        i = 0;
4004
4005                buffer_info = &tx_ring->buffer_info[i];
4006                BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4007                buffer_info->length = len;
4008                buffer_info->time_stamp = jiffies;
4009                buffer_info->next_to_watch = i;
4010                buffer_info->mapped_as_page = true;
4011                buffer_info->dma = dma_map_page(dev,
4012                                                frag->page,
4013                                                frag->page_offset,
4014                                                len,
4015                                                DMA_TO_DEVICE);
4016                if (dma_mapping_error(dev, buffer_info->dma))
4017                        goto dma_error;
4018
4019        }
4020
4021        tx_ring->buffer_info[i].skb = skb;
4022        tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4023        /* multiply data chunks by size of headers */
4024        tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4025        tx_ring->buffer_info[i].gso_segs = gso_segs;
4026        tx_ring->buffer_info[first].next_to_watch = i;
4027
4028        return ++count;
4029
4030dma_error:
4031        dev_err(dev, "TX DMA map failed\n");
4032
4033        /* clear timestamp and dma mappings for failed buffer_info mapping */
4034        buffer_info->dma = 0;
4035        buffer_info->time_stamp = 0;
4036        buffer_info->length = 0;
4037        buffer_info->next_to_watch = 0;
4038        buffer_info->mapped_as_page = false;
4039
4040        /* clear timestamp and dma mappings for remaining portion of packet */
4041        while (count--) {
4042                if (i == 0)
4043                        i = tx_ring->count;
4044                i--;
4045                buffer_info = &tx_ring->buffer_info[i];
4046                igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4047        }
4048
4049        return 0;
4050}
4051
4052static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4053                                    u32 tx_flags, int count, u32 paylen,
4054                                    u8 hdr_len)
4055{
4056        union e1000_adv_tx_desc *tx_desc;
4057        struct igb_buffer *buffer_info;
4058        u32 olinfo_status = 0, cmd_type_len;
4059        unsigned int i = tx_ring->next_to_use;
4060
4061        cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4062                        E1000_ADVTXD_DCMD_DEXT);
4063
4064        if (tx_flags & IGB_TX_FLAGS_VLAN)
4065                cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4066
4067        if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4068                cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4069
4070        if (tx_flags & IGB_TX_FLAGS_TSO) {
4071                cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4072
4073                /* insert tcp checksum */
4074                olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4075
4076                /* insert ip checksum */
4077                if (tx_flags & IGB_TX_FLAGS_IPV4)
4078                        olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4079
4080        } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4081                olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4082        }
4083
4084        if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4085            (tx_flags & (IGB_TX_FLAGS_CSUM |
4086                         IGB_TX_FLAGS_TSO |
4087                         IGB_TX_FLAGS_VLAN)))
4088                olinfo_status |= tx_ring->reg_idx << 4;
4089
4090        olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4091
4092        do {
4093                buffer_info = &tx_ring->buffer_info[i];
4094                tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4095                tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4096                tx_desc->read.cmd_type_len =
4097                        cpu_to_le32(cmd_type_len | buffer_info->length);
4098                tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4099                count--;
4100                i++;
4101                if (i == tx_ring->count)
4102                        i = 0;
4103        } while (count > 0);
4104
4105        tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4106        /* Force memory writes to complete before letting h/w
4107         * know there are new descriptors to fetch.  (Only
4108         * applicable for weak-ordered memory model archs,
4109         * such as IA-64). */
4110        wmb();
4111
4112        tx_ring->next_to_use = i;
4113        writel(i, tx_ring->tail);
4114        /* we need this if more than one processor can write to our tail
4115         * at a time, it syncronizes IO on IA64/Altix systems */
4116        mmiowb();
4117}
4118
4119static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4120{
4121        struct net_device *netdev = tx_ring->netdev;
4122
4123        netif_stop_subqueue(netdev, tx_ring->queue_index);
4124
4125        /* Herbert's original patch had:
4126         *  smp_mb__after_netif_stop_queue();
4127         * but since that doesn't exist yet, just open code it. */
4128        smp_mb();
4129
4130        /* We need to check again in a case another CPU has just
4131         * made room available. */
4132        if (igb_desc_unused(tx_ring) < size)
4133                return -EBUSY;
4134
4135        /* A reprieve! */
4136        netif_wake_subqueue(netdev, tx_ring->queue_index);
4137
4138        u64_stats_update_begin(&tx_ring->tx_syncp2);
4139        tx_ring->tx_stats.restart_queue2++;
4140        u64_stats_update_end(&tx_ring->tx_syncp2);
4141
4142        return 0;
4143}
4144
4145static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4146{
4147        if (igb_desc_unused(tx_ring) >= size)
4148                return 0;
4149        return __igb_maybe_stop_tx(tx_ring, size);
4150}
4151
4152netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4153                                    struct igb_ring *tx_ring)
4154{
4155        int tso = 0, count;
4156        u32 tx_flags = 0;
4157        u16 first;
4158        u8 hdr_len = 0;
4159
4160        /* need: 1 descriptor per page,
4161         *       + 2 desc gap to keep tail from touching head,
4162         *       + 1 desc for skb->data,
4163         *       + 1 desc for context descriptor,
4164         * otherwise try next time */
4165        if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4166                /* this is a hard error */
4167                return NETDEV_TX_BUSY;
4168        }
4169
4170        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4171                skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4172                tx_flags |= IGB_TX_FLAGS_TSTAMP;
4173        }
4174
4175        if (vlan_tx_tag_present(skb)) {
4176                tx_flags |= IGB_TX_FLAGS_VLAN;
4177                tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4178        }
4179
4180        if (skb->protocol == htons(ETH_P_IP))
4181                tx_flags |= IGB_TX_FLAGS_IPV4;
4182
4183        first = tx_ring->next_to_use;
4184        if (skb_is_gso(skb)) {
4185                tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4186
4187                if (tso < 0) {
4188                        dev_kfree_skb_any(skb);
4189                        return NETDEV_TX_OK;
4190                }
4191        }
4192
4193        if (tso)
4194                tx_flags |= IGB_TX_FLAGS_TSO;
4195        else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4196                 (skb->ip_summed == CHECKSUM_PARTIAL))
4197                tx_flags |= IGB_TX_FLAGS_CSUM;
4198
4199        /*
4200         * count reflects descriptors mapped, if 0 or less then mapping error
4201         * has occured and we need to rewind the descriptor queue
4202         */
4203        count = igb_tx_map_adv(tx_ring, skb, first);
4204        if (!count) {
4205                dev_kfree_skb_any(skb);
4206                tx_ring->buffer_info[first].time_stamp = 0;
4207                tx_ring->next_to_use = first;
4208                return NETDEV_TX_OK;
4209        }
4210
4211        igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4212
4213        /* Make sure there is space in the ring for the next send. */
4214        igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4215
4216        return NETDEV_TX_OK;
4217}
4218
4219static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4220                                      struct net_device *netdev)
4221{
4222        struct igb_adapter *adapter = netdev_priv(netdev);
4223        struct igb_ring *tx_ring;
4224        int r_idx = 0;
4225
4226        if (test_bit(__IGB_DOWN, &adapter->state)) {
4227                dev_kfree_skb_any(skb);
4228                return NETDEV_TX_OK;
4229        }
4230
4231        if (skb->len <= 0) {
4232                dev_kfree_skb_any(skb);
4233                return NETDEV_TX_OK;
4234        }
4235
4236        r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4237        tx_ring = adapter->multi_tx_table[r_idx];
4238
4239        /* This goes back to the question of how to logically map a tx queue
4240         * to a flow.  Right now, performance is impacted slightly negatively
4241         * if using multiple tx queues.  If the stack breaks away from a
4242         * single qdisc implementation, we can look at this again. */
4243        return igb_xmit_frame_ring_adv(skb, tx_ring);
4244}
4245
4246/**
4247 * igb_tx_timeout - Respond to a Tx Hang
4248 * @netdev: network interface device structure
4249 **/
4250static void igb_tx_timeout(struct net_device *netdev)
4251{
4252        struct igb_adapter *adapter = netdev_priv(netdev);
4253        struct e1000_hw *hw = &adapter->hw;
4254
4255        /* Do the reset outside of interrupt context */
4256        adapter->tx_timeout_count++;
4257
4258        if (hw->mac.type == e1000_82580)
4259                hw->dev_spec._82575.global_device_reset = true;
4260
4261        schedule_work(&adapter->reset_task);
4262        wr32(E1000_EICS,
4263             (adapter->eims_enable_mask & ~adapter->eims_other));
4264}
4265
4266static void igb_reset_task(struct work_struct *work)
4267{
4268        struct igb_adapter *adapter;
4269        adapter = container_of(work, struct igb_adapter, reset_task);
4270
4271        igb_dump(adapter);
4272        netdev_err(adapter->netdev, "Reset adapter\n");
4273        igb_reinit_locked(adapter);
4274}
4275
4276/**
4277 * igb_get_stats64 - Get System Network Statistics
4278 * @netdev: network interface device structure
4279 * @stats: rtnl_link_stats64 pointer
4280 *
4281 **/
4282static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4283                                                 struct rtnl_link_stats64 *stats)
4284{
4285        struct igb_adapter *adapter = netdev_priv(netdev);
4286
4287        spin_lock(&adapter->stats64_lock);
4288        igb_update_stats(adapter, &adapter->stats64);
4289        memcpy(stats, &adapter->stats64, sizeof(*stats));
4290        spin_unlock(&adapter->stats64_lock);
4291
4292        return stats;
4293}
4294
4295/**
4296 * igb_change_mtu - Change the Maximum Transfer Unit
4297 * @netdev: network interface device structure
4298 * @new_mtu: new value for maximum frame size
4299 *
4300 * Returns 0 on success, negative on failure
4301 **/
4302static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4303{
4304        struct igb_adapter *adapter = netdev_priv(netdev);
4305        struct pci_dev *pdev = adapter->pdev;
4306        int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4307        u32 rx_buffer_len, i;
4308
4309        if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4310                dev_err(&pdev->dev, "Invalid MTU setting\n");
4311                return -EINVAL;
4312        }
4313
4314        if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4315                dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4316                return -EINVAL;
4317        }
4318
4319        while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4320                msleep(1);
4321
4322        /* igb_down has a dependency on max_frame_size */
4323        adapter->max_frame_size = max_frame;
4324
4325        /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4326         * means we reserve 2 more, this pushes us to allocate from the next
4327         * larger slab size.
4328         * i.e. RXBUFFER_2048 --> size-4096 slab
4329         */
4330
4331        if (adapter->hw.mac.type == e1000_82580)
4332                max_frame += IGB_TS_HDR_LEN;
4333
4334        if (max_frame <= IGB_RXBUFFER_1024)
4335                rx_buffer_len = IGB_RXBUFFER_1024;
4336        else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4337                rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4338        else
4339                rx_buffer_len = IGB_RXBUFFER_128;
4340
4341        if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4342             (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4343                rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4344
4345        if ((adapter->hw.mac.type == e1000_82580) &&
4346            (rx_buffer_len == IGB_RXBUFFER_128))
4347                rx_buffer_len += IGB_RXBUFFER_64;
4348
4349        if (netif_running(netdev))
4350                igb_down(adapter);
4351
4352        dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4353                 netdev->mtu, new_mtu);
4354        netdev->mtu = new_mtu;
4355
4356        for (i = 0; i < adapter->num_rx_queues; i++)
4357                adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4358
4359        if (netif_running(netdev))
4360                igb_up(adapter);
4361        else
4362                igb_reset(adapter);
4363
4364        clear_bit(__IGB_RESETTING, &adapter->state);
4365
4366        return 0;
4367}
4368
4369/**
4370 * igb_update_stats - Update the board statistics counters
4371 * @adapter: board private structure
4372 **/
4373
4374void igb_update_stats(struct igb_adapter *adapter,
4375                      struct rtnl_link_stats64 *net_stats)
4376{
4377        struct e1000_hw *hw = &adapter->hw;
4378        struct pci_dev *pdev = adapter->pdev;
4379        u32 reg, mpc;
4380        u16 phy_tmp;
4381        int i;
4382        u64 bytes, packets;
4383        unsigned int start;
4384        u64 _bytes, _packets;
4385
4386#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4387
4388        /*
4389         * Prevent stats update while adapter is being reset, or if the pci
4390         * connection is down.
4391         */
4392        if (adapter->link_speed == 0)
4393                return;
4394        if (pci_channel_offline(pdev))
4395                return;
4396
4397        bytes = 0;
4398        packets = 0;
4399        for (i = 0; i < adapter->num_rx_queues; i++) {
4400                u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4401                struct igb_ring *ring = adapter->rx_ring[i];
4402
4403                ring->rx_stats.drops += rqdpc_tmp;
4404                net_stats->rx_fifo_errors += rqdpc_tmp;
4405
4406                do {
4407                        start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4408                        _bytes = ring->rx_stats.bytes;
4409                        _packets = ring->rx_stats.packets;
4410                } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4411                bytes += _bytes;
4412                packets += _packets;
4413        }
4414
4415        net_stats->rx_bytes = bytes;
4416        net_stats->rx_packets = packets;
4417
4418        bytes = 0;
4419        packets = 0;
4420        for (i = 0; i < adapter->num_tx_queues; i++) {
4421                struct igb_ring *ring = adapter->tx_ring[i];
4422                do {
4423                        start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4424                        _bytes = ring->tx_stats.bytes;
4425                        _packets = ring->tx_stats.packets;
4426                } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4427                bytes += _bytes;
4428                packets += _packets;
4429        }
4430        net_stats->tx_bytes = bytes;
4431        net_stats->tx_packets = packets;
4432
4433        /* read stats registers */
4434        adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4435        adapter->stats.gprc += rd32(E1000_GPRC);
4436        adapter->stats.gorc += rd32(E1000_GORCL);
4437        rd32(E1000_GORCH); /* clear GORCL */
4438        adapter->stats.bprc += rd32(E1000_BPRC);
4439        adapter->stats.mprc += rd32(E1000_MPRC);
4440        adapter->stats.roc += rd32(E1000_ROC);
4441
4442        adapter->stats.prc64 += rd32(E1000_PRC64);
4443        adapter->stats.prc127 += rd32(E1000_PRC127);
4444        adapter->stats.prc255 += rd32(E1000_PRC255);
4445        adapter->stats.prc511 += rd32(E1000_PRC511);
4446        adapter->stats.prc1023 += rd32(E1000_PRC1023);
4447        adapter->stats.prc1522 += rd32(E1000_PRC1522);
4448        adapter->stats.symerrs += rd32(E1000_SYMERRS);
4449        adapter->stats.sec += rd32(E1000_SEC);
4450
4451        mpc = rd32(E1000_MPC);
4452        adapter->stats.mpc += mpc;
4453        net_stats->rx_fifo_errors += mpc;
4454        adapter->stats.scc += rd32(E1000_SCC);
4455        adapter->stats.ecol += rd32(E1000_ECOL);
4456        adapter->stats.mcc += rd32(E1000_MCC);
4457        adapter->stats.latecol += rd32(E1000_LATECOL);
4458        adapter->stats.dc += rd32(E1000_DC);
4459        adapter->stats.rlec += rd32(E1000_RLEC);
4460        adapter->stats.xonrxc += rd32(E1000_XONRXC);
4461        adapter->stats.xontxc += rd32(E1000_XONTXC);
4462        adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4463        adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4464        adapter->stats.fcruc += rd32(E1000_FCRUC);
4465        adapter->stats.gptc += rd32(E1000_GPTC);
4466        adapter->stats.gotc += rd32(E1000_GOTCL);
4467        rd32(E1000_GOTCH); /* clear GOTCL */
4468        adapter->stats.rnbc += rd32(E1000_RNBC);
4469        adapter->stats.ruc += rd32(E1000_RUC);
4470        adapter->stats.rfc += rd32(E1000_RFC);
4471        adapter->stats.rjc += rd32(E1000_RJC);
4472        adapter->stats.tor += rd32(E1000_TORH);
4473        adapter->stats.tot += rd32(E1000_TOTH);
4474        adapter->stats.tpr += rd32(E1000_TPR);
4475
4476        adapter->stats.ptc64 += rd32(E1000_PTC64);
4477        adapter->stats.ptc127 += rd32(E1000_PTC127);
4478        adapter->stats.ptc255 += rd32(E1000_PTC255);
4479        adapter->stats.ptc511 += rd32(E1000_PTC511);
4480        adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4481        adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4482
4483        adapter->stats.mptc += rd32(E1000_MPTC);
4484        adapter->stats.bptc += rd32(E1000_BPTC);
4485
4486        adapter->stats.tpt += rd32(E1000_TPT);
4487        adapter->stats.colc += rd32(E1000_COLC);
4488
4489        adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4490        /* read internal phy specific stats */
4491        reg = rd32(E1000_CTRL_EXT);
4492        if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4493                adapter->stats.rxerrc += rd32(E1000_RXERRC);
4494                adapter->stats.tncrs += rd32(E1000_TNCRS);
4495        }
4496
4497        adapter->stats.tsctc += rd32(E1000_TSCTC);
4498        adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4499
4500        adapter->stats.iac += rd32(E1000_IAC);
4501        adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4502        adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4503        adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4504        adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4505        adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4506        adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4507        adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4508        adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4509
4510        /* Fill out the OS statistics structure */
4511        net_stats->multicast = adapter->stats.mprc;
4512        net_stats->collisions = adapter->stats.colc;
4513
4514        /* Rx Errors */
4515
4516        /* RLEC on some newer hardware can be incorrect so build
4517         * our own version based on RUC and ROC */
4518        net_stats->rx_errors = adapter->stats.rxerrc +
4519                adapter->stats.crcerrs + adapter->stats.algnerrc +
4520                adapter->stats.ruc + adapter->stats.roc +
4521                adapter->stats.cexterr;
4522        net_stats->rx_length_errors = adapter->stats.ruc +
4523                                      adapter->stats.roc;
4524        net_stats->rx_crc_errors = adapter->stats.crcerrs;
4525        net_stats->rx_frame_errors = adapter->stats.algnerrc;
4526        net_stats->rx_missed_errors = adapter->stats.mpc;
4527
4528        /* Tx Errors */
4529        net_stats->tx_errors = adapter->stats.ecol +
4530                               adapter->stats.latecol;
4531        net_stats->tx_aborted_errors = adapter->stats.ecol;
4532        net_stats->tx_window_errors = adapter->stats.latecol;
4533        net_stats->tx_carrier_errors = adapter->stats.tncrs;
4534
4535        /* Tx Dropped needs to be maintained elsewhere */
4536
4537        /* Phy Stats */
4538        if (hw->phy.media_type == e1000_media_type_copper) {
4539                if ((adapter->link_speed == SPEED_1000) &&
4540                   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4541                        phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4542                        adapter->phy_stats.idle_errors += phy_tmp;
4543                }
4544        }
4545
4546        /* Management Stats */
4547        adapter->stats.mgptc += rd32(E1000_MGTPTC);
4548        adapter->stats.mgprc += rd32(E1000_MGTPRC);
4549        adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4550}
4551
4552static irqreturn_t igb_msix_other(int irq, void *data)
4553{
4554        struct igb_adapter *adapter = data;
4555        struct e1000_hw *hw = &adapter->hw;
4556        u32 icr = rd32(E1000_ICR);
4557        /* reading ICR causes bit 31 of EICR to be cleared */
4558
4559        if (icr & E1000_ICR_DRSTA)
4560                schedule_work(&adapter->reset_task);
4561
4562        if (icr & E1000_ICR_DOUTSYNC) {
4563                /* HW is reporting DMA is out of sync */
4564                adapter->stats.doosync++;
4565                /* The DMA Out of Sync is also indication of a spoof event
4566                 * in IOV mode. Check the Wrong VM Behavior register to
4567                 * see if it is really a spoof event. */
4568                igb_check_wvbr(adapter);
4569        }
4570
4571        /* Check for a mailbox event */
4572        if (icr & E1000_ICR_VMMB)
4573                igb_msg_task(adapter);
4574
4575        if (icr & E1000_ICR_LSC) {
4576                hw->mac.get_link_status = 1;
4577                /* guard against interrupt when we're going down */
4578                if (!test_bit(__IGB_DOWN, &adapter->state))
4579                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
4580        }
4581
4582        if (adapter->vfs_allocated_count)
4583                wr32(E1000_IMS, E1000_IMS_LSC |
4584                                E1000_IMS_VMMB |
4585                                E1000_IMS_DOUTSYNC);
4586        else
4587                wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4588        wr32(E1000_EIMS, adapter->eims_other);
4589
4590        return IRQ_HANDLED;
4591}
4592
4593static void igb_write_itr(struct igb_q_vector *q_vector)
4594{
4595        struct igb_adapter *adapter = q_vector->adapter;
4596        u32 itr_val = q_vector->itr_val & 0x7FFC;
4597
4598        if (!q_vector->set_itr)
4599                return;
4600
4601        if (!itr_val)
4602                itr_val = 0x4;
4603
4604        if (adapter->hw.mac.type == e1000_82575)
4605                itr_val |= itr_val << 16;
4606        else
4607                itr_val |= 0x8000000;
4608
4609        writel(itr_val, q_vector->itr_register);
4610        q_vector->set_itr = 0;
4611}
4612
4613static irqreturn_t igb_msix_ring(int irq, void *data)
4614{
4615        struct igb_q_vector *q_vector = data;
4616
4617        /* Write the ITR value calculated from the previous interrupt. */
4618        igb_write_itr(q_vector);
4619
4620        napi_schedule(&q_vector->napi);
4621
4622        return IRQ_HANDLED;
4623}
4624
4625#ifdef CONFIG_IGB_DCA
4626static void igb_update_dca(struct igb_q_vector *q_vector)
4627{
4628        struct igb_adapter *adapter = q_vector->adapter;
4629        struct e1000_hw *hw = &adapter->hw;
4630        int cpu = get_cpu();
4631
4632        if (q_vector->cpu == cpu)
4633                goto out_no_update;
4634
4635        if (q_vector->tx_ring) {
4636                int q = q_vector->tx_ring->reg_idx;
4637                u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4638                if (hw->mac.type == e1000_82575) {
4639                        dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4640                        dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4641                } else {
4642                        dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4643                        dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4644                                      E1000_DCA_TXCTRL_CPUID_SHIFT;
4645                }
4646                dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4647                wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4648        }
4649        if (q_vector->rx_ring) {
4650                int q = q_vector->rx_ring->reg_idx;
4651                u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4652                if (hw->mac.type == e1000_82575) {
4653                        dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4654                        dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4655                } else {
4656                        dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4657                        dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4658                                      E1000_DCA_RXCTRL_CPUID_SHIFT;
4659                }
4660                dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4661                dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4662                dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4663                wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4664        }
4665        q_vector->cpu = cpu;
4666out_no_update:
4667        put_cpu();
4668}
4669
4670static void igb_setup_dca(struct igb_adapter *adapter)
4671{
4672        struct e1000_hw *hw = &adapter->hw;
4673        int i;
4674
4675        if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4676                return;
4677
4678        /* Always use CB2 mode, difference is masked in the CB driver. */
4679        wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4680
4681        for (i = 0; i < adapter->num_q_vectors; i++) {
4682                adapter->q_vector[i]->cpu = -1;
4683                igb_update_dca(adapter->q_vector[i]);
4684        }
4685}
4686
4687static int __igb_notify_dca(struct device *dev, void *data)
4688{
4689        struct net_device *netdev = dev_get_drvdata(dev);
4690        struct igb_adapter *adapter = netdev_priv(netdev);
4691        struct pci_dev *pdev = adapter->pdev;
4692        struct e1000_hw *hw = &adapter->hw;
4693        unsigned long event = *(unsigned long *)data;
4694
4695        switch (event) {
4696        case DCA_PROVIDER_ADD:
4697                /* if already enabled, don't do it again */
4698                if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4699                        break;
4700                if (dca_add_requester(dev) == 0) {
4701                        adapter->flags |= IGB_FLAG_DCA_ENABLED;
4702                        dev_info(&pdev->dev, "DCA enabled\n");
4703                        igb_setup_dca(adapter);
4704                        break;
4705                }
4706                /* Fall Through since DCA is disabled. */
4707        case DCA_PROVIDER_REMOVE:
4708                if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4709                        /* without this a class_device is left
4710                         * hanging around in the sysfs model */
4711                        dca_remove_requester(dev);
4712                        dev_info(&pdev->dev, "DCA disabled\n");
4713                        adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4714                        wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4715                }
4716                break;
4717        }
4718
4719        return 0;
4720}
4721
4722static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4723                          void *p)
4724{
4725        int ret_val;
4726
4727        ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4728                                         __igb_notify_dca);
4729
4730        return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4731}
4732#endif /* CONFIG_IGB_DCA */
4733
4734static void igb_ping_all_vfs(struct igb_adapter *adapter)
4735{
4736        struct e1000_hw *hw = &adapter->hw;
4737        u32 ping;
4738        int i;
4739
4740        for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4741                ping = E1000_PF_CONTROL_MSG;
4742                if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4743                        ping |= E1000_VT_MSGTYPE_CTS;
4744                igb_write_mbx(hw, &ping, 1, i);
4745        }
4746}
4747
4748static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4749{
4750        struct e1000_hw *hw = &adapter->hw;
4751        u32 vmolr = rd32(E1000_VMOLR(vf));
4752        struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4753
4754        vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4755                            IGB_VF_FLAG_MULTI_PROMISC);
4756        vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4757
4758        if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4759                vmolr |= E1000_VMOLR_MPME;
4760                vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4761                *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4762        } else {
4763                /*
4764                 * if we have hashes and we are clearing a multicast promisc
4765                 * flag we need to write the hashes to the MTA as this step
4766                 * was previously skipped
4767                 */
4768                if (vf_data->num_vf_mc_hashes > 30) {
4769                        vmolr |= E1000_VMOLR_MPME;
4770                } else if (vf_data->num_vf_mc_hashes) {
4771                        int j;
4772                        vmolr |= E1000_VMOLR_ROMPE;
4773                        for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4774                                igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4775                }
4776        }
4777
4778        wr32(E1000_VMOLR(vf), vmolr);
4779
4780        /* there are flags left unprocessed, likely not supported */
4781        if (*msgbuf & E1000_VT_MSGINFO_MASK)
4782                return -EINVAL;
4783
4784        return 0;
4785
4786}
4787
4788static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4789                                  u32 *msgbuf, u32 vf)
4790{
4791        int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4792        u16 *hash_list = (u16 *)&msgbuf[1];
4793        struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4794        int i;
4795
4796        /* salt away the number of multicast addresses assigned
4797         * to this VF for later use to restore when the PF multi cast
4798         * list changes
4799         */
4800        vf_data->num_vf_mc_hashes = n;
4801
4802        /* only up to 30 hash values supported */
4803        if (n > 30)
4804                n = 30;
4805
4806        /* store the hashes for later use */
4807        for (i = 0; i < n; i++)
4808                vf_data->vf_mc_hashes[i] = hash_list[i];
4809
4810        /* Flush and reset the mta with the new values */
4811        igb_set_rx_mode(adapter->netdev);
4812
4813        return 0;
4814}
4815
4816static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4817{
4818        struct e1000_hw *hw = &adapter->hw;
4819        struct vf_data_storage *vf_data;
4820        int i, j;
4821
4822        for (i = 0; i < adapter->vfs_allocated_count; i++) {
4823                u32 vmolr = rd32(E1000_VMOLR(i));
4824                vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4825
4826                vf_data = &adapter->vf_data[i];
4827
4828                if ((vf_data->num_vf_mc_hashes > 30) ||
4829                    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4830                        vmolr |= E1000_VMOLR_MPME;
4831                } else if (vf_data->num_vf_mc_hashes) {
4832                        vmolr |= E1000_VMOLR_ROMPE;
4833                        for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4834                                igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4835                }
4836                wr32(E1000_VMOLR(i), vmolr);
4837        }
4838}
4839
4840static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4841{
4842        struct e1000_hw *hw = &adapter->hw;
4843        u32 pool_mask, reg, vid;
4844        int i;
4845
4846        pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4847
4848        /* Find the vlan filter for this id */
4849        for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4850                reg = rd32(E1000_VLVF(i));
4851
4852                /* remove the vf from the pool */
4853                reg &= ~pool_mask;
4854
4855                /* if pool is empty then remove entry from vfta */
4856                if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4857                    (reg & E1000_VLVF_VLANID_ENABLE)) {
4858                        reg = 0;
4859                        vid = reg & E1000_VLVF_VLANID_MASK;
4860                        igb_vfta_set(hw, vid, false);
4861                }
4862
4863                wr32(E1000_VLVF(i), reg);
4864        }
4865
4866        adapter->vf_data[vf].vlans_enabled = 0;
4867}
4868
4869static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4870{
4871        struct e1000_hw *hw = &adapter->hw;
4872        u32 reg, i;
4873
4874        /* The vlvf table only exists on 82576 hardware and newer */
4875        if (hw->mac.type < e1000_82576)
4876                return -1;
4877
4878        /* we only need to do this if VMDq is enabled */
4879        if (!adapter->vfs_allocated_count)
4880                return -1;
4881
4882        /* Find the vlan filter for this id */
4883        for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4884                reg = rd32(E1000_VLVF(i));
4885                if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4886                    vid == (reg & E1000_VLVF_VLANID_MASK))
4887                        break;
4888        }
4889
4890        if (add) {
4891                if (i == E1000_VLVF_ARRAY_SIZE) {
4892                        /* Did not find a matching VLAN ID entry that was
4893                         * enabled.  Search for a free filter entry, i.e.
4894                         * one without the enable bit set
4895                         */
4896                        for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4897                                reg = rd32(E1000_VLVF(i));
4898                                if (!(reg & E1000_VLVF_VLANID_ENABLE))
4899                                        break;
4900                        }
4901                }
4902                if (i < E1000_VLVF_ARRAY_SIZE) {
4903                        /* Found an enabled/available entry */
4904                        reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4905
4906                        /* if !enabled we need to set this up in vfta */
4907                        if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4908                                /* add VID to filter table */
4909                                igb_vfta_set(hw, vid, true);
4910                                reg |= E1000_VLVF_VLANID_ENABLE;
4911                        }
4912                        reg &= ~E1000_VLVF_VLANID_MASK;
4913                        reg |= vid;
4914                        wr32(E1000_VLVF(i), reg);
4915
4916                        /* do not modify RLPML for PF devices */
4917                        if (vf >= adapter->vfs_allocated_count)
4918                                return 0;
4919
4920                        if (!adapter->vf_data[vf].vlans_enabled) {
4921                                u32 size;
4922                                reg = rd32(E1000_VMOLR(vf));
4923                                size = reg & E1000_VMOLR_RLPML_MASK;
4924                                size += 4;
4925                                reg &= ~E1000_VMOLR_RLPML_MASK;
4926                                reg |= size;
4927                                wr32(E1000_VMOLR(vf), reg);
4928                        }
4929
4930                        adapter->vf_data[vf].vlans_enabled++;
4931                        return 0;
4932                }
4933        } else {
4934                if (i < E1000_VLVF_ARRAY_SIZE) {
4935                        /* remove vf from the pool */
4936                        reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4937                        /* if pool is empty then remove entry from vfta */
4938                        if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4939                                reg = 0;
4940                                igb_vfta_set(hw, vid, false);
4941                        }
4942                        wr32(E1000_VLVF(i), reg);
4943
4944                        /* do not modify RLPML for PF devices */
4945                        if (vf >= adapter->vfs_allocated_count)
4946                                return 0;
4947
4948                        adapter->vf_data[vf].vlans_enabled--;
4949                        if (!adapter->vf_data[vf].vlans_enabled) {
4950                                u32 size;
4951                                reg = rd32(E1000_VMOLR(vf));
4952                                size = reg & E1000_VMOLR_RLPML_MASK;
4953                                size -= 4;
4954                                reg &= ~E1000_VMOLR_RLPML_MASK;
4955                                reg |= size;
4956                                wr32(E1000_VMOLR(vf), reg);
4957                        }
4958                }
4959        }
4960        return 0;
4961}
4962
4963static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4964{
4965        struct e1000_hw *hw = &adapter->hw;
4966
4967        if (vid)
4968                wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4969        else
4970                wr32(E1000_VMVIR(vf), 0);
4971}
4972
4973static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4974                               int vf, u16 vlan, u8 qos)
4975{
4976        int err = 0;
4977        struct igb_adapter *adapter = netdev_priv(netdev);
4978
4979        if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4980                return -EINVAL;
4981        if (vlan || qos) {
4982                err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4983                if (err)
4984                        goto out;
4985                igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4986                igb_set_vmolr(adapter, vf, !vlan);
4987                adapter->vf_data[vf].pf_vlan = vlan;
4988                adapter->vf_data[vf].pf_qos = qos;
4989                dev_info(&adapter->pdev->dev,
4990                         "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4991                if (test_bit(__IGB_DOWN, &adapter->state)) {
4992                        dev_warn(&adapter->pdev->dev,
4993                                 "The VF VLAN has been set,"
4994                                 " but the PF device is not up.\n");
4995                        dev_warn(&adapter->pdev->dev,
4996                                 "Bring the PF device up before"
4997                                 " attempting to use the VF device.\n");
4998                }
4999        } else {
5000                igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5001                                   false, vf);
5002                igb_set_vmvir(adapter, vlan, vf);
5003                igb_set_vmolr(adapter, vf, true);
5004                adapter->vf_data[vf].pf_vlan = 0;
5005                adapter->vf_data[vf].pf_qos = 0;
5006       }
5007out:
5008       return err;
5009}
5010
5011static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5012{
5013        int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5014        int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5015
5016        return igb_vlvf_set(adapter, vid, add, vf);
5017}
5018
5019static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5020{
5021        /* clear flags - except flag that indicates PF has set the MAC */
5022        adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5023        adapter->vf_data[vf].last_nack = jiffies;
5024
5025        /* reset offloads to defaults */
5026        igb_set_vmolr(adapter, vf, true);
5027
5028        /* reset vlans for device */
5029        igb_clear_vf_vfta(adapter, vf);
5030        if (adapter->vf_data[vf].pf_vlan)
5031                igb_ndo_set_vf_vlan(adapter->netdev, vf,
5032                                    adapter->vf_data[vf].pf_vlan,
5033                                    adapter->vf_data[vf].pf_qos);
5034        else
5035                igb_clear_vf_vfta(adapter, vf);
5036
5037        /* reset multicast table array for vf */
5038        adapter->vf_data[vf].num_vf_mc_hashes = 0;
5039
5040        /* Flush and reset the mta with the new values */
5041        igb_set_rx_mode(adapter->netdev);
5042}
5043
5044static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5045{
5046        unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5047
5048        /* generate a new mac address as we were hotplug removed/added */
5049        if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5050                random_ether_addr(vf_mac);
5051
5052        /* process remaining reset events */
5053        igb_vf_reset(adapter, vf);
5054}
5055
5056static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5057{
5058        struct e1000_hw *hw = &adapter->hw;
5059        unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5060        int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5061        u32 reg, msgbuf[3];
5062        u8 *addr = (u8 *)(&msgbuf[1]);
5063
5064        /* process all the same items cleared in a function level reset */
5065        igb_vf_reset(adapter, vf);
5066
5067        /* set vf mac address */
5068        igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5069
5070        /* enable transmit and receive for vf */
5071        reg = rd32(E1000_VFTE);
5072        wr32(E1000_VFTE, reg | (1 << vf));
5073        reg = rd32(E1000_VFRE);
5074        wr32(E1000_VFRE, reg | (1 << vf));
5075
5076        adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5077
5078        /* reply to reset with ack and vf mac address */
5079        msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5080        memcpy(addr, vf_mac, 6);
5081        igb_write_mbx(hw, msgbuf, 3, vf);
5082}
5083
5084static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5085{
5086        /*
5087         * The VF MAC Address is stored in a packed array of bytes
5088         * starting at the second 32 bit word of the msg array
5089         */
5090        unsigned char *addr = (char *)&msg[1];
5091        int err = -1;
5092
5093        if (is_valid_ether_addr(addr))
5094                err = igb_set_vf_mac(adapter, vf, addr);
5095
5096        return err;
5097}
5098
5099static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5100{
5101        struct e1000_hw *hw = &adapter->hw;
5102        struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5103        u32 msg = E1000_VT_MSGTYPE_NACK;
5104
5105        /* if device isn't clear to send it shouldn't be reading either */
5106        if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5107            time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5108                igb_write_mbx(hw, &msg, 1, vf);
5109                vf_data->last_nack = jiffies;
5110        }
5111}
5112
5113static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5114{
5115        struct pci_dev *pdev = adapter->pdev;
5116        u32 msgbuf[E1000_VFMAILBOX_SIZE];
5117        struct e1000_hw *hw = &adapter->hw;
5118        struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5119        s32 retval;
5120
5121        retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5122
5123        if (retval) {
5124                /* if receive failed revoke VF CTS stats and restart init */
5125                dev_err(&pdev->dev, "Error receiving message from VF\n");
5126                vf_data->flags &= ~IGB_VF_FLAG_CTS;
5127                if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5128                        return;
5129                goto out;
5130        }
5131
5132        /* this is a message we already processed, do nothing */
5133        if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5134                return;
5135
5136        /*
5137         * until the vf completes a reset it should not be
5138         * allowed to start any configuration.
5139         */
5140
5141        if (msgbuf[0] == E1000_VF_RESET) {
5142                igb_vf_reset_msg(adapter, vf);
5143                return;
5144        }
5145
5146        if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5147                if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5148                        return;
5149                retval = -1;
5150                goto out;
5151        }
5152
5153        switch ((msgbuf[0] & 0xFFFF)) {
5154        case E1000_VF_SET_MAC_ADDR:
5155                retval = -EINVAL;
5156                if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5157                        retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5158                else
5159                        dev_warn(&pdev->dev,
5160                                 "VF %d attempted to override administratively "
5161                                 "set MAC address\nReload the VF driver to "
5162                                 "resume operations\n", vf);
5163                break;
5164        case E1000_VF_SET_PROMISC:
5165                retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5166                break;
5167        case E1000_VF_SET_MULTICAST:
5168                retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5169                break;
5170        case E1000_VF_SET_LPE:
5171                retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5172                break;
5173        case E1000_VF_SET_VLAN:
5174                retval = -1;
5175                if (vf_data->pf_vlan)
5176                        dev_warn(&pdev->dev,
5177                                 "VF %d attempted to override administratively "
5178                                 "set VLAN tag\nReload the VF driver to "
5179                                 "resume operations\n", vf);
5180                else
5181                        retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5182                break;
5183        default:
5184                dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5185                retval = -1;
5186                break;
5187        }
5188
5189        msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5190out:
5191        /* notify the VF of the results of what it sent us */
5192        if (retval)
5193                msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5194        else
5195                msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5196
5197        igb_write_mbx(hw, msgbuf, 1, vf);
5198}
5199
5200static void igb_msg_task(struct igb_adapter *adapter)
5201{
5202        struct e1000_hw *hw = &adapter->hw;
5203        u32 vf;
5204
5205        for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5206                /* process any reset requests */
5207                if (!igb_check_for_rst(hw, vf))
5208                        igb_vf_reset_event(adapter, vf);
5209
5210                /* process any messages pending */
5211                if (!igb_check_for_msg(hw, vf))
5212                        igb_rcv_msg_from_vf(adapter, vf);
5213
5214                /* process any acks */
5215                if (!igb_check_for_ack(hw, vf))
5216                        igb_rcv_ack_from_vf(adapter, vf);
5217        }
5218}
5219
5220/**
5221 *  igb_set_uta - Set unicast filter table address
5222 *  @adapter: board private structure
5223 *
5224 *  The unicast table address is a register array of 32-bit registers.
5225 *  The table is meant to be used in a way similar to how the MTA is used
5226 *  however due to certain limitations in the hardware it is necessary to
5227 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5228 *  enable bit to allow vlan tag stripping when promiscous mode is enabled
5229 **/
5230static void igb_set_uta(struct igb_adapter *adapter)
5231{
5232        struct e1000_hw *hw = &adapter->hw;
5233        int i;
5234
5235        /* The UTA table only exists on 82576 hardware and newer */
5236        if (hw->mac.type < e1000_82576)
5237                return;
5238
5239        /* we only need to do this if VMDq is enabled */
5240        if (!adapter->vfs_allocated_count)
5241                return;
5242
5243        for (i = 0; i < hw->mac.uta_reg_count; i++)
5244                array_wr32(E1000_UTA, i, ~0);
5245}
5246
5247/**
5248 * igb_intr_msi - Interrupt Handler
5249 * @irq: interrupt number
5250 * @data: pointer to a network interface device structure
5251 **/
5252static irqreturn_t igb_intr_msi(int irq, void *data)
5253{
5254        struct igb_adapter *adapter = data;
5255        struct igb_q_vector *q_vector = adapter->q_vector[0];
5256        struct e1000_hw *hw = &adapter->hw;
5257        /* read ICR disables interrupts using IAM */
5258        u32 icr = rd32(E1000_ICR);
5259
5260        igb_write_itr(q_vector);
5261
5262        if (icr & E1000_ICR_DRSTA)
5263                schedule_work(&adapter->reset_task);
5264
5265        if (icr & E1000_ICR_DOUTSYNC) {
5266                /* HW is reporting DMA is out of sync */
5267                adapter->stats.doosync++;
5268        }
5269
5270        if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5271                hw->mac.get_link_status = 1;
5272                if (!test_bit(__IGB_DOWN, &adapter->state))
5273                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
5274        }
5275
5276        napi_schedule(&q_vector->napi);
5277
5278        return IRQ_HANDLED;
5279}
5280
5281/**
5282 * igb_intr - Legacy Interrupt Handler
5283 * @irq: interrupt number
5284 * @data: pointer to a network interface device structure
5285 **/
5286static irqreturn_t igb_intr(int irq, void *data)
5287{
5288        struct igb_adapter *adapter = data;
5289        struct igb_q_vector *q_vector = adapter->q_vector[0];
5290        struct e1000_hw *hw = &adapter->hw;
5291        /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5292         * need for the IMC write */
5293        u32 icr = rd32(E1000_ICR);
5294        if (!icr)
5295                return IRQ_NONE;  /* Not our interrupt */
5296
5297        igb_write_itr(q_vector);
5298
5299        /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5300         * not set, then the adapter didn't send an interrupt */
5301        if (!(icr & E1000_ICR_INT_ASSERTED))
5302                return IRQ_NONE;
5303
5304        if (icr & E1000_ICR_DRSTA)
5305                schedule_work(&adapter->reset_task);
5306
5307        if (icr & E1000_ICR_DOUTSYNC) {
5308                /* HW is reporting DMA is out of sync */
5309                adapter->stats.doosync++;
5310        }
5311
5312        if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5313                hw->mac.get_link_status = 1;
5314                /* guard against interrupt when we're going down */
5315                if (!test_bit(__IGB_DOWN, &adapter->state))
5316                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
5317        }
5318
5319        napi_schedule(&q_vector->napi);
5320
5321        return IRQ_HANDLED;
5322}
5323
5324static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5325{
5326        struct igb_adapter *adapter = q_vector->adapter;
5327        struct e1000_hw *hw = &adapter->hw;
5328
5329        if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5330            (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5331                if (!adapter->msix_entries)
5332                        igb_set_itr(adapter);
5333                else
5334                        igb_update_ring_itr(q_vector);
5335        }
5336
5337        if (!test_bit(__IGB_DOWN, &adapter->state)) {
5338                if (adapter->msix_entries)
5339                        wr32(E1000_EIMS, q_vector->eims_value);
5340                else
5341                        igb_irq_enable(adapter);
5342        }
5343}
5344
5345/**
5346 * igb_poll - NAPI Rx polling callback
5347 * @napi: napi polling structure
5348 * @budget: count of how many packets we should handle
5349 **/
5350static int igb_poll(struct napi_struct *napi, int budget)
5351{
5352        struct igb_q_vector *q_vector = container_of(napi,
5353                                                     struct igb_q_vector,
5354                                                     napi);
5355        int tx_clean_complete = 1, work_done = 0;
5356
5357#ifdef CONFIG_IGB_DCA
5358        if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5359                igb_update_dca(q_vector);
5360#endif
5361        if (q_vector->tx_ring)
5362                tx_clean_complete = igb_clean_tx_irq(q_vector);
5363
5364        if (q_vector->rx_ring)
5365                igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5366
5367        if (!tx_clean_complete)
5368                work_done = budget;
5369
5370        /* If not enough Rx work done, exit the polling mode */
5371        if (work_done < budget) {
5372                napi_complete(napi);
5373                igb_ring_irq_enable(q_vector);
5374        }
5375
5376        return work_done;
5377}
5378
5379/**
5380 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5381 * @adapter: board private structure
5382 * @shhwtstamps: timestamp structure to update
5383 * @regval: unsigned 64bit system time value.
5384 *
5385 * We need to convert the system time value stored in the RX/TXSTMP registers
5386 * into a hwtstamp which can be used by the upper level timestamping functions
5387 */
5388static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5389                                   struct skb_shared_hwtstamps *shhwtstamps,
5390                                   u64 regval)
5391{
5392        u64 ns;
5393
5394        /*
5395         * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5396         * 24 to match clock shift we setup earlier.
5397         */
5398        if (adapter->hw.mac.type == e1000_82580)
5399                regval <<= IGB_82580_TSYNC_SHIFT;
5400
5401        ns = timecounter_cyc2time(&adapter->clock, regval);
5402        timecompare_update(&adapter->compare, ns);
5403        memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5404        shhwtstamps->hwtstamp = ns_to_ktime(ns);
5405        shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5406}
5407
5408/**
5409 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5410 * @q_vector: pointer to q_vector containing needed info
5411 * @buffer: pointer to igb_buffer structure
5412 *
5413 * If we were asked to do hardware stamping and such a time stamp is
5414 * available, then it must have been for this skb here because we only
5415 * allow only one such packet into the queue.
5416 */
5417static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5418{
5419        struct igb_adapter *adapter = q_vector->adapter;
5420        struct e1000_hw *hw = &adapter->hw;
5421        struct skb_shared_hwtstamps shhwtstamps;
5422        u64 regval;
5423
5424        /* if skb does not support hw timestamp or TX stamp not valid exit */
5425        if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5426            !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5427                return;
5428
5429        regval = rd32(E1000_TXSTMPL);
5430        regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5431
5432        igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5433        skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5434}
5435
5436/**
5437 * igb_clean_tx_irq - Reclaim resources after transmit completes
5438 * @q_vector: pointer to q_vector containing needed info
5439 * returns true if ring is completely cleaned
5440 **/
5441static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5442{
5443        struct igb_adapter *adapter = q_vector->adapter;
5444        struct igb_ring *tx_ring = q_vector->tx_ring;
5445        struct net_device *netdev = tx_ring->netdev;
5446        struct e1000_hw *hw = &adapter->hw;
5447        struct igb_buffer *buffer_info;
5448        union e1000_adv_tx_desc *tx_desc, *eop_desc;
5449        unsigned int total_bytes = 0, total_packets = 0;
5450        unsigned int i, eop, count = 0;
5451        bool cleaned = false;
5452
5453        i = tx_ring->next_to_clean;
5454        eop = tx_ring->buffer_info[i].next_to_watch;
5455        eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5456
5457        while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5458               (count < tx_ring->count)) {
5459                rmb();  /* read buffer_info after eop_desc status */
5460                for (cleaned = false; !cleaned; count++) {
5461                        tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5462                        buffer_info = &tx_ring->buffer_info[i];
5463                        cleaned = (i == eop);
5464
5465                        if (buffer_info->skb) {
5466                                total_bytes += buffer_info->bytecount;
5467                                /* gso_segs is currently only valid for tcp */
5468                                total_packets += buffer_info->gso_segs;
5469                                igb_tx_hwtstamp(q_vector, buffer_info);
5470                        }
5471
5472                        igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5473                        tx_desc->wb.status = 0;
5474
5475                        i++;
5476                        if (i == tx_ring->count)
5477                                i = 0;
5478                }
5479                eop = tx_ring->buffer_info[i].next_to_watch;
5480                eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5481        }
5482
5483        tx_ring->next_to_clean = i;
5484
5485        if (unlikely(count &&
5486                     netif_carrier_ok(netdev) &&
5487                     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5488                /* Make sure that anybody stopping the queue after this
5489                 * sees the new next_to_clean.
5490                 */
5491                smp_mb();
5492                if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5493                    !(test_bit(__IGB_DOWN, &adapter->state))) {
5494                        netif_wake_subqueue(netdev, tx_ring->queue_index);
5495
5496                        u64_stats_update_begin(&tx_ring->tx_syncp);
5497                        tx_ring->tx_stats.restart_queue++;
5498                        u64_stats_update_end(&tx_ring->tx_syncp);
5499                }
5500        }
5501
5502        if (tx_ring->detect_tx_hung) {
5503                /* Detect a transmit hang in hardware, this serializes the
5504                 * check with the clearing of time_stamp and movement of i */
5505                tx_ring->detect_tx_hung = false;
5506                if (tx_ring->buffer_info[i].time_stamp &&
5507                    time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5508                               (adapter->tx_timeout_factor * HZ)) &&
5509                    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5510
5511                        /* detected Tx unit hang */
5512                        dev_err(tx_ring->dev,
5513                                "Detected Tx Unit Hang\n"
5514                                "  Tx Queue             <%d>\n"
5515                                "  TDH                  <%x>\n"
5516                                "  TDT                  <%x>\n"
5517                                "  next_to_use          <%x>\n"
5518                                "  next_to_clean        <%x>\n"
5519                                "buffer_info[next_to_clean]\n"
5520                                "  time_stamp           <%lx>\n"
5521                                "  next_to_watch        <%x>\n"
5522                                "  jiffies              <%lx>\n"
5523                                "  desc.status          <%x>\n",
5524                                tx_ring->queue_index,
5525                                readl(tx_ring->head),
5526                                readl(tx_ring->tail),
5527                                tx_ring->next_to_use,
5528                                tx_ring->next_to_clean,
5529                                tx_ring->buffer_info[eop].time_stamp,
5530                                eop,
5531                                jiffies,
5532                                eop_desc->wb.status);
5533                        netif_stop_subqueue(netdev, tx_ring->queue_index);
5534                }
5535        }
5536        tx_ring->total_bytes += total_bytes;
5537        tx_ring->total_packets += total_packets;
5538        u64_stats_update_begin(&tx_ring->tx_syncp);
5539        tx_ring->tx_stats.bytes += total_bytes;
5540        tx_ring->tx_stats.packets += total_packets;
5541        u64_stats_update_end(&tx_ring->tx_syncp);
5542        return count < tx_ring->count;
5543}
5544
5545/**
5546 * igb_receive_skb - helper function to handle rx indications
5547 * @q_vector: structure containing interrupt and ring information
5548 * @skb: packet to send up
5549 * @vlan_tag: vlan tag for packet
5550 **/
5551static void igb_receive_skb(struct igb_q_vector *q_vector,
5552                            struct sk_buff *skb,
5553                            u16 vlan_tag)
5554{
5555        struct igb_adapter *adapter = q_vector->adapter;
5556
5557        if (vlan_tag && adapter->vlgrp)
5558                vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5559                                 vlan_tag, skb);
5560        else
5561                napi_gro_receive(&q_vector->napi, skb);
5562}
5563
5564static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5565                                       u32 status_err, struct sk_buff *skb)
5566{
5567        skb_checksum_none_assert(skb);
5568
5569        /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5570        if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5571             (status_err & E1000_RXD_STAT_IXSM))
5572                return;
5573
5574        /* TCP/UDP checksum error bit is set */
5575        if (status_err &
5576            (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5577                /*
5578                 * work around errata with sctp packets where the TCPE aka
5579                 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5580                 * packets, (aka let the stack check the crc32c)
5581                 */
5582                if ((skb->len == 60) &&
5583                    (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5584                        u64_stats_update_begin(&ring->rx_syncp);
5585                        ring->rx_stats.csum_err++;
5586                        u64_stats_update_end(&ring->rx_syncp);
5587                }
5588                /* let the stack verify checksum errors */
5589                return;
5590        }
5591        /* It must be a TCP or UDP packet with a valid checksum */
5592        if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5593                skb->ip_summed = CHECKSUM_UNNECESSARY;
5594
5595        dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5596}
5597
5598static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5599                                   struct sk_buff *skb)
5600{
5601        struct igb_adapter *adapter = q_vector->adapter;
5602        struct e1000_hw *hw = &adapter->hw;
5603        u64 regval;
5604
5605        /*
5606         * If this bit is set, then the RX registers contain the time stamp. No
5607         * other packet will be time stamped until we read these registers, so
5608         * read the registers to make them available again. Because only one
5609         * packet can be time stamped at a time, we know that the register
5610         * values must belong to this one here and therefore we don't need to
5611         * compare any of the additional attributes stored for it.
5612         *
5613         * If nothing went wrong, then it should have a shared tx_flags that we
5614         * can turn into a skb_shared_hwtstamps.
5615         */
5616        if (staterr & E1000_RXDADV_STAT_TSIP) {
5617                u32 *stamp = (u32 *)skb->data;
5618                regval = le32_to_cpu(*(stamp + 2));
5619                regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5620                skb_pull(skb, IGB_TS_HDR_LEN);
5621        } else {
5622                if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5623                        return;
5624
5625                regval = rd32(E1000_RXSTMPL);
5626                regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5627        }
5628
5629        igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5630}
5631static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5632                               union e1000_adv_rx_desc *rx_desc)
5633{
5634        /* HW will not DMA in data larger than the given buffer, even if it
5635         * parses the (NFS, of course) header to be larger.  In that case, it
5636         * fills the header buffer and spills the rest into the page.
5637         */
5638        u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5639                   E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5640        if (hlen > rx_ring->rx_buffer_len)
5641                hlen = rx_ring->rx_buffer_len;
5642        return hlen;
5643}
5644
5645static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5646                                 int *work_done, int budget)
5647{
5648        struct igb_ring *rx_ring = q_vector->rx_ring;
5649        struct net_device *netdev = rx_ring->netdev;
5650        struct device *dev = rx_ring->dev;
5651        union e1000_adv_rx_desc *rx_desc , *next_rxd;
5652        struct igb_buffer *buffer_info , *next_buffer;
5653        struct sk_buff *skb;
5654        bool cleaned = false;
5655        int cleaned_count = 0;
5656        int current_node = numa_node_id();
5657        unsigned int total_bytes = 0, total_packets = 0;
5658        unsigned int i;
5659        u32 staterr;
5660        u16 length;
5661        u16 vlan_tag;
5662
5663        i = rx_ring->next_to_clean;
5664        buffer_info = &rx_ring->buffer_info[i];
5665        rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5666        staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5667
5668        while (staterr & E1000_RXD_STAT_DD) {
5669                if (*work_done >= budget)
5670                        break;
5671                (*work_done)++;
5672                rmb(); /* read descriptor and rx_buffer_info after status DD */
5673
5674                skb = buffer_info->skb;
5675                prefetch(skb->data - NET_IP_ALIGN);
5676                buffer_info->skb = NULL;
5677
5678                i++;
5679                if (i == rx_ring->count)
5680                        i = 0;
5681
5682                next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5683                prefetch(next_rxd);
5684                next_buffer = &rx_ring->buffer_info[i];
5685
5686                length = le16_to_cpu(rx_desc->wb.upper.length);
5687                cleaned = true;
5688                cleaned_count++;
5689
5690                if (buffer_info->dma) {
5691                        dma_unmap_single(dev, buffer_info->dma,
5692                                         rx_ring->rx_buffer_len,
5693                                         DMA_FROM_DEVICE);
5694                        buffer_info->dma = 0;
5695                        if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5696                                skb_put(skb, length);
5697                                goto send_up;
5698                        }
5699                        skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5700                }
5701
5702                if (length) {
5703                        dma_unmap_page(dev, buffer_info->page_dma,
5704                                       PAGE_SIZE / 2, DMA_FROM_DEVICE);
5705                        buffer_info->page_dma = 0;
5706
5707                        skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5708                                                buffer_info->page,
5709                                                buffer_info->page_offset,
5710                                                length);
5711
5712                        if ((page_count(buffer_info->page) != 1) ||
5713                            (page_to_nid(buffer_info->page) != current_node))
5714                                buffer_info->page = NULL;
5715                        else
5716                                get_page(buffer_info->page);
5717
5718                        skb->len += length;
5719                        skb->data_len += length;
5720                        skb->truesize += length;
5721                }
5722
5723                if (!(staterr & E1000_RXD_STAT_EOP)) {
5724                        buffer_info->skb = next_buffer->skb;
5725                        buffer_info->dma = next_buffer->dma;
5726                        next_buffer->skb = skb;
5727                        next_buffer->dma = 0;
5728                        goto next_desc;
5729                }
5730send_up:
5731                if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5732                        dev_kfree_skb_irq(skb);
5733                        goto next_desc;
5734                }
5735
5736                if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5737                        igb_rx_hwtstamp(q_vector, staterr, skb);
5738                total_bytes += skb->len;
5739                total_packets++;
5740
5741                igb_rx_checksum_adv(rx_ring, staterr, skb);
5742
5743                skb->protocol = eth_type_trans(skb, netdev);
5744                skb_record_rx_queue(skb, rx_ring->queue_index);
5745
5746                vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5747                            le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5748
5749                igb_receive_skb(q_vector, skb, vlan_tag);
5750
5751next_desc:
5752                rx_desc->wb.upper.status_error = 0;
5753
5754                /* return some buffers to hardware, one at a time is too slow */
5755                if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5756                        igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5757                        cleaned_count = 0;
5758                }
5759
5760                /* use prefetched values */
5761                rx_desc = next_rxd;
5762                buffer_info = next_buffer;
5763                staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5764        }
5765
5766        rx_ring->next_to_clean = i;
5767        cleaned_count = igb_desc_unused(rx_ring);
5768
5769        if (cleaned_count)
5770                igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5771
5772        rx_ring->total_packets += total_packets;
5773        rx_ring->total_bytes += total_bytes;
5774        u64_stats_update_begin(&rx_ring->rx_syncp);
5775        rx_ring->rx_stats.packets += total_packets;
5776        rx_ring->rx_stats.bytes += total_bytes;
5777        u64_stats_update_end(&rx_ring->rx_syncp);
5778        return cleaned;
5779}
5780
5781/**
5782 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5783 * @adapter: address of board private structure
5784 **/
5785void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5786{
5787        struct net_device *netdev = rx_ring->netdev;
5788        union e1000_adv_rx_desc *rx_desc;
5789        struct igb_buffer *buffer_info;
5790        struct sk_buff *skb;
5791        unsigned int i;
5792        int bufsz;
5793
5794        i = rx_ring->next_to_use;
5795        buffer_info = &rx_ring->buffer_info[i];
5796
5797        bufsz = rx_ring->rx_buffer_len;
5798
5799        while (cleaned_count--) {
5800                rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5801
5802                if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5803                        if (!buffer_info->page) {
5804                                buffer_info->page = netdev_alloc_page(netdev);
5805                                if (unlikely(!buffer_info->page)) {
5806                                        u64_stats_update_begin(&rx_ring->rx_syncp);
5807                                        rx_ring->rx_stats.alloc_failed++;
5808                                        u64_stats_update_end(&rx_ring->rx_syncp);
5809                                        goto no_buffers;
5810                                }
5811                                buffer_info->page_offset = 0;
5812                        } else {
5813                                buffer_info->page_offset ^= PAGE_SIZE / 2;
5814                        }
5815                        buffer_info->page_dma =
5816                                dma_map_page(rx_ring->dev, buffer_info->page,
5817                                             buffer_info->page_offset,
5818                                             PAGE_SIZE / 2,
5819                                             DMA_FROM_DEVICE);
5820                        if (dma_mapping_error(rx_ring->dev,
5821                                              buffer_info->page_dma)) {
5822                                buffer_info->page_dma = 0;
5823                                u64_stats_update_begin(&rx_ring->rx_syncp);
5824                                rx_ring->rx_stats.alloc_failed++;
5825                                u64_stats_update_end(&rx_ring->rx_syncp);
5826                                goto no_buffers;
5827                        }
5828                }
5829
5830                skb = buffer_info->skb;
5831                if (!skb) {
5832                        skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5833                        if (unlikely(!skb)) {
5834                                u64_stats_update_begin(&rx_ring->rx_syncp);
5835                                rx_ring->rx_stats.alloc_failed++;
5836                                u64_stats_update_end(&rx_ring->rx_syncp);
5837                                goto no_buffers;
5838                        }
5839
5840                        buffer_info->skb = skb;
5841                }
5842                if (!buffer_info->dma) {
5843                        buffer_info->dma = dma_map_single(rx_ring->dev,
5844                                                          skb->data,
5845                                                          bufsz,
5846                                                          DMA_FROM_DEVICE);
5847                        if (dma_mapping_error(rx_ring->dev,
5848                                              buffer_info->dma)) {
5849                                buffer_info->dma = 0;
5850                                u64_stats_update_begin(&rx_ring->rx_syncp);
5851                                rx_ring->rx_stats.alloc_failed++;
5852                                u64_stats_update_end(&rx_ring->rx_syncp);
5853                                goto no_buffers;
5854                        }
5855                }
5856                /* Refresh the desc even if buffer_addrs didn't change because
5857                 * each write-back erases this info. */
5858                if (bufsz < IGB_RXBUFFER_1024) {
5859                        rx_desc->read.pkt_addr =
5860                             cpu_to_le64(buffer_info->page_dma);
5861                        rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5862                } else {
5863                        rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5864                        rx_desc->read.hdr_addr = 0;
5865                }
5866
5867                i++;
5868                if (i == rx_ring->count)
5869                        i = 0;
5870                buffer_info = &rx_ring->buffer_info[i];
5871        }
5872
5873no_buffers:
5874        if (rx_ring->next_to_use != i) {
5875                rx_ring->next_to_use = i;
5876                if (i == 0)
5877                        i = (rx_ring->count - 1);
5878                else
5879                        i--;
5880
5881                /* Force memory writes to complete before letting h/w
5882                 * know there are new descriptors to fetch.  (Only
5883                 * applicable for weak-ordered memory model archs,
5884                 * such as IA-64). */
5885                wmb();
5886                writel(i, rx_ring->tail);
5887        }
5888}
5889
5890/**
5891 * igb_mii_ioctl -
5892 * @netdev:
5893 * @ifreq:
5894 * @cmd:
5895 **/
5896static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5897{
5898        struct igb_adapter *adapter = netdev_priv(netdev);
5899        struct mii_ioctl_data *data = if_mii(ifr);
5900
5901        if (adapter->hw.phy.media_type != e1000_media_type_copper)
5902                return -EOPNOTSUPP;
5903
5904        switch (cmd) {
5905        case SIOCGMIIPHY:
5906                data->phy_id = adapter->hw.phy.addr;
5907                break;
5908        case SIOCGMIIREG:
5909                if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5910                                     &data->val_out))
5911                        return -EIO;
5912                break;
5913        case SIOCSMIIREG:
5914        default:
5915                return -EOPNOTSUPP;
5916        }
5917        return 0;
5918}
5919
5920/**
5921 * igb_hwtstamp_ioctl - control hardware time stamping
5922 * @netdev:
5923 * @ifreq:
5924 * @cmd:
5925 *
5926 * Outgoing time stamping can be enabled and disabled. Play nice and
5927 * disable it when requested, although it shouldn't case any overhead
5928 * when no packet needs it. At most one packet in the queue may be
5929 * marked for time stamping, otherwise it would be impossible to tell
5930 * for sure to which packet the hardware time stamp belongs.
5931 *
5932 * Incoming time stamping has to be configured via the hardware
5933 * filters. Not all combinations are supported, in particular event
5934 * type has to be specified. Matching the kind of event packet is
5935 * not supported, with the exception of "all V2 events regardless of
5936 * level 2 or 4".
5937 *
5938 **/
5939static int igb_hwtstamp_ioctl(struct net_device *netdev,
5940                              struct ifreq *ifr, int cmd)
5941{
5942        struct igb_adapter *adapter = netdev_priv(netdev);
5943        struct e1000_hw *hw = &adapter->hw;
5944        struct hwtstamp_config config;
5945        u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5946        u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5947        u32 tsync_rx_cfg = 0;
5948        bool is_l4 = false;
5949        bool is_l2 = false;
5950        u32 regval;
5951
5952        if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5953                return -EFAULT;
5954
5955        /* reserved for future extensions */
5956        if (config.flags)
5957                return -EINVAL;
5958
5959        switch (config.tx_type) {
5960        case HWTSTAMP_TX_OFF:
5961                tsync_tx_ctl = 0;
5962        case HWTSTAMP_TX_ON:
5963                break;
5964        default:
5965                return -ERANGE;
5966        }
5967
5968        switch (config.rx_filter) {
5969        case HWTSTAMP_FILTER_NONE:
5970                tsync_rx_ctl = 0;
5971                break;
5972        case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5973        case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5974        case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5975        case HWTSTAMP_FILTER_ALL:
5976                /*
5977                 * register TSYNCRXCFG must be set, therefore it is not
5978                 * possible to time stamp both Sync and Delay_Req messages
5979                 * => fall back to time stamping all packets
5980                 */
5981                tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5982                config.rx_filter = HWTSTAMP_FILTER_ALL;
5983                break;
5984        case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5985                tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5986                tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5987                is_l4 = true;
5988                break;
5989        case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5990                tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5991                tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5992                is_l4 = true;
5993                break;
5994        case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5995        case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5996                tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5997                tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5998                is_l2 = true;
5999                is_l4 = true;
6000                config.rx_filter = HWTSTAMP_FILTER_SOME;
6001                break;
6002        case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6003        case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6004                tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6005                tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6006                is_l2 = true;
6007                is_l4 = true;
6008                config.rx_filter = HWTSTAMP_FILTER_SOME;
6009                break;
6010        case HWTSTAMP_FILTER_PTP_V2_EVENT:
6011        case HWTSTAMP_FILTER_PTP_V2_SYNC:
6012        case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6013                tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6014                config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6015                is_l2 = true;
6016                break;
6017        default:
6018                return -ERANGE;
6019        }
6020
6021        if (hw->mac.type == e1000_82575) {
6022                if (tsync_rx_ctl | tsync_tx_ctl)
6023                        return -EINVAL;
6024                return 0;
6025        }
6026
6027        /*
6028         * Per-packet timestamping only works if all packets are
6029         * timestamped, so enable timestamping in all packets as
6030         * long as one rx filter was configured.
6031         */
6032        if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6033                tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6034                tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6035        }
6036
6037        /* enable/disable TX */
6038        regval = rd32(E1000_TSYNCTXCTL);
6039        regval &= ~E1000_TSYNCTXCTL_ENABLED;
6040        regval |= tsync_tx_ctl;
6041        wr32(E1000_TSYNCTXCTL, regval);
6042
6043        /* enable/disable RX */
6044        regval = rd32(E1000_TSYNCRXCTL);
6045        regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6046        regval |= tsync_rx_ctl;
6047        wr32(E1000_TSYNCRXCTL, regval);
6048
6049        /* define which PTP packets are time stamped */
6050        wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6051
6052        /* define ethertype filter for timestamped packets */
6053        if (is_l2)
6054                wr32(E1000_ETQF(3),
6055                                (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6056                                 E1000_ETQF_1588 | /* enable timestamping */
6057                                 ETH_P_1588));     /* 1588 eth protocol type */
6058        else
6059                wr32(E1000_ETQF(3), 0);
6060
6061#define PTP_PORT 319
6062        /* L4 Queue Filter[3]: filter by destination port and protocol */
6063        if (is_l4) {
6064                u32 ftqf = (IPPROTO_UDP /* UDP */
6065                        | E1000_FTQF_VF_BP /* VF not compared */
6066                        | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6067                        | E1000_FTQF_MASK); /* mask all inputs */
6068                ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6069
6070                wr32(E1000_IMIR(3), htons(PTP_PORT));
6071                wr32(E1000_IMIREXT(3),
6072                     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6073                if (hw->mac.type == e1000_82576) {
6074                        /* enable source port check */
6075                        wr32(E1000_SPQF(3), htons(PTP_PORT));
6076                        ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6077                }
6078                wr32(E1000_FTQF(3), ftqf);
6079        } else {
6080                wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6081        }
6082        wrfl();
6083
6084        adapter->hwtstamp_config = config;
6085
6086        /* clear TX/RX time stamp registers, just to be sure */
6087        regval = rd32(E1000_TXSTMPH);
6088        regval = rd32(E1000_RXSTMPH);
6089
6090        return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6091                -EFAULT : 0;
6092}
6093
6094/**
6095 * igb_ioctl -
6096 * @netdev:
6097 * @ifreq:
6098 * @cmd:
6099 **/
6100static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6101{
6102        switch (cmd) {
6103        case SIOCGMIIPHY:
6104        case SIOCGMIIREG:
6105        case SIOCSMIIREG:
6106                return igb_mii_ioctl(netdev, ifr, cmd);
6107        case SIOCSHWTSTAMP:
6108                return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6109        default:
6110                return -EOPNOTSUPP;
6111        }
6112}
6113
6114s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6115{
6116        struct igb_adapter *adapter = hw->back;
6117        u16 cap_offset;
6118
6119        cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6120        if (!cap_offset)
6121                return -E1000_ERR_CONFIG;
6122
6123        pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6124
6125        return 0;
6126}
6127
6128s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6129{
6130        struct igb_adapter *adapter = hw->back;
6131        u16 cap_offset;
6132
6133        cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6134        if (!cap_offset)
6135                return -E1000_ERR_CONFIG;
6136
6137        pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6138
6139        return 0;
6140}
6141
6142static void igb_vlan_rx_register(struct net_device *netdev,
6143                                 struct vlan_group *grp)
6144{
6145        struct igb_adapter *adapter = netdev_priv(netdev);
6146        struct e1000_hw *hw = &adapter->hw;
6147        u32 ctrl, rctl;
6148
6149        igb_irq_disable(adapter);
6150        adapter->vlgrp = grp;
6151
6152        if (grp) {
6153                /* enable VLAN tag insert/strip */
6154                ctrl = rd32(E1000_CTRL);
6155                ctrl |= E1000_CTRL_VME;
6156                wr32(E1000_CTRL, ctrl);
6157
6158                /* Disable CFI check */
6159                rctl = rd32(E1000_RCTL);
6160                rctl &= ~E1000_RCTL_CFIEN;
6161                wr32(E1000_RCTL, rctl);
6162        } else {
6163                /* disable VLAN tag insert/strip */
6164                ctrl = rd32(E1000_CTRL);
6165                ctrl &= ~E1000_CTRL_VME;
6166                wr32(E1000_CTRL, ctrl);
6167        }
6168
6169        igb_rlpml_set(adapter);
6170
6171        if (!test_bit(__IGB_DOWN, &adapter->state))
6172                igb_irq_enable(adapter);
6173}
6174
6175static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6176{
6177        struct igb_adapter *adapter = netdev_priv(netdev);
6178        struct e1000_hw *hw = &adapter->hw;
6179        int pf_id = adapter->vfs_allocated_count;
6180
6181        /* attempt to add filter to vlvf array */
6182        igb_vlvf_set(adapter, vid, true, pf_id);
6183
6184        /* add the filter since PF can receive vlans w/o entry in vlvf */
6185        igb_vfta_set(hw, vid, true);
6186}
6187
6188static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6189{
6190        struct igb_adapter *adapter = netdev_priv(netdev);
6191        struct e1000_hw *hw = &adapter->hw;
6192        int pf_id = adapter->vfs_allocated_count;
6193        s32 err;
6194
6195        igb_irq_disable(adapter);
6196        vlan_group_set_device(adapter->vlgrp, vid, NULL);
6197
6198        if (!test_bit(__IGB_DOWN, &adapter->state))
6199                igb_irq_enable(adapter);
6200
6201        /* remove vlan from VLVF table array */
6202        err = igb_vlvf_set(adapter, vid, false, pf_id);
6203
6204        /* if vid was not present in VLVF just remove it from table */
6205        if (err)
6206                igb_vfta_set(hw, vid, false);
6207}
6208
6209static void igb_restore_vlan(struct igb_adapter *adapter)
6210{
6211        igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6212
6213        if (adapter->vlgrp) {
6214                u16 vid;
6215                for (vid = 0; vid < VLAN_N_VID; vid++) {
6216                        if (!vlan_group_get_device(adapter->vlgrp, vid))
6217                                continue;
6218                        igb_vlan_rx_add_vid(adapter->netdev, vid);
6219                }
6220        }
6221}
6222
6223int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6224{
6225        struct pci_dev *pdev = adapter->pdev;
6226        struct e1000_mac_info *mac = &adapter->hw.mac;
6227
6228        mac->autoneg = 0;
6229
6230        /* Fiber NIC's only allow 1000 Gbps Full duplex */
6231        if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6232                spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6233                dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6234                return -EINVAL;
6235        }
6236
6237        switch (spddplx) {
6238        case SPEED_10 + DUPLEX_HALF:
6239                mac->forced_speed_duplex = ADVERTISE_10_HALF;
6240                break;
6241        case SPEED_10 + DUPLEX_FULL:
6242                mac->forced_speed_duplex = ADVERTISE_10_FULL;
6243                break;
6244        case SPEED_100 + DUPLEX_HALF:
6245                mac->forced_speed_duplex = ADVERTISE_100_HALF;
6246                break;
6247        case SPEED_100 + DUPLEX_FULL:
6248                mac->forced_speed_duplex = ADVERTISE_100_FULL;
6249                break;
6250        case SPEED_1000 + DUPLEX_FULL:
6251                mac->autoneg = 1;
6252                adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6253                break;
6254        case SPEED_1000 + DUPLEX_HALF: /* not supported */
6255        default:
6256                dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6257                return -EINVAL;
6258        }
6259        return 0;
6260}
6261
6262static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6263{
6264        struct net_device *netdev = pci_get_drvdata(pdev);
6265        struct igb_adapter *adapter = netdev_priv(netdev);
6266        struct e1000_hw *hw = &adapter->hw;
6267        u32 ctrl, rctl, status;
6268        u32 wufc = adapter->wol;
6269#ifdef CONFIG_PM
6270        int retval = 0;
6271#endif
6272
6273        netif_device_detach(netdev);
6274
6275        if (netif_running(netdev))
6276                igb_close(netdev);
6277
6278        igb_clear_interrupt_scheme(adapter);
6279
6280#ifdef CONFIG_PM
6281        retval = pci_save_state(pdev);
6282        if (retval)
6283                return retval;
6284#endif
6285
6286        status = rd32(E1000_STATUS);
6287        if (status & E1000_STATUS_LU)
6288                wufc &= ~E1000_WUFC_LNKC;
6289
6290        if (wufc) {
6291                igb_setup_rctl(adapter);
6292                igb_set_rx_mode(netdev);
6293
6294                /* turn on all-multi mode if wake on multicast is enabled */
6295                if (wufc & E1000_WUFC_MC) {
6296                        rctl = rd32(E1000_RCTL);
6297                        rctl |= E1000_RCTL_MPE;
6298                        wr32(E1000_RCTL, rctl);
6299                }
6300
6301                ctrl = rd32(E1000_CTRL);
6302                /* advertise wake from D3Cold */
6303                #define E1000_CTRL_ADVD3WUC 0x00100000
6304                /* phy power management enable */
6305                #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6306                ctrl |= E1000_CTRL_ADVD3WUC;
6307                wr32(E1000_CTRL, ctrl);
6308
6309                /* Allow time for pending master requests to run */
6310                igb_disable_pcie_master(hw);
6311
6312                wr32(E1000_WUC, E1000_WUC_PME_EN);
6313                wr32(E1000_WUFC, wufc);
6314        } else {
6315                wr32(E1000_WUC, 0);
6316                wr32(E1000_WUFC, 0);
6317        }
6318
6319        *enable_wake = wufc || adapter->en_mng_pt;
6320        if (!*enable_wake)
6321                igb_power_down_link(adapter);
6322        else
6323                igb_power_up_link(adapter);
6324
6325        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6326         * would have already happened in close and is redundant. */
6327        igb_release_hw_control(adapter);
6328
6329        pci_disable_device(pdev);
6330
6331        return 0;
6332}
6333
6334#ifdef CONFIG_PM
6335static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6336{
6337        int retval;
6338        bool wake;
6339
6340        retval = __igb_shutdown(pdev, &wake);
6341        if (retval)
6342                return retval;
6343
6344        if (wake) {
6345                pci_prepare_to_sleep(pdev);
6346        } else {
6347                pci_wake_from_d3(pdev, false);
6348                pci_set_power_state(pdev, PCI_D3hot);
6349        }
6350
6351        return 0;
6352}
6353
6354static int igb_resume(struct pci_dev *pdev)
6355{
6356        struct net_device *netdev = pci_get_drvdata(pdev);
6357        struct igb_adapter *adapter = netdev_priv(netdev);
6358        struct e1000_hw *hw = &adapter->hw;
6359        u32 err;
6360
6361        pci_set_power_state(pdev, PCI_D0);
6362        pci_restore_state(pdev);
6363        pci_save_state(pdev);
6364
6365        err = pci_enable_device_mem(pdev);
6366        if (err) {
6367                dev_err(&pdev->dev,
6368                        "igb: Cannot enable PCI device from suspend\n");
6369                return err;
6370        }
6371        pci_set_master(pdev);
6372
6373        pci_enable_wake(pdev, PCI_D3hot, 0);
6374        pci_enable_wake(pdev, PCI_D3cold, 0);
6375
6376        if (igb_init_interrupt_scheme(adapter)) {
6377                dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6378                return -ENOMEM;
6379        }
6380
6381        igb_reset(adapter);
6382
6383        /* let the f/w know that the h/w is now under the control of the
6384         * driver. */
6385        igb_get_hw_control(adapter);
6386
6387        wr32(E1000_WUS, ~0);
6388
6389        if (netif_running(netdev)) {
6390                err = igb_open(netdev);
6391                if (err)
6392                        return err;
6393        }
6394
6395        netif_device_attach(netdev);
6396
6397        return 0;
6398}
6399#endif
6400
6401static void igb_shutdown(struct pci_dev *pdev)
6402{
6403        bool wake;
6404
6405        __igb_shutdown(pdev, &wake);
6406
6407        if (system_state == SYSTEM_POWER_OFF) {
6408                pci_wake_from_d3(pdev, wake);
6409                pci_set_power_state(pdev, PCI_D3hot);
6410        }
6411}
6412
6413#ifdef CONFIG_NET_POLL_CONTROLLER
6414/*
6415 * Polling 'interrupt' - used by things like netconsole to send skbs
6416 * without having to re-enable interrupts. It's not called while
6417 * the interrupt routine is executing.
6418 */
6419static void igb_netpoll(struct net_device *netdev)
6420{
6421        struct igb_adapter *adapter = netdev_priv(netdev);
6422        struct e1000_hw *hw = &adapter->hw;
6423        int i;
6424
6425        if (!adapter->msix_entries) {
6426                struct igb_q_vector *q_vector = adapter->q_vector[0];
6427                igb_irq_disable(adapter);
6428                napi_schedule(&q_vector->napi);
6429                return;
6430        }
6431
6432        for (i = 0; i < adapter->num_q_vectors; i++) {
6433                struct igb_q_vector *q_vector = adapter->q_vector[i];
6434                wr32(E1000_EIMC, q_vector->eims_value);
6435                napi_schedule(&q_vector->napi);
6436        }
6437}
6438#endif /* CONFIG_NET_POLL_CONTROLLER */
6439
6440/**
6441 * igb_io_error_detected - called when PCI error is detected
6442 * @pdev: Pointer to PCI device
6443 * @state: The current pci connection state
6444 *
6445 * This function is called after a PCI bus error affecting
6446 * this device has been detected.
6447 */
6448static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6449                                              pci_channel_state_t state)
6450{
6451        struct net_device *netdev = pci_get_drvdata(pdev);
6452        struct igb_adapter *adapter = netdev_priv(netdev);
6453
6454        netif_device_detach(netdev);
6455
6456        if (state == pci_channel_io_perm_failure)
6457                return PCI_ERS_RESULT_DISCONNECT;
6458
6459        if (netif_running(netdev))
6460                igb_down(adapter);
6461        pci_disable_device(pdev);
6462
6463        /* Request a slot slot reset. */
6464        return PCI_ERS_RESULT_NEED_RESET;
6465}
6466
6467/**
6468 * igb_io_slot_reset - called after the pci bus has been reset.
6469 * @pdev: Pointer to PCI device
6470 *
6471 * Restart the card from scratch, as if from a cold-boot. Implementation
6472 * resembles the first-half of the igb_resume routine.
6473 */
6474static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6475{
6476        struct net_device *netdev = pci_get_drvdata(pdev);
6477        struct igb_adapter *adapter = netdev_priv(netdev);
6478        struct e1000_hw *hw = &adapter->hw;
6479        pci_ers_result_t result;
6480        int err;
6481
6482        if (pci_enable_device_mem(pdev)) {
6483                dev_err(&pdev->dev,
6484                        "Cannot re-enable PCI device after reset.\n");
6485                result = PCI_ERS_RESULT_DISCONNECT;
6486        } else {
6487                pci_set_master(pdev);
6488                pci_restore_state(pdev);
6489                pci_save_state(pdev);
6490
6491                pci_enable_wake(pdev, PCI_D3hot, 0);
6492                pci_enable_wake(pdev, PCI_D3cold, 0);
6493
6494                igb_reset(adapter);
6495                wr32(E1000_WUS, ~0);
6496                result = PCI_ERS_RESULT_RECOVERED;
6497        }
6498
6499        err = pci_cleanup_aer_uncorrect_error_status(pdev);
6500        if (err) {
6501                dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6502                        "failed 0x%0x\n", err);
6503                /* non-fatal, continue */
6504        }
6505
6506        return result;
6507}
6508
6509/**
6510 * igb_io_resume - called when traffic can start flowing again.
6511 * @pdev: Pointer to PCI device
6512 *
6513 * This callback is called when the error recovery driver tells us that
6514 * its OK to resume normal operation. Implementation resembles the
6515 * second-half of the igb_resume routine.
6516 */
6517static void igb_io_resume(struct pci_dev *pdev)
6518{
6519        struct net_device *netdev = pci_get_drvdata(pdev);
6520        struct igb_adapter *adapter = netdev_priv(netdev);
6521
6522        if (netif_running(netdev)) {
6523                if (igb_up(adapter)) {
6524                        dev_err(&pdev->dev, "igb_up failed after reset\n");
6525                        return;
6526                }
6527        }
6528
6529        netif_device_attach(netdev);
6530
6531        /* let the f/w know that the h/w is now under the control of the
6532         * driver. */
6533        igb_get_hw_control(adapter);
6534}
6535
6536static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6537                             u8 qsel)
6538{
6539        u32 rar_low, rar_high;
6540        struct e1000_hw *hw = &adapter->hw;
6541
6542        /* HW expects these in little endian so we reverse the byte order
6543         * from network order (big endian) to little endian
6544         */
6545        rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6546                  ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6547        rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6548
6549        /* Indicate to hardware the Address is Valid. */
6550        rar_high |= E1000_RAH_AV;
6551
6552        if (hw->mac.type == e1000_82575)
6553                rar_high |= E1000_RAH_POOL_1 * qsel;
6554        else
6555                rar_high |= E1000_RAH_POOL_1 << qsel;
6556
6557        wr32(E1000_RAL(index), rar_low);
6558        wrfl();
6559        wr32(E1000_RAH(index), rar_high);
6560        wrfl();
6561}
6562
6563static int igb_set_vf_mac(struct igb_adapter *adapter,
6564                          int vf, unsigned char *mac_addr)
6565{
6566        struct e1000_hw *hw = &adapter->hw;
6567        /* VF MAC addresses start at end of receive addresses and moves
6568         * torwards the first, as a result a collision should not be possible */
6569        int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6570
6571        memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6572
6573        igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6574
6575        return 0;
6576}
6577
6578static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6579{
6580        struct igb_adapter *adapter = netdev_priv(netdev);
6581        if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6582                return -EINVAL;
6583        adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6584        dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6585        dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6586                                      " change effective.");
6587        if (test_bit(__IGB_DOWN, &adapter->state)) {
6588                dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6589                         " but the PF device is not up.\n");
6590                dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6591                         " attempting to use the VF device.\n");
6592        }
6593        return igb_set_vf_mac(adapter, vf, mac);
6594}
6595
6596static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6597{
6598        return -EOPNOTSUPP;
6599}
6600
6601static int igb_ndo_get_vf_config(struct net_device *netdev,
6602                                 int vf, struct ifla_vf_info *ivi)
6603{
6604        struct igb_adapter *adapter = netdev_priv(netdev);
6605        if (vf >= adapter->vfs_allocated_count)
6606                return -EINVAL;
6607        ivi->vf = vf;
6608        memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6609        ivi->tx_rate = 0;
6610        ivi->vlan = adapter->vf_data[vf].pf_vlan;
6611        ivi->qos = adapter->vf_data[vf].pf_qos;
6612        return 0;
6613}
6614
6615static void igb_vmm_control(struct igb_adapter *adapter)
6616{
6617        struct e1000_hw *hw = &adapter->hw;
6618        u32 reg;
6619
6620        switch (hw->mac.type) {
6621        case e1000_82575:
6622        default:
6623                /* replication is not supported for 82575 */
6624                return;
6625        case e1000_82576:
6626                /* notify HW that the MAC is adding vlan tags */
6627                reg = rd32(E1000_DTXCTL);
6628                reg |= E1000_DTXCTL_VLAN_ADDED;
6629                wr32(E1000_DTXCTL, reg);
6630        case e1000_82580:
6631                /* enable replication vlan tag stripping */
6632                reg = rd32(E1000_RPLOLR);
6633                reg |= E1000_RPLOLR_STRVLAN;
6634                wr32(E1000_RPLOLR, reg);
6635        case e1000_i350:
6636                /* none of the above registers are supported by i350 */
6637                break;
6638        }
6639
6640        if (adapter->vfs_allocated_count) {
6641                igb_vmdq_set_loopback_pf(hw, true);
6642                igb_vmdq_set_replication_pf(hw, true);
6643                igb_vmdq_set_anti_spoofing_pf(hw, true,
6644                                                adapter->vfs_allocated_count);
6645        } else {
6646                igb_vmdq_set_loopback_pf(hw, false);
6647                igb_vmdq_set_replication_pf(hw, false);
6648        }
6649}
6650
6651/* igb_main.c */
6652