linux/drivers/net/ethernet/emulex/benet/be_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2005 - 2016 Broadcom
   4 * All rights reserved.
   5 *
   6 * Contact Information:
   7 * linux-drivers@emulex.com
   8 *
   9 * Emulex
  10 * 3333 Susan Street
  11 * Costa Mesa, CA 92626
  12 */
  13
  14#include <linux/prefetch.h>
  15#include <linux/module.h>
  16#include "be.h"
  17#include "be_cmds.h"
  18#include <asm/div64.h>
  19#include <linux/aer.h>
  20#include <linux/if_bridge.h>
  21#include <net/busy_poll.h>
  22#include <net/vxlan.h>
  23
  24MODULE_DESCRIPTION(DRV_DESC);
  25MODULE_AUTHOR("Emulex Corporation");
  26MODULE_LICENSE("GPL");
  27
  28/* num_vfs module param is obsolete.
  29 * Use sysfs method to enable/disable VFs.
  30 */
  31static unsigned int num_vfs;
  32module_param(num_vfs, uint, 0444);
  33MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
  34
  35static ushort rx_frag_size = 2048;
  36module_param(rx_frag_size, ushort, 0444);
  37MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
  38
  39/* Per-module error detection/recovery workq shared across all functions.
  40 * Each function schedules its own work request on this shared workq.
  41 */
  42static struct workqueue_struct *be_err_recovery_workq;
  43
  44static const struct pci_device_id be_dev_ids[] = {
  45#ifdef CONFIG_BE2NET_BE2
  46        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
  47        { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
  48#endif /* CONFIG_BE2NET_BE2 */
  49#ifdef CONFIG_BE2NET_BE3
  50        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
  51        { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
  52#endif /* CONFIG_BE2NET_BE3 */
  53#ifdef CONFIG_BE2NET_LANCER
  54        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
  55        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
  56#endif /* CONFIG_BE2NET_LANCER */
  57#ifdef CONFIG_BE2NET_SKYHAWK
  58        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
  59        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
  60#endif /* CONFIG_BE2NET_SKYHAWK */
  61        { 0 }
  62};
  63MODULE_DEVICE_TABLE(pci, be_dev_ids);
  64
  65/* Workqueue used by all functions for defering cmd calls to the adapter */
  66static struct workqueue_struct *be_wq;
  67
  68/* UE Status Low CSR */
  69static const char * const ue_status_low_desc[] = {
  70        "CEV",
  71        "CTX",
  72        "DBUF",
  73        "ERX",
  74        "Host",
  75        "MPU",
  76        "NDMA",
  77        "PTC ",
  78        "RDMA ",
  79        "RXF ",
  80        "RXIPS ",
  81        "RXULP0 ",
  82        "RXULP1 ",
  83        "RXULP2 ",
  84        "TIM ",
  85        "TPOST ",
  86        "TPRE ",
  87        "TXIPS ",
  88        "TXULP0 ",
  89        "TXULP1 ",
  90        "UC ",
  91        "WDMA ",
  92        "TXULP2 ",
  93        "HOST1 ",
  94        "P0_OB_LINK ",
  95        "P1_OB_LINK ",
  96        "HOST_GPIO ",
  97        "MBOX ",
  98        "ERX2 ",
  99        "SPARE ",
 100        "JTAG ",
 101        "MPU_INTPEND "
 102};
 103
 104/* UE Status High CSR */
 105static const char * const ue_status_hi_desc[] = {
 106        "LPCMEMHOST",
 107        "MGMT_MAC",
 108        "PCS0ONLINE",
 109        "MPU_IRAM",
 110        "PCS1ONLINE",
 111        "PCTL0",
 112        "PCTL1",
 113        "PMEM",
 114        "RR",
 115        "TXPB",
 116        "RXPP",
 117        "XAUI",
 118        "TXP",
 119        "ARM",
 120        "IPC",
 121        "HOST2",
 122        "HOST3",
 123        "HOST4",
 124        "HOST5",
 125        "HOST6",
 126        "HOST7",
 127        "ECRC",
 128        "Poison TLP",
 129        "NETC",
 130        "PERIPH",
 131        "LLTXULP",
 132        "D2P",
 133        "RCON",
 134        "LDMA",
 135        "LLTXP",
 136        "LLTXPB",
 137        "Unknown"
 138};
 139
 140#define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
 141                                 BE_IF_FLAGS_BROADCAST | \
 142                                 BE_IF_FLAGS_MULTICAST | \
 143                                 BE_IF_FLAGS_PASS_L3L4_ERRORS)
 144
 145static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
 146{
 147        struct be_dma_mem *mem = &q->dma_mem;
 148
 149        if (mem->va) {
 150                dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
 151                                  mem->dma);
 152                mem->va = NULL;
 153        }
 154}
 155
 156static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
 157                          u16 len, u16 entry_size)
 158{
 159        struct be_dma_mem *mem = &q->dma_mem;
 160
 161        memset(q, 0, sizeof(*q));
 162        q->len = len;
 163        q->entry_size = entry_size;
 164        mem->size = len * entry_size;
 165        mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
 166                                     &mem->dma, GFP_KERNEL);
 167        if (!mem->va)
 168                return -ENOMEM;
 169        return 0;
 170}
 171
 172static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
 173{
 174        u32 reg, enabled;
 175
 176        pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
 177                              &reg);
 178        enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 179
 180        if (!enabled && enable)
 181                reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 182        else if (enabled && !enable)
 183                reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 184        else
 185                return;
 186
 187        pci_write_config_dword(adapter->pdev,
 188                               PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
 189}
 190
 191static void be_intr_set(struct be_adapter *adapter, bool enable)
 192{
 193        int status = 0;
 194
 195        /* On lancer interrupts can't be controlled via this register */
 196        if (lancer_chip(adapter))
 197                return;
 198
 199        if (be_check_error(adapter, BE_ERROR_EEH))
 200                return;
 201
 202        status = be_cmd_intr_set(adapter, enable);
 203        if (status)
 204                be_reg_intr_set(adapter, enable);
 205}
 206
 207static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
 208{
 209        u32 val = 0;
 210
 211        if (be_check_error(adapter, BE_ERROR_HW))
 212                return;
 213
 214        val |= qid & DB_RQ_RING_ID_MASK;
 215        val |= posted << DB_RQ_NUM_POSTED_SHIFT;
 216
 217        wmb();
 218        iowrite32(val, adapter->db + DB_RQ_OFFSET);
 219}
 220
 221static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
 222                          u16 posted)
 223{
 224        u32 val = 0;
 225
 226        if (be_check_error(adapter, BE_ERROR_HW))
 227                return;
 228
 229        val |= txo->q.id & DB_TXULP_RING_ID_MASK;
 230        val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
 231
 232        wmb();
 233        iowrite32(val, adapter->db + txo->db_offset);
 234}
 235
 236static void be_eq_notify(struct be_adapter *adapter, u16 qid,
 237                         bool arm, bool clear_int, u16 num_popped,
 238                         u32 eq_delay_mult_enc)
 239{
 240        u32 val = 0;
 241
 242        val |= qid & DB_EQ_RING_ID_MASK;
 243        val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
 244
 245        if (be_check_error(adapter, BE_ERROR_HW))
 246                return;
 247
 248        if (arm)
 249                val |= 1 << DB_EQ_REARM_SHIFT;
 250        if (clear_int)
 251                val |= 1 << DB_EQ_CLR_SHIFT;
 252        val |= 1 << DB_EQ_EVNT_SHIFT;
 253        val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
 254        val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
 255        iowrite32(val, adapter->db + DB_EQ_OFFSET);
 256}
 257
 258void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
 259{
 260        u32 val = 0;
 261
 262        val |= qid & DB_CQ_RING_ID_MASK;
 263        val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
 264                        DB_CQ_RING_ID_EXT_MASK_SHIFT);
 265
 266        if (be_check_error(adapter, BE_ERROR_HW))
 267                return;
 268
 269        if (arm)
 270                val |= 1 << DB_CQ_REARM_SHIFT;
 271        val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
 272        iowrite32(val, adapter->db + DB_CQ_OFFSET);
 273}
 274
 275static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
 276{
 277        int i;
 278
 279        /* Check if mac has already been added as part of uc-list */
 280        for (i = 0; i < adapter->uc_macs; i++) {
 281                if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
 282                        /* mac already added, skip addition */
 283                        adapter->pmac_id[0] = adapter->pmac_id[i + 1];
 284                        return 0;
 285                }
 286        }
 287
 288        return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
 289                               &adapter->pmac_id[0], 0);
 290}
 291
 292static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
 293{
 294        int i;
 295
 296        /* Skip deletion if the programmed mac is
 297         * being used in uc-list
 298         */
 299        for (i = 0; i < adapter->uc_macs; i++) {
 300                if (adapter->pmac_id[i + 1] == pmac_id)
 301                        return;
 302        }
 303        be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
 304}
 305
 306static int be_mac_addr_set(struct net_device *netdev, void *p)
 307{
 308        struct be_adapter *adapter = netdev_priv(netdev);
 309        struct device *dev = &adapter->pdev->dev;
 310        struct sockaddr *addr = p;
 311        int status;
 312        u8 mac[ETH_ALEN];
 313        u32 old_pmac_id = adapter->pmac_id[0];
 314
 315        if (!is_valid_ether_addr(addr->sa_data))
 316                return -EADDRNOTAVAIL;
 317
 318        /* Proceed further only if, User provided MAC is different
 319         * from active MAC
 320         */
 321        if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
 322                return 0;
 323
 324        /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
 325         * address
 326         */
 327        if (BEx_chip(adapter) && be_virtfn(adapter) &&
 328            !check_privilege(adapter, BE_PRIV_FILTMGMT))
 329                return -EPERM;
 330
 331        /* if device is not running, copy MAC to netdev->dev_addr */
 332        if (!netif_running(netdev))
 333                goto done;
 334
 335        /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
 336         * privilege or if PF did not provision the new MAC address.
 337         * On BE3, this cmd will always fail if the VF doesn't have the
 338         * FILTMGMT privilege. This failure is OK, only if the PF programmed
 339         * the MAC for the VF.
 340         */
 341        mutex_lock(&adapter->rx_filter_lock);
 342        status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
 343        if (!status) {
 344
 345                /* Delete the old programmed MAC. This call may fail if the
 346                 * old MAC was already deleted by the PF driver.
 347                 */
 348                if (adapter->pmac_id[0] != old_pmac_id)
 349                        be_dev_mac_del(adapter, old_pmac_id);
 350        }
 351
 352        mutex_unlock(&adapter->rx_filter_lock);
 353        /* Decide if the new MAC is successfully activated only after
 354         * querying the FW
 355         */
 356        status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
 357                                       adapter->if_handle, true, 0);
 358        if (status)
 359                goto err;
 360
 361        /* The MAC change did not happen, either due to lack of privilege
 362         * or PF didn't pre-provision.
 363         */
 364        if (!ether_addr_equal(addr->sa_data, mac)) {
 365                status = -EPERM;
 366                goto err;
 367        }
 368
 369        /* Remember currently programmed MAC */
 370        ether_addr_copy(adapter->dev_mac, addr->sa_data);
 371done:
 372        ether_addr_copy(netdev->dev_addr, addr->sa_data);
 373        dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
 374        return 0;
 375err:
 376        dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
 377        return status;
 378}
 379
 380/* BE2 supports only v0 cmd */
 381static void *hw_stats_from_cmd(struct be_adapter *adapter)
 382{
 383        if (BE2_chip(adapter)) {
 384                struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
 385
 386                return &cmd->hw_stats;
 387        } else if (BE3_chip(adapter)) {
 388                struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
 389
 390                return &cmd->hw_stats;
 391        } else {
 392                struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
 393
 394                return &cmd->hw_stats;
 395        }
 396}
 397
 398/* BE2 supports only v0 cmd */
 399static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
 400{
 401        if (BE2_chip(adapter)) {
 402                struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 403
 404                return &hw_stats->erx;
 405        } else if (BE3_chip(adapter)) {
 406                struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 407
 408                return &hw_stats->erx;
 409        } else {
 410                struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 411
 412                return &hw_stats->erx;
 413        }
 414}
 415
 416static void populate_be_v0_stats(struct be_adapter *adapter)
 417{
 418        struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 419        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 420        struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
 421        struct be_port_rxf_stats_v0 *port_stats =
 422                                        &rxf_stats->port[adapter->port_num];
 423        struct be_drv_stats *drvs = &adapter->drv_stats;
 424
 425        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 426        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 427        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 428        drvs->rx_control_frames = port_stats->rx_control_frames;
 429        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 430        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 431        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 432        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 433        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 434        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 435        drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
 436        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 437        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 438        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 439        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 440        drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
 441        drvs->rx_dropped_header_too_small =
 442                port_stats->rx_dropped_header_too_small;
 443        drvs->rx_address_filtered =
 444                                        port_stats->rx_address_filtered +
 445                                        port_stats->rx_vlan_filtered;
 446        drvs->rx_alignment_symbol_errors =
 447                port_stats->rx_alignment_symbol_errors;
 448
 449        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 450        drvs->tx_controlframes = port_stats->tx_controlframes;
 451
 452        if (adapter->port_num)
 453                drvs->jabber_events = rxf_stats->port1_jabber_events;
 454        else
 455                drvs->jabber_events = rxf_stats->port0_jabber_events;
 456        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 457        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 458        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 459        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 460        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 461        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 462        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 463}
 464
 465static void populate_be_v1_stats(struct be_adapter *adapter)
 466{
 467        struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 468        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 469        struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
 470        struct be_port_rxf_stats_v1 *port_stats =
 471                                        &rxf_stats->port[adapter->port_num];
 472        struct be_drv_stats *drvs = &adapter->drv_stats;
 473
 474        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 475        drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 476        drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 477        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 478        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 479        drvs->rx_control_frames = port_stats->rx_control_frames;
 480        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 481        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 482        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 483        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 484        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 485        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 486        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 487        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 488        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 489        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 490        drvs->rx_dropped_header_too_small =
 491                port_stats->rx_dropped_header_too_small;
 492        drvs->rx_input_fifo_overflow_drop =
 493                port_stats->rx_input_fifo_overflow_drop;
 494        drvs->rx_address_filtered = port_stats->rx_address_filtered;
 495        drvs->rx_alignment_symbol_errors =
 496                port_stats->rx_alignment_symbol_errors;
 497        drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 498        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 499        drvs->tx_controlframes = port_stats->tx_controlframes;
 500        drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 501        drvs->jabber_events = port_stats->jabber_events;
 502        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 503        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 504        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 505        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 506        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 507        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 508        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 509}
 510
 511static void populate_be_v2_stats(struct be_adapter *adapter)
 512{
 513        struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 514        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 515        struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
 516        struct be_port_rxf_stats_v2 *port_stats =
 517                                        &rxf_stats->port[adapter->port_num];
 518        struct be_drv_stats *drvs = &adapter->drv_stats;
 519
 520        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 521        drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 522        drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 523        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 524        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 525        drvs->rx_control_frames = port_stats->rx_control_frames;
 526        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 527        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 528        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 529        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 530        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 531        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 532        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 533        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 534        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 535        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 536        drvs->rx_dropped_header_too_small =
 537                port_stats->rx_dropped_header_too_small;
 538        drvs->rx_input_fifo_overflow_drop =
 539                port_stats->rx_input_fifo_overflow_drop;
 540        drvs->rx_address_filtered = port_stats->rx_address_filtered;
 541        drvs->rx_alignment_symbol_errors =
 542                port_stats->rx_alignment_symbol_errors;
 543        drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 544        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 545        drvs->tx_controlframes = port_stats->tx_controlframes;
 546        drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 547        drvs->jabber_events = port_stats->jabber_events;
 548        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 549        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 550        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 551        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 552        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 553        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 554        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 555        if (be_roce_supported(adapter)) {
 556                drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
 557                drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
 558                drvs->rx_roce_frames = port_stats->roce_frames_received;
 559                drvs->roce_drops_crc = port_stats->roce_drops_crc;
 560                drvs->roce_drops_payload_len =
 561                        port_stats->roce_drops_payload_len;
 562        }
 563}
 564
 565static void populate_lancer_stats(struct be_adapter *adapter)
 566{
 567        struct be_drv_stats *drvs = &adapter->drv_stats;
 568        struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
 569
 570        be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
 571        drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
 572        drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
 573        drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
 574        drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
 575        drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
 576        drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
 577        drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
 578        drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
 579        drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
 580        drvs->rx_dropped_tcp_length =
 581                                pport_stats->rx_dropped_invalid_tcp_length;
 582        drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
 583        drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
 584        drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
 585        drvs->rx_dropped_header_too_small =
 586                                pport_stats->rx_dropped_header_too_small;
 587        drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 588        drvs->rx_address_filtered =
 589                                        pport_stats->rx_address_filtered +
 590                                        pport_stats->rx_vlan_filtered;
 591        drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
 592        drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 593        drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
 594        drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
 595        drvs->jabber_events = pport_stats->rx_jabbers;
 596        drvs->forwarded_packets = pport_stats->num_forwards_lo;
 597        drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
 598        drvs->rx_drops_too_many_frags =
 599                                pport_stats->rx_drops_too_many_frags_lo;
 600}
 601
 602static void accumulate_16bit_val(u32 *acc, u16 val)
 603{
 604#define lo(x)                   (x & 0xFFFF)
 605#define hi(x)                   (x & 0xFFFF0000)
 606        bool wrapped = val < lo(*acc);
 607        u32 newacc = hi(*acc) + val;
 608
 609        if (wrapped)
 610                newacc += 65536;
 611        WRITE_ONCE(*acc, newacc);
 612}
 613
 614static void populate_erx_stats(struct be_adapter *adapter,
 615                               struct be_rx_obj *rxo, u32 erx_stat)
 616{
 617        if (!BEx_chip(adapter))
 618                rx_stats(rxo)->rx_drops_no_frags = erx_stat;
 619        else
 620                /* below erx HW counter can actually wrap around after
 621                 * 65535. Driver accumulates a 32-bit value
 622                 */
 623                accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
 624                                     (u16)erx_stat);
 625}
 626
 627void be_parse_stats(struct be_adapter *adapter)
 628{
 629        struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
 630        struct be_rx_obj *rxo;
 631        int i;
 632        u32 erx_stat;
 633
 634        if (lancer_chip(adapter)) {
 635                populate_lancer_stats(adapter);
 636        } else {
 637                if (BE2_chip(adapter))
 638                        populate_be_v0_stats(adapter);
 639                else if (BE3_chip(adapter))
 640                        /* for BE3 */
 641                        populate_be_v1_stats(adapter);
 642                else
 643                        populate_be_v2_stats(adapter);
 644
 645                /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
 646                for_all_rx_queues(adapter, rxo, i) {
 647                        erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
 648                        populate_erx_stats(adapter, rxo, erx_stat);
 649                }
 650        }
 651}
 652
 653static void be_get_stats64(struct net_device *netdev,
 654                           struct rtnl_link_stats64 *stats)
 655{
 656        struct be_adapter *adapter = netdev_priv(netdev);
 657        struct be_drv_stats *drvs = &adapter->drv_stats;
 658        struct be_rx_obj *rxo;
 659        struct be_tx_obj *txo;
 660        u64 pkts, bytes;
 661        unsigned int start;
 662        int i;
 663
 664        for_all_rx_queues(adapter, rxo, i) {
 665                const struct be_rx_stats *rx_stats = rx_stats(rxo);
 666
 667                do {
 668                        start = u64_stats_fetch_begin_irq(&rx_stats->sync);
 669                        pkts = rx_stats(rxo)->rx_pkts;
 670                        bytes = rx_stats(rxo)->rx_bytes;
 671                } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
 672                stats->rx_packets += pkts;
 673                stats->rx_bytes += bytes;
 674                stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
 675                stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
 676                                        rx_stats(rxo)->rx_drops_no_frags;
 677        }
 678
 679        for_all_tx_queues(adapter, txo, i) {
 680                const struct be_tx_stats *tx_stats = tx_stats(txo);
 681
 682                do {
 683                        start = u64_stats_fetch_begin_irq(&tx_stats->sync);
 684                        pkts = tx_stats(txo)->tx_pkts;
 685                        bytes = tx_stats(txo)->tx_bytes;
 686                } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
 687                stats->tx_packets += pkts;
 688                stats->tx_bytes += bytes;
 689        }
 690
 691        /* bad pkts received */
 692        stats->rx_errors = drvs->rx_crc_errors +
 693                drvs->rx_alignment_symbol_errors +
 694                drvs->rx_in_range_errors +
 695                drvs->rx_out_range_errors +
 696                drvs->rx_frame_too_long +
 697                drvs->rx_dropped_too_small +
 698                drvs->rx_dropped_too_short +
 699                drvs->rx_dropped_header_too_small +
 700                drvs->rx_dropped_tcp_length +
 701                drvs->rx_dropped_runt;
 702
 703        /* detailed rx errors */
 704        stats->rx_length_errors = drvs->rx_in_range_errors +
 705                drvs->rx_out_range_errors +
 706                drvs->rx_frame_too_long;
 707
 708        stats->rx_crc_errors = drvs->rx_crc_errors;
 709
 710        /* frame alignment errors */
 711        stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
 712
 713        /* receiver fifo overrun */
 714        /* drops_no_pbuf is no per i/f, it's per BE card */
 715        stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
 716                                drvs->rx_input_fifo_overflow_drop +
 717                                drvs->rx_drops_no_pbuf;
 718}
 719
 720void be_link_status_update(struct be_adapter *adapter, u8 link_status)
 721{
 722        struct net_device *netdev = adapter->netdev;
 723
 724        if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
 725                netif_carrier_off(netdev);
 726                adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
 727        }
 728
 729        if (link_status)
 730                netif_carrier_on(netdev);
 731        else
 732                netif_carrier_off(netdev);
 733
 734        netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
 735}
 736
 737static int be_gso_hdr_len(struct sk_buff *skb)
 738{
 739        if (skb->encapsulation)
 740                return skb_inner_transport_offset(skb) +
 741                       inner_tcp_hdrlen(skb);
 742        return skb_transport_offset(skb) + tcp_hdrlen(skb);
 743}
 744
 745static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
 746{
 747        struct be_tx_stats *stats = tx_stats(txo);
 748        u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
 749        /* Account for headers which get duplicated in TSO pkt */
 750        u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
 751
 752        u64_stats_update_begin(&stats->sync);
 753        stats->tx_reqs++;
 754        stats->tx_bytes += skb->len + dup_hdr_len;
 755        stats->tx_pkts += tx_pkts;
 756        if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
 757                stats->tx_vxlan_offload_pkts += tx_pkts;
 758        u64_stats_update_end(&stats->sync);
 759}
 760
 761/* Returns number of WRBs needed for the skb */
 762static u32 skb_wrb_cnt(struct sk_buff *skb)
 763{
 764        /* +1 for the header wrb */
 765        return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
 766}
 767
 768static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
 769{
 770        wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
 771        wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
 772        wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
 773        wrb->rsvd0 = 0;
 774}
 775
 776/* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
 777 * to avoid the swap and shift/mask operations in wrb_fill().
 778 */
 779static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
 780{
 781        wrb->frag_pa_hi = 0;
 782        wrb->frag_pa_lo = 0;
 783        wrb->frag_len = 0;
 784        wrb->rsvd0 = 0;
 785}
 786
 787static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
 788                                     struct sk_buff *skb)
 789{
 790        u8 vlan_prio;
 791        u16 vlan_tag;
 792
 793        vlan_tag = skb_vlan_tag_get(skb);
 794        vlan_prio = skb_vlan_tag_get_prio(skb);
 795        /* If vlan priority provided by OS is NOT in available bmap */
 796        if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
 797                vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
 798                                adapter->recommended_prio_bits;
 799
 800        return vlan_tag;
 801}
 802
 803/* Used only for IP tunnel packets */
 804static u16 skb_inner_ip_proto(struct sk_buff *skb)
 805{
 806        return (inner_ip_hdr(skb)->version == 4) ?
 807                inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
 808}
 809
 810static u16 skb_ip_proto(struct sk_buff *skb)
 811{
 812        return (ip_hdr(skb)->version == 4) ?
 813                ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
 814}
 815
 816static inline bool be_is_txq_full(struct be_tx_obj *txo)
 817{
 818        return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
 819}
 820
 821static inline bool be_can_txq_wake(struct be_tx_obj *txo)
 822{
 823        return atomic_read(&txo->q.used) < txo->q.len / 2;
 824}
 825
 826static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
 827{
 828        return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
 829}
 830
 831static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
 832                                       struct sk_buff *skb,
 833                                       struct be_wrb_params *wrb_params)
 834{
 835        u16 proto;
 836
 837        if (skb_is_gso(skb)) {
 838                BE_WRB_F_SET(wrb_params->features, LSO, 1);
 839                wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
 840                if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
 841                        BE_WRB_F_SET(wrb_params->features, LSO6, 1);
 842        } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 843                if (skb->encapsulation) {
 844                        BE_WRB_F_SET(wrb_params->features, IPCS, 1);
 845                        proto = skb_inner_ip_proto(skb);
 846                } else {
 847                        proto = skb_ip_proto(skb);
 848                }
 849                if (proto == IPPROTO_TCP)
 850                        BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
 851                else if (proto == IPPROTO_UDP)
 852                        BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
 853        }
 854
 855        if (skb_vlan_tag_present(skb)) {
 856                BE_WRB_F_SET(wrb_params->features, VLAN, 1);
 857                wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
 858        }
 859
 860        BE_WRB_F_SET(wrb_params->features, CRC, 1);
 861}
 862
 863static void wrb_fill_hdr(struct be_adapter *adapter,
 864                         struct be_eth_hdr_wrb *hdr,
 865                         struct be_wrb_params *wrb_params,
 866                         struct sk_buff *skb)
 867{
 868        memset(hdr, 0, sizeof(*hdr));
 869
 870        SET_TX_WRB_HDR_BITS(crc, hdr,
 871                            BE_WRB_F_GET(wrb_params->features, CRC));
 872        SET_TX_WRB_HDR_BITS(ipcs, hdr,
 873                            BE_WRB_F_GET(wrb_params->features, IPCS));
 874        SET_TX_WRB_HDR_BITS(tcpcs, hdr,
 875                            BE_WRB_F_GET(wrb_params->features, TCPCS));
 876        SET_TX_WRB_HDR_BITS(udpcs, hdr,
 877                            BE_WRB_F_GET(wrb_params->features, UDPCS));
 878
 879        SET_TX_WRB_HDR_BITS(lso, hdr,
 880                            BE_WRB_F_GET(wrb_params->features, LSO));
 881        SET_TX_WRB_HDR_BITS(lso6, hdr,
 882                            BE_WRB_F_GET(wrb_params->features, LSO6));
 883        SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
 884
 885        /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
 886         * hack is not needed, the evt bit is set while ringing DB.
 887         */
 888        SET_TX_WRB_HDR_BITS(event, hdr,
 889                            BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
 890        SET_TX_WRB_HDR_BITS(vlan, hdr,
 891                            BE_WRB_F_GET(wrb_params->features, VLAN));
 892        SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
 893
 894        SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
 895        SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
 896        SET_TX_WRB_HDR_BITS(mgmt, hdr,
 897                            BE_WRB_F_GET(wrb_params->features, OS2BMC));
 898}
 899
 900static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
 901                          bool unmap_single)
 902{
 903        dma_addr_t dma;
 904        u32 frag_len = le32_to_cpu(wrb->frag_len);
 905
 906
 907        dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
 908                (u64)le32_to_cpu(wrb->frag_pa_lo);
 909        if (frag_len) {
 910                if (unmap_single)
 911                        dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
 912                else
 913                        dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
 914        }
 915}
 916
 917/* Grab a WRB header for xmit */
 918static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
 919{
 920        u32 head = txo->q.head;
 921
 922        queue_head_inc(&txo->q);
 923        return head;
 924}
 925
 926/* Set up the WRB header for xmit */
 927static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
 928                                struct be_tx_obj *txo,
 929                                struct be_wrb_params *wrb_params,
 930                                struct sk_buff *skb, u16 head)
 931{
 932        u32 num_frags = skb_wrb_cnt(skb);
 933        struct be_queue_info *txq = &txo->q;
 934        struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
 935
 936        wrb_fill_hdr(adapter, hdr, wrb_params, skb);
 937        be_dws_cpu_to_le(hdr, sizeof(*hdr));
 938
 939        BUG_ON(txo->sent_skb_list[head]);
 940        txo->sent_skb_list[head] = skb;
 941        txo->last_req_hdr = head;
 942        atomic_add(num_frags, &txq->used);
 943        txo->last_req_wrb_cnt = num_frags;
 944        txo->pend_wrb_cnt += num_frags;
 945}
 946
 947/* Setup a WRB fragment (buffer descriptor) for xmit */
 948static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
 949                                 int len)
 950{
 951        struct be_eth_wrb *wrb;
 952        struct be_queue_info *txq = &txo->q;
 953
 954        wrb = queue_head_node(txq);
 955        wrb_fill(wrb, busaddr, len);
 956        queue_head_inc(txq);
 957}
 958
 959/* Bring the queue back to the state it was in before be_xmit_enqueue() routine
 960 * was invoked. The producer index is restored to the previous packet and the
 961 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
 962 */
 963static void be_xmit_restore(struct be_adapter *adapter,
 964                            struct be_tx_obj *txo, u32 head, bool map_single,
 965                            u32 copied)
 966{
 967        struct device *dev;
 968        struct be_eth_wrb *wrb;
 969        struct be_queue_info *txq = &txo->q;
 970
 971        dev = &adapter->pdev->dev;
 972        txq->head = head;
 973
 974        /* skip the first wrb (hdr); it's not mapped */
 975        queue_head_inc(txq);
 976        while (copied) {
 977                wrb = queue_head_node(txq);
 978                unmap_tx_frag(dev, wrb, map_single);
 979                map_single = false;
 980                copied -= le32_to_cpu(wrb->frag_len);
 981                queue_head_inc(txq);
 982        }
 983
 984        txq->head = head;
 985}
 986
 987/* Enqueue the given packet for transmit. This routine allocates WRBs for the
 988 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
 989 * of WRBs used up by the packet.
 990 */
 991static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
 992                           struct sk_buff *skb,
 993                           struct be_wrb_params *wrb_params)
 994{
 995        u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
 996        struct device *dev = &adapter->pdev->dev;
 997        bool map_single = false;
 998        u32 head;
 999        dma_addr_t busaddr;
1000        int len;
1001
1002        head = be_tx_get_wrb_hdr(txo);
1003
1004        if (skb->len > skb->data_len) {
1005                len = skb_headlen(skb);
1006
1007                busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1008                if (dma_mapping_error(dev, busaddr))
1009                        goto dma_err;
1010                map_single = true;
1011                be_tx_setup_wrb_frag(txo, busaddr, len);
1012                copied += len;
1013        }
1014
1015        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1016                const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1017                len = skb_frag_size(frag);
1018
1019                busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1020                if (dma_mapping_error(dev, busaddr))
1021                        goto dma_err;
1022                be_tx_setup_wrb_frag(txo, busaddr, len);
1023                copied += len;
1024        }
1025
1026        be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1027
1028        be_tx_stats_update(txo, skb);
1029        return wrb_cnt;
1030
1031dma_err:
1032        adapter->drv_stats.dma_map_errors++;
1033        be_xmit_restore(adapter, txo, head, map_single, copied);
1034        return 0;
1035}
1036
1037static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1038{
1039        return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1040}
1041
1042static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1043                                             struct sk_buff *skb,
1044                                             struct be_wrb_params
1045                                             *wrb_params)
1046{
1047        bool insert_vlan = false;
1048        u16 vlan_tag = 0;
1049
1050        skb = skb_share_check(skb, GFP_ATOMIC);
1051        if (unlikely(!skb))
1052                return skb;
1053
1054        if (skb_vlan_tag_present(skb)) {
1055                vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1056                insert_vlan = true;
1057        }
1058
1059        if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1060                if (!insert_vlan) {
1061                        vlan_tag = adapter->pvid;
1062                        insert_vlan = true;
1063                }
1064                /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1065                 * skip VLAN insertion
1066                 */
1067                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1068        }
1069
1070        if (insert_vlan) {
1071                skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1072                                                vlan_tag);
1073                if (unlikely(!skb))
1074                        return skb;
1075                __vlan_hwaccel_clear_tag(skb);
1076        }
1077
1078        /* Insert the outer VLAN, if any */
1079        if (adapter->qnq_vid) {
1080                vlan_tag = adapter->qnq_vid;
1081                skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1082                                                vlan_tag);
1083                if (unlikely(!skb))
1084                        return skb;
1085                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086        }
1087
1088        return skb;
1089}
1090
1091static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1092{
1093        struct ethhdr *eh = (struct ethhdr *)skb->data;
1094        u16 offset = ETH_HLEN;
1095
1096        if (eh->h_proto == htons(ETH_P_IPV6)) {
1097                struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1098
1099                offset += sizeof(struct ipv6hdr);
1100                if (ip6h->nexthdr != NEXTHDR_TCP &&
1101                    ip6h->nexthdr != NEXTHDR_UDP) {
1102                        struct ipv6_opt_hdr *ehdr =
1103                                (struct ipv6_opt_hdr *)(skb->data + offset);
1104
1105                        /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1106                        if (ehdr->hdrlen == 0xff)
1107                                return true;
1108                }
1109        }
1110        return false;
1111}
1112
1113static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1114{
1115        return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1116}
1117
1118static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119{
1120        return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1121}
1122
1123static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1124                                                  struct sk_buff *skb,
1125                                                  struct be_wrb_params
1126                                                  *wrb_params)
1127{
1128        struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1129        unsigned int eth_hdr_len;
1130        struct iphdr *ip;
1131
1132        /* For padded packets, BE HW modifies tot_len field in IP header
1133         * incorrecly when VLAN tag is inserted by HW.
1134         * For padded packets, Lancer computes incorrect checksum.
1135         */
1136        eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1137                                                VLAN_ETH_HLEN : ETH_HLEN;
1138        if (skb->len <= 60 &&
1139            (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1140            is_ipv4_pkt(skb)) {
1141                ip = (struct iphdr *)ip_hdr(skb);
1142                pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1143        }
1144
1145        /* If vlan tag is already inlined in the packet, skip HW VLAN
1146         * tagging in pvid-tagging mode
1147         */
1148        if (be_pvid_tagging_enabled(adapter) &&
1149            veh->h_vlan_proto == htons(ETH_P_8021Q))
1150                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152        /* HW has a bug wherein it will calculate CSUM for VLAN
1153         * pkts even though it is disabled.
1154         * Manually insert VLAN in pkt.
1155         */
1156        if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157            skb_vlan_tag_present(skb)) {
1158                skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159                if (unlikely(!skb))
1160                        goto err;
1161        }
1162
1163        /* HW may lockup when VLAN HW tagging is requested on
1164         * certain ipv6 packets. Drop such pkts if the HW workaround to
1165         * skip HW tagging is not enabled by FW.
1166         */
1167        if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168                     (adapter->pvid || adapter->qnq_vid) &&
1169                     !qnq_async_evt_rcvd(adapter)))
1170                goto tx_drop;
1171
1172        /* Manual VLAN tag insertion to prevent:
1173         * ASIC lockup when the ASIC inserts VLAN tag into
1174         * certain ipv6 packets. Insert VLAN tags in driver,
1175         * and set event, completion, vlan bits accordingly
1176         * in the Tx WRB.
1177         */
1178        if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179            be_vlan_tag_tx_chk(adapter, skb)) {
1180                skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181                if (unlikely(!skb))
1182                        goto err;
1183        }
1184
1185        return skb;
1186tx_drop:
1187        dev_kfree_skb_any(skb);
1188err:
1189        return NULL;
1190}
1191
1192static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193                                           struct sk_buff *skb,
1194                                           struct be_wrb_params *wrb_params)
1195{
1196        int err;
1197
1198        /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199         * packets that are 32b or less may cause a transmit stall
1200         * on that port. The workaround is to pad such packets
1201         * (len <= 32 bytes) to a minimum length of 36b.
1202         */
1203        if (skb->len <= 32) {
1204                if (skb_put_padto(skb, 36))
1205                        return NULL;
1206        }
1207
1208        if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209                skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210                if (!skb)
1211                        return NULL;
1212        }
1213
1214        /* The stack can send us skbs with length greater than
1215         * what the HW can handle. Trim the extra bytes.
1216         */
1217        WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218        err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219        WARN_ON(err);
1220
1221        return skb;
1222}
1223
1224static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225{
1226        struct be_queue_info *txq = &txo->q;
1227        struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229        /* Mark the last request eventable if it hasn't been marked already */
1230        if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231                hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233        /* compose a dummy wrb if there are odd set of wrbs to notify */
1234        if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235                wrb_fill_dummy(queue_head_node(txq));
1236                queue_head_inc(txq);
1237                atomic_inc(&txq->used);
1238                txo->pend_wrb_cnt++;
1239                hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240                                           TX_HDR_WRB_NUM_SHIFT);
1241                hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242                                          TX_HDR_WRB_NUM_SHIFT);
1243        }
1244        be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245        txo->pend_wrb_cnt = 0;
1246}
1247
1248/* OS2BMC related */
1249
1250#define DHCP_CLIENT_PORT        68
1251#define DHCP_SERVER_PORT        67
1252#define NET_BIOS_PORT1          137
1253#define NET_BIOS_PORT2          138
1254#define DHCPV6_RAS_PORT         547
1255
1256#define is_mc_allowed_on_bmc(adapter, eh)       \
1257        (!is_multicast_filt_enabled(adapter) && \
1258         is_multicast_ether_addr(eh->h_dest) && \
1259         !is_broadcast_ether_addr(eh->h_dest))
1260
1261#define is_bc_allowed_on_bmc(adapter, eh)       \
1262        (!is_broadcast_filt_enabled(adapter) && \
1263         is_broadcast_ether_addr(eh->h_dest))
1264
1265#define is_arp_allowed_on_bmc(adapter, skb)     \
1266        (is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268#define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1269
1270#define is_arp_filt_enabled(adapter)    \
1271                (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1272
1273#define is_dhcp_client_filt_enabled(adapter)    \
1274                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1275
1276#define is_dhcp_srvr_filt_enabled(adapter)      \
1277                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1278
1279#define is_nbios_filt_enabled(adapter)  \
1280                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1281
1282#define is_ipv6_na_filt_enabled(adapter)        \
1283                (adapter->bmc_filt_mask &       \
1284                        BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1285
1286#define is_ipv6_ra_filt_enabled(adapter)        \
1287                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1288
1289#define is_ipv6_ras_filt_enabled(adapter)       \
1290                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1291
1292#define is_broadcast_filt_enabled(adapter)      \
1293                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1294
1295#define is_multicast_filt_enabled(adapter)      \
1296                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1297
1298static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1299                               struct sk_buff **skb)
1300{
1301        struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1302        bool os2bmc = false;
1303
1304        if (!be_is_os2bmc_enabled(adapter))
1305                goto done;
1306
1307        if (!is_multicast_ether_addr(eh->h_dest))
1308                goto done;
1309
1310        if (is_mc_allowed_on_bmc(adapter, eh) ||
1311            is_bc_allowed_on_bmc(adapter, eh) ||
1312            is_arp_allowed_on_bmc(adapter, (*skb))) {
1313                os2bmc = true;
1314                goto done;
1315        }
1316
1317        if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1318                struct ipv6hdr *hdr = ipv6_hdr((*skb));
1319                u8 nexthdr = hdr->nexthdr;
1320
1321                if (nexthdr == IPPROTO_ICMPV6) {
1322                        struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1323
1324                        switch (icmp6->icmp6_type) {
1325                        case NDISC_ROUTER_ADVERTISEMENT:
1326                                os2bmc = is_ipv6_ra_filt_enabled(adapter);
1327                                goto done;
1328                        case NDISC_NEIGHBOUR_ADVERTISEMENT:
1329                                os2bmc = is_ipv6_na_filt_enabled(adapter);
1330                                goto done;
1331                        default:
1332                                break;
1333                        }
1334                }
1335        }
1336
1337        if (is_udp_pkt((*skb))) {
1338                struct udphdr *udp = udp_hdr((*skb));
1339
1340                switch (ntohs(udp->dest)) {
1341                case DHCP_CLIENT_PORT:
1342                        os2bmc = is_dhcp_client_filt_enabled(adapter);
1343                        goto done;
1344                case DHCP_SERVER_PORT:
1345                        os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1346                        goto done;
1347                case NET_BIOS_PORT1:
1348                case NET_BIOS_PORT2:
1349                        os2bmc = is_nbios_filt_enabled(adapter);
1350                        goto done;
1351                case DHCPV6_RAS_PORT:
1352                        os2bmc = is_ipv6_ras_filt_enabled(adapter);
1353                        goto done;
1354                default:
1355                        break;
1356                }
1357        }
1358done:
1359        /* For packets over a vlan, which are destined
1360         * to BMC, asic expects the vlan to be inline in the packet.
1361         */
1362        if (os2bmc)
1363                *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1364
1365        return os2bmc;
1366}
1367
1368static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1369{
1370        struct be_adapter *adapter = netdev_priv(netdev);
1371        u16 q_idx = skb_get_queue_mapping(skb);
1372        struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1373        struct be_wrb_params wrb_params = { 0 };
1374        bool flush = !netdev_xmit_more();
1375        u16 wrb_cnt;
1376
1377        skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1378        if (unlikely(!skb))
1379                goto drop;
1380
1381        be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1382
1383        wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384        if (unlikely(!wrb_cnt)) {
1385                dev_kfree_skb_any(skb);
1386                goto drop;
1387        }
1388
1389        /* if os2bmc is enabled and if the pkt is destined to bmc,
1390         * enqueue the pkt a 2nd time with mgmt bit set.
1391         */
1392        if (be_send_pkt_to_bmc(adapter, &skb)) {
1393                BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1394                wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1395                if (unlikely(!wrb_cnt))
1396                        goto drop;
1397                else
1398                        skb_get(skb);
1399        }
1400
1401        if (be_is_txq_full(txo)) {
1402                netif_stop_subqueue(netdev, q_idx);
1403                tx_stats(txo)->tx_stops++;
1404        }
1405
1406        if (flush || __netif_subqueue_stopped(netdev, q_idx))
1407                be_xmit_flush(adapter, txo);
1408
1409        return NETDEV_TX_OK;
1410drop:
1411        tx_stats(txo)->tx_drv_drops++;
1412        /* Flush the already enqueued tx requests */
1413        if (flush && txo->pend_wrb_cnt)
1414                be_xmit_flush(adapter, txo);
1415
1416        return NETDEV_TX_OK;
1417}
1418
1419static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1420{
1421        struct be_adapter *adapter = netdev_priv(netdev);
1422        struct device *dev = &adapter->pdev->dev;
1423        struct be_tx_obj *txo;
1424        struct sk_buff *skb;
1425        struct tcphdr *tcphdr;
1426        struct udphdr *udphdr;
1427        u32 *entry;
1428        int status;
1429        int i, j;
1430
1431        for_all_tx_queues(adapter, txo, i) {
1432                dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1433                         i, txo->q.head, txo->q.tail,
1434                         atomic_read(&txo->q.used), txo->q.id);
1435
1436                entry = txo->q.dma_mem.va;
1437                for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1438                        if (entry[j] != 0 || entry[j + 1] != 0 ||
1439                            entry[j + 2] != 0 || entry[j + 3] != 0) {
1440                                dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1441                                         j, entry[j], entry[j + 1],
1442                                         entry[j + 2], entry[j + 3]);
1443                        }
1444                }
1445
1446                entry = txo->cq.dma_mem.va;
1447                dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1448                         i, txo->cq.head, txo->cq.tail,
1449                         atomic_read(&txo->cq.used));
1450                for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1451                        if (entry[j] != 0 || entry[j + 1] != 0 ||
1452                            entry[j + 2] != 0 || entry[j + 3] != 0) {
1453                                dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1454                                         j, entry[j], entry[j + 1],
1455                                         entry[j + 2], entry[j + 3]);
1456                        }
1457                }
1458
1459                for (j = 0; j < TX_Q_LEN; j++) {
1460                        if (txo->sent_skb_list[j]) {
1461                                skb = txo->sent_skb_list[j];
1462                                if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1463                                        tcphdr = tcp_hdr(skb);
1464                                        dev_info(dev, "TCP source port %d\n",
1465                                                 ntohs(tcphdr->source));
1466                                        dev_info(dev, "TCP dest port %d\n",
1467                                                 ntohs(tcphdr->dest));
1468                                        dev_info(dev, "TCP sequence num %d\n",
1469                                                 ntohs(tcphdr->seq));
1470                                        dev_info(dev, "TCP ack_seq %d\n",
1471                                                 ntohs(tcphdr->ack_seq));
1472                                } else if (ip_hdr(skb)->protocol ==
1473                                           IPPROTO_UDP) {
1474                                        udphdr = udp_hdr(skb);
1475                                        dev_info(dev, "UDP source port %d\n",
1476                                                 ntohs(udphdr->source));
1477                                        dev_info(dev, "UDP dest port %d\n",
1478                                                 ntohs(udphdr->dest));
1479                                }
1480                                dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1481                                         j, skb, skb->len, skb->protocol);
1482                        }
1483                }
1484        }
1485
1486        if (lancer_chip(adapter)) {
1487                dev_info(dev, "Initiating reset due to tx timeout\n");
1488                dev_info(dev, "Resetting adapter\n");
1489                status = lancer_physdev_ctrl(adapter,
1490                                             PHYSDEV_CONTROL_FW_RESET_MASK);
1491                if (status)
1492                        dev_err(dev, "Reset failed .. Reboot server\n");
1493        }
1494}
1495
1496static inline bool be_in_all_promisc(struct be_adapter *adapter)
1497{
1498        return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1499                        BE_IF_FLAGS_ALL_PROMISCUOUS;
1500}
1501
1502static int be_set_vlan_promisc(struct be_adapter *adapter)
1503{
1504        struct device *dev = &adapter->pdev->dev;
1505        int status;
1506
1507        if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1508                return 0;
1509
1510        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1511        if (!status) {
1512                dev_info(dev, "Enabled VLAN promiscuous mode\n");
1513                adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1514        } else {
1515                dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1516        }
1517        return status;
1518}
1519
1520static int be_clear_vlan_promisc(struct be_adapter *adapter)
1521{
1522        struct device *dev = &adapter->pdev->dev;
1523        int status;
1524
1525        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1526        if (!status) {
1527                dev_info(dev, "Disabling VLAN promiscuous mode\n");
1528                adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1529        }
1530        return status;
1531}
1532
1533/*
1534 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1535 * If the user configures more, place BE in vlan promiscuous mode.
1536 */
1537static int be_vid_config(struct be_adapter *adapter)
1538{
1539        struct device *dev = &adapter->pdev->dev;
1540        u16 vids[BE_NUM_VLANS_SUPPORTED];
1541        u16 num = 0, i = 0;
1542        int status = 0;
1543
1544        /* No need to change the VLAN state if the I/F is in promiscuous */
1545        if (adapter->netdev->flags & IFF_PROMISC)
1546                return 0;
1547
1548        if (adapter->vlans_added > be_max_vlans(adapter))
1549                return be_set_vlan_promisc(adapter);
1550
1551        if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1552                status = be_clear_vlan_promisc(adapter);
1553                if (status)
1554                        return status;
1555        }
1556        /* Construct VLAN Table to give to HW */
1557        for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1558                vids[num++] = cpu_to_le16(i);
1559
1560        status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1561        if (status) {
1562                dev_err(dev, "Setting HW VLAN filtering failed\n");
1563                /* Set to VLAN promisc mode as setting VLAN filter failed */
1564                if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1565                    addl_status(status) ==
1566                                MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1567                        return be_set_vlan_promisc(adapter);
1568        }
1569        return status;
1570}
1571
1572static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1573{
1574        struct be_adapter *adapter = netdev_priv(netdev);
1575        int status = 0;
1576
1577        mutex_lock(&adapter->rx_filter_lock);
1578
1579        /* Packets with VID 0 are always received by Lancer by default */
1580        if (lancer_chip(adapter) && vid == 0)
1581                goto done;
1582
1583        if (test_bit(vid, adapter->vids))
1584                goto done;
1585
1586        set_bit(vid, adapter->vids);
1587        adapter->vlans_added++;
1588
1589        status = be_vid_config(adapter);
1590done:
1591        mutex_unlock(&adapter->rx_filter_lock);
1592        return status;
1593}
1594
1595static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1596{
1597        struct be_adapter *adapter = netdev_priv(netdev);
1598        int status = 0;
1599
1600        mutex_lock(&adapter->rx_filter_lock);
1601
1602        /* Packets with VID 0 are always received by Lancer by default */
1603        if (lancer_chip(adapter) && vid == 0)
1604                goto done;
1605
1606        if (!test_bit(vid, adapter->vids))
1607                goto done;
1608
1609        clear_bit(vid, adapter->vids);
1610        adapter->vlans_added--;
1611
1612        status = be_vid_config(adapter);
1613done:
1614        mutex_unlock(&adapter->rx_filter_lock);
1615        return status;
1616}
1617
1618static void be_set_all_promisc(struct be_adapter *adapter)
1619{
1620        be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1621        adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1622}
1623
1624static void be_set_mc_promisc(struct be_adapter *adapter)
1625{
1626        int status;
1627
1628        if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1629                return;
1630
1631        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1632        if (!status)
1633                adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1634}
1635
1636static void be_set_uc_promisc(struct be_adapter *adapter)
1637{
1638        int status;
1639
1640        if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1641                return;
1642
1643        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1644        if (!status)
1645                adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1646}
1647
1648static void be_clear_uc_promisc(struct be_adapter *adapter)
1649{
1650        int status;
1651
1652        if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1653                return;
1654
1655        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1656        if (!status)
1657                adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1658}
1659
1660/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1661 * We use a single callback function for both sync and unsync. We really don't
1662 * add/remove addresses through this callback. But, we use it to detect changes
1663 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1664 */
1665static int be_uc_list_update(struct net_device *netdev,
1666                             const unsigned char *addr)
1667{
1668        struct be_adapter *adapter = netdev_priv(netdev);
1669
1670        adapter->update_uc_list = true;
1671        return 0;
1672}
1673
1674static int be_mc_list_update(struct net_device *netdev,
1675                             const unsigned char *addr)
1676{
1677        struct be_adapter *adapter = netdev_priv(netdev);
1678
1679        adapter->update_mc_list = true;
1680        return 0;
1681}
1682
1683static void be_set_mc_list(struct be_adapter *adapter)
1684{
1685        struct net_device *netdev = adapter->netdev;
1686        struct netdev_hw_addr *ha;
1687        bool mc_promisc = false;
1688        int status;
1689
1690        netif_addr_lock_bh(netdev);
1691        __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1692
1693        if (netdev->flags & IFF_PROMISC) {
1694                adapter->update_mc_list = false;
1695        } else if (netdev->flags & IFF_ALLMULTI ||
1696                   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1697                /* Enable multicast promisc if num configured exceeds
1698                 * what we support
1699                 */
1700                mc_promisc = true;
1701                adapter->update_mc_list = false;
1702        } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1703                /* Update mc-list unconditionally if the iface was previously
1704                 * in mc-promisc mode and now is out of that mode.
1705                 */
1706                adapter->update_mc_list = true;
1707        }
1708
1709        if (adapter->update_mc_list) {
1710                int i = 0;
1711
1712                /* cache the mc-list in adapter */
1713                netdev_for_each_mc_addr(ha, netdev) {
1714                        ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1715                        i++;
1716                }
1717                adapter->mc_count = netdev_mc_count(netdev);
1718        }
1719        netif_addr_unlock_bh(netdev);
1720
1721        if (mc_promisc) {
1722                be_set_mc_promisc(adapter);
1723        } else if (adapter->update_mc_list) {
1724                status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1725                if (!status)
1726                        adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1727                else
1728                        be_set_mc_promisc(adapter);
1729
1730                adapter->update_mc_list = false;
1731        }
1732}
1733
1734static void be_clear_mc_list(struct be_adapter *adapter)
1735{
1736        struct net_device *netdev = adapter->netdev;
1737
1738        __dev_mc_unsync(netdev, NULL);
1739        be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1740        adapter->mc_count = 0;
1741}
1742
1743static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1744{
1745        if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1746                adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1747                return 0;
1748        }
1749
1750        return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1751                               adapter->if_handle,
1752                               &adapter->pmac_id[uc_idx + 1], 0);
1753}
1754
1755static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1756{
1757        if (pmac_id == adapter->pmac_id[0])
1758                return;
1759
1760        be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1761}
1762
1763static void be_set_uc_list(struct be_adapter *adapter)
1764{
1765        struct net_device *netdev = adapter->netdev;
1766        struct netdev_hw_addr *ha;
1767        bool uc_promisc = false;
1768        int curr_uc_macs = 0, i;
1769
1770        netif_addr_lock_bh(netdev);
1771        __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1772
1773        if (netdev->flags & IFF_PROMISC) {
1774                adapter->update_uc_list = false;
1775        } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1776                uc_promisc = true;
1777                adapter->update_uc_list = false;
1778        }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1779                /* Update uc-list unconditionally if the iface was previously
1780                 * in uc-promisc mode and now is out of that mode.
1781                 */
1782                adapter->update_uc_list = true;
1783        }
1784
1785        if (adapter->update_uc_list) {
1786                /* cache the uc-list in adapter array */
1787                i = 0;
1788                netdev_for_each_uc_addr(ha, netdev) {
1789                        ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1790                        i++;
1791                }
1792                curr_uc_macs = netdev_uc_count(netdev);
1793        }
1794        netif_addr_unlock_bh(netdev);
1795
1796        if (uc_promisc) {
1797                be_set_uc_promisc(adapter);
1798        } else if (adapter->update_uc_list) {
1799                be_clear_uc_promisc(adapter);
1800
1801                for (i = 0; i < adapter->uc_macs; i++)
1802                        be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1803
1804                for (i = 0; i < curr_uc_macs; i++)
1805                        be_uc_mac_add(adapter, i);
1806                adapter->uc_macs = curr_uc_macs;
1807                adapter->update_uc_list = false;
1808        }
1809}
1810
1811static void be_clear_uc_list(struct be_adapter *adapter)
1812{
1813        struct net_device *netdev = adapter->netdev;
1814        int i;
1815
1816        __dev_uc_unsync(netdev, NULL);
1817        for (i = 0; i < adapter->uc_macs; i++)
1818                be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1819
1820        adapter->uc_macs = 0;
1821}
1822
1823static void __be_set_rx_mode(struct be_adapter *adapter)
1824{
1825        struct net_device *netdev = adapter->netdev;
1826
1827        mutex_lock(&adapter->rx_filter_lock);
1828
1829        if (netdev->flags & IFF_PROMISC) {
1830                if (!be_in_all_promisc(adapter))
1831                        be_set_all_promisc(adapter);
1832        } else if (be_in_all_promisc(adapter)) {
1833                /* We need to re-program the vlan-list or clear
1834                 * vlan-promisc mode (if needed) when the interface
1835                 * comes out of promisc mode.
1836                 */
1837                be_vid_config(adapter);
1838        }
1839
1840        be_set_uc_list(adapter);
1841        be_set_mc_list(adapter);
1842
1843        mutex_unlock(&adapter->rx_filter_lock);
1844}
1845
1846static void be_work_set_rx_mode(struct work_struct *work)
1847{
1848        struct be_cmd_work *cmd_work =
1849                                container_of(work, struct be_cmd_work, work);
1850
1851        __be_set_rx_mode(cmd_work->adapter);
1852        kfree(cmd_work);
1853}
1854
1855static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1856{
1857        struct be_adapter *adapter = netdev_priv(netdev);
1858        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1859        int status;
1860
1861        if (!sriov_enabled(adapter))
1862                return -EPERM;
1863
1864        if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1865                return -EINVAL;
1866
1867        /* Proceed further only if user provided MAC is different
1868         * from active MAC
1869         */
1870        if (ether_addr_equal(mac, vf_cfg->mac_addr))
1871                return 0;
1872
1873        if (BEx_chip(adapter)) {
1874                be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1875                                vf + 1);
1876
1877                status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1878                                         &vf_cfg->pmac_id, vf + 1);
1879        } else {
1880                status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1881                                        vf + 1);
1882        }
1883
1884        if (status) {
1885                dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1886                        mac, vf, status);
1887                return be_cmd_status(status);
1888        }
1889
1890        ether_addr_copy(vf_cfg->mac_addr, mac);
1891
1892        return 0;
1893}
1894
1895static int be_get_vf_config(struct net_device *netdev, int vf,
1896                            struct ifla_vf_info *vi)
1897{
1898        struct be_adapter *adapter = netdev_priv(netdev);
1899        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900
1901        if (!sriov_enabled(adapter))
1902                return -EPERM;
1903
1904        if (vf >= adapter->num_vfs)
1905                return -EINVAL;
1906
1907        vi->vf = vf;
1908        vi->max_tx_rate = vf_cfg->tx_rate;
1909        vi->min_tx_rate = 0;
1910        vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1911        vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1912        memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1913        vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1914        vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1915
1916        return 0;
1917}
1918
1919static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1920{
1921        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1922        u16 vids[BE_NUM_VLANS_SUPPORTED];
1923        int vf_if_id = vf_cfg->if_handle;
1924        int status;
1925
1926        /* Enable Transparent VLAN Tagging */
1927        status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1928        if (status)
1929                return status;
1930
1931        /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1932        vids[0] = 0;
1933        status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1934        if (!status)
1935                dev_info(&adapter->pdev->dev,
1936                         "Cleared guest VLANs on VF%d", vf);
1937
1938        /* After TVT is enabled, disallow VFs to program VLAN filters */
1939        if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1940                status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1941                                                  ~BE_PRIV_FILTMGMT, vf + 1);
1942                if (!status)
1943                        vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1944        }
1945        return 0;
1946}
1947
1948static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1949{
1950        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1951        struct device *dev = &adapter->pdev->dev;
1952        int status;
1953
1954        /* Reset Transparent VLAN Tagging. */
1955        status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1956                                       vf_cfg->if_handle, 0, 0);
1957        if (status)
1958                return status;
1959
1960        /* Allow VFs to program VLAN filtering */
1961        if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1962                status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1963                                                  BE_PRIV_FILTMGMT, vf + 1);
1964                if (!status) {
1965                        vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1966                        dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1967                }
1968        }
1969
1970        dev_info(dev,
1971                 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1972        return 0;
1973}
1974
1975static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1976                          __be16 vlan_proto)
1977{
1978        struct be_adapter *adapter = netdev_priv(netdev);
1979        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1980        int status;
1981
1982        if (!sriov_enabled(adapter))
1983                return -EPERM;
1984
1985        if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1986                return -EINVAL;
1987
1988        if (vlan_proto != htons(ETH_P_8021Q))
1989                return -EPROTONOSUPPORT;
1990
1991        if (vlan || qos) {
1992                vlan |= qos << VLAN_PRIO_SHIFT;
1993                status = be_set_vf_tvt(adapter, vf, vlan);
1994        } else {
1995                status = be_clear_vf_tvt(adapter, vf);
1996        }
1997
1998        if (status) {
1999                dev_err(&adapter->pdev->dev,
2000                        "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2001                        status);
2002                return be_cmd_status(status);
2003        }
2004
2005        vf_cfg->vlan_tag = vlan;
2006        return 0;
2007}
2008
2009static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2010                             int min_tx_rate, int max_tx_rate)
2011{
2012        struct be_adapter *adapter = netdev_priv(netdev);
2013        struct device *dev = &adapter->pdev->dev;
2014        int percent_rate, status = 0;
2015        u16 link_speed = 0;
2016        u8 link_status;
2017
2018        if (!sriov_enabled(adapter))
2019                return -EPERM;
2020
2021        if (vf >= adapter->num_vfs)
2022                return -EINVAL;
2023
2024        if (min_tx_rate)
2025                return -EINVAL;
2026
2027        if (!max_tx_rate)
2028                goto config_qos;
2029
2030        status = be_cmd_link_status_query(adapter, &link_speed,
2031                                          &link_status, 0);
2032        if (status)
2033                goto err;
2034
2035        if (!link_status) {
2036                dev_err(dev, "TX-rate setting not allowed when link is down\n");
2037                status = -ENETDOWN;
2038                goto err;
2039        }
2040
2041        if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2042                dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2043                        link_speed);
2044                status = -EINVAL;
2045                goto err;
2046        }
2047
2048        /* On Skyhawk the QOS setting must be done only as a % value */
2049        percent_rate = link_speed / 100;
2050        if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2051                dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2052                        percent_rate);
2053                status = -EINVAL;
2054                goto err;
2055        }
2056
2057config_qos:
2058        status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2059        if (status)
2060                goto err;
2061
2062        adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2063        return 0;
2064
2065err:
2066        dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2067                max_tx_rate, vf);
2068        return be_cmd_status(status);
2069}
2070
2071static int be_set_vf_link_state(struct net_device *netdev, int vf,
2072                                int link_state)
2073{
2074        struct be_adapter *adapter = netdev_priv(netdev);
2075        int status;
2076
2077        if (!sriov_enabled(adapter))
2078                return -EPERM;
2079
2080        if (vf >= adapter->num_vfs)
2081                return -EINVAL;
2082
2083        status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2084        if (status) {
2085                dev_err(&adapter->pdev->dev,
2086                        "Link state change on VF %d failed: %#x\n", vf, status);
2087                return be_cmd_status(status);
2088        }
2089
2090        adapter->vf_cfg[vf].plink_tracking = link_state;
2091
2092        return 0;
2093}
2094
2095static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2096{
2097        struct be_adapter *adapter = netdev_priv(netdev);
2098        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2099        u8 spoofchk;
2100        int status;
2101
2102        if (!sriov_enabled(adapter))
2103                return -EPERM;
2104
2105        if (vf >= adapter->num_vfs)
2106                return -EINVAL;
2107
2108        if (BEx_chip(adapter))
2109                return -EOPNOTSUPP;
2110
2111        if (enable == vf_cfg->spoofchk)
2112                return 0;
2113
2114        spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2115
2116        status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2117                                       0, spoofchk);
2118        if (status) {
2119                dev_err(&adapter->pdev->dev,
2120                        "Spoofchk change on VF %d failed: %#x\n", vf, status);
2121                return be_cmd_status(status);
2122        }
2123
2124        vf_cfg->spoofchk = enable;
2125        return 0;
2126}
2127
2128static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2129                          ulong now)
2130{
2131        aic->rx_pkts_prev = rx_pkts;
2132        aic->tx_reqs_prev = tx_pkts;
2133        aic->jiffies = now;
2134}
2135
2136static int be_get_new_eqd(struct be_eq_obj *eqo)
2137{
2138        struct be_adapter *adapter = eqo->adapter;
2139        int eqd, start;
2140        struct be_aic_obj *aic;
2141        struct be_rx_obj *rxo;
2142        struct be_tx_obj *txo;
2143        u64 rx_pkts = 0, tx_pkts = 0;
2144        ulong now;
2145        u32 pps, delta;
2146        int i;
2147
2148        aic = &adapter->aic_obj[eqo->idx];
2149        if (!adapter->aic_enabled) {
2150                if (aic->jiffies)
2151                        aic->jiffies = 0;
2152                eqd = aic->et_eqd;
2153                return eqd;
2154        }
2155
2156        for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2157                do {
2158                        start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2159                        rx_pkts += rxo->stats.rx_pkts;
2160                } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2161        }
2162
2163        for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2164                do {
2165                        start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2166                        tx_pkts += txo->stats.tx_reqs;
2167                } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2168        }
2169
2170        /* Skip, if wrapped around or first calculation */
2171        now = jiffies;
2172        if (!aic->jiffies || time_before(now, aic->jiffies) ||
2173            rx_pkts < aic->rx_pkts_prev ||
2174            tx_pkts < aic->tx_reqs_prev) {
2175                be_aic_update(aic, rx_pkts, tx_pkts, now);
2176                return aic->prev_eqd;
2177        }
2178
2179        delta = jiffies_to_msecs(now - aic->jiffies);
2180        if (delta == 0)
2181                return aic->prev_eqd;
2182
2183        pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2184                (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2185        eqd = (pps / 15000) << 2;
2186
2187        if (eqd < 8)
2188                eqd = 0;
2189        eqd = min_t(u32, eqd, aic->max_eqd);
2190        eqd = max_t(u32, eqd, aic->min_eqd);
2191
2192        be_aic_update(aic, rx_pkts, tx_pkts, now);
2193
2194        return eqd;
2195}
2196
2197/* For Skyhawk-R only */
2198static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2199{
2200        struct be_adapter *adapter = eqo->adapter;
2201        struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2202        ulong now = jiffies;
2203        int eqd;
2204        u32 mult_enc;
2205
2206        if (!adapter->aic_enabled)
2207                return 0;
2208
2209        if (jiffies_to_msecs(now - aic->jiffies) < 1)
2210                eqd = aic->prev_eqd;
2211        else
2212                eqd = be_get_new_eqd(eqo);
2213
2214        if (eqd > 100)
2215                mult_enc = R2I_DLY_ENC_1;
2216        else if (eqd > 60)
2217                mult_enc = R2I_DLY_ENC_2;
2218        else if (eqd > 20)
2219                mult_enc = R2I_DLY_ENC_3;
2220        else
2221                mult_enc = R2I_DLY_ENC_0;
2222
2223        aic->prev_eqd = eqd;
2224
2225        return mult_enc;
2226}
2227
2228void be_eqd_update(struct be_adapter *adapter, bool force_update)
2229{
2230        struct be_set_eqd set_eqd[MAX_EVT_QS];
2231        struct be_aic_obj *aic;
2232        struct be_eq_obj *eqo;
2233        int i, num = 0, eqd;
2234
2235        for_all_evt_queues(adapter, eqo, i) {
2236                aic = &adapter->aic_obj[eqo->idx];
2237                eqd = be_get_new_eqd(eqo);
2238                if (force_update || eqd != aic->prev_eqd) {
2239                        set_eqd[num].delay_multiplier = (eqd * 65)/100;
2240                        set_eqd[num].eq_id = eqo->q.id;
2241                        aic->prev_eqd = eqd;
2242                        num++;
2243                }
2244        }
2245
2246        if (num)
2247                be_cmd_modify_eqd(adapter, set_eqd, num);
2248}
2249
2250static void be_rx_stats_update(struct be_rx_obj *rxo,
2251                               struct be_rx_compl_info *rxcp)
2252{
2253        struct be_rx_stats *stats = rx_stats(rxo);
2254
2255        u64_stats_update_begin(&stats->sync);
2256        stats->rx_compl++;
2257        stats->rx_bytes += rxcp->pkt_size;
2258        stats->rx_pkts++;
2259        if (rxcp->tunneled)
2260                stats->rx_vxlan_offload_pkts++;
2261        if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2262                stats->rx_mcast_pkts++;
2263        if (rxcp->err)
2264                stats->rx_compl_err++;
2265        u64_stats_update_end(&stats->sync);
2266}
2267
2268static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2269{
2270        /* L4 checksum is not reliable for non TCP/UDP packets.
2271         * Also ignore ipcksm for ipv6 pkts
2272         */
2273        return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2274                (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2275}
2276
2277static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2278{
2279        struct be_adapter *adapter = rxo->adapter;
2280        struct be_rx_page_info *rx_page_info;
2281        struct be_queue_info *rxq = &rxo->q;
2282        u32 frag_idx = rxq->tail;
2283
2284        rx_page_info = &rxo->page_info_tbl[frag_idx];
2285        BUG_ON(!rx_page_info->page);
2286
2287        if (rx_page_info->last_frag) {
2288                dma_unmap_page(&adapter->pdev->dev,
2289                               dma_unmap_addr(rx_page_info, bus),
2290                               adapter->big_page_size, DMA_FROM_DEVICE);
2291                rx_page_info->last_frag = false;
2292        } else {
2293                dma_sync_single_for_cpu(&adapter->pdev->dev,
2294                                        dma_unmap_addr(rx_page_info, bus),
2295                                        rx_frag_size, DMA_FROM_DEVICE);
2296        }
2297
2298        queue_tail_inc(rxq);
2299        atomic_dec(&rxq->used);
2300        return rx_page_info;
2301}
2302
2303/* Throwaway the data in the Rx completion */
2304static void be_rx_compl_discard(struct be_rx_obj *rxo,
2305                                struct be_rx_compl_info *rxcp)
2306{
2307        struct be_rx_page_info *page_info;
2308        u16 i, num_rcvd = rxcp->num_rcvd;
2309
2310        for (i = 0; i < num_rcvd; i++) {
2311                page_info = get_rx_page_info(rxo);
2312                put_page(page_info->page);
2313                memset(page_info, 0, sizeof(*page_info));
2314        }
2315}
2316
2317/*
2318 * skb_fill_rx_data forms a complete skb for an ether frame
2319 * indicated by rxcp.
2320 */
2321static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2322                             struct be_rx_compl_info *rxcp)
2323{
2324        struct be_rx_page_info *page_info;
2325        u16 i, j;
2326        u16 hdr_len, curr_frag_len, remaining;
2327        u8 *start;
2328
2329        page_info = get_rx_page_info(rxo);
2330        start = page_address(page_info->page) + page_info->page_offset;
2331        prefetch(start);
2332
2333        /* Copy data in the first descriptor of this completion */
2334        curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2335
2336        skb->len = curr_frag_len;
2337        if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2338                memcpy(skb->data, start, curr_frag_len);
2339                /* Complete packet has now been moved to data */
2340                put_page(page_info->page);
2341                skb->data_len = 0;
2342                skb->tail += curr_frag_len;
2343        } else {
2344                hdr_len = ETH_HLEN;
2345                memcpy(skb->data, start, hdr_len);
2346                skb_shinfo(skb)->nr_frags = 1;
2347                skb_frag_set_page(skb, 0, page_info->page);
2348                skb_frag_off_set(&skb_shinfo(skb)->frags[0],
2349                                 page_info->page_offset + hdr_len);
2350                skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2351                                  curr_frag_len - hdr_len);
2352                skb->data_len = curr_frag_len - hdr_len;
2353                skb->truesize += rx_frag_size;
2354                skb->tail += hdr_len;
2355        }
2356        page_info->page = NULL;
2357
2358        if (rxcp->pkt_size <= rx_frag_size) {
2359                BUG_ON(rxcp->num_rcvd != 1);
2360                return;
2361        }
2362
2363        /* More frags present for this completion */
2364        remaining = rxcp->pkt_size - curr_frag_len;
2365        for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2366                page_info = get_rx_page_info(rxo);
2367                curr_frag_len = min(remaining, rx_frag_size);
2368
2369                /* Coalesce all frags from the same physical page in one slot */
2370                if (page_info->page_offset == 0) {
2371                        /* Fresh page */
2372                        j++;
2373                        skb_frag_set_page(skb, j, page_info->page);
2374                        skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2375                                         page_info->page_offset);
2376                        skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2377                        skb_shinfo(skb)->nr_frags++;
2378                } else {
2379                        put_page(page_info->page);
2380                }
2381
2382                skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2383                skb->len += curr_frag_len;
2384                skb->data_len += curr_frag_len;
2385                skb->truesize += rx_frag_size;
2386                remaining -= curr_frag_len;
2387                page_info->page = NULL;
2388        }
2389        BUG_ON(j > MAX_SKB_FRAGS);
2390}
2391
2392/* Process the RX completion indicated by rxcp when GRO is disabled */
2393static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2394                                struct be_rx_compl_info *rxcp)
2395{
2396        struct be_adapter *adapter = rxo->adapter;
2397        struct net_device *netdev = adapter->netdev;
2398        struct sk_buff *skb;
2399
2400        skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2401        if (unlikely(!skb)) {
2402                rx_stats(rxo)->rx_drops_no_skbs++;
2403                be_rx_compl_discard(rxo, rxcp);
2404                return;
2405        }
2406
2407        skb_fill_rx_data(rxo, skb, rxcp);
2408
2409        if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2410                skb->ip_summed = CHECKSUM_UNNECESSARY;
2411        else
2412                skb_checksum_none_assert(skb);
2413
2414        skb->protocol = eth_type_trans(skb, netdev);
2415        skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2416        if (netdev->features & NETIF_F_RXHASH)
2417                skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2418
2419        skb->csum_level = rxcp->tunneled;
2420        skb_mark_napi_id(skb, napi);
2421
2422        if (rxcp->vlanf)
2423                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2424
2425        netif_receive_skb(skb);
2426}
2427
2428/* Process the RX completion indicated by rxcp when GRO is enabled */
2429static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2430                                    struct napi_struct *napi,
2431                                    struct be_rx_compl_info *rxcp)
2432{
2433        struct be_adapter *adapter = rxo->adapter;
2434        struct be_rx_page_info *page_info;
2435        struct sk_buff *skb = NULL;
2436        u16 remaining, curr_frag_len;
2437        u16 i, j;
2438
2439        skb = napi_get_frags(napi);
2440        if (!skb) {
2441                be_rx_compl_discard(rxo, rxcp);
2442                return;
2443        }
2444
2445        remaining = rxcp->pkt_size;
2446        for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2447                page_info = get_rx_page_info(rxo);
2448
2449                curr_frag_len = min(remaining, rx_frag_size);
2450
2451                /* Coalesce all frags from the same physical page in one slot */
2452                if (i == 0 || page_info->page_offset == 0) {
2453                        /* First frag or Fresh page */
2454                        j++;
2455                        skb_frag_set_page(skb, j, page_info->page);
2456                        skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2457                                         page_info->page_offset);
2458                        skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2459                } else {
2460                        put_page(page_info->page);
2461                }
2462                skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2463                skb->truesize += rx_frag_size;
2464                remaining -= curr_frag_len;
2465                memset(page_info, 0, sizeof(*page_info));
2466        }
2467        BUG_ON(j > MAX_SKB_FRAGS);
2468
2469        skb_shinfo(skb)->nr_frags = j + 1;
2470        skb->len = rxcp->pkt_size;
2471        skb->data_len = rxcp->pkt_size;
2472        skb->ip_summed = CHECKSUM_UNNECESSARY;
2473        skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2474        if (adapter->netdev->features & NETIF_F_RXHASH)
2475                skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2476
2477        skb->csum_level = rxcp->tunneled;
2478
2479        if (rxcp->vlanf)
2480                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2481
2482        napi_gro_frags(napi);
2483}
2484
2485static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2486                                 struct be_rx_compl_info *rxcp)
2487{
2488        rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2489        rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2490        rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2491        rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2492        rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2493        rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2494        rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2495        rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2496        rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2497        rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2498        rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2499        if (rxcp->vlanf) {
2500                rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2501                rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2502        }
2503        rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2504        rxcp->tunneled =
2505                GET_RX_COMPL_V1_BITS(tunneled, compl);
2506}
2507
2508static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2509                                 struct be_rx_compl_info *rxcp)
2510{
2511        rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2512        rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2513        rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2514        rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2515        rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2516        rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2517        rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2518        rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2519        rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2520        rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2521        rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2522        if (rxcp->vlanf) {
2523                rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2524                rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2525        }
2526        rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2527        rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2528}
2529
2530static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2531{
2532        struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2533        struct be_rx_compl_info *rxcp = &rxo->rxcp;
2534        struct be_adapter *adapter = rxo->adapter;
2535
2536        /* For checking the valid bit it is Ok to use either definition as the
2537         * valid bit is at the same position in both v0 and v1 Rx compl */
2538        if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2539                return NULL;
2540
2541        rmb();
2542        be_dws_le_to_cpu(compl, sizeof(*compl));
2543
2544        if (adapter->be3_native)
2545                be_parse_rx_compl_v1(compl, rxcp);
2546        else
2547                be_parse_rx_compl_v0(compl, rxcp);
2548
2549        if (rxcp->ip_frag)
2550                rxcp->l4_csum = 0;
2551
2552        if (rxcp->vlanf) {
2553                /* In QNQ modes, if qnq bit is not set, then the packet was
2554                 * tagged only with the transparent outer vlan-tag and must
2555                 * not be treated as a vlan packet by host
2556                 */
2557                if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2558                        rxcp->vlanf = 0;
2559
2560                if (!lancer_chip(adapter))
2561                        rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2562
2563                if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2564                    !test_bit(rxcp->vlan_tag, adapter->vids))
2565                        rxcp->vlanf = 0;
2566        }
2567
2568        /* As the compl has been parsed, reset it; we wont touch it again */
2569        compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2570
2571        queue_tail_inc(&rxo->cq);
2572        return rxcp;
2573}
2574
2575static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2576{
2577        u32 order = get_order(size);
2578
2579        if (order > 0)
2580                gfp |= __GFP_COMP;
2581        return  alloc_pages(gfp, order);
2582}
2583
2584/*
2585 * Allocate a page, split it to fragments of size rx_frag_size and post as
2586 * receive buffers to BE
2587 */
2588static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2589{
2590        struct be_adapter *adapter = rxo->adapter;
2591        struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2592        struct be_queue_info *rxq = &rxo->q;
2593        struct page *pagep = NULL;
2594        struct device *dev = &adapter->pdev->dev;
2595        struct be_eth_rx_d *rxd;
2596        u64 page_dmaaddr = 0, frag_dmaaddr;
2597        u32 posted, page_offset = 0, notify = 0;
2598
2599        page_info = &rxo->page_info_tbl[rxq->head];
2600        for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2601                if (!pagep) {
2602                        pagep = be_alloc_pages(adapter->big_page_size, gfp);
2603                        if (unlikely(!pagep)) {
2604                                rx_stats(rxo)->rx_post_fail++;
2605                                break;
2606                        }
2607                        page_dmaaddr = dma_map_page(dev, pagep, 0,
2608                                                    adapter->big_page_size,
2609                                                    DMA_FROM_DEVICE);
2610                        if (dma_mapping_error(dev, page_dmaaddr)) {
2611                                put_page(pagep);
2612                                pagep = NULL;
2613                                adapter->drv_stats.dma_map_errors++;
2614                                break;
2615                        }
2616                        page_offset = 0;
2617                } else {
2618                        get_page(pagep);
2619                        page_offset += rx_frag_size;
2620                }
2621                page_info->page_offset = page_offset;
2622                page_info->page = pagep;
2623
2624                rxd = queue_head_node(rxq);
2625                frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2626                rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2627                rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2628
2629                /* Any space left in the current big page for another frag? */
2630                if ((page_offset + rx_frag_size + rx_frag_size) >
2631                                        adapter->big_page_size) {
2632                        pagep = NULL;
2633                        page_info->last_frag = true;
2634                        dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2635                } else {
2636                        dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2637                }
2638
2639                prev_page_info = page_info;
2640                queue_head_inc(rxq);
2641                page_info = &rxo->page_info_tbl[rxq->head];
2642        }
2643
2644        /* Mark the last frag of a page when we break out of the above loop
2645         * with no more slots available in the RXQ
2646         */
2647        if (pagep) {
2648                prev_page_info->last_frag = true;
2649                dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2650        }
2651
2652        if (posted) {
2653                atomic_add(posted, &rxq->used);
2654                if (rxo->rx_post_starved)
2655                        rxo->rx_post_starved = false;
2656                do {
2657                        notify = min(MAX_NUM_POST_ERX_DB, posted);
2658                        be_rxq_notify(adapter, rxq->id, notify);
2659                        posted -= notify;
2660                } while (posted);
2661        } else if (atomic_read(&rxq->used) == 0) {
2662                /* Let be_worker replenish when memory is available */
2663                rxo->rx_post_starved = true;
2664        }
2665}
2666
2667static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2668{
2669        switch (status) {
2670        case BE_TX_COMP_HDR_PARSE_ERR:
2671                tx_stats(txo)->tx_hdr_parse_err++;
2672                break;
2673        case BE_TX_COMP_NDMA_ERR:
2674                tx_stats(txo)->tx_dma_err++;
2675                break;
2676        case BE_TX_COMP_ACL_ERR:
2677                tx_stats(txo)->tx_spoof_check_err++;
2678                break;
2679        }
2680}
2681
2682static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2683{
2684        switch (status) {
2685        case LANCER_TX_COMP_LSO_ERR:
2686                tx_stats(txo)->tx_tso_err++;
2687                break;
2688        case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2689        case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2690                tx_stats(txo)->tx_spoof_check_err++;
2691                break;
2692        case LANCER_TX_COMP_QINQ_ERR:
2693                tx_stats(txo)->tx_qinq_err++;
2694                break;
2695        case LANCER_TX_COMP_PARITY_ERR:
2696                tx_stats(txo)->tx_internal_parity_err++;
2697                break;
2698        case LANCER_TX_COMP_DMA_ERR:
2699                tx_stats(txo)->tx_dma_err++;
2700                break;
2701        case LANCER_TX_COMP_SGE_ERR:
2702                tx_stats(txo)->tx_sge_err++;
2703                break;
2704        }
2705}
2706
2707static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2708                                                struct be_tx_obj *txo)
2709{
2710        struct be_queue_info *tx_cq = &txo->cq;
2711        struct be_tx_compl_info *txcp = &txo->txcp;
2712        struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2713
2714        if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2715                return NULL;
2716
2717        /* Ensure load ordering of valid bit dword and other dwords below */
2718        rmb();
2719        be_dws_le_to_cpu(compl, sizeof(*compl));
2720
2721        txcp->status = GET_TX_COMPL_BITS(status, compl);
2722        txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2723
2724        if (txcp->status) {
2725                if (lancer_chip(adapter)) {
2726                        lancer_update_tx_err(txo, txcp->status);
2727                        /* Reset the adapter incase of TSO,
2728                         * SGE or Parity error
2729                         */
2730                        if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2731                            txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2732                            txcp->status == LANCER_TX_COMP_SGE_ERR)
2733                                be_set_error(adapter, BE_ERROR_TX);
2734                } else {
2735                        be_update_tx_err(txo, txcp->status);
2736                }
2737        }
2738
2739        if (be_check_error(adapter, BE_ERROR_TX))
2740                return NULL;
2741
2742        compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2743        queue_tail_inc(tx_cq);
2744        return txcp;
2745}
2746
2747static u16 be_tx_compl_process(struct be_adapter *adapter,
2748                               struct be_tx_obj *txo, u16 last_index)
2749{
2750        struct sk_buff **sent_skbs = txo->sent_skb_list;
2751        struct be_queue_info *txq = &txo->q;
2752        struct sk_buff *skb = NULL;
2753        bool unmap_skb_hdr = false;
2754        struct be_eth_wrb *wrb;
2755        u16 num_wrbs = 0;
2756        u32 frag_index;
2757
2758        do {
2759                if (sent_skbs[txq->tail]) {
2760                        /* Free skb from prev req */
2761                        if (skb)
2762                                dev_consume_skb_any(skb);
2763                        skb = sent_skbs[txq->tail];
2764                        sent_skbs[txq->tail] = NULL;
2765                        queue_tail_inc(txq);  /* skip hdr wrb */
2766                        num_wrbs++;
2767                        unmap_skb_hdr = true;
2768                }
2769                wrb = queue_tail_node(txq);
2770                frag_index = txq->tail;
2771                unmap_tx_frag(&adapter->pdev->dev, wrb,
2772                              (unmap_skb_hdr && skb_headlen(skb)));
2773                unmap_skb_hdr = false;
2774                queue_tail_inc(txq);
2775                num_wrbs++;
2776        } while (frag_index != last_index);
2777        dev_consume_skb_any(skb);
2778
2779        return num_wrbs;
2780}
2781
2782/* Return the number of events in the event queue */
2783static inline int events_get(struct be_eq_obj *eqo)
2784{
2785        struct be_eq_entry *eqe;
2786        int num = 0;
2787
2788        do {
2789                eqe = queue_tail_node(&eqo->q);
2790                if (eqe->evt == 0)
2791                        break;
2792
2793                rmb();
2794                eqe->evt = 0;
2795                num++;
2796                queue_tail_inc(&eqo->q);
2797        } while (true);
2798
2799        return num;
2800}
2801
2802/* Leaves the EQ is disarmed state */
2803static void be_eq_clean(struct be_eq_obj *eqo)
2804{
2805        int num = events_get(eqo);
2806
2807        be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2808}
2809
2810/* Free posted rx buffers that were not used */
2811static void be_rxq_clean(struct be_rx_obj *rxo)
2812{
2813        struct be_queue_info *rxq = &rxo->q;
2814        struct be_rx_page_info *page_info;
2815
2816        while (atomic_read(&rxq->used) > 0) {
2817                page_info = get_rx_page_info(rxo);
2818                put_page(page_info->page);
2819                memset(page_info, 0, sizeof(*page_info));
2820        }
2821        BUG_ON(atomic_read(&rxq->used));
2822        rxq->tail = 0;
2823        rxq->head = 0;
2824}
2825
2826static void be_rx_cq_clean(struct be_rx_obj *rxo)
2827{
2828        struct be_queue_info *rx_cq = &rxo->cq;
2829        struct be_rx_compl_info *rxcp;
2830        struct be_adapter *adapter = rxo->adapter;
2831        int flush_wait = 0;
2832
2833        /* Consume pending rx completions.
2834         * Wait for the flush completion (identified by zero num_rcvd)
2835         * to arrive. Notify CQ even when there are no more CQ entries
2836         * for HW to flush partially coalesced CQ entries.
2837         * In Lancer, there is no need to wait for flush compl.
2838         */
2839        for (;;) {
2840                rxcp = be_rx_compl_get(rxo);
2841                if (!rxcp) {
2842                        if (lancer_chip(adapter))
2843                                break;
2844
2845                        if (flush_wait++ > 50 ||
2846                            be_check_error(adapter,
2847                                           BE_ERROR_HW)) {
2848                                dev_warn(&adapter->pdev->dev,
2849                                         "did not receive flush compl\n");
2850                                break;
2851                        }
2852                        be_cq_notify(adapter, rx_cq->id, true, 0);
2853                        mdelay(1);
2854                } else {
2855                        be_rx_compl_discard(rxo, rxcp);
2856                        be_cq_notify(adapter, rx_cq->id, false, 1);
2857                        if (rxcp->num_rcvd == 0)
2858                                break;
2859                }
2860        }
2861
2862        /* After cleanup, leave the CQ in unarmed state */
2863        be_cq_notify(adapter, rx_cq->id, false, 0);
2864}
2865
2866static void be_tx_compl_clean(struct be_adapter *adapter)
2867{
2868        struct device *dev = &adapter->pdev->dev;
2869        u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2870        struct be_tx_compl_info *txcp;
2871        struct be_queue_info *txq;
2872        u32 end_idx, notified_idx;
2873        struct be_tx_obj *txo;
2874        int i, pending_txqs;
2875
2876        /* Stop polling for compls when HW has been silent for 10ms */
2877        do {
2878                pending_txqs = adapter->num_tx_qs;
2879
2880                for_all_tx_queues(adapter, txo, i) {
2881                        cmpl = 0;
2882                        num_wrbs = 0;
2883                        txq = &txo->q;
2884                        while ((txcp = be_tx_compl_get(adapter, txo))) {
2885                                num_wrbs +=
2886                                        be_tx_compl_process(adapter, txo,
2887                                                            txcp->end_index);
2888                                cmpl++;
2889                        }
2890                        if (cmpl) {
2891                                be_cq_notify(adapter, txo->cq.id, false, cmpl);
2892                                atomic_sub(num_wrbs, &txq->used);
2893                                timeo = 0;
2894                        }
2895                        if (!be_is_tx_compl_pending(txo))
2896                                pending_txqs--;
2897                }
2898
2899                if (pending_txqs == 0 || ++timeo > 10 ||
2900                    be_check_error(adapter, BE_ERROR_HW))
2901                        break;
2902
2903                mdelay(1);
2904        } while (true);
2905
2906        /* Free enqueued TX that was never notified to HW */
2907        for_all_tx_queues(adapter, txo, i) {
2908                txq = &txo->q;
2909
2910                if (atomic_read(&txq->used)) {
2911                        dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2912                                 i, atomic_read(&txq->used));
2913                        notified_idx = txq->tail;
2914                        end_idx = txq->tail;
2915                        index_adv(&end_idx, atomic_read(&txq->used) - 1,
2916                                  txq->len);
2917                        /* Use the tx-compl process logic to handle requests
2918                         * that were not sent to the HW.
2919                         */
2920                        num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2921                        atomic_sub(num_wrbs, &txq->used);
2922                        BUG_ON(atomic_read(&txq->used));
2923                        txo->pend_wrb_cnt = 0;
2924                        /* Since hw was never notified of these requests,
2925                         * reset TXQ indices
2926                         */
2927                        txq->head = notified_idx;
2928                        txq->tail = notified_idx;
2929                }
2930        }
2931}
2932
2933static void be_evt_queues_destroy(struct be_adapter *adapter)
2934{
2935        struct be_eq_obj *eqo;
2936        int i;
2937
2938        for_all_evt_queues(adapter, eqo, i) {
2939                if (eqo->q.created) {
2940                        be_eq_clean(eqo);
2941                        be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2942                        netif_napi_del(&eqo->napi);
2943                        free_cpumask_var(eqo->affinity_mask);
2944                }
2945                be_queue_free(adapter, &eqo->q);
2946        }
2947}
2948
2949static int be_evt_queues_create(struct be_adapter *adapter)
2950{
2951        struct be_queue_info *eq;
2952        struct be_eq_obj *eqo;
2953        struct be_aic_obj *aic;
2954        int i, rc;
2955
2956        /* need enough EQs to service both RX and TX queues */
2957        adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2958                                    max(adapter->cfg_num_rx_irqs,
2959                                        adapter->cfg_num_tx_irqs));
2960
2961        adapter->aic_enabled = true;
2962
2963        for_all_evt_queues(adapter, eqo, i) {
2964                int numa_node = dev_to_node(&adapter->pdev->dev);
2965
2966                aic = &adapter->aic_obj[i];
2967                eqo->adapter = adapter;
2968                eqo->idx = i;
2969                aic->max_eqd = BE_MAX_EQD;
2970
2971                eq = &eqo->q;
2972                rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2973                                    sizeof(struct be_eq_entry));
2974                if (rc)
2975                        return rc;
2976
2977                rc = be_cmd_eq_create(adapter, eqo);
2978                if (rc)
2979                        return rc;
2980
2981                if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2982                        return -ENOMEM;
2983                cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2984                                eqo->affinity_mask);
2985                netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2986                               BE_NAPI_WEIGHT);
2987        }
2988        return 0;
2989}
2990
2991static void be_mcc_queues_destroy(struct be_adapter *adapter)
2992{
2993        struct be_queue_info *q;
2994
2995        q = &adapter->mcc_obj.q;
2996        if (q->created)
2997                be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2998        be_queue_free(adapter, q);
2999
3000        q = &adapter->mcc_obj.cq;
3001        if (q->created)
3002                be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3003        be_queue_free(adapter, q);
3004}
3005
3006/* Must be called only after TX qs are created as MCC shares TX EQ */
3007static int be_mcc_queues_create(struct be_adapter *adapter)
3008{
3009        struct be_queue_info *q, *cq;
3010
3011        cq = &adapter->mcc_obj.cq;
3012        if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3013                           sizeof(struct be_mcc_compl)))
3014                goto err;
3015
3016        /* Use the default EQ for MCC completions */
3017        if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3018                goto mcc_cq_free;
3019
3020        q = &adapter->mcc_obj.q;
3021        if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3022                goto mcc_cq_destroy;
3023
3024        if (be_cmd_mccq_create(adapter, q, cq))
3025                goto mcc_q_free;
3026
3027        return 0;
3028
3029mcc_q_free:
3030        be_queue_free(adapter, q);
3031mcc_cq_destroy:
3032        be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3033mcc_cq_free:
3034        be_queue_free(adapter, cq);
3035err:
3036        return -1;
3037}
3038
3039static void be_tx_queues_destroy(struct be_adapter *adapter)
3040{
3041        struct be_queue_info *q;
3042        struct be_tx_obj *txo;
3043        u8 i;
3044
3045        for_all_tx_queues(adapter, txo, i) {
3046                q = &txo->q;
3047                if (q->created)
3048                        be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3049                be_queue_free(adapter, q);
3050
3051                q = &txo->cq;
3052                if (q->created)
3053                        be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3054                be_queue_free(adapter, q);
3055        }
3056}
3057
3058static int be_tx_qs_create(struct be_adapter *adapter)
3059{
3060        struct be_queue_info *cq;
3061        struct be_tx_obj *txo;
3062        struct be_eq_obj *eqo;
3063        int status, i;
3064
3065        adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3066
3067        for_all_tx_queues(adapter, txo, i) {
3068                cq = &txo->cq;
3069                status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3070                                        sizeof(struct be_eth_tx_compl));
3071                if (status)
3072                        return status;
3073
3074                u64_stats_init(&txo->stats.sync);
3075                u64_stats_init(&txo->stats.sync_compl);
3076
3077                /* If num_evt_qs is less than num_tx_qs, then more than
3078                 * one txq share an eq
3079                 */
3080                eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3081                status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3082                if (status)
3083                        return status;
3084
3085                status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3086                                        sizeof(struct be_eth_wrb));
3087                if (status)
3088                        return status;
3089
3090                status = be_cmd_txq_create(adapter, txo);
3091                if (status)
3092                        return status;
3093
3094                netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3095                                    eqo->idx);
3096        }
3097
3098        dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3099                 adapter->num_tx_qs);
3100        return 0;
3101}
3102
3103static void be_rx_cqs_destroy(struct be_adapter *adapter)
3104{
3105        struct be_queue_info *q;
3106        struct be_rx_obj *rxo;
3107        int i;
3108
3109        for_all_rx_queues(adapter, rxo, i) {
3110                q = &rxo->cq;
3111                if (q->created)
3112                        be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3113                be_queue_free(adapter, q);
3114        }
3115}
3116
3117static int be_rx_cqs_create(struct be_adapter *adapter)
3118{
3119        struct be_queue_info *eq, *cq;
3120        struct be_rx_obj *rxo;
3121        int rc, i;
3122
3123        adapter->num_rss_qs =
3124                        min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3125
3126        /* We'll use RSS only if atleast 2 RSS rings are supported. */
3127        if (adapter->num_rss_qs < 2)
3128                adapter->num_rss_qs = 0;
3129
3130        adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3131
3132        /* When the interface is not capable of RSS rings (and there is no
3133         * need to create a default RXQ) we'll still need one RXQ
3134         */
3135        if (adapter->num_rx_qs == 0)
3136                adapter->num_rx_qs = 1;
3137
3138        adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3139        for_all_rx_queues(adapter, rxo, i) {
3140                rxo->adapter = adapter;
3141                cq = &rxo->cq;
3142                rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3143                                    sizeof(struct be_eth_rx_compl));
3144                if (rc)
3145                        return rc;
3146
3147                u64_stats_init(&rxo->stats.sync);
3148                eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3149                rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3150                if (rc)
3151                        return rc;
3152        }
3153
3154        dev_info(&adapter->pdev->dev,
3155                 "created %d RX queue(s)\n", adapter->num_rx_qs);
3156        return 0;
3157}
3158
3159static irqreturn_t be_intx(int irq, void *dev)
3160{
3161        struct be_eq_obj *eqo = dev;
3162        struct be_adapter *adapter = eqo->adapter;
3163        int num_evts = 0;
3164
3165        /* IRQ is not expected when NAPI is scheduled as the EQ
3166         * will not be armed.
3167         * But, this can happen on Lancer INTx where it takes
3168         * a while to de-assert INTx or in BE2 where occasionaly
3169         * an interrupt may be raised even when EQ is unarmed.
3170         * If NAPI is already scheduled, then counting & notifying
3171         * events will orphan them.
3172         */
3173        if (napi_schedule_prep(&eqo->napi)) {
3174                num_evts = events_get(eqo);
3175                __napi_schedule(&eqo->napi);
3176                if (num_evts)
3177                        eqo->spurious_intr = 0;
3178        }
3179        be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3180
3181        /* Return IRQ_HANDLED only for the the first spurious intr
3182         * after a valid intr to stop the kernel from branding
3183         * this irq as a bad one!
3184         */
3185        if (num_evts || eqo->spurious_intr++ == 0)
3186                return IRQ_HANDLED;
3187        else
3188                return IRQ_NONE;
3189}
3190
3191static irqreturn_t be_msix(int irq, void *dev)
3192{
3193        struct be_eq_obj *eqo = dev;
3194
3195        be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3196        napi_schedule(&eqo->napi);
3197        return IRQ_HANDLED;
3198}
3199
3200static inline bool do_gro(struct be_rx_compl_info *rxcp)
3201{
3202        return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3203}
3204
3205static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3206                         int budget)
3207{
3208        struct be_adapter *adapter = rxo->adapter;
3209        struct be_queue_info *rx_cq = &rxo->cq;
3210        struct be_rx_compl_info *rxcp;
3211        u32 work_done;
3212        u32 frags_consumed = 0;
3213
3214        for (work_done = 0; work_done < budget; work_done++) {
3215                rxcp = be_rx_compl_get(rxo);
3216                if (!rxcp)
3217                        break;
3218
3219                /* Is it a flush compl that has no data */
3220                if (unlikely(rxcp->num_rcvd == 0))
3221                        goto loop_continue;
3222
3223                /* Discard compl with partial DMA Lancer B0 */
3224                if (unlikely(!rxcp->pkt_size)) {
3225                        be_rx_compl_discard(rxo, rxcp);
3226                        goto loop_continue;
3227                }
3228
3229                /* On BE drop pkts that arrive due to imperfect filtering in
3230                 * promiscuous mode on some skews
3231                 */
3232                if (unlikely(rxcp->port != adapter->port_num &&
3233                             !lancer_chip(adapter))) {
3234                        be_rx_compl_discard(rxo, rxcp);
3235                        goto loop_continue;
3236                }
3237
3238                if (do_gro(rxcp))
3239                        be_rx_compl_process_gro(rxo, napi, rxcp);
3240                else
3241                        be_rx_compl_process(rxo, napi, rxcp);
3242
3243loop_continue:
3244                frags_consumed += rxcp->num_rcvd;
3245                be_rx_stats_update(rxo, rxcp);
3246        }
3247
3248        if (work_done) {
3249                be_cq_notify(adapter, rx_cq->id, true, work_done);
3250
3251                /* When an rx-obj gets into post_starved state, just
3252                 * let be_worker do the posting.
3253                 */
3254                if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3255                    !rxo->rx_post_starved)
3256                        be_post_rx_frags(rxo, GFP_ATOMIC,
3257                                         max_t(u32, MAX_RX_POST,
3258                                               frags_consumed));
3259        }
3260
3261        return work_done;
3262}
3263
3264
3265static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3266                          int idx)
3267{
3268        int num_wrbs = 0, work_done = 0;
3269        struct be_tx_compl_info *txcp;
3270
3271        while ((txcp = be_tx_compl_get(adapter, txo))) {
3272                num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3273                work_done++;
3274        }
3275
3276        if (work_done) {
3277                be_cq_notify(adapter, txo->cq.id, true, work_done);
3278                atomic_sub(num_wrbs, &txo->q.used);
3279
3280                /* As Tx wrbs have been freed up, wake up netdev queue
3281                 * if it was stopped due to lack of tx wrbs.  */
3282                if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3283                    be_can_txq_wake(txo)) {
3284                        netif_wake_subqueue(adapter->netdev, idx);
3285                }
3286
3287                u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3288                tx_stats(txo)->tx_compl += work_done;
3289                u64_stats_update_end(&tx_stats(txo)->sync_compl);
3290        }
3291}
3292
3293int be_poll(struct napi_struct *napi, int budget)
3294{
3295        struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3296        struct be_adapter *adapter = eqo->adapter;
3297        int max_work = 0, work, i, num_evts;
3298        struct be_rx_obj *rxo;
3299        struct be_tx_obj *txo;
3300        u32 mult_enc = 0;
3301
3302        num_evts = events_get(eqo);
3303
3304        for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3305                be_process_tx(adapter, txo, i);
3306
3307        /* This loop will iterate twice for EQ0 in which
3308         * completions of the last RXQ (default one) are also processed
3309         * For other EQs the loop iterates only once
3310         */
3311        for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3312                work = be_process_rx(rxo, napi, budget);
3313                max_work = max(work, max_work);
3314        }
3315
3316        if (is_mcc_eqo(eqo))
3317                be_process_mcc(adapter);
3318
3319        if (max_work < budget) {
3320                napi_complete_done(napi, max_work);
3321
3322                /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3323                 * delay via a delay multiplier encoding value
3324                 */
3325                if (skyhawk_chip(adapter))
3326                        mult_enc = be_get_eq_delay_mult_enc(eqo);
3327
3328                be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3329                             mult_enc);
3330        } else {
3331                /* As we'll continue in polling mode, count and clear events */
3332                be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3333        }
3334        return max_work;
3335}
3336
3337void be_detect_error(struct be_adapter *adapter)
3338{
3339        u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3340        u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3341        struct device *dev = &adapter->pdev->dev;
3342        u16 val;
3343        u32 i;
3344
3345        if (be_check_error(adapter, BE_ERROR_HW))
3346                return;
3347
3348        if (lancer_chip(adapter)) {
3349                sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3350                if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3351                        be_set_error(adapter, BE_ERROR_UE);
3352                        sliport_err1 = ioread32(adapter->db +
3353                                                SLIPORT_ERROR1_OFFSET);
3354                        sliport_err2 = ioread32(adapter->db +
3355                                                SLIPORT_ERROR2_OFFSET);
3356                        /* Do not log error messages if its a FW reset */
3357                        if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3358                            sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3359                                dev_info(dev, "Reset is in progress\n");
3360                        } else {
3361                                dev_err(dev, "Error detected in the card\n");
3362                                dev_err(dev, "ERR: sliport status 0x%x\n",
3363                                        sliport_status);
3364                                dev_err(dev, "ERR: sliport error1 0x%x\n",
3365                                        sliport_err1);
3366                                dev_err(dev, "ERR: sliport error2 0x%x\n",
3367                                        sliport_err2);
3368                        }
3369                }
3370        } else {
3371                ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3372                ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3373                ue_lo_mask = ioread32(adapter->pcicfg +
3374                                      PCICFG_UE_STATUS_LOW_MASK);
3375                ue_hi_mask = ioread32(adapter->pcicfg +
3376                                      PCICFG_UE_STATUS_HI_MASK);
3377
3378                ue_lo = (ue_lo & ~ue_lo_mask);
3379                ue_hi = (ue_hi & ~ue_hi_mask);
3380
3381                if (ue_lo || ue_hi) {
3382                        /* On certain platforms BE3 hardware can indicate
3383                         * spurious UEs. In case of a UE in the chip,
3384                         * the POST register correctly reports either a
3385                         * FAT_LOG_START state (FW is currently dumping
3386                         * FAT log data) or a ARMFW_UE state. Check for the
3387                         * above states to ascertain if the UE is valid or not.
3388                         */
3389                        if (BE3_chip(adapter)) {
3390                                val = be_POST_stage_get(adapter);
3391                                if ((val & POST_STAGE_FAT_LOG_START)
3392                                     != POST_STAGE_FAT_LOG_START &&
3393                                    (val & POST_STAGE_ARMFW_UE)
3394                                     != POST_STAGE_ARMFW_UE &&
3395                                    (val & POST_STAGE_RECOVERABLE_ERR)
3396                                     != POST_STAGE_RECOVERABLE_ERR)
3397                                        return;
3398                        }
3399
3400                        dev_err(dev, "Error detected in the adapter");
3401                        be_set_error(adapter, BE_ERROR_UE);
3402
3403                        for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3404                                if (ue_lo & 1)
3405                                        dev_err(dev, "UE: %s bit set\n",
3406                                                ue_status_low_desc[i]);
3407                        }
3408                        for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3409                                if (ue_hi & 1)
3410                                        dev_err(dev, "UE: %s bit set\n",
3411                                                ue_status_hi_desc[i]);
3412                        }
3413                }
3414        }
3415}
3416
3417static void be_msix_disable(struct be_adapter *adapter)
3418{
3419        if (msix_enabled(adapter)) {
3420                pci_disable_msix(adapter->pdev);
3421                adapter->num_msix_vec = 0;
3422                adapter->num_msix_roce_vec = 0;
3423        }
3424}
3425
3426static int be_msix_enable(struct be_adapter *adapter)
3427{
3428        unsigned int i, max_roce_eqs;
3429        struct device *dev = &adapter->pdev->dev;
3430        int num_vec;
3431
3432        /* If RoCE is supported, program the max number of vectors that
3433         * could be used for NIC and RoCE, else, just program the number
3434         * we'll use initially.
3435         */
3436        if (be_roce_supported(adapter)) {
3437                max_roce_eqs =
3438                        be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3439                max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3440                num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3441        } else {
3442                num_vec = max(adapter->cfg_num_rx_irqs,
3443                              adapter->cfg_num_tx_irqs);
3444        }
3445
3446        for (i = 0; i < num_vec; i++)
3447                adapter->msix_entries[i].entry = i;
3448
3449        num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3450                                        MIN_MSIX_VECTORS, num_vec);
3451        if (num_vec < 0)
3452                goto fail;
3453
3454        if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3455                adapter->num_msix_roce_vec = num_vec / 2;
3456                dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3457                         adapter->num_msix_roce_vec);
3458        }
3459
3460        adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3461
3462        dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3463                 adapter->num_msix_vec);
3464        return 0;
3465
3466fail:
3467        dev_warn(dev, "MSIx enable failed\n");
3468
3469        /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3470        if (be_virtfn(adapter))
3471                return num_vec;
3472        return 0;
3473}
3474
3475static inline int be_msix_vec_get(struct be_adapter *adapter,
3476                                  struct be_eq_obj *eqo)
3477{
3478        return adapter->msix_entries[eqo->msix_idx].vector;
3479}
3480
3481static int be_msix_register(struct be_adapter *adapter)
3482{
3483        struct net_device *netdev = adapter->netdev;
3484        struct be_eq_obj *eqo;
3485        int status, i, vec;
3486
3487        for_all_evt_queues(adapter, eqo, i) {
3488                sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3489                vec = be_msix_vec_get(adapter, eqo);
3490                status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3491                if (status)
3492                        goto err_msix;
3493
3494                irq_set_affinity_hint(vec, eqo->affinity_mask);
3495        }
3496
3497        return 0;
3498err_msix:
3499        for (i--; i >= 0; i--) {
3500                eqo = &adapter->eq_obj[i];
3501                free_irq(be_msix_vec_get(adapter, eqo), eqo);
3502        }
3503        dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3504                 status);
3505        be_msix_disable(adapter);
3506        return status;
3507}
3508
3509static int be_irq_register(struct be_adapter *adapter)
3510{
3511        struct net_device *netdev = adapter->netdev;
3512        int status;
3513
3514        if (msix_enabled(adapter)) {
3515                status = be_msix_register(adapter);
3516                if (status == 0)
3517                        goto done;
3518                /* INTx is not supported for VF */
3519                if (be_virtfn(adapter))
3520                        return status;
3521        }
3522
3523        /* INTx: only the first EQ is used */
3524        netdev->irq = adapter->pdev->irq;
3525        status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3526                             &adapter->eq_obj[0]);
3527        if (status) {
3528                dev_err(&adapter->pdev->dev,
3529                        "INTx request IRQ failed - err %d\n", status);
3530                return status;
3531        }
3532done:
3533        adapter->isr_registered = true;
3534        return 0;
3535}
3536
3537static void be_irq_unregister(struct be_adapter *adapter)
3538{
3539        struct net_device *netdev = adapter->netdev;
3540        struct be_eq_obj *eqo;
3541        int i, vec;
3542
3543        if (!adapter->isr_registered)
3544                return;
3545
3546        /* INTx */
3547        if (!msix_enabled(adapter)) {
3548                free_irq(netdev->irq, &adapter->eq_obj[0]);
3549                goto done;
3550        }
3551
3552        /* MSIx */
3553        for_all_evt_queues(adapter, eqo, i) {
3554                vec = be_msix_vec_get(adapter, eqo);
3555                irq_set_affinity_hint(vec, NULL);
3556                free_irq(vec, eqo);
3557        }
3558
3559done:
3560        adapter->isr_registered = false;
3561}
3562
3563static void be_rx_qs_destroy(struct be_adapter *adapter)
3564{
3565        struct rss_info *rss = &adapter->rss_info;
3566        struct be_queue_info *q;
3567        struct be_rx_obj *rxo;
3568        int i;
3569
3570        for_all_rx_queues(adapter, rxo, i) {
3571                q = &rxo->q;
3572                if (q->created) {
3573                        /* If RXQs are destroyed while in an "out of buffer"
3574                         * state, there is a possibility of an HW stall on
3575                         * Lancer. So, post 64 buffers to each queue to relieve
3576                         * the "out of buffer" condition.
3577                         * Make sure there's space in the RXQ before posting.
3578                         */
3579                        if (lancer_chip(adapter)) {
3580                                be_rx_cq_clean(rxo);
3581                                if (atomic_read(&q->used) == 0)
3582                                        be_post_rx_frags(rxo, GFP_KERNEL,
3583                                                         MAX_RX_POST);
3584                        }
3585
3586                        be_cmd_rxq_destroy(adapter, q);
3587                        be_rx_cq_clean(rxo);
3588                        be_rxq_clean(rxo);
3589                }
3590                be_queue_free(adapter, q);
3591        }
3592
3593        if (rss->rss_flags) {
3594                rss->rss_flags = RSS_ENABLE_NONE;
3595                be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3596                                  128, rss->rss_hkey);
3597        }
3598}
3599
3600static void be_disable_if_filters(struct be_adapter *adapter)
3601{
3602        /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3603        if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3604            check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3605                be_dev_mac_del(adapter, adapter->pmac_id[0]);
3606                eth_zero_addr(adapter->dev_mac);
3607        }
3608
3609        be_clear_uc_list(adapter);
3610        be_clear_mc_list(adapter);
3611
3612        /* The IFACE flags are enabled in the open path and cleared
3613         * in the close path. When a VF gets detached from the host and
3614         * assigned to a VM the following happens:
3615         *      - VF's IFACE flags get cleared in the detach path
3616         *      - IFACE create is issued by the VF in the attach path
3617         * Due to a bug in the BE3/Skyhawk-R FW
3618         * (Lancer FW doesn't have the bug), the IFACE capability flags
3619         * specified along with the IFACE create cmd issued by a VF are not
3620         * honoured by FW.  As a consequence, if a *new* driver
3621         * (that enables/disables IFACE flags in open/close)
3622         * is loaded in the host and an *old* driver is * used by a VM/VF,
3623         * the IFACE gets created *without* the needed flags.
3624         * To avoid this, disable RX-filter flags only for Lancer.
3625         */
3626        if (lancer_chip(adapter)) {
3627                be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3628                adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3629        }
3630}
3631
3632static int be_close(struct net_device *netdev)
3633{
3634        struct be_adapter *adapter = netdev_priv(netdev);
3635        struct be_eq_obj *eqo;
3636        int i;
3637
3638        /* This protection is needed as be_close() may be called even when the
3639         * adapter is in cleared state (after eeh perm failure)
3640         */
3641        if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3642                return 0;
3643
3644        /* Before attempting cleanup ensure all the pending cmds in the
3645         * config_wq have finished execution
3646         */
3647        flush_workqueue(be_wq);
3648
3649        be_disable_if_filters(adapter);
3650
3651        if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3652                for_all_evt_queues(adapter, eqo, i) {
3653                        napi_disable(&eqo->napi);
3654                }
3655                adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3656        }
3657
3658        be_async_mcc_disable(adapter);
3659
3660        /* Wait for all pending tx completions to arrive so that
3661         * all tx skbs are freed.
3662         */
3663        netif_tx_disable(netdev);
3664        be_tx_compl_clean(adapter);
3665
3666        be_rx_qs_destroy(adapter);
3667
3668        for_all_evt_queues(adapter, eqo, i) {
3669                if (msix_enabled(adapter))
3670                        synchronize_irq(be_msix_vec_get(adapter, eqo));
3671                else
3672                        synchronize_irq(netdev->irq);
3673                be_eq_clean(eqo);
3674        }
3675
3676        be_irq_unregister(adapter);
3677
3678        return 0;
3679}
3680
3681static int be_rx_qs_create(struct be_adapter *adapter)
3682{
3683        struct rss_info *rss = &adapter->rss_info;
3684        u8 rss_key[RSS_HASH_KEY_LEN];
3685        struct be_rx_obj *rxo;
3686        int rc, i, j;
3687
3688        for_all_rx_queues(adapter, rxo, i) {
3689                rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3690                                    sizeof(struct be_eth_rx_d));
3691                if (rc)
3692                        return rc;
3693        }
3694
3695        if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3696                rxo = default_rxo(adapter);
3697                rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3698                                       rx_frag_size, adapter->if_handle,
3699                                       false, &rxo->rss_id);
3700                if (rc)
3701                        return rc;
3702        }
3703
3704        for_all_rss_queues(adapter, rxo, i) {
3705                rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3706                                       rx_frag_size, adapter->if_handle,
3707                                       true, &rxo->rss_id);
3708                if (rc)
3709                        return rc;
3710        }
3711
3712        if (be_multi_rxq(adapter)) {
3713                for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3714                        for_all_rss_queues(adapter, rxo, i) {
3715                                if ((j + i) >= RSS_INDIR_TABLE_LEN)
3716                                        break;
3717                                rss->rsstable[j + i] = rxo->rss_id;
3718                                rss->rss_queue[j + i] = i;
3719                        }
3720                }
3721                rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3722                        RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3723
3724                if (!BEx_chip(adapter))
3725                        rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3726                                RSS_ENABLE_UDP_IPV6;
3727
3728                netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3729                rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3730                                       RSS_INDIR_TABLE_LEN, rss_key);
3731                if (rc) {
3732                        rss->rss_flags = RSS_ENABLE_NONE;
3733                        return rc;
3734                }
3735
3736                memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3737        } else {
3738                /* Disable RSS, if only default RX Q is created */
3739                rss->rss_flags = RSS_ENABLE_NONE;
3740        }
3741
3742
3743        /* Post 1 less than RXQ-len to avoid head being equal to tail,
3744         * which is a queue empty condition
3745         */
3746        for_all_rx_queues(adapter, rxo, i)
3747                be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3748
3749        return 0;
3750}
3751
3752static int be_enable_if_filters(struct be_adapter *adapter)
3753{
3754        int status;
3755
3756        status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3757        if (status)
3758                return status;
3759
3760        /* Normally this condition usually true as the ->dev_mac is zeroed.
3761         * But on BE3 VFs the initial MAC is pre-programmed by PF and
3762         * subsequent be_dev_mac_add() can fail (after fresh boot)
3763         */
3764        if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3765                int old_pmac_id = -1;
3766
3767                /* Remember old programmed MAC if any - can happen on BE3 VF */
3768                if (!is_zero_ether_addr(adapter->dev_mac))
3769                        old_pmac_id = adapter->pmac_id[0];
3770
3771                status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3772                if (status)
3773                        return status;
3774
3775                /* Delete the old programmed MAC as we successfully programmed
3776                 * a new MAC
3777                 */
3778                if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3779                        be_dev_mac_del(adapter, old_pmac_id);
3780
3781                ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3782        }
3783
3784        if (adapter->vlans_added)
3785                be_vid_config(adapter);
3786
3787        __be_set_rx_mode(adapter);
3788
3789        return 0;
3790}
3791
3792static int be_open(struct net_device *netdev)
3793{
3794        struct be_adapter *adapter = netdev_priv(netdev);
3795        struct be_eq_obj *eqo;
3796        struct be_rx_obj *rxo;
3797        struct be_tx_obj *txo;
3798        u8 link_status;
3799        int status, i;
3800
3801        status = be_rx_qs_create(adapter);
3802        if (status)
3803                goto err;
3804
3805        status = be_enable_if_filters(adapter);
3806        if (status)
3807                goto err;
3808
3809        status = be_irq_register(adapter);
3810        if (status)
3811                goto err;
3812
3813        for_all_rx_queues(adapter, rxo, i)
3814                be_cq_notify(adapter, rxo->cq.id, true, 0);
3815
3816        for_all_tx_queues(adapter, txo, i)
3817                be_cq_notify(adapter, txo->cq.id, true, 0);
3818
3819        be_async_mcc_enable(adapter);
3820
3821        for_all_evt_queues(adapter, eqo, i) {
3822                napi_enable(&eqo->napi);
3823                be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3824        }
3825        adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3826
3827        status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3828        if (!status)
3829                be_link_status_update(adapter, link_status);
3830
3831        netif_tx_start_all_queues(netdev);
3832        if (skyhawk_chip(adapter))
3833                udp_tunnel_get_rx_info(netdev);
3834
3835        return 0;
3836err:
3837        be_close(adapter->netdev);
3838        return -EIO;
3839}
3840
3841static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3842{
3843        u32 addr;
3844
3845        addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3846
3847        mac[5] = (u8)(addr & 0xFF);
3848        mac[4] = (u8)((addr >> 8) & 0xFF);
3849        mac[3] = (u8)((addr >> 16) & 0xFF);
3850        /* Use the OUI from the current MAC address */
3851        memcpy(mac, adapter->netdev->dev_addr, 3);
3852}
3853
3854/*
3855 * Generate a seed MAC address from the PF MAC Address using jhash.
3856 * MAC Address for VFs are assigned incrementally starting from the seed.
3857 * These addresses are programmed in the ASIC by the PF and the VF driver
3858 * queries for the MAC address during its probe.
3859 */
3860static int be_vf_eth_addr_config(struct be_adapter *adapter)
3861{
3862        u32 vf;
3863        int status = 0;
3864        u8 mac[ETH_ALEN];
3865        struct be_vf_cfg *vf_cfg;
3866
3867        be_vf_eth_addr_generate(adapter, mac);
3868
3869        for_all_vfs(adapter, vf_cfg, vf) {
3870                if (BEx_chip(adapter))
3871                        status = be_cmd_pmac_add(adapter, mac,
3872                                                 vf_cfg->if_handle,
3873                                                 &vf_cfg->pmac_id, vf + 1);
3874                else
3875                        status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3876                                                vf + 1);
3877
3878                if (status)
3879                        dev_err(&adapter->pdev->dev,
3880                                "Mac address assignment failed for VF %d\n",
3881                                vf);
3882                else
3883                        memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3884
3885                mac[5] += 1;
3886        }
3887        return status;
3888}
3889
3890static int be_vfs_mac_query(struct be_adapter *adapter)
3891{
3892        int status, vf;
3893        u8 mac[ETH_ALEN];
3894        struct be_vf_cfg *vf_cfg;
3895
3896        for_all_vfs(adapter, vf_cfg, vf) {
3897                status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3898                                               mac, vf_cfg->if_handle,
3899                                               false, vf+1);
3900                if (status)
3901                        return status;
3902                memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3903        }
3904        return 0;
3905}
3906
3907static void be_vf_clear(struct be_adapter *adapter)
3908{
3909        struct be_vf_cfg *vf_cfg;
3910        u32 vf;
3911
3912        if (pci_vfs_assigned(adapter->pdev)) {
3913                dev_warn(&adapter->pdev->dev,
3914                         "VFs are assigned to VMs: not disabling VFs\n");
3915                goto done;
3916        }
3917
3918        pci_disable_sriov(adapter->pdev);
3919
3920        for_all_vfs(adapter, vf_cfg, vf) {
3921                if (BEx_chip(adapter))
3922                        be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3923                                        vf_cfg->pmac_id, vf + 1);
3924                else
3925                        be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3926                                       vf + 1);
3927
3928                be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3929        }
3930
3931        if (BE3_chip(adapter))
3932                be_cmd_set_hsw_config(adapter, 0, 0,
3933                                      adapter->if_handle,
3934                                      PORT_FWD_TYPE_PASSTHRU, 0);
3935done:
3936        kfree(adapter->vf_cfg);
3937        adapter->num_vfs = 0;
3938        adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3939}
3940
3941static void be_clear_queues(struct be_adapter *adapter)
3942{
3943        be_mcc_queues_destroy(adapter);
3944        be_rx_cqs_destroy(adapter);
3945        be_tx_queues_destroy(adapter);
3946        be_evt_queues_destroy(adapter);
3947}
3948
3949static void be_cancel_worker(struct be_adapter *adapter)
3950{
3951        if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3952                cancel_delayed_work_sync(&adapter->work);
3953                adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3954        }
3955}
3956
3957static void be_cancel_err_detection(struct be_adapter *adapter)
3958{
3959        struct be_error_recovery *err_rec = &adapter->error_recovery;
3960
3961        if (!be_err_recovery_workq)
3962                return;
3963
3964        if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3965                cancel_delayed_work_sync(&err_rec->err_detection_work);
3966                adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3967        }
3968}
3969
3970static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3971{
3972        struct net_device *netdev = adapter->netdev;
3973        struct device *dev = &adapter->pdev->dev;
3974        struct be_vxlan_port *vxlan_port;
3975        __be16 port;
3976        int status;
3977
3978        vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3979                                      struct be_vxlan_port, list);
3980        port = vxlan_port->port;
3981
3982        status = be_cmd_manage_iface(adapter, adapter->if_handle,
3983                                     OP_CONVERT_NORMAL_TO_TUNNEL);
3984        if (status) {
3985                dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3986                return status;
3987        }
3988        adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3989
3990        status = be_cmd_set_vxlan_port(adapter, port);
3991        if (status) {
3992                dev_warn(dev, "Failed to add VxLAN port\n");
3993                return status;
3994        }
3995        adapter->vxlan_port = port;
3996
3997        netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3998                                   NETIF_F_TSO | NETIF_F_TSO6 |
3999                                   NETIF_F_GSO_UDP_TUNNEL;
4000
4001        dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4002                 be16_to_cpu(port));
4003        return 0;
4004}
4005
4006static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4007{
4008        struct net_device *netdev = adapter->netdev;
4009
4010        if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4011                be_cmd_manage_iface(adapter, adapter->if_handle,
4012                                    OP_CONVERT_TUNNEL_TO_NORMAL);
4013
4014        if (adapter->vxlan_port)
4015                be_cmd_set_vxlan_port(adapter, 0);
4016
4017        adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4018        adapter->vxlan_port = 0;
4019
4020        netdev->hw_enc_features = 0;
4021}
4022
4023static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4024                                struct be_resources *vft_res)
4025{
4026        struct be_resources res = adapter->pool_res;
4027        u32 vf_if_cap_flags = res.vf_if_cap_flags;
4028        struct be_resources res_mod = {0};
4029        u16 num_vf_qs = 1;
4030
4031        /* Distribute the queue resources among the PF and it's VFs */
4032        if (num_vfs) {
4033                /* Divide the rx queues evenly among the VFs and the PF, capped
4034                 * at VF-EQ-count. Any remainder queues belong to the PF.
4035                 */
4036                num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4037                                res.max_rss_qs / (num_vfs + 1));
4038
4039                /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4040                 * RSS Tables per port. Provide RSS on VFs, only if number of
4041                 * VFs requested is less than it's PF Pool's RSS Tables limit.
4042                 */
4043                if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4044                        num_vf_qs = 1;
4045        }
4046
4047        /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4048         * which are modifiable using SET_PROFILE_CONFIG cmd.
4049         */
4050        be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4051                                  RESOURCE_MODIFIABLE, 0);
4052
4053        /* If RSS IFACE capability flags are modifiable for a VF, set the
4054         * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4055         * more than 1 RSSQ is available for a VF.
4056         * Otherwise, provision only 1 queue pair for VF.
4057         */
4058        if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4059                vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4060                if (num_vf_qs > 1) {
4061                        vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4062                        if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4063                                vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4064                } else {
4065                        vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4066                                             BE_IF_FLAGS_DEFQ_RSS);
4067                }
4068        } else {
4069                num_vf_qs = 1;
4070        }
4071
4072        if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4073                vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4074                vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4075        }
4076
4077        vft_res->vf_if_cap_flags = vf_if_cap_flags;
4078        vft_res->max_rx_qs = num_vf_qs;
4079        vft_res->max_rss_qs = num_vf_qs;
4080        vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4081        vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4082
4083        /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4084         * among the PF and it's VFs, if the fields are changeable
4085         */
4086        if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4087                vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4088
4089        if (res_mod.max_vlans == FIELD_MODIFIABLE)
4090                vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4091
4092        if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4093                vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4094
4095        if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4096                vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4097}
4098
4099static void be_if_destroy(struct be_adapter *adapter)
4100{
4101        be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4102
4103        kfree(adapter->pmac_id);
4104        adapter->pmac_id = NULL;
4105
4106        kfree(adapter->mc_list);
4107        adapter->mc_list = NULL;
4108
4109        kfree(adapter->uc_list);
4110        adapter->uc_list = NULL;
4111}
4112
4113static int be_clear(struct be_adapter *adapter)
4114{
4115        struct pci_dev *pdev = adapter->pdev;
4116        struct  be_resources vft_res = {0};
4117
4118        be_cancel_worker(adapter);
4119
4120        flush_workqueue(be_wq);
4121
4122        if (sriov_enabled(adapter))
4123                be_vf_clear(adapter);
4124
4125        /* Re-configure FW to distribute resources evenly across max-supported
4126         * number of VFs, only when VFs are not already enabled.
4127         */
4128        if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4129            !pci_vfs_assigned(pdev)) {
4130                be_calculate_vf_res(adapter,
4131                                    pci_sriov_get_totalvfs(pdev),
4132                                    &vft_res);
4133                be_cmd_set_sriov_config(adapter, adapter->pool_res,
4134                                        pci_sriov_get_totalvfs(pdev),
4135                                        &vft_res);
4136        }
4137
4138        be_disable_vxlan_offloads(adapter);
4139
4140        be_if_destroy(adapter);
4141
4142        be_clear_queues(adapter);
4143
4144        be_msix_disable(adapter);
4145        adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4146        return 0;
4147}
4148
4149static int be_vfs_if_create(struct be_adapter *adapter)
4150{
4151        struct be_resources res = {0};
4152        u32 cap_flags, en_flags, vf;
4153        struct be_vf_cfg *vf_cfg;
4154        int status;
4155
4156        /* If a FW profile exists, then cap_flags are updated */
4157        cap_flags = BE_VF_IF_EN_FLAGS;
4158
4159        for_all_vfs(adapter, vf_cfg, vf) {
4160                if (!BE3_chip(adapter)) {
4161                        status = be_cmd_get_profile_config(adapter, &res, NULL,
4162                                                           ACTIVE_PROFILE_TYPE,
4163                                                           RESOURCE_LIMITS,
4164                                                           vf + 1);
4165                        if (!status) {
4166                                cap_flags = res.if_cap_flags;
4167                                /* Prevent VFs from enabling VLAN promiscuous
4168                                 * mode
4169                                 */
4170                                cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4171                        }
4172                }
4173
4174                /* PF should enable IF flags during proxy if_create call */
4175                en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4176                status = be_cmd_if_create(adapter, cap_flags, en_flags,
4177                                          &vf_cfg->if_handle, vf + 1);
4178                if (status)
4179                        return status;
4180        }
4181
4182        return 0;
4183}
4184
4185static int be_vf_setup_init(struct be_adapter *adapter)
4186{
4187        struct be_vf_cfg *vf_cfg;
4188        int vf;
4189
4190        adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4191                                  GFP_KERNEL);
4192        if (!adapter->vf_cfg)
4193                return -ENOMEM;
4194
4195        for_all_vfs(adapter, vf_cfg, vf) {
4196                vf_cfg->if_handle = -1;
4197                vf_cfg->pmac_id = -1;
4198        }
4199        return 0;
4200}
4201
4202static int be_vf_setup(struct be_adapter *adapter)
4203{
4204        struct device *dev = &adapter->pdev->dev;
4205        struct be_vf_cfg *vf_cfg;
4206        int status, old_vfs, vf;
4207        bool spoofchk;
4208
4209        old_vfs = pci_num_vf(adapter->pdev);
4210
4211        status = be_vf_setup_init(adapter);
4212        if (status)
4213                goto err;
4214
4215        if (old_vfs) {
4216                for_all_vfs(adapter, vf_cfg, vf) {
4217                        status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4218                        if (status)
4219                                goto err;
4220                }
4221
4222                status = be_vfs_mac_query(adapter);
4223                if (status)
4224                        goto err;
4225        } else {
4226                status = be_vfs_if_create(adapter);
4227                if (status)
4228                        goto err;
4229
4230                status = be_vf_eth_addr_config(adapter);
4231                if (status)
4232                        goto err;
4233        }
4234
4235        for_all_vfs(adapter, vf_cfg, vf) {
4236                /* Allow VFs to programs MAC/VLAN filters */
4237                status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4238                                                  vf + 1);
4239                if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4240                        status = be_cmd_set_fn_privileges(adapter,
4241                                                          vf_cfg->privileges |
4242                                                          BE_PRIV_FILTMGMT,
4243                                                          vf + 1);
4244                        if (!status) {
4245                                vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4246                                dev_info(dev, "VF%d has FILTMGMT privilege\n",
4247                                         vf);
4248                        }
4249                }
4250
4251                /* Allow full available bandwidth */
4252                if (!old_vfs)
4253                        be_cmd_config_qos(adapter, 0, 0, vf + 1);
4254
4255                status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4256                                               vf_cfg->if_handle, NULL,
4257                                               &spoofchk);
4258                if (!status)
4259                        vf_cfg->spoofchk = spoofchk;
4260
4261                if (!old_vfs) {
4262                        be_cmd_enable_vf(adapter, vf + 1);
4263                        be_cmd_set_logical_link_config(adapter,
4264                                                       IFLA_VF_LINK_STATE_AUTO,
4265                                                       vf+1);
4266                }
4267        }
4268
4269        if (!old_vfs) {
4270                status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4271                if (status) {
4272                        dev_err(dev, "SRIOV enable failed\n");
4273                        adapter->num_vfs = 0;
4274                        goto err;
4275                }
4276        }
4277
4278        if (BE3_chip(adapter)) {
4279                /* On BE3, enable VEB only when SRIOV is enabled */
4280                status = be_cmd_set_hsw_config(adapter, 0, 0,
4281                                               adapter->if_handle,
4282                                               PORT_FWD_TYPE_VEB, 0);
4283                if (status)
4284                        goto err;
4285        }
4286
4287        adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4288        return 0;
4289err:
4290        dev_err(dev, "VF setup failed\n");
4291        be_vf_clear(adapter);
4292        return status;
4293}
4294
4295/* Converting function_mode bits on BE3 to SH mc_type enums */
4296
4297static u8 be_convert_mc_type(u32 function_mode)
4298{
4299        if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4300                return vNIC1;
4301        else if (function_mode & QNQ_MODE)
4302                return FLEX10;
4303        else if (function_mode & VNIC_MODE)
4304                return vNIC2;
4305        else if (function_mode & UMC_ENABLED)
4306                return UMC;
4307        else
4308                return MC_NONE;
4309}
4310
4311/* On BE2/BE3 FW does not suggest the supported limits */
4312static void BEx_get_resources(struct be_adapter *adapter,
4313                              struct be_resources *res)
4314{
4315        bool use_sriov = adapter->num_vfs ? 1 : 0;
4316
4317        if (be_physfn(adapter))
4318                res->max_uc_mac = BE_UC_PMAC_COUNT;
4319        else
4320                res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4321
4322        adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4323
4324        if (be_is_mc(adapter)) {
4325                /* Assuming that there are 4 channels per port,
4326                 * when multi-channel is enabled
4327                 */
4328                if (be_is_qnq_mode(adapter))
4329                        res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4330                else
4331                        /* In a non-qnq multichannel mode, the pvid
4332                         * takes up one vlan entry
4333                         */
4334                        res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4335        } else {
4336                res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4337        }
4338
4339        res->max_mcast_mac = BE_MAX_MC;
4340
4341        /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4342         * 2) Create multiple TX rings on a BE3-R multi-channel interface
4343         *    *only* if it is RSS-capable.
4344         */
4345        if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4346            be_virtfn(adapter) ||
4347            (be_is_mc(adapter) &&
4348             !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4349                res->max_tx_qs = 1;
4350        } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4351                struct be_resources super_nic_res = {0};
4352
4353                /* On a SuperNIC profile, the driver needs to use the
4354                 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4355                 */
4356                be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4357                                          ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4358                                          0);
4359                /* Some old versions of BE3 FW don't report max_tx_qs value */
4360                res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4361        } else {
4362                res->max_tx_qs = BE3_MAX_TX_QS;
4363        }
4364
4365        if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4366            !use_sriov && be_physfn(adapter))
4367                res->max_rss_qs = (adapter->be3_native) ?
4368                                           BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4369        res->max_rx_qs = res->max_rss_qs + 1;
4370
4371        if (be_physfn(adapter))
4372                res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4373                                        BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4374        else
4375                res->max_evt_qs = 1;
4376
4377        res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4378        res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4379        if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4380                res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4381}
4382
4383static void be_setup_init(struct be_adapter *adapter)
4384{
4385        adapter->vlan_prio_bmap = 0xff;
4386        adapter->phy.link_speed = -1;
4387        adapter->if_handle = -1;
4388        adapter->be3_native = false;
4389        adapter->if_flags = 0;
4390        adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4391        if (be_physfn(adapter))
4392                adapter->cmd_privileges = MAX_PRIVILEGES;
4393        else
4394                adapter->cmd_privileges = MIN_PRIVILEGES;
4395}
4396
4397/* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4398 * However, this HW limitation is not exposed to the host via any SLI cmd.
4399 * As a result, in the case of SRIOV and in particular multi-partition configs
4400 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4401 * for distribution between the VFs. This self-imposed limit will determine the
4402 * no: of VFs for which RSS can be enabled.
4403 */
4404static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4405{
4406        struct be_port_resources port_res = {0};
4407        u8 rss_tables_on_port;
4408        u16 max_vfs = be_max_vfs(adapter);
4409
4410        be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4411                                  RESOURCE_LIMITS, 0);
4412
4413        rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4414
4415        /* Each PF Pool's RSS Tables limit =
4416         * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4417         */
4418        adapter->pool_res.max_rss_tables =
4419                max_vfs * rss_tables_on_port / port_res.max_vfs;
4420}
4421
4422static int be_get_sriov_config(struct be_adapter *adapter)
4423{
4424        struct be_resources res = {0};
4425        int max_vfs, old_vfs;
4426
4427        be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4428                                  RESOURCE_LIMITS, 0);
4429
4430        /* Some old versions of BE3 FW don't report max_vfs value */
4431        if (BE3_chip(adapter) && !res.max_vfs) {
4432                max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4433                res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4434        }
4435
4436        adapter->pool_res = res;
4437
4438        /* If during previous unload of the driver, the VFs were not disabled,
4439         * then we cannot rely on the PF POOL limits for the TotalVFs value.
4440         * Instead use the TotalVFs value stored in the pci-dev struct.
4441         */
4442        old_vfs = pci_num_vf(adapter->pdev);
4443        if (old_vfs) {
4444                dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4445                         old_vfs);
4446
4447                adapter->pool_res.max_vfs =
4448                        pci_sriov_get_totalvfs(adapter->pdev);
4449                adapter->num_vfs = old_vfs;
4450        }
4451
4452        if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4453                be_calculate_pf_pool_rss_tables(adapter);
4454                dev_info(&adapter->pdev->dev,
4455                         "RSS can be enabled for all VFs if num_vfs <= %d\n",
4456                         be_max_pf_pool_rss_tables(adapter));
4457        }
4458        return 0;
4459}
4460
4461static void be_alloc_sriov_res(struct be_adapter *adapter)
4462{
4463        int old_vfs = pci_num_vf(adapter->pdev);
4464        struct  be_resources vft_res = {0};
4465        int status;
4466
4467        be_get_sriov_config(adapter);
4468
4469        if (!old_vfs)
4470                pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4471
4472        /* When the HW is in SRIOV capable configuration, the PF-pool
4473         * resources are given to PF during driver load, if there are no
4474         * old VFs. This facility is not available in BE3 FW.
4475         * Also, this is done by FW in Lancer chip.
4476         */
4477        if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4478                be_calculate_vf_res(adapter, 0, &vft_res);
4479                status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4480                                                 &vft_res);
4481                if (status)
4482                        dev_err(&adapter->pdev->dev,
4483                                "Failed to optimize SRIOV resources\n");
4484        }
4485}
4486
4487static int be_get_resources(struct be_adapter *adapter)
4488{
4489        struct device *dev = &adapter->pdev->dev;
4490        struct be_resources res = {0};
4491        int status;
4492
4493        /* For Lancer, SH etc read per-function resource limits from FW.
4494         * GET_FUNC_CONFIG returns per function guaranteed limits.
4495         * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4496         */
4497        if (BEx_chip(adapter)) {
4498                BEx_get_resources(adapter, &res);
4499        } else {
4500                status = be_cmd_get_func_config(adapter, &res);
4501                if (status)
4502                        return status;
4503
4504                /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4505                if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4506                    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4507                        res.max_rss_qs -= 1;
4508        }
4509
4510        /* If RoCE is supported stash away half the EQs for RoCE */
4511        res.max_nic_evt_qs = be_roce_supported(adapter) ?
4512                                res.max_evt_qs / 2 : res.max_evt_qs;
4513        adapter->res = res;
4514
4515        /* If FW supports RSS default queue, then skip creating non-RSS
4516         * queue for non-IP traffic.
4517         */
4518        adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4519                                 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4520
4521        dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4522                 be_max_txqs(adapter), be_max_rxqs(adapter),
4523                 be_max_rss(adapter), be_max_nic_eqs(adapter),
4524                 be_max_vfs(adapter));
4525        dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4526                 be_max_uc(adapter), be_max_mc(adapter),
4527                 be_max_vlans(adapter));
4528
4529        /* Ensure RX and TX queues are created in pairs at init time */
4530        adapter->cfg_num_rx_irqs =
4531                                min_t(u16, netif_get_num_default_rss_queues(),
4532                                      be_max_qp_irqs(adapter));
4533        adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4534        return 0;
4535}
4536
4537static int be_get_config(struct be_adapter *adapter)
4538{
4539        int status, level;
4540        u16 profile_id;
4541
4542        status = be_cmd_get_cntl_attributes(adapter);
4543        if (status)
4544                return status;
4545
4546        status = be_cmd_query_fw_cfg(adapter);
4547        if (status)
4548                return status;
4549
4550        if (!lancer_chip(adapter) && be_physfn(adapter))
4551                be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4552
4553        if (BEx_chip(adapter)) {
4554                level = be_cmd_get_fw_log_level(adapter);
4555                adapter->msg_enable =
4556                        level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4557        }
4558
4559        be_cmd_get_acpi_wol_cap(adapter);
4560        pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4561        pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4562
4563        be_cmd_query_port_name(adapter);
4564
4565        if (be_physfn(adapter)) {
4566                status = be_cmd_get_active_profile(adapter, &profile_id);
4567                if (!status)
4568                        dev_info(&adapter->pdev->dev,
4569                                 "Using profile 0x%x\n", profile_id);
4570        }
4571
4572        return 0;
4573}
4574
4575static int be_mac_setup(struct be_adapter *adapter)
4576{
4577        u8 mac[ETH_ALEN];
4578        int status;
4579
4580        if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4581                status = be_cmd_get_perm_mac(adapter, mac);
4582                if (status)
4583                        return status;
4584
4585                memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4586                memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4587
4588                /* Initial MAC for BE3 VFs is already programmed by PF */
4589                if (BEx_chip(adapter) && be_virtfn(adapter))
4590                        memcpy(adapter->dev_mac, mac, ETH_ALEN);
4591        }
4592
4593        return 0;
4594}
4595
4596static void be_schedule_worker(struct be_adapter *adapter)
4597{
4598        queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4599        adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4600}
4601
4602static void be_destroy_err_recovery_workq(void)
4603{
4604        if (!be_err_recovery_workq)
4605                return;
4606
4607        flush_workqueue(be_err_recovery_workq);
4608        destroy_workqueue(be_err_recovery_workq);
4609        be_err_recovery_workq = NULL;
4610}
4611
4612static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4613{
4614        struct be_error_recovery *err_rec = &adapter->error_recovery;
4615
4616        if (!be_err_recovery_workq)
4617                return;
4618
4619        queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4620                           msecs_to_jiffies(delay));
4621        adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4622}
4623
4624static int be_setup_queues(struct be_adapter *adapter)
4625{
4626        struct net_device *netdev = adapter->netdev;
4627        int status;
4628
4629        status = be_evt_queues_create(adapter);
4630        if (status)
4631                goto err;
4632
4633        status = be_tx_qs_create(adapter);
4634        if (status)
4635                goto err;
4636
4637        status = be_rx_cqs_create(adapter);
4638        if (status)
4639                goto err;
4640
4641        status = be_mcc_queues_create(adapter);
4642        if (status)
4643                goto err;
4644
4645        status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4646        if (status)
4647                goto err;
4648
4649        status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4650        if (status)
4651                goto err;
4652
4653        return 0;
4654err:
4655        dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4656        return status;
4657}
4658
4659static int be_if_create(struct be_adapter *adapter)
4660{
4661        u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4662        u32 cap_flags = be_if_cap_flags(adapter);
4663        int status;
4664
4665        /* alloc required memory for other filtering fields */
4666        adapter->pmac_id = kcalloc(be_max_uc(adapter),
4667                                   sizeof(*adapter->pmac_id), GFP_KERNEL);
4668        if (!adapter->pmac_id)
4669                return -ENOMEM;
4670
4671        adapter->mc_list = kcalloc(be_max_mc(adapter),
4672                                   sizeof(*adapter->mc_list), GFP_KERNEL);
4673        if (!adapter->mc_list)
4674                return -ENOMEM;
4675
4676        adapter->uc_list = kcalloc(be_max_uc(adapter),
4677                                   sizeof(*adapter->uc_list), GFP_KERNEL);
4678        if (!adapter->uc_list)
4679                return -ENOMEM;
4680
4681        if (adapter->cfg_num_rx_irqs == 1)
4682                cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4683
4684        en_flags &= cap_flags;
4685        /* will enable all the needed filter flags in be_open() */
4686        status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4687                                  &adapter->if_handle, 0);
4688
4689        if (status)
4690                return status;
4691
4692        return 0;
4693}
4694
4695int be_update_queues(struct be_adapter *adapter)
4696{
4697        struct net_device *netdev = adapter->netdev;
4698        int status;
4699
4700        if (netif_running(netdev)) {
4701                /* be_tx_timeout() must not run concurrently with this
4702                 * function, synchronize with an already-running dev_watchdog
4703                 */
4704                netif_tx_lock_bh(netdev);
4705                /* device cannot transmit now, avoid dev_watchdog timeouts */
4706                netif_carrier_off(netdev);
4707                netif_tx_unlock_bh(netdev);
4708
4709                be_close(netdev);
4710        }
4711
4712        be_cancel_worker(adapter);
4713
4714        /* If any vectors have been shared with RoCE we cannot re-program
4715         * the MSIx table.
4716         */
4717        if (!adapter->num_msix_roce_vec)
4718                be_msix_disable(adapter);
4719
4720        be_clear_queues(adapter);
4721        status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4722        if (status)
4723                return status;
4724
4725        if (!msix_enabled(adapter)) {
4726                status = be_msix_enable(adapter);
4727                if (status)
4728                        return status;
4729        }
4730
4731        status = be_if_create(adapter);
4732        if (status)
4733                return status;
4734
4735        status = be_setup_queues(adapter);
4736        if (status)
4737                return status;
4738
4739        be_schedule_worker(adapter);
4740
4741        /* The IF was destroyed and re-created. We need to clear
4742         * all promiscuous flags valid for the destroyed IF.
4743         * Without this promisc mode is not restored during
4744         * be_open() because the driver thinks that it is
4745         * already enabled in HW.
4746         */
4747        adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4748
4749        if (netif_running(netdev))
4750                status = be_open(netdev);
4751
4752        return status;
4753}
4754
4755static inline int fw_major_num(const char *fw_ver)
4756{
4757        int fw_major = 0, i;
4758
4759        i = sscanf(fw_ver, "%d.", &fw_major);
4760        if (i != 1)
4761                return 0;
4762
4763        return fw_major;
4764}
4765
4766/* If it is error recovery, FLR the PF
4767 * Else if any VFs are already enabled don't FLR the PF
4768 */
4769static bool be_reset_required(struct be_adapter *adapter)
4770{
4771        if (be_error_recovering(adapter))
4772                return true;
4773        else
4774                return pci_num_vf(adapter->pdev) == 0;
4775}
4776
4777/* Wait for the FW to be ready and perform the required initialization */
4778static int be_func_init(struct be_adapter *adapter)
4779{
4780        int status;
4781
4782        status = be_fw_wait_ready(adapter);
4783        if (status)
4784                return status;
4785
4786        /* FW is now ready; clear errors to allow cmds/doorbell */
4787        be_clear_error(adapter, BE_CLEAR_ALL);
4788
4789        if (be_reset_required(adapter)) {
4790                status = be_cmd_reset_function(adapter);
4791                if (status)
4792                        return status;
4793
4794                /* Wait for interrupts to quiesce after an FLR */
4795                msleep(100);
4796        }
4797
4798        /* Tell FW we're ready to fire cmds */
4799        status = be_cmd_fw_init(adapter);
4800        if (status)
4801                return status;
4802
4803        /* Allow interrupts for other ULPs running on NIC function */
4804        be_intr_set(adapter, true);
4805
4806        return 0;
4807}
4808
4809static int be_setup(struct be_adapter *adapter)
4810{
4811        struct device *dev = &adapter->pdev->dev;
4812        int status;
4813
4814        status = be_func_init(adapter);
4815        if (status)
4816                return status;
4817
4818        be_setup_init(adapter);
4819
4820        if (!lancer_chip(adapter))
4821                be_cmd_req_native_mode(adapter);
4822
4823        /* invoke this cmd first to get pf_num and vf_num which are needed
4824         * for issuing profile related cmds
4825         */
4826        if (!BEx_chip(adapter)) {
4827                status = be_cmd_get_func_config(adapter, NULL);
4828                if (status)
4829                        return status;
4830        }
4831
4832        status = be_get_config(adapter);
4833        if (status)
4834                goto err;
4835
4836        if (!BE2_chip(adapter) && be_physfn(adapter))
4837                be_alloc_sriov_res(adapter);
4838
4839        status = be_get_resources(adapter);
4840        if (status)
4841                goto err;
4842
4843        status = be_msix_enable(adapter);
4844        if (status)
4845                goto err;
4846
4847        /* will enable all the needed filter flags in be_open() */
4848        status = be_if_create(adapter);
4849        if (status)
4850                goto err;
4851
4852        /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4853        rtnl_lock();
4854        status = be_setup_queues(adapter);
4855        rtnl_unlock();
4856        if (status)
4857                goto err;
4858
4859        be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4860
4861        status = be_mac_setup(adapter);
4862        if (status)
4863                goto err;
4864
4865        be_cmd_get_fw_ver(adapter);
4866        dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4867
4868        if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4869                dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4870                        adapter->fw_ver);
4871                dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4872        }
4873
4874        status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4875                                         adapter->rx_fc);
4876        if (status)
4877                be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4878                                        &adapter->rx_fc);
4879
4880        dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4881                 adapter->tx_fc, adapter->rx_fc);
4882
4883        if (be_physfn(adapter))
4884                be_cmd_set_logical_link_config(adapter,
4885                                               IFLA_VF_LINK_STATE_AUTO, 0);
4886
4887        /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4888         * confusing a linux bridge or OVS that it might be connected to.
4889         * Set the EVB to PASSTHRU mode which effectively disables the EVB
4890         * when SRIOV is not enabled.
4891         */
4892        if (BE3_chip(adapter))
4893                be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4894                                      PORT_FWD_TYPE_PASSTHRU, 0);
4895
4896        if (adapter->num_vfs)
4897                be_vf_setup(adapter);
4898
4899        status = be_cmd_get_phy_info(adapter);
4900        if (!status && be_pause_supported(adapter))
4901                adapter->phy.fc_autoneg = 1;
4902
4903        if (be_physfn(adapter) && !lancer_chip(adapter))
4904                be_cmd_set_features(adapter);
4905
4906        be_schedule_worker(adapter);
4907        adapter->flags |= BE_FLAGS_SETUP_DONE;
4908        return 0;
4909err:
4910        be_clear(adapter);
4911        return status;
4912}
4913
4914#ifdef CONFIG_NET_POLL_CONTROLLER
4915static void be_netpoll(struct net_device *netdev)
4916{
4917        struct be_adapter *adapter = netdev_priv(netdev);
4918        struct be_eq_obj *eqo;
4919        int i;
4920
4921        for_all_evt_queues(adapter, eqo, i) {
4922                be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4923                napi_schedule(&eqo->napi);
4924        }
4925}
4926#endif
4927
4928int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4929{
4930        const struct firmware *fw;
4931        int status;
4932
4933        if (!netif_running(adapter->netdev)) {
4934                dev_err(&adapter->pdev->dev,
4935                        "Firmware load not allowed (interface is down)\n");
4936                return -ENETDOWN;
4937        }
4938
4939        status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4940        if (status)
4941                goto fw_exit;
4942
4943        dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4944
4945        if (lancer_chip(adapter))
4946                status = lancer_fw_download(adapter, fw);
4947        else
4948                status = be_fw_download(adapter, fw);
4949
4950        if (!status)
4951                be_cmd_get_fw_ver(adapter);
4952
4953fw_exit:
4954        release_firmware(fw);
4955        return status;
4956}
4957
4958static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4959                                 u16 flags, struct netlink_ext_ack *extack)
4960{
4961        struct be_adapter *adapter = netdev_priv(dev);
4962        struct nlattr *attr, *br_spec;
4963        int rem;
4964        int status = 0;
4965        u16 mode = 0;
4966
4967        if (!sriov_enabled(adapter))
4968                return -EOPNOTSUPP;
4969
4970        br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4971        if (!br_spec)
4972                return -EINVAL;
4973
4974        nla_for_each_nested(attr, br_spec, rem) {
4975                if (nla_type(attr) != IFLA_BRIDGE_MODE)
4976                        continue;
4977
4978                if (nla_len(attr) < sizeof(mode))
4979                        return -EINVAL;
4980
4981                mode = nla_get_u16(attr);
4982                if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4983                        return -EOPNOTSUPP;
4984
4985                if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4986                        return -EINVAL;
4987
4988                status = be_cmd_set_hsw_config(adapter, 0, 0,
4989                                               adapter->if_handle,
4990                                               mode == BRIDGE_MODE_VEPA ?
4991                                               PORT_FWD_TYPE_VEPA :
4992                                               PORT_FWD_TYPE_VEB, 0);
4993                if (status)
4994                        goto err;
4995
4996                dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4997                         mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4998
4999                return status;
5000        }
5001err:
5002        dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5003                mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5004
5005        return status;
5006}
5007
5008static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5009                                 struct net_device *dev, u32 filter_mask,
5010                                 int nlflags)
5011{
5012        struct be_adapter *adapter = netdev_priv(dev);
5013        int status = 0;
5014        u8 hsw_mode;
5015
5016        /* BE and Lancer chips support VEB mode only */
5017        if (BEx_chip(adapter) || lancer_chip(adapter)) {
5018                /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5019                if (!pci_sriov_get_totalvfs(adapter->pdev))
5020                        return 0;
5021                hsw_mode = PORT_FWD_TYPE_VEB;
5022        } else {
5023                status = be_cmd_get_hsw_config(adapter, NULL, 0,
5024                                               adapter->if_handle, &hsw_mode,
5025                                               NULL);
5026                if (status)
5027                        return 0;
5028
5029                if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5030                        return 0;
5031        }
5032
5033        return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5034                                       hsw_mode == PORT_FWD_TYPE_VEPA ?
5035                                       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5036                                       0, 0, nlflags, filter_mask, NULL);
5037}
5038
5039static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5040                                         void (*func)(struct work_struct *))
5041{
5042        struct be_cmd_work *work;
5043
5044        work = kzalloc(sizeof(*work), GFP_ATOMIC);
5045        if (!work) {
5046                dev_err(&adapter->pdev->dev,
5047                        "be_work memory allocation failed\n");
5048                return NULL;
5049        }
5050
5051        INIT_WORK(&work->work, func);
5052        work->adapter = adapter;
5053        return work;
5054}
5055
5056/* VxLAN offload Notes:
5057 *
5058 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5059 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5060 * is expected to work across all types of IP tunnels once exported. Skyhawk
5061 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5062 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5063 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5064 * those other tunnels are unexported on the fly through ndo_features_check().
5065 *
5066 * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5067 * adds more than one port, disable offloads and re-enable them again when
5068 * there's only one port left. We maintain a list of ports for this purpose.
5069 */
5070static void be_work_add_vxlan_port(struct work_struct *work)
5071{
5072        struct be_cmd_work *cmd_work =
5073                                container_of(work, struct be_cmd_work, work);
5074        struct be_adapter *adapter = cmd_work->adapter;
5075        struct device *dev = &adapter->pdev->dev;
5076        __be16 port = cmd_work->info.vxlan_port;
5077        struct be_vxlan_port *vxlan_port;
5078        int status;
5079
5080        /* Bump up the alias count if it is an existing port */
5081        list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5082                if (vxlan_port->port == port) {
5083                        vxlan_port->port_aliases++;
5084                        goto done;
5085                }
5086        }
5087
5088        /* Add a new port to our list. We don't need a lock here since port
5089         * add/delete are done only in the context of a single-threaded work
5090         * queue (be_wq).
5091         */
5092        vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5093        if (!vxlan_port)
5094                goto done;
5095
5096        vxlan_port->port = port;
5097        INIT_LIST_HEAD(&vxlan_port->list);
5098        list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5099        adapter->vxlan_port_count++;
5100
5101        if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5102                dev_info(dev,
5103                         "Only one UDP port supported for VxLAN offloads\n");
5104                dev_info(dev, "Disabling VxLAN offloads\n");
5105                goto err;
5106        }
5107
5108        if (adapter->vxlan_port_count > 1)
5109                goto done;
5110
5111        status = be_enable_vxlan_offloads(adapter);
5112        if (!status)
5113                goto done;
5114
5115err:
5116        be_disable_vxlan_offloads(adapter);
5117done:
5118        kfree(cmd_work);
5119        return;
5120}
5121
5122static void be_work_del_vxlan_port(struct work_struct *work)
5123{
5124        struct be_cmd_work *cmd_work =
5125                                container_of(work, struct be_cmd_work, work);
5126        struct be_adapter *adapter = cmd_work->adapter;
5127        __be16 port = cmd_work->info.vxlan_port;
5128        struct be_vxlan_port *vxlan_port;
5129
5130        /* Nothing to be done if a port alias is being deleted */
5131        list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5132                if (vxlan_port->port == port) {
5133                        if (vxlan_port->port_aliases) {
5134                                vxlan_port->port_aliases--;
5135                                goto done;
5136                        }
5137                        break;
5138                }
5139        }
5140
5141        /* No port aliases left; delete the port from the list */
5142        list_del(&vxlan_port->list);
5143        adapter->vxlan_port_count--;
5144
5145        /* Disable VxLAN offload if this is the offloaded port */
5146        if (adapter->vxlan_port == vxlan_port->port) {
5147                WARN_ON(adapter->vxlan_port_count);
5148                be_disable_vxlan_offloads(adapter);
5149                dev_info(&adapter->pdev->dev,
5150                         "Disabled VxLAN offloads for UDP port %d\n",
5151                         be16_to_cpu(port));
5152                goto out;
5153        }
5154
5155        /* If only 1 port is left, re-enable VxLAN offload */
5156        if (adapter->vxlan_port_count == 1)
5157                be_enable_vxlan_offloads(adapter);
5158
5159out:
5160        kfree(vxlan_port);
5161done:
5162        kfree(cmd_work);
5163}
5164
5165static void be_cfg_vxlan_port(struct net_device *netdev,
5166                              struct udp_tunnel_info *ti,
5167                              void (*func)(struct work_struct *))
5168{
5169        struct be_adapter *adapter = netdev_priv(netdev);
5170        struct be_cmd_work *cmd_work;
5171
5172        if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5173                return;
5174
5175        if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5176                return;
5177
5178        cmd_work = be_alloc_work(adapter, func);
5179        if (cmd_work) {
5180                cmd_work->info.vxlan_port = ti->port;
5181                queue_work(be_wq, &cmd_work->work);
5182        }
5183}
5184
5185static void be_del_vxlan_port(struct net_device *netdev,
5186                              struct udp_tunnel_info *ti)
5187{
5188        be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5189}
5190
5191static void be_add_vxlan_port(struct net_device *netdev,
5192                              struct udp_tunnel_info *ti)
5193{
5194        be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5195}
5196
5197static netdev_features_t be_features_check(struct sk_buff *skb,
5198                                           struct net_device *dev,
5199                                           netdev_features_t features)
5200{
5201        struct be_adapter *adapter = netdev_priv(dev);
5202        u8 l4_hdr = 0;
5203
5204        if (skb_is_gso(skb)) {
5205                /* IPv6 TSO requests with extension hdrs are a problem
5206                 * to Lancer and BE3 HW. Disable TSO6 feature.
5207                 */
5208                if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5209                        features &= ~NETIF_F_TSO6;
5210
5211                /* Lancer cannot handle the packet with MSS less than 256.
5212                 * Also it can't handle a TSO packet with a single segment
5213                 * Disable the GSO support in such cases
5214                 */
5215                if (lancer_chip(adapter) &&
5216                    (skb_shinfo(skb)->gso_size < 256 ||
5217                     skb_shinfo(skb)->gso_segs == 1))
5218                        features &= ~NETIF_F_GSO_MASK;
5219        }
5220
5221        /* The code below restricts offload features for some tunneled and
5222         * Q-in-Q packets.
5223         * Offload features for normal (non tunnel) packets are unchanged.
5224         */
5225        features = vlan_features_check(skb, features);
5226        if (!skb->encapsulation ||
5227            !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5228                return features;
5229
5230        /* It's an encapsulated packet and VxLAN offloads are enabled. We
5231         * should disable tunnel offload features if it's not a VxLAN packet,
5232         * as tunnel offloads have been enabled only for VxLAN. This is done to
5233         * allow other tunneled traffic like GRE work fine while VxLAN
5234         * offloads are configured in Skyhawk-R.
5235         */
5236        switch (vlan_get_protocol(skb)) {
5237        case htons(ETH_P_IP):
5238                l4_hdr = ip_hdr(skb)->protocol;
5239                break;
5240        case htons(ETH_P_IPV6):
5241                l4_hdr = ipv6_hdr(skb)->nexthdr;
5242                break;
5243        default:
5244                return features;
5245        }
5246
5247        if (l4_hdr != IPPROTO_UDP ||
5248            skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5249            skb->inner_protocol != htons(ETH_P_TEB) ||
5250            skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5251                sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5252            !adapter->vxlan_port ||
5253            udp_hdr(skb)->dest != adapter->vxlan_port)
5254                return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5255
5256        return features;
5257}
5258
5259static int be_get_phys_port_id(struct net_device *dev,
5260                               struct netdev_phys_item_id *ppid)
5261{
5262        int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5263        struct be_adapter *adapter = netdev_priv(dev);
5264        u8 *id;
5265
5266        if (MAX_PHYS_ITEM_ID_LEN < id_len)
5267                return -ENOSPC;
5268
5269        ppid->id[0] = adapter->hba_port_num + 1;
5270        id = &ppid->id[1];
5271        for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5272             i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5273                memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5274
5275        ppid->id_len = id_len;
5276
5277        return 0;
5278}
5279
5280static void be_set_rx_mode(struct net_device *dev)
5281{
5282        struct be_adapter *adapter = netdev_priv(dev);
5283        struct be_cmd_work *work;
5284
5285        work = be_alloc_work(adapter, be_work_set_rx_mode);
5286        if (work)
5287                queue_work(be_wq, &work->work);
5288}
5289
5290static const struct net_device_ops be_netdev_ops = {
5291        .ndo_open               = be_open,
5292        .ndo_stop               = be_close,
5293        .ndo_start_xmit         = be_xmit,
5294        .ndo_set_rx_mode        = be_set_rx_mode,
5295        .ndo_set_mac_address    = be_mac_addr_set,
5296        .ndo_get_stats64        = be_get_stats64,
5297        .ndo_validate_addr      = eth_validate_addr,
5298        .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5299        .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5300        .ndo_set_vf_mac         = be_set_vf_mac,
5301        .ndo_set_vf_vlan        = be_set_vf_vlan,
5302        .ndo_set_vf_rate        = be_set_vf_tx_rate,
5303        .ndo_get_vf_config      = be_get_vf_config,
5304        .ndo_set_vf_link_state  = be_set_vf_link_state,
5305        .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5306        .ndo_tx_timeout         = be_tx_timeout,
5307#ifdef CONFIG_NET_POLL_CONTROLLER
5308        .ndo_poll_controller    = be_netpoll,
5309#endif
5310        .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5311        .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5312        .ndo_udp_tunnel_add     = be_add_vxlan_port,
5313        .ndo_udp_tunnel_del     = be_del_vxlan_port,
5314        .ndo_features_check     = be_features_check,
5315        .ndo_get_phys_port_id   = be_get_phys_port_id,
5316};
5317
5318static void be_netdev_init(struct net_device *netdev)
5319{
5320        struct be_adapter *adapter = netdev_priv(netdev);
5321
5322        netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5323                NETIF_F_GSO_UDP_TUNNEL |
5324                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5325                NETIF_F_HW_VLAN_CTAG_TX;
5326        if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5327                netdev->hw_features |= NETIF_F_RXHASH;
5328
5329        netdev->features |= netdev->hw_features |
5330                NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5331
5332        netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5333                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5334
5335        netdev->priv_flags |= IFF_UNICAST_FLT;
5336
5337        netdev->flags |= IFF_MULTICAST;
5338
5339        netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5340
5341        netdev->netdev_ops = &be_netdev_ops;
5342
5343        netdev->ethtool_ops = &be_ethtool_ops;
5344
5345        /* MTU range: 256 - 9000 */
5346        netdev->min_mtu = BE_MIN_MTU;
5347        netdev->max_mtu = BE_MAX_MTU;
5348}
5349
5350static void be_cleanup(struct be_adapter *adapter)
5351{
5352        struct net_device *netdev = adapter->netdev;
5353
5354        rtnl_lock();
5355        netif_device_detach(netdev);
5356        if (netif_running(netdev))
5357                be_close(netdev);
5358        rtnl_unlock();
5359
5360        be_clear(adapter);
5361}
5362
5363static int be_resume(struct be_adapter *adapter)
5364{
5365        struct net_device *netdev = adapter->netdev;
5366        int status;
5367
5368        status = be_setup(adapter);
5369        if (status)
5370                return status;
5371
5372        rtnl_lock();
5373        if (netif_running(netdev))
5374                status = be_open(netdev);
5375        rtnl_unlock();
5376
5377        if (status)
5378                return status;
5379
5380        netif_device_attach(netdev);
5381
5382        return 0;
5383}
5384
5385static void be_soft_reset(struct be_adapter *adapter)
5386{
5387        u32 val;
5388
5389        dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5390        val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5391        val |= SLIPORT_SOFTRESET_SR_MASK;
5392        iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5393}
5394
5395static bool be_err_is_recoverable(struct be_adapter *adapter)
5396{
5397        struct be_error_recovery *err_rec = &adapter->error_recovery;
5398        unsigned long initial_idle_time =
5399                msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5400        unsigned long recovery_interval =
5401                msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5402        u16 ue_err_code;
5403        u32 val;
5404
5405        val = be_POST_stage_get(adapter);
5406        if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5407                return false;
5408        ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5409        if (ue_err_code == 0)
5410                return false;
5411
5412        dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5413                ue_err_code);
5414
5415        if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5416                dev_err(&adapter->pdev->dev,
5417                        "Cannot recover within %lu sec from driver load\n",
5418                        jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5419                return false;
5420        }
5421
5422        if (err_rec->last_recovery_time && time_before_eq(
5423                jiffies - err_rec->last_recovery_time, recovery_interval)) {
5424                dev_err(&adapter->pdev->dev,
5425                        "Cannot recover within %lu sec from last recovery\n",
5426                        jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5427                return false;
5428        }
5429
5430        if (ue_err_code == err_rec->last_err_code) {
5431                dev_err(&adapter->pdev->dev,
5432                        "Cannot recover from a consecutive TPE error\n");
5433                return false;
5434        }
5435
5436        err_rec->last_recovery_time = jiffies;
5437        err_rec->last_err_code = ue_err_code;
5438        return true;
5439}
5440
5441static int be_tpe_recover(struct be_adapter *adapter)
5442{
5443        struct be_error_recovery *err_rec = &adapter->error_recovery;
5444        int status = -EAGAIN;
5445        u32 val;
5446
5447        switch (err_rec->recovery_state) {
5448        case ERR_RECOVERY_ST_NONE:
5449                err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5450                err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5451                break;
5452
5453        case ERR_RECOVERY_ST_DETECT:
5454                val = be_POST_stage_get(adapter);
5455                if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5456                    POST_STAGE_RECOVERABLE_ERR) {
5457                        dev_err(&adapter->pdev->dev,
5458                                "Unrecoverable HW error detected: 0x%x\n", val);
5459                        status = -EINVAL;
5460                        err_rec->resched_delay = 0;
5461                        break;
5462                }
5463
5464                dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5465
5466                /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5467                 * milliseconds before it checks for final error status in
5468                 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5469                 * If it does, then PF0 initiates a Soft Reset.
5470                 */
5471                if (adapter->pf_num == 0) {
5472                        err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5473                        err_rec->resched_delay = err_rec->ue_to_reset_time -
5474                                        ERR_RECOVERY_UE_DETECT_DURATION;
5475                        break;
5476                }
5477
5478                err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5479                err_rec->resched_delay = err_rec->ue_to_poll_time -
5480                                        ERR_RECOVERY_UE_DETECT_DURATION;
5481                break;
5482
5483        case ERR_RECOVERY_ST_RESET:
5484                if (!be_err_is_recoverable(adapter)) {
5485                        dev_err(&adapter->pdev->dev,
5486                                "Failed to meet recovery criteria\n");
5487                        status = -EIO;
5488                        err_rec->resched_delay = 0;
5489                        break;
5490                }
5491                be_soft_reset(adapter);
5492                err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5493                err_rec->resched_delay = err_rec->ue_to_poll_time -
5494                                        err_rec->ue_to_reset_time;
5495                break;
5496
5497        case ERR_RECOVERY_ST_PRE_POLL:
5498                err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5499                err_rec->resched_delay = 0;
5500                status = 0;                     /* done */
5501                break;
5502
5503        default:
5504                status = -EINVAL;
5505                err_rec->resched_delay = 0;
5506                break;
5507        }
5508
5509        return status;
5510}
5511
5512static int be_err_recover(struct be_adapter *adapter)
5513{
5514        int status;
5515
5516        if (!lancer_chip(adapter)) {
5517                if (!adapter->error_recovery.recovery_supported ||
5518                    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5519                        return -EIO;
5520                status = be_tpe_recover(adapter);
5521                if (status)
5522                        goto err;
5523        }
5524
5525        /* Wait for adapter to reach quiescent state before
5526         * destroying queues
5527         */
5528        status = be_fw_wait_ready(adapter);
5529        if (status)
5530                goto err;
5531
5532        adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5533
5534        be_cleanup(adapter);
5535
5536        status = be_resume(adapter);
5537        if (status)
5538                goto err;
5539
5540        adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5541
5542err:
5543        return status;
5544}
5545
5546static void be_err_detection_task(struct work_struct *work)
5547{
5548        struct be_error_recovery *err_rec =
5549                        container_of(work, struct be_error_recovery,
5550                                     err_detection_work.work);
5551        struct be_adapter *adapter =
5552                        container_of(err_rec, struct be_adapter,
5553                                     error_recovery);
5554        u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5555        struct device *dev = &adapter->pdev->dev;
5556        int recovery_status;
5557
5558        be_detect_error(adapter);
5559        if (!be_check_error(adapter, BE_ERROR_HW))
5560                goto reschedule_task;
5561
5562        recovery_status = be_err_recover(adapter);
5563        if (!recovery_status) {
5564                err_rec->recovery_retries = 0;
5565                err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5566                dev_info(dev, "Adapter recovery successful\n");
5567                goto reschedule_task;
5568        } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5569                /* BEx/SH recovery state machine */
5570                if (adapter->pf_num == 0 &&
5571                    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5572                        dev_err(&adapter->pdev->dev,
5573                                "Adapter recovery in progress\n");
5574                resched_delay = err_rec->resched_delay;
5575                goto reschedule_task;
5576        } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5577                /* For VFs, check if PF have allocated resources
5578                 * every second.
5579                 */
5580                dev_err(dev, "Re-trying adapter recovery\n");
5581                goto reschedule_task;
5582        } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5583                   ERR_RECOVERY_MAX_RETRY_COUNT) {
5584                /* In case of another error during recovery, it takes 30 sec
5585                 * for adapter to come out of error. Retry error recovery after
5586                 * this time interval.
5587                 */
5588                dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5589                resched_delay = ERR_RECOVERY_RETRY_DELAY;
5590                goto reschedule_task;
5591        } else {
5592                dev_err(dev, "Adapter recovery failed\n");
5593                dev_err(dev, "Please reboot server to recover\n");
5594        }
5595
5596        return;
5597
5598reschedule_task:
5599        be_schedule_err_detection(adapter, resched_delay);
5600}
5601
5602static void be_log_sfp_info(struct be_adapter *adapter)
5603{
5604        int status;
5605
5606        status = be_cmd_query_sfp_info(adapter);
5607        if (!status) {
5608                dev_err(&adapter->pdev->dev,
5609                        "Port %c: %s Vendor: %s part no: %s",
5610                        adapter->port_name,
5611                        be_misconfig_evt_port_state[adapter->phy_state],
5612                        adapter->phy.vendor_name,
5613                        adapter->phy.vendor_pn);
5614        }
5615        adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5616}
5617
5618static void be_worker(struct work_struct *work)
5619{
5620        struct be_adapter *adapter =
5621                container_of(work, struct be_adapter, work.work);
5622        struct be_rx_obj *rxo;
5623        int i;
5624
5625        if (be_physfn(adapter) &&
5626            MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5627                be_cmd_get_die_temperature(adapter);
5628
5629        /* when interrupts are not yet enabled, just reap any pending
5630         * mcc completions
5631         */
5632        if (!netif_running(adapter->netdev)) {
5633                be_process_mcc(adapter);
5634                goto reschedule;
5635        }
5636
5637        if (!adapter->stats_cmd_sent) {
5638                if (lancer_chip(adapter))
5639                        lancer_cmd_get_pport_stats(adapter,
5640                                                   &adapter->stats_cmd);
5641                else
5642                        be_cmd_get_stats(adapter, &adapter->stats_cmd);
5643        }
5644
5645        for_all_rx_queues(adapter, rxo, i) {
5646                /* Replenish RX-queues starved due to memory
5647                 * allocation failures.
5648                 */
5649                if (rxo->rx_post_starved)
5650                        be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5651        }
5652
5653        /* EQ-delay update for Skyhawk is done while notifying EQ */
5654        if (!skyhawk_chip(adapter))
5655                be_eqd_update(adapter, false);
5656
5657        if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5658                be_log_sfp_info(adapter);
5659
5660reschedule:
5661        adapter->work_counter++;
5662        queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5663}
5664
5665static void be_unmap_pci_bars(struct be_adapter *adapter)
5666{
5667        if (adapter->csr)
5668                pci_iounmap(adapter->pdev, adapter->csr);
5669        if (adapter->db)
5670                pci_iounmap(adapter->pdev, adapter->db);
5671        if (adapter->pcicfg && adapter->pcicfg_mapped)
5672                pci_iounmap(adapter->pdev, adapter->pcicfg);
5673}
5674
5675static int db_bar(struct be_adapter *adapter)
5676{
5677        if (lancer_chip(adapter) || be_virtfn(adapter))
5678                return 0;
5679        else
5680                return 4;
5681}
5682
5683static int be_roce_map_pci_bars(struct be_adapter *adapter)
5684{
5685        if (skyhawk_chip(adapter)) {
5686                adapter->roce_db.size = 4096;
5687                adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5688                                                              db_bar(adapter));
5689                adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5690                                                               db_bar(adapter));
5691        }
5692        return 0;
5693}
5694
5695static int be_map_pci_bars(struct be_adapter *adapter)
5696{
5697        struct pci_dev *pdev = adapter->pdev;
5698        u8 __iomem *addr;
5699        u32 sli_intf;
5700
5701        pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5702        adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5703                                SLI_INTF_FAMILY_SHIFT;
5704        adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5705
5706        if (BEx_chip(adapter) && be_physfn(adapter)) {
5707                adapter->csr = pci_iomap(pdev, 2, 0);
5708                if (!adapter->csr)
5709                        return -ENOMEM;
5710        }
5711
5712        addr = pci_iomap(pdev, db_bar(adapter), 0);
5713        if (!addr)
5714                goto pci_map_err;
5715        adapter->db = addr;
5716
5717        if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5718                if (be_physfn(adapter)) {
5719                        /* PCICFG is the 2nd BAR in BE2 */
5720                        addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5721                        if (!addr)
5722                                goto pci_map_err;
5723                        adapter->pcicfg = addr;
5724                        adapter->pcicfg_mapped = true;
5725                } else {
5726                        adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5727                        adapter->pcicfg_mapped = false;
5728                }
5729        }
5730
5731        be_roce_map_pci_bars(adapter);
5732        return 0;
5733
5734pci_map_err:
5735        dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5736        be_unmap_pci_bars(adapter);
5737        return -ENOMEM;
5738}
5739
5740static void be_drv_cleanup(struct be_adapter *adapter)
5741{
5742        struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5743        struct device *dev = &adapter->pdev->dev;
5744
5745        if (mem->va)
5746                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5747
5748        mem = &adapter->rx_filter;
5749        if (mem->va)
5750                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5751
5752        mem = &adapter->stats_cmd;
5753        if (mem->va)
5754                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5755}
5756
5757/* Allocate and initialize various fields in be_adapter struct */
5758static int be_drv_init(struct be_adapter *adapter)
5759{
5760        struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5761        struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5762        struct be_dma_mem *rx_filter = &adapter->rx_filter;
5763        struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5764        struct device *dev = &adapter->pdev->dev;
5765        int status = 0;
5766
5767        mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5768        mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5769                                                &mbox_mem_alloc->dma,
5770                                                GFP_KERNEL);
5771        if (!mbox_mem_alloc->va)
5772                return -ENOMEM;
5773
5774        mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5775        mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5776        mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5777
5778        rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5779        rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5780                                           &rx_filter->dma, GFP_KERNEL);
5781        if (!rx_filter->va) {
5782                status = -ENOMEM;
5783                goto free_mbox;
5784        }
5785
5786        if (lancer_chip(adapter))
5787                stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5788        else if (BE2_chip(adapter))
5789                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5790        else if (BE3_chip(adapter))
5791                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5792        else
5793                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5794        stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5795                                           &stats_cmd->dma, GFP_KERNEL);
5796        if (!stats_cmd->va) {
5797                status = -ENOMEM;
5798                goto free_rx_filter;
5799        }
5800
5801        mutex_init(&adapter->mbox_lock);
5802        mutex_init(&adapter->mcc_lock);
5803        mutex_init(&adapter->rx_filter_lock);
5804        spin_lock_init(&adapter->mcc_cq_lock);
5805        init_completion(&adapter->et_cmd_compl);
5806
5807        pci_save_state(adapter->pdev);
5808
5809        INIT_DELAYED_WORK(&adapter->work, be_worker);
5810
5811        adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5812        adapter->error_recovery.resched_delay = 0;
5813        INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5814                          be_err_detection_task);
5815
5816        adapter->rx_fc = true;
5817        adapter->tx_fc = true;
5818
5819        /* Must be a power of 2 or else MODULO will BUG_ON */
5820        adapter->be_get_temp_freq = 64;
5821
5822        INIT_LIST_HEAD(&adapter->vxlan_port_list);
5823        return 0;
5824
5825free_rx_filter:
5826        dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5827free_mbox:
5828        dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5829                          mbox_mem_alloc->dma);
5830        return status;
5831}
5832
5833static void be_remove(struct pci_dev *pdev)
5834{
5835        struct be_adapter *adapter = pci_get_drvdata(pdev);
5836
5837        if (!adapter)
5838                return;
5839
5840        be_roce_dev_remove(adapter);
5841        be_intr_set(adapter, false);
5842
5843        be_cancel_err_detection(adapter);
5844
5845        unregister_netdev(adapter->netdev);
5846
5847        be_clear(adapter);
5848
5849        if (!pci_vfs_assigned(adapter->pdev))
5850                be_cmd_reset_function(adapter);
5851
5852        /* tell fw we're done with firing cmds */
5853        be_cmd_fw_clean(adapter);
5854
5855        be_unmap_pci_bars(adapter);
5856        be_drv_cleanup(adapter);
5857
5858        pci_disable_pcie_error_reporting(pdev);
5859
5860        pci_release_regions(pdev);
5861        pci_disable_device(pdev);
5862
5863        free_netdev(adapter->netdev);
5864}
5865
5866static ssize_t be_hwmon_show_temp(struct device *dev,
5867                                  struct device_attribute *dev_attr,
5868                                  char *buf)
5869{
5870        struct be_adapter *adapter = dev_get_drvdata(dev);
5871
5872        /* Unit: millidegree Celsius */
5873        if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5874                return -EIO;
5875        else
5876                return sprintf(buf, "%u\n",
5877                               adapter->hwmon_info.be_on_die_temp * 1000);
5878}
5879
5880static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5881                          be_hwmon_show_temp, NULL, 1);
5882
5883static struct attribute *be_hwmon_attrs[] = {
5884        &sensor_dev_attr_temp1_input.dev_attr.attr,
5885        NULL
5886};
5887
5888ATTRIBUTE_GROUPS(be_hwmon);
5889
5890static char *mc_name(struct be_adapter *adapter)
5891{
5892        char *str = ""; /* default */
5893
5894        switch (adapter->mc_type) {
5895        case UMC:
5896                str = "UMC";
5897                break;
5898        case FLEX10:
5899                str = "FLEX10";
5900                break;
5901        case vNIC1:
5902                str = "vNIC-1";
5903                break;
5904        case nPAR:
5905                str = "nPAR";
5906                break;
5907        case UFP:
5908                str = "UFP";
5909                break;
5910        case vNIC2:
5911                str = "vNIC-2";
5912                break;
5913        default:
5914                str = "";
5915        }
5916
5917        return str;
5918}
5919
5920static inline char *func_name(struct be_adapter *adapter)
5921{
5922        return be_physfn(adapter) ? "PF" : "VF";
5923}
5924
5925static inline char *nic_name(struct pci_dev *pdev)
5926{
5927        switch (pdev->device) {
5928        case OC_DEVICE_ID1:
5929                return OC_NAME;
5930        case OC_DEVICE_ID2:
5931                return OC_NAME_BE;
5932        case OC_DEVICE_ID3:
5933        case OC_DEVICE_ID4:
5934                return OC_NAME_LANCER;
5935        case BE_DEVICE_ID2:
5936                return BE3_NAME;
5937        case OC_DEVICE_ID5:
5938        case OC_DEVICE_ID6:
5939                return OC_NAME_SH;
5940        default:
5941                return BE_NAME;
5942        }
5943}
5944
5945static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5946{
5947        struct be_adapter *adapter;
5948        struct net_device *netdev;
5949        int status = 0;
5950
5951        status = pci_enable_device(pdev);
5952        if (status)
5953                goto do_none;
5954
5955        status = pci_request_regions(pdev, DRV_NAME);
5956        if (status)
5957                goto disable_dev;
5958        pci_set_master(pdev);
5959
5960        netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5961        if (!netdev) {
5962                status = -ENOMEM;
5963                goto rel_reg;
5964        }
5965        adapter = netdev_priv(netdev);
5966        adapter->pdev = pdev;
5967        pci_set_drvdata(pdev, adapter);
5968        adapter->netdev = netdev;
5969        SET_NETDEV_DEV(netdev, &pdev->dev);
5970
5971        status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5972        if (!status) {
5973                netdev->features |= NETIF_F_HIGHDMA;
5974        } else {
5975                status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5976                if (status) {
5977                        dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5978                        goto free_netdev;
5979                }
5980        }
5981
5982        status = pci_enable_pcie_error_reporting(pdev);
5983        if (!status)
5984                dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5985
5986        status = be_map_pci_bars(adapter);
5987        if (status)
5988                goto free_netdev;
5989
5990        status = be_drv_init(adapter);
5991        if (status)
5992                goto unmap_bars;
5993
5994        status = be_setup(adapter);
5995        if (status)
5996                goto drv_cleanup;
5997
5998        be_netdev_init(netdev);
5999        status = register_netdev(netdev);
6000        if (status != 0)
6001                goto unsetup;
6002
6003        be_roce_dev_add(adapter);
6004
6005        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6006        adapter->error_recovery.probe_time = jiffies;
6007
6008        /* On Die temperature not supported for VF. */
6009        if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
6010                adapter->hwmon_info.hwmon_dev =
6011                        devm_hwmon_device_register_with_groups(&pdev->dev,
6012                                                               DRV_NAME,
6013                                                               adapter,
6014                                                               be_hwmon_groups);
6015                adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
6016        }
6017
6018        dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
6019                 func_name(adapter), mc_name(adapter), adapter->port_name);
6020
6021        return 0;
6022
6023unsetup:
6024        be_clear(adapter);
6025drv_cleanup:
6026        be_drv_cleanup(adapter);
6027unmap_bars:
6028        be_unmap_pci_bars(adapter);
6029free_netdev:
6030        free_netdev(netdev);
6031rel_reg:
6032        pci_release_regions(pdev);
6033disable_dev:
6034        pci_disable_device(pdev);
6035do_none:
6036        dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6037        return status;
6038}
6039
6040static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6041{
6042        struct be_adapter *adapter = pci_get_drvdata(pdev);
6043
6044        be_intr_set(adapter, false);
6045        be_cancel_err_detection(adapter);
6046
6047        be_cleanup(adapter);
6048
6049        pci_save_state(pdev);
6050        pci_disable_device(pdev);
6051        pci_set_power_state(pdev, pci_choose_state(pdev, state));
6052        return 0;
6053}
6054
6055static int be_pci_resume(struct pci_dev *pdev)
6056{
6057        struct be_adapter *adapter = pci_get_drvdata(pdev);
6058        int status = 0;
6059
6060        status = pci_enable_device(pdev);
6061        if (status)
6062                return status;
6063
6064        pci_restore_state(pdev);
6065
6066        status = be_resume(adapter);
6067        if (status)
6068                return status;
6069
6070        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6071
6072        return 0;
6073}
6074
6075/*
6076 * An FLR will stop BE from DMAing any data.
6077 */
6078static void be_shutdown(struct pci_dev *pdev)
6079{
6080        struct be_adapter *adapter = pci_get_drvdata(pdev);
6081
6082        if (!adapter)
6083                return;
6084
6085        be_roce_dev_shutdown(adapter);
6086        cancel_delayed_work_sync(&adapter->work);
6087        be_cancel_err_detection(adapter);
6088
6089        netif_device_detach(adapter->netdev);
6090
6091        be_cmd_reset_function(adapter);
6092
6093        pci_disable_device(pdev);
6094}
6095
6096static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6097                                            pci_channel_state_t state)
6098{
6099        struct be_adapter *adapter = pci_get_drvdata(pdev);
6100
6101        dev_err(&adapter->pdev->dev, "EEH error detected\n");
6102
6103        be_roce_dev_remove(adapter);
6104
6105        if (!be_check_error(adapter, BE_ERROR_EEH)) {
6106                be_set_error(adapter, BE_ERROR_EEH);
6107
6108                be_cancel_err_detection(adapter);
6109
6110                be_cleanup(adapter);
6111        }
6112
6113        if (state == pci_channel_io_perm_failure)
6114                return PCI_ERS_RESULT_DISCONNECT;
6115
6116        pci_disable_device(pdev);
6117
6118        /* The error could cause the FW to trigger a flash debug dump.
6119         * Resetting the card while flash dump is in progress
6120         * can cause it not to recover; wait for it to finish.
6121         * Wait only for first function as it is needed only once per
6122         * adapter.
6123         */
6124        if (pdev->devfn == 0)
6125                ssleep(30);
6126
6127        return PCI_ERS_RESULT_NEED_RESET;
6128}
6129
6130static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6131{
6132        struct be_adapter *adapter = pci_get_drvdata(pdev);
6133        int status;
6134
6135        dev_info(&adapter->pdev->dev, "EEH reset\n");
6136
6137        status = pci_enable_device(pdev);
6138        if (status)
6139                return PCI_ERS_RESULT_DISCONNECT;
6140
6141        pci_set_master(pdev);
6142        pci_restore_state(pdev);
6143
6144        /* Check if card is ok and fw is ready */
6145        dev_info(&adapter->pdev->dev,
6146                 "Waiting for FW to be ready after EEH reset\n");
6147        status = be_fw_wait_ready(adapter);
6148        if (status)
6149                return PCI_ERS_RESULT_DISCONNECT;
6150
6151        be_clear_error(adapter, BE_CLEAR_ALL);
6152        return PCI_ERS_RESULT_RECOVERED;
6153}
6154
6155static void be_eeh_resume(struct pci_dev *pdev)
6156{
6157        int status = 0;
6158        struct be_adapter *adapter = pci_get_drvdata(pdev);
6159
6160        dev_info(&adapter->pdev->dev, "EEH resume\n");
6161
6162        pci_save_state(pdev);
6163
6164        status = be_resume(adapter);
6165        if (status)
6166                goto err;
6167
6168        be_roce_dev_add(adapter);
6169
6170        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6171        return;
6172err:
6173        dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6174}
6175
6176static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6177{
6178        struct be_adapter *adapter = pci_get_drvdata(pdev);
6179        struct be_resources vft_res = {0};
6180        int status;
6181
6182        if (!num_vfs)
6183                be_vf_clear(adapter);
6184
6185        adapter->num_vfs = num_vfs;
6186
6187        if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6188                dev_warn(&pdev->dev,
6189                         "Cannot disable VFs while they are assigned\n");
6190                return -EBUSY;
6191        }
6192
6193        /* When the HW is in SRIOV capable configuration, the PF-pool resources
6194         * are equally distributed across the max-number of VFs. The user may
6195         * request only a subset of the max-vfs to be enabled.
6196         * Based on num_vfs, redistribute the resources across num_vfs so that
6197         * each VF will have access to more number of resources.
6198         * This facility is not available in BE3 FW.
6199         * Also, this is done by FW in Lancer chip.
6200         */
6201        if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6202                be_calculate_vf_res(adapter, adapter->num_vfs,
6203                                    &vft_res);
6204                status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6205                                                 adapter->num_vfs, &vft_res);
6206                if (status)
6207                        dev_err(&pdev->dev,
6208                                "Failed to optimize SR-IOV resources\n");
6209        }
6210
6211        status = be_get_resources(adapter);
6212        if (status)
6213                return be_cmd_status(status);
6214
6215        /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6216        rtnl_lock();
6217        status = be_update_queues(adapter);
6218        rtnl_unlock();
6219        if (status)
6220                return be_cmd_status(status);
6221
6222        if (adapter->num_vfs)
6223                status = be_vf_setup(adapter);
6224
6225        if (!status)
6226                return adapter->num_vfs;
6227
6228        return 0;
6229}
6230
6231static const struct pci_error_handlers be_eeh_handlers = {
6232        .error_detected = be_eeh_err_detected,
6233        .slot_reset = be_eeh_reset,
6234        .resume = be_eeh_resume,
6235};
6236
6237static struct pci_driver be_driver = {
6238        .name = DRV_NAME,
6239        .id_table = be_dev_ids,
6240        .probe = be_probe,
6241        .remove = be_remove,
6242        .suspend = be_suspend,
6243        .resume = be_pci_resume,
6244        .shutdown = be_shutdown,
6245        .sriov_configure = be_pci_sriov_configure,
6246        .err_handler = &be_eeh_handlers
6247};
6248
6249static int __init be_init_module(void)
6250{
6251        int status;
6252
6253        if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6254            rx_frag_size != 2048) {
6255                printk(KERN_WARNING DRV_NAME
6256                        " : Module param rx_frag_size must be 2048/4096/8192."
6257                        " Using 2048\n");
6258                rx_frag_size = 2048;
6259        }
6260
6261        if (num_vfs > 0) {
6262                pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6263                pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6264        }
6265
6266        be_wq = create_singlethread_workqueue("be_wq");
6267        if (!be_wq) {
6268                pr_warn(DRV_NAME "workqueue creation failed\n");
6269                return -1;
6270        }
6271
6272        be_err_recovery_workq =
6273                create_singlethread_workqueue("be_err_recover");
6274        if (!be_err_recovery_workq)
6275                pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6276
6277        status = pci_register_driver(&be_driver);
6278        if (status) {
6279                destroy_workqueue(be_wq);
6280                be_destroy_err_recovery_workq();
6281        }
6282        return status;
6283}
6284module_init(be_init_module);
6285
6286static void __exit be_exit_module(void)
6287{
6288        pci_unregister_driver(&be_driver);
6289
6290        be_destroy_err_recovery_workq();
6291
6292        if (be_wq)
6293                destroy_workqueue(be_wq);
6294}
6295module_exit(be_exit_module);
6296