linux/drivers/net/ethernet/emulex/benet/be_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2005 - 2016 Broadcom
   4 * All rights reserved.
   5 *
   6 * Contact Information:
   7 * linux-drivers@emulex.com
   8 *
   9 * Emulex
  10 * 3333 Susan Street
  11 * Costa Mesa, CA 92626
  12 */
  13
  14#include <linux/prefetch.h>
  15#include <linux/module.h>
  16#include "be.h"
  17#include "be_cmds.h"
  18#include <asm/div64.h>
  19#include <linux/aer.h>
  20#include <linux/if_bridge.h>
  21#include <net/busy_poll.h>
  22#include <net/vxlan.h>
  23
  24MODULE_DESCRIPTION(DRV_DESC);
  25MODULE_AUTHOR("Emulex Corporation");
  26MODULE_LICENSE("GPL");
  27
  28/* num_vfs module param is obsolete.
  29 * Use sysfs method to enable/disable VFs.
  30 */
  31static unsigned int num_vfs;
  32module_param(num_vfs, uint, 0444);
  33MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
  34
  35static ushort rx_frag_size = 2048;
  36module_param(rx_frag_size, ushort, 0444);
  37MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
  38
  39/* Per-module error detection/recovery workq shared across all functions.
  40 * Each function schedules its own work request on this shared workq.
  41 */
  42static struct workqueue_struct *be_err_recovery_workq;
  43
  44static const struct pci_device_id be_dev_ids[] = {
  45#ifdef CONFIG_BE2NET_BE2
  46        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
  47        { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
  48#endif /* CONFIG_BE2NET_BE2 */
  49#ifdef CONFIG_BE2NET_BE3
  50        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
  51        { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
  52#endif /* CONFIG_BE2NET_BE3 */
  53#ifdef CONFIG_BE2NET_LANCER
  54        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
  55        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
  56#endif /* CONFIG_BE2NET_LANCER */
  57#ifdef CONFIG_BE2NET_SKYHAWK
  58        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
  59        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
  60#endif /* CONFIG_BE2NET_SKYHAWK */
  61        { 0 }
  62};
  63MODULE_DEVICE_TABLE(pci, be_dev_ids);
  64
  65/* Workqueue used by all functions for defering cmd calls to the adapter */
  66static struct workqueue_struct *be_wq;
  67
  68/* UE Status Low CSR */
  69static const char * const ue_status_low_desc[] = {
  70        "CEV",
  71        "CTX",
  72        "DBUF",
  73        "ERX",
  74        "Host",
  75        "MPU",
  76        "NDMA",
  77        "PTC ",
  78        "RDMA ",
  79        "RXF ",
  80        "RXIPS ",
  81        "RXULP0 ",
  82        "RXULP1 ",
  83        "RXULP2 ",
  84        "TIM ",
  85        "TPOST ",
  86        "TPRE ",
  87        "TXIPS ",
  88        "TXULP0 ",
  89        "TXULP1 ",
  90        "UC ",
  91        "WDMA ",
  92        "TXULP2 ",
  93        "HOST1 ",
  94        "P0_OB_LINK ",
  95        "P1_OB_LINK ",
  96        "HOST_GPIO ",
  97        "MBOX ",
  98        "ERX2 ",
  99        "SPARE ",
 100        "JTAG ",
 101        "MPU_INTPEND "
 102};
 103
 104/* UE Status High CSR */
 105static const char * const ue_status_hi_desc[] = {
 106        "LPCMEMHOST",
 107        "MGMT_MAC",
 108        "PCS0ONLINE",
 109        "MPU_IRAM",
 110        "PCS1ONLINE",
 111        "PCTL0",
 112        "PCTL1",
 113        "PMEM",
 114        "RR",
 115        "TXPB",
 116        "RXPP",
 117        "XAUI",
 118        "TXP",
 119        "ARM",
 120        "IPC",
 121        "HOST2",
 122        "HOST3",
 123        "HOST4",
 124        "HOST5",
 125        "HOST6",
 126        "HOST7",
 127        "ECRC",
 128        "Poison TLP",
 129        "NETC",
 130        "PERIPH",
 131        "LLTXULP",
 132        "D2P",
 133        "RCON",
 134        "LDMA",
 135        "LLTXP",
 136        "LLTXPB",
 137        "Unknown"
 138};
 139
 140#define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
 141                                 BE_IF_FLAGS_BROADCAST | \
 142                                 BE_IF_FLAGS_MULTICAST | \
 143                                 BE_IF_FLAGS_PASS_L3L4_ERRORS)
 144
 145static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
 146{
 147        struct be_dma_mem *mem = &q->dma_mem;
 148
 149        if (mem->va) {
 150                dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
 151                                  mem->dma);
 152                mem->va = NULL;
 153        }
 154}
 155
 156static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
 157                          u16 len, u16 entry_size)
 158{
 159        struct be_dma_mem *mem = &q->dma_mem;
 160
 161        memset(q, 0, sizeof(*q));
 162        q->len = len;
 163        q->entry_size = entry_size;
 164        mem->size = len * entry_size;
 165        mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
 166                                     &mem->dma, GFP_KERNEL);
 167        if (!mem->va)
 168                return -ENOMEM;
 169        return 0;
 170}
 171
 172static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
 173{
 174        u32 reg, enabled;
 175
 176        pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
 177                              &reg);
 178        enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 179
 180        if (!enabled && enable)
 181                reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 182        else if (enabled && !enable)
 183                reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 184        else
 185                return;
 186
 187        pci_write_config_dword(adapter->pdev,
 188                               PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
 189}
 190
 191static void be_intr_set(struct be_adapter *adapter, bool enable)
 192{
 193        int status = 0;
 194
 195        /* On lancer interrupts can't be controlled via this register */
 196        if (lancer_chip(adapter))
 197                return;
 198
 199        if (be_check_error(adapter, BE_ERROR_EEH))
 200                return;
 201
 202        status = be_cmd_intr_set(adapter, enable);
 203        if (status)
 204                be_reg_intr_set(adapter, enable);
 205}
 206
 207static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
 208{
 209        u32 val = 0;
 210
 211        if (be_check_error(adapter, BE_ERROR_HW))
 212                return;
 213
 214        val |= qid & DB_RQ_RING_ID_MASK;
 215        val |= posted << DB_RQ_NUM_POSTED_SHIFT;
 216
 217        wmb();
 218        iowrite32(val, adapter->db + DB_RQ_OFFSET);
 219}
 220
 221static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
 222                          u16 posted)
 223{
 224        u32 val = 0;
 225
 226        if (be_check_error(adapter, BE_ERROR_HW))
 227                return;
 228
 229        val |= txo->q.id & DB_TXULP_RING_ID_MASK;
 230        val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
 231
 232        wmb();
 233        iowrite32(val, adapter->db + txo->db_offset);
 234}
 235
 236static void be_eq_notify(struct be_adapter *adapter, u16 qid,
 237                         bool arm, bool clear_int, u16 num_popped,
 238                         u32 eq_delay_mult_enc)
 239{
 240        u32 val = 0;
 241
 242        val |= qid & DB_EQ_RING_ID_MASK;
 243        val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
 244
 245        if (be_check_error(adapter, BE_ERROR_HW))
 246                return;
 247
 248        if (arm)
 249                val |= 1 << DB_EQ_REARM_SHIFT;
 250        if (clear_int)
 251                val |= 1 << DB_EQ_CLR_SHIFT;
 252        val |= 1 << DB_EQ_EVNT_SHIFT;
 253        val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
 254        val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
 255        iowrite32(val, adapter->db + DB_EQ_OFFSET);
 256}
 257
 258void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
 259{
 260        u32 val = 0;
 261
 262        val |= qid & DB_CQ_RING_ID_MASK;
 263        val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
 264                        DB_CQ_RING_ID_EXT_MASK_SHIFT);
 265
 266        if (be_check_error(adapter, BE_ERROR_HW))
 267                return;
 268
 269        if (arm)
 270                val |= 1 << DB_CQ_REARM_SHIFT;
 271        val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
 272        iowrite32(val, adapter->db + DB_CQ_OFFSET);
 273}
 274
 275static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
 276{
 277        int i;
 278
 279        /* Check if mac has already been added as part of uc-list */
 280        for (i = 0; i < adapter->uc_macs; i++) {
 281                if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
 282                        /* mac already added, skip addition */
 283                        adapter->pmac_id[0] = adapter->pmac_id[i + 1];
 284                        return 0;
 285                }
 286        }
 287
 288        return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
 289                               &adapter->pmac_id[0], 0);
 290}
 291
 292static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
 293{
 294        int i;
 295
 296        /* Skip deletion if the programmed mac is
 297         * being used in uc-list
 298         */
 299        for (i = 0; i < adapter->uc_macs; i++) {
 300                if (adapter->pmac_id[i + 1] == pmac_id)
 301                        return;
 302        }
 303        be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
 304}
 305
 306static int be_mac_addr_set(struct net_device *netdev, void *p)
 307{
 308        struct be_adapter *adapter = netdev_priv(netdev);
 309        struct device *dev = &adapter->pdev->dev;
 310        struct sockaddr *addr = p;
 311        int status;
 312        u8 mac[ETH_ALEN];
 313        u32 old_pmac_id = adapter->pmac_id[0];
 314
 315        if (!is_valid_ether_addr(addr->sa_data))
 316                return -EADDRNOTAVAIL;
 317
 318        /* Proceed further only if, User provided MAC is different
 319         * from active MAC
 320         */
 321        if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
 322                return 0;
 323
 324        /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
 325         * address
 326         */
 327        if (BEx_chip(adapter) && be_virtfn(adapter) &&
 328            !check_privilege(adapter, BE_PRIV_FILTMGMT))
 329                return -EPERM;
 330
 331        /* if device is not running, copy MAC to netdev->dev_addr */
 332        if (!netif_running(netdev))
 333                goto done;
 334
 335        /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
 336         * privilege or if PF did not provision the new MAC address.
 337         * On BE3, this cmd will always fail if the VF doesn't have the
 338         * FILTMGMT privilege. This failure is OK, only if the PF programmed
 339         * the MAC for the VF.
 340         */
 341        mutex_lock(&adapter->rx_filter_lock);
 342        status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
 343        if (!status) {
 344
 345                /* Delete the old programmed MAC. This call may fail if the
 346                 * old MAC was already deleted by the PF driver.
 347                 */
 348                if (adapter->pmac_id[0] != old_pmac_id)
 349                        be_dev_mac_del(adapter, old_pmac_id);
 350        }
 351
 352        mutex_unlock(&adapter->rx_filter_lock);
 353        /* Decide if the new MAC is successfully activated only after
 354         * querying the FW
 355         */
 356        status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
 357                                       adapter->if_handle, true, 0);
 358        if (status)
 359                goto err;
 360
 361        /* The MAC change did not happen, either due to lack of privilege
 362         * or PF didn't pre-provision.
 363         */
 364        if (!ether_addr_equal(addr->sa_data, mac)) {
 365                status = -EPERM;
 366                goto err;
 367        }
 368
 369        /* Remember currently programmed MAC */
 370        ether_addr_copy(adapter->dev_mac, addr->sa_data);
 371done:
 372        ether_addr_copy(netdev->dev_addr, addr->sa_data);
 373        dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
 374        return 0;
 375err:
 376        dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
 377        return status;
 378}
 379
 380/* BE2 supports only v0 cmd */
 381static void *hw_stats_from_cmd(struct be_adapter *adapter)
 382{
 383        if (BE2_chip(adapter)) {
 384                struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
 385
 386                return &cmd->hw_stats;
 387        } else if (BE3_chip(adapter)) {
 388                struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
 389
 390                return &cmd->hw_stats;
 391        } else {
 392                struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
 393
 394                return &cmd->hw_stats;
 395        }
 396}
 397
 398/* BE2 supports only v0 cmd */
 399static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
 400{
 401        if (BE2_chip(adapter)) {
 402                struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 403
 404                return &hw_stats->erx;
 405        } else if (BE3_chip(adapter)) {
 406                struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 407
 408                return &hw_stats->erx;
 409        } else {
 410                struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 411
 412                return &hw_stats->erx;
 413        }
 414}
 415
 416static void populate_be_v0_stats(struct be_adapter *adapter)
 417{
 418        struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 419        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 420        struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
 421        struct be_port_rxf_stats_v0 *port_stats =
 422                                        &rxf_stats->port[adapter->port_num];
 423        struct be_drv_stats *drvs = &adapter->drv_stats;
 424
 425        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 426        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 427        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 428        drvs->rx_control_frames = port_stats->rx_control_frames;
 429        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 430        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 431        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 432        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 433        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 434        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 435        drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
 436        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 437        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 438        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 439        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 440        drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
 441        drvs->rx_dropped_header_too_small =
 442                port_stats->rx_dropped_header_too_small;
 443        drvs->rx_address_filtered =
 444                                        port_stats->rx_address_filtered +
 445                                        port_stats->rx_vlan_filtered;
 446        drvs->rx_alignment_symbol_errors =
 447                port_stats->rx_alignment_symbol_errors;
 448
 449        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 450        drvs->tx_controlframes = port_stats->tx_controlframes;
 451
 452        if (adapter->port_num)
 453                drvs->jabber_events = rxf_stats->port1_jabber_events;
 454        else
 455                drvs->jabber_events = rxf_stats->port0_jabber_events;
 456        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 457        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 458        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 459        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 460        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 461        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 462        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 463}
 464
 465static void populate_be_v1_stats(struct be_adapter *adapter)
 466{
 467        struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 468        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 469        struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
 470        struct be_port_rxf_stats_v1 *port_stats =
 471                                        &rxf_stats->port[adapter->port_num];
 472        struct be_drv_stats *drvs = &adapter->drv_stats;
 473
 474        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 475        drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 476        drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 477        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 478        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 479        drvs->rx_control_frames = port_stats->rx_control_frames;
 480        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 481        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 482        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 483        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 484        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 485        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 486        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 487        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 488        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 489        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 490        drvs->rx_dropped_header_too_small =
 491                port_stats->rx_dropped_header_too_small;
 492        drvs->rx_input_fifo_overflow_drop =
 493                port_stats->rx_input_fifo_overflow_drop;
 494        drvs->rx_address_filtered = port_stats->rx_address_filtered;
 495        drvs->rx_alignment_symbol_errors =
 496                port_stats->rx_alignment_symbol_errors;
 497        drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 498        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 499        drvs->tx_controlframes = port_stats->tx_controlframes;
 500        drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 501        drvs->jabber_events = port_stats->jabber_events;
 502        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 503        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 504        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 505        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 506        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 507        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 508        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 509}
 510
 511static void populate_be_v2_stats(struct be_adapter *adapter)
 512{
 513        struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 514        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 515        struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
 516        struct be_port_rxf_stats_v2 *port_stats =
 517                                        &rxf_stats->port[adapter->port_num];
 518        struct be_drv_stats *drvs = &adapter->drv_stats;
 519
 520        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 521        drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 522        drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 523        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 524        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 525        drvs->rx_control_frames = port_stats->rx_control_frames;
 526        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 527        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 528        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 529        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 530        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 531        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 532        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 533        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 534        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 535        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 536        drvs->rx_dropped_header_too_small =
 537                port_stats->rx_dropped_header_too_small;
 538        drvs->rx_input_fifo_overflow_drop =
 539                port_stats->rx_input_fifo_overflow_drop;
 540        drvs->rx_address_filtered = port_stats->rx_address_filtered;
 541        drvs->rx_alignment_symbol_errors =
 542                port_stats->rx_alignment_symbol_errors;
 543        drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 544        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 545        drvs->tx_controlframes = port_stats->tx_controlframes;
 546        drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 547        drvs->jabber_events = port_stats->jabber_events;
 548        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 549        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 550        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 551        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 552        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 553        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 554        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 555        if (be_roce_supported(adapter)) {
 556                drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
 557                drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
 558                drvs->rx_roce_frames = port_stats->roce_frames_received;
 559                drvs->roce_drops_crc = port_stats->roce_drops_crc;
 560                drvs->roce_drops_payload_len =
 561                        port_stats->roce_drops_payload_len;
 562        }
 563}
 564
 565static void populate_lancer_stats(struct be_adapter *adapter)
 566{
 567        struct be_drv_stats *drvs = &adapter->drv_stats;
 568        struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
 569
 570        be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
 571        drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
 572        drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
 573        drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
 574        drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
 575        drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
 576        drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
 577        drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
 578        drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
 579        drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
 580        drvs->rx_dropped_tcp_length =
 581                                pport_stats->rx_dropped_invalid_tcp_length;
 582        drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
 583        drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
 584        drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
 585        drvs->rx_dropped_header_too_small =
 586                                pport_stats->rx_dropped_header_too_small;
 587        drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 588        drvs->rx_address_filtered =
 589                                        pport_stats->rx_address_filtered +
 590                                        pport_stats->rx_vlan_filtered;
 591        drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
 592        drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 593        drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
 594        drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
 595        drvs->jabber_events = pport_stats->rx_jabbers;
 596        drvs->forwarded_packets = pport_stats->num_forwards_lo;
 597        drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
 598        drvs->rx_drops_too_many_frags =
 599                                pport_stats->rx_drops_too_many_frags_lo;
 600}
 601
 602static void accumulate_16bit_val(u32 *acc, u16 val)
 603{
 604#define lo(x)                   (x & 0xFFFF)
 605#define hi(x)                   (x & 0xFFFF0000)
 606        bool wrapped = val < lo(*acc);
 607        u32 newacc = hi(*acc) + val;
 608
 609        if (wrapped)
 610                newacc += 65536;
 611        WRITE_ONCE(*acc, newacc);
 612}
 613
 614static void populate_erx_stats(struct be_adapter *adapter,
 615                               struct be_rx_obj *rxo, u32 erx_stat)
 616{
 617        if (!BEx_chip(adapter))
 618                rx_stats(rxo)->rx_drops_no_frags = erx_stat;
 619        else
 620                /* below erx HW counter can actually wrap around after
 621                 * 65535. Driver accumulates a 32-bit value
 622                 */
 623                accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
 624                                     (u16)erx_stat);
 625}
 626
 627void be_parse_stats(struct be_adapter *adapter)
 628{
 629        struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
 630        struct be_rx_obj *rxo;
 631        int i;
 632        u32 erx_stat;
 633
 634        if (lancer_chip(adapter)) {
 635                populate_lancer_stats(adapter);
 636        } else {
 637                if (BE2_chip(adapter))
 638                        populate_be_v0_stats(adapter);
 639                else if (BE3_chip(adapter))
 640                        /* for BE3 */
 641                        populate_be_v1_stats(adapter);
 642                else
 643                        populate_be_v2_stats(adapter);
 644
 645                /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
 646                for_all_rx_queues(adapter, rxo, i) {
 647                        erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
 648                        populate_erx_stats(adapter, rxo, erx_stat);
 649                }
 650        }
 651}
 652
 653static void be_get_stats64(struct net_device *netdev,
 654                           struct rtnl_link_stats64 *stats)
 655{
 656        struct be_adapter *adapter = netdev_priv(netdev);
 657        struct be_drv_stats *drvs = &adapter->drv_stats;
 658        struct be_rx_obj *rxo;
 659        struct be_tx_obj *txo;
 660        u64 pkts, bytes;
 661        unsigned int start;
 662        int i;
 663
 664        for_all_rx_queues(adapter, rxo, i) {
 665                const struct be_rx_stats *rx_stats = rx_stats(rxo);
 666
 667                do {
 668                        start = u64_stats_fetch_begin_irq(&rx_stats->sync);
 669                        pkts = rx_stats(rxo)->rx_pkts;
 670                        bytes = rx_stats(rxo)->rx_bytes;
 671                } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
 672                stats->rx_packets += pkts;
 673                stats->rx_bytes += bytes;
 674                stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
 675                stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
 676                                        rx_stats(rxo)->rx_drops_no_frags;
 677        }
 678
 679        for_all_tx_queues(adapter, txo, i) {
 680                const struct be_tx_stats *tx_stats = tx_stats(txo);
 681
 682                do {
 683                        start = u64_stats_fetch_begin_irq(&tx_stats->sync);
 684                        pkts = tx_stats(txo)->tx_pkts;
 685                        bytes = tx_stats(txo)->tx_bytes;
 686                } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
 687                stats->tx_packets += pkts;
 688                stats->tx_bytes += bytes;
 689        }
 690
 691        /* bad pkts received */
 692        stats->rx_errors = drvs->rx_crc_errors +
 693                drvs->rx_alignment_symbol_errors +
 694                drvs->rx_in_range_errors +
 695                drvs->rx_out_range_errors +
 696                drvs->rx_frame_too_long +
 697                drvs->rx_dropped_too_small +
 698                drvs->rx_dropped_too_short +
 699                drvs->rx_dropped_header_too_small +
 700                drvs->rx_dropped_tcp_length +
 701                drvs->rx_dropped_runt;
 702
 703        /* detailed rx errors */
 704        stats->rx_length_errors = drvs->rx_in_range_errors +
 705                drvs->rx_out_range_errors +
 706                drvs->rx_frame_too_long;
 707
 708        stats->rx_crc_errors = drvs->rx_crc_errors;
 709
 710        /* frame alignment errors */
 711        stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
 712
 713        /* receiver fifo overrun */
 714        /* drops_no_pbuf is no per i/f, it's per BE card */
 715        stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
 716                                drvs->rx_input_fifo_overflow_drop +
 717                                drvs->rx_drops_no_pbuf;
 718}
 719
 720void be_link_status_update(struct be_adapter *adapter, u8 link_status)
 721{
 722        struct net_device *netdev = adapter->netdev;
 723
 724        if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
 725                netif_carrier_off(netdev);
 726                adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
 727        }
 728
 729        if (link_status)
 730                netif_carrier_on(netdev);
 731        else
 732                netif_carrier_off(netdev);
 733
 734        netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
 735}
 736
 737static int be_gso_hdr_len(struct sk_buff *skb)
 738{
 739        if (skb->encapsulation)
 740                return skb_inner_transport_offset(skb) +
 741                       inner_tcp_hdrlen(skb);
 742        return skb_transport_offset(skb) + tcp_hdrlen(skb);
 743}
 744
 745static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
 746{
 747        struct be_tx_stats *stats = tx_stats(txo);
 748        u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
 749        /* Account for headers which get duplicated in TSO pkt */
 750        u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
 751
 752        u64_stats_update_begin(&stats->sync);
 753        stats->tx_reqs++;
 754        stats->tx_bytes += skb->len + dup_hdr_len;
 755        stats->tx_pkts += tx_pkts;
 756        if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
 757                stats->tx_vxlan_offload_pkts += tx_pkts;
 758        u64_stats_update_end(&stats->sync);
 759}
 760
 761/* Returns number of WRBs needed for the skb */
 762static u32 skb_wrb_cnt(struct sk_buff *skb)
 763{
 764        /* +1 for the header wrb */
 765        return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
 766}
 767
 768static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
 769{
 770        wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
 771        wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
 772        wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
 773        wrb->rsvd0 = 0;
 774}
 775
 776/* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
 777 * to avoid the swap and shift/mask operations in wrb_fill().
 778 */
 779static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
 780{
 781        wrb->frag_pa_hi = 0;
 782        wrb->frag_pa_lo = 0;
 783        wrb->frag_len = 0;
 784        wrb->rsvd0 = 0;
 785}
 786
 787static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
 788                                     struct sk_buff *skb)
 789{
 790        u8 vlan_prio;
 791        u16 vlan_tag;
 792
 793        vlan_tag = skb_vlan_tag_get(skb);
 794        vlan_prio = skb_vlan_tag_get_prio(skb);
 795        /* If vlan priority provided by OS is NOT in available bmap */
 796        if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
 797                vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
 798                                adapter->recommended_prio_bits;
 799
 800        return vlan_tag;
 801}
 802
 803/* Used only for IP tunnel packets */
 804static u16 skb_inner_ip_proto(struct sk_buff *skb)
 805{
 806        return (inner_ip_hdr(skb)->version == 4) ?
 807                inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
 808}
 809
 810static u16 skb_ip_proto(struct sk_buff *skb)
 811{
 812        return (ip_hdr(skb)->version == 4) ?
 813                ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
 814}
 815
 816static inline bool be_is_txq_full(struct be_tx_obj *txo)
 817{
 818        return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
 819}
 820
 821static inline bool be_can_txq_wake(struct be_tx_obj *txo)
 822{
 823        return atomic_read(&txo->q.used) < txo->q.len / 2;
 824}
 825
 826static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
 827{
 828        return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
 829}
 830
 831static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
 832                                       struct sk_buff *skb,
 833                                       struct be_wrb_params *wrb_params)
 834{
 835        u16 proto;
 836
 837        if (skb_is_gso(skb)) {
 838                BE_WRB_F_SET(wrb_params->features, LSO, 1);
 839                wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
 840                if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
 841                        BE_WRB_F_SET(wrb_params->features, LSO6, 1);
 842        } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 843                if (skb->encapsulation) {
 844                        BE_WRB_F_SET(wrb_params->features, IPCS, 1);
 845                        proto = skb_inner_ip_proto(skb);
 846                } else {
 847                        proto = skb_ip_proto(skb);
 848                }
 849                if (proto == IPPROTO_TCP)
 850                        BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
 851                else if (proto == IPPROTO_UDP)
 852                        BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
 853        }
 854
 855        if (skb_vlan_tag_present(skb)) {
 856                BE_WRB_F_SET(wrb_params->features, VLAN, 1);
 857                wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
 858        }
 859
 860        BE_WRB_F_SET(wrb_params->features, CRC, 1);
 861}
 862
 863static void wrb_fill_hdr(struct be_adapter *adapter,
 864                         struct be_eth_hdr_wrb *hdr,
 865                         struct be_wrb_params *wrb_params,
 866                         struct sk_buff *skb)
 867{
 868        memset(hdr, 0, sizeof(*hdr));
 869
 870        SET_TX_WRB_HDR_BITS(crc, hdr,
 871                            BE_WRB_F_GET(wrb_params->features, CRC));
 872        SET_TX_WRB_HDR_BITS(ipcs, hdr,
 873                            BE_WRB_F_GET(wrb_params->features, IPCS));
 874        SET_TX_WRB_HDR_BITS(tcpcs, hdr,
 875                            BE_WRB_F_GET(wrb_params->features, TCPCS));
 876        SET_TX_WRB_HDR_BITS(udpcs, hdr,
 877                            BE_WRB_F_GET(wrb_params->features, UDPCS));
 878
 879        SET_TX_WRB_HDR_BITS(lso, hdr,
 880                            BE_WRB_F_GET(wrb_params->features, LSO));
 881        SET_TX_WRB_HDR_BITS(lso6, hdr,
 882                            BE_WRB_F_GET(wrb_params->features, LSO6));
 883        SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
 884
 885        /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
 886         * hack is not needed, the evt bit is set while ringing DB.
 887         */
 888        SET_TX_WRB_HDR_BITS(event, hdr,
 889                            BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
 890        SET_TX_WRB_HDR_BITS(vlan, hdr,
 891                            BE_WRB_F_GET(wrb_params->features, VLAN));
 892        SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
 893
 894        SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
 895        SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
 896        SET_TX_WRB_HDR_BITS(mgmt, hdr,
 897                            BE_WRB_F_GET(wrb_params->features, OS2BMC));
 898}
 899
 900static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
 901                          bool unmap_single)
 902{
 903        dma_addr_t dma;
 904        u32 frag_len = le32_to_cpu(wrb->frag_len);
 905
 906
 907        dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
 908                (u64)le32_to_cpu(wrb->frag_pa_lo);
 909        if (frag_len) {
 910                if (unmap_single)
 911                        dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
 912                else
 913                        dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
 914        }
 915}
 916
 917/* Grab a WRB header for xmit */
 918static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
 919{
 920        u32 head = txo->q.head;
 921
 922        queue_head_inc(&txo->q);
 923        return head;
 924}
 925
 926/* Set up the WRB header for xmit */
 927static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
 928                                struct be_tx_obj *txo,
 929                                struct be_wrb_params *wrb_params,
 930                                struct sk_buff *skb, u16 head)
 931{
 932        u32 num_frags = skb_wrb_cnt(skb);
 933        struct be_queue_info *txq = &txo->q;
 934        struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
 935
 936        wrb_fill_hdr(adapter, hdr, wrb_params, skb);
 937        be_dws_cpu_to_le(hdr, sizeof(*hdr));
 938
 939        BUG_ON(txo->sent_skb_list[head]);
 940        txo->sent_skb_list[head] = skb;
 941        txo->last_req_hdr = head;
 942        atomic_add(num_frags, &txq->used);
 943        txo->last_req_wrb_cnt = num_frags;
 944        txo->pend_wrb_cnt += num_frags;
 945}
 946
 947/* Setup a WRB fragment (buffer descriptor) for xmit */
 948static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
 949                                 int len)
 950{
 951        struct be_eth_wrb *wrb;
 952        struct be_queue_info *txq = &txo->q;
 953
 954        wrb = queue_head_node(txq);
 955        wrb_fill(wrb, busaddr, len);
 956        queue_head_inc(txq);
 957}
 958
 959/* Bring the queue back to the state it was in before be_xmit_enqueue() routine
 960 * was invoked. The producer index is restored to the previous packet and the
 961 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
 962 */
 963static void be_xmit_restore(struct be_adapter *adapter,
 964                            struct be_tx_obj *txo, u32 head, bool map_single,
 965                            u32 copied)
 966{
 967        struct device *dev;
 968        struct be_eth_wrb *wrb;
 969        struct be_queue_info *txq = &txo->q;
 970
 971        dev = &adapter->pdev->dev;
 972        txq->head = head;
 973
 974        /* skip the first wrb (hdr); it's not mapped */
 975        queue_head_inc(txq);
 976        while (copied) {
 977                wrb = queue_head_node(txq);
 978                unmap_tx_frag(dev, wrb, map_single);
 979                map_single = false;
 980                copied -= le32_to_cpu(wrb->frag_len);
 981                queue_head_inc(txq);
 982        }
 983
 984        txq->head = head;
 985}
 986
 987/* Enqueue the given packet for transmit. This routine allocates WRBs for the
 988 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
 989 * of WRBs used up by the packet.
 990 */
 991static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
 992                           struct sk_buff *skb,
 993                           struct be_wrb_params *wrb_params)
 994{
 995        u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
 996        struct device *dev = &adapter->pdev->dev;
 997        bool map_single = false;
 998        u32 head;
 999        dma_addr_t busaddr;
1000        int len;
1001
1002        head = be_tx_get_wrb_hdr(txo);
1003
1004        if (skb->len > skb->data_len) {
1005                len = skb_headlen(skb);
1006
1007                busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1008                if (dma_mapping_error(dev, busaddr))
1009                        goto dma_err;
1010                map_single = true;
1011                be_tx_setup_wrb_frag(txo, busaddr, len);
1012                copied += len;
1013        }
1014
1015        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1016                const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1017                len = skb_frag_size(frag);
1018
1019                busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1020                if (dma_mapping_error(dev, busaddr))
1021                        goto dma_err;
1022                be_tx_setup_wrb_frag(txo, busaddr, len);
1023                copied += len;
1024        }
1025
1026        be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1027
1028        be_tx_stats_update(txo, skb);
1029        return wrb_cnt;
1030
1031dma_err:
1032        adapter->drv_stats.dma_map_errors++;
1033        be_xmit_restore(adapter, txo, head, map_single, copied);
1034        return 0;
1035}
1036
1037static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1038{
1039        return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1040}
1041
1042static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1043                                             struct sk_buff *skb,
1044                                             struct be_wrb_params
1045                                             *wrb_params)
1046{
1047        bool insert_vlan = false;
1048        u16 vlan_tag = 0;
1049
1050        skb = skb_share_check(skb, GFP_ATOMIC);
1051        if (unlikely(!skb))
1052                return skb;
1053
1054        if (skb_vlan_tag_present(skb)) {
1055                vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1056                insert_vlan = true;
1057        }
1058
1059        if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1060                if (!insert_vlan) {
1061                        vlan_tag = adapter->pvid;
1062                        insert_vlan = true;
1063                }
1064                /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1065                 * skip VLAN insertion
1066                 */
1067                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1068        }
1069
1070        if (insert_vlan) {
1071                skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1072                                                vlan_tag);
1073                if (unlikely(!skb))
1074                        return skb;
1075                __vlan_hwaccel_clear_tag(skb);
1076        }
1077
1078        /* Insert the outer VLAN, if any */
1079        if (adapter->qnq_vid) {
1080                vlan_tag = adapter->qnq_vid;
1081                skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1082                                                vlan_tag);
1083                if (unlikely(!skb))
1084                        return skb;
1085                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086        }
1087
1088        return skb;
1089}
1090
1091static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1092{
1093        struct ethhdr *eh = (struct ethhdr *)skb->data;
1094        u16 offset = ETH_HLEN;
1095
1096        if (eh->h_proto == htons(ETH_P_IPV6)) {
1097                struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1098
1099                offset += sizeof(struct ipv6hdr);
1100                if (ip6h->nexthdr != NEXTHDR_TCP &&
1101                    ip6h->nexthdr != NEXTHDR_UDP) {
1102                        struct ipv6_opt_hdr *ehdr =
1103                                (struct ipv6_opt_hdr *)(skb->data + offset);
1104
1105                        /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1106                        if (ehdr->hdrlen == 0xff)
1107                                return true;
1108                }
1109        }
1110        return false;
1111}
1112
1113static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1114{
1115        return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1116}
1117
1118static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119{
1120        return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1121}
1122
1123static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1124                                                  struct sk_buff *skb,
1125                                                  struct be_wrb_params
1126                                                  *wrb_params)
1127{
1128        struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1129        unsigned int eth_hdr_len;
1130        struct iphdr *ip;
1131
1132        /* For padded packets, BE HW modifies tot_len field in IP header
1133         * incorrecly when VLAN tag is inserted by HW.
1134         * For padded packets, Lancer computes incorrect checksum.
1135         */
1136        eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1137                                                VLAN_ETH_HLEN : ETH_HLEN;
1138        if (skb->len <= 60 &&
1139            (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1140            is_ipv4_pkt(skb)) {
1141                ip = (struct iphdr *)ip_hdr(skb);
1142                pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1143        }
1144
1145        /* If vlan tag is already inlined in the packet, skip HW VLAN
1146         * tagging in pvid-tagging mode
1147         */
1148        if (be_pvid_tagging_enabled(adapter) &&
1149            veh->h_vlan_proto == htons(ETH_P_8021Q))
1150                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152        /* HW has a bug wherein it will calculate CSUM for VLAN
1153         * pkts even though it is disabled.
1154         * Manually insert VLAN in pkt.
1155         */
1156        if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157            skb_vlan_tag_present(skb)) {
1158                skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159                if (unlikely(!skb))
1160                        goto err;
1161        }
1162
1163        /* HW may lockup when VLAN HW tagging is requested on
1164         * certain ipv6 packets. Drop such pkts if the HW workaround to
1165         * skip HW tagging is not enabled by FW.
1166         */
1167        if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168                     (adapter->pvid || adapter->qnq_vid) &&
1169                     !qnq_async_evt_rcvd(adapter)))
1170                goto tx_drop;
1171
1172        /* Manual VLAN tag insertion to prevent:
1173         * ASIC lockup when the ASIC inserts VLAN tag into
1174         * certain ipv6 packets. Insert VLAN tags in driver,
1175         * and set event, completion, vlan bits accordingly
1176         * in the Tx WRB.
1177         */
1178        if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179            be_vlan_tag_tx_chk(adapter, skb)) {
1180                skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181                if (unlikely(!skb))
1182                        goto err;
1183        }
1184
1185        return skb;
1186tx_drop:
1187        dev_kfree_skb_any(skb);
1188err:
1189        return NULL;
1190}
1191
1192static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193                                           struct sk_buff *skb,
1194                                           struct be_wrb_params *wrb_params)
1195{
1196        int err;
1197
1198        /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199         * packets that are 32b or less may cause a transmit stall
1200         * on that port. The workaround is to pad such packets
1201         * (len <= 32 bytes) to a minimum length of 36b.
1202         */
1203        if (skb->len <= 32) {
1204                if (skb_put_padto(skb, 36))
1205                        return NULL;
1206        }
1207
1208        if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209                skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210                if (!skb)
1211                        return NULL;
1212        }
1213
1214        /* The stack can send us skbs with length greater than
1215         * what the HW can handle. Trim the extra bytes.
1216         */
1217        WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218        err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219        WARN_ON(err);
1220
1221        return skb;
1222}
1223
1224static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225{
1226        struct be_queue_info *txq = &txo->q;
1227        struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229        /* Mark the last request eventable if it hasn't been marked already */
1230        if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231                hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233        /* compose a dummy wrb if there are odd set of wrbs to notify */
1234        if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235                wrb_fill_dummy(queue_head_node(txq));
1236                queue_head_inc(txq);
1237                atomic_inc(&txq->used);
1238                txo->pend_wrb_cnt++;
1239                hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240                                           TX_HDR_WRB_NUM_SHIFT);
1241                hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242                                          TX_HDR_WRB_NUM_SHIFT);
1243        }
1244        be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245        txo->pend_wrb_cnt = 0;
1246}
1247
1248/* OS2BMC related */
1249
1250#define DHCP_CLIENT_PORT        68
1251#define DHCP_SERVER_PORT        67
1252#define NET_BIOS_PORT1          137
1253#define NET_BIOS_PORT2          138
1254#define DHCPV6_RAS_PORT         547
1255
1256#define is_mc_allowed_on_bmc(adapter, eh)       \
1257        (!is_multicast_filt_enabled(adapter) && \
1258         is_multicast_ether_addr(eh->h_dest) && \
1259         !is_broadcast_ether_addr(eh->h_dest))
1260
1261#define is_bc_allowed_on_bmc(adapter, eh)       \
1262        (!is_broadcast_filt_enabled(adapter) && \
1263         is_broadcast_ether_addr(eh->h_dest))
1264
1265#define is_arp_allowed_on_bmc(adapter, skb)     \
1266        (is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268#define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1269
1270#define is_arp_filt_enabled(adapter)    \
1271                (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1272
1273#define is_dhcp_client_filt_enabled(adapter)    \
1274                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1275
1276#define is_dhcp_srvr_filt_enabled(adapter)      \
1277                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1278
1279#define is_nbios_filt_enabled(adapter)  \
1280                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1281
1282#define is_ipv6_na_filt_enabled(adapter)        \
1283                (adapter->bmc_filt_mask &       \
1284                        BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1285
1286#define is_ipv6_ra_filt_enabled(adapter)        \
1287                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1288
1289#define is_ipv6_ras_filt_enabled(adapter)       \
1290                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1291
1292#define is_broadcast_filt_enabled(adapter)      \
1293                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1294
1295#define is_multicast_filt_enabled(adapter)      \
1296                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1297
1298static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1299                               struct sk_buff **skb)
1300{
1301        struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1302        bool os2bmc = false;
1303
1304        if (!be_is_os2bmc_enabled(adapter))
1305                goto done;
1306
1307        if (!is_multicast_ether_addr(eh->h_dest))
1308                goto done;
1309
1310        if (is_mc_allowed_on_bmc(adapter, eh) ||
1311            is_bc_allowed_on_bmc(adapter, eh) ||
1312            is_arp_allowed_on_bmc(adapter, (*skb))) {
1313                os2bmc = true;
1314                goto done;
1315        }
1316
1317        if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1318                struct ipv6hdr *hdr = ipv6_hdr((*skb));
1319                u8 nexthdr = hdr->nexthdr;
1320
1321                if (nexthdr == IPPROTO_ICMPV6) {
1322                        struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1323
1324                        switch (icmp6->icmp6_type) {
1325                        case NDISC_ROUTER_ADVERTISEMENT:
1326                                os2bmc = is_ipv6_ra_filt_enabled(adapter);
1327                                goto done;
1328                        case NDISC_NEIGHBOUR_ADVERTISEMENT:
1329                                os2bmc = is_ipv6_na_filt_enabled(adapter);
1330                                goto done;
1331                        default:
1332                                break;
1333                        }
1334                }
1335        }
1336
1337        if (is_udp_pkt((*skb))) {
1338                struct udphdr *udp = udp_hdr((*skb));
1339
1340                switch (ntohs(udp->dest)) {
1341                case DHCP_CLIENT_PORT:
1342                        os2bmc = is_dhcp_client_filt_enabled(adapter);
1343                        goto done;
1344                case DHCP_SERVER_PORT:
1345                        os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1346                        goto done;
1347                case NET_BIOS_PORT1:
1348                case NET_BIOS_PORT2:
1349                        os2bmc = is_nbios_filt_enabled(adapter);
1350                        goto done;
1351                case DHCPV6_RAS_PORT:
1352                        os2bmc = is_ipv6_ras_filt_enabled(adapter);
1353                        goto done;
1354                default:
1355                        break;
1356                }
1357        }
1358done:
1359        /* For packets over a vlan, which are destined
1360         * to BMC, asic expects the vlan to be inline in the packet.
1361         */
1362        if (os2bmc)
1363                *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1364
1365        return os2bmc;
1366}
1367
1368static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1369{
1370        struct be_adapter *adapter = netdev_priv(netdev);
1371        u16 q_idx = skb_get_queue_mapping(skb);
1372        struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1373        struct be_wrb_params wrb_params = { 0 };
1374        bool flush = !netdev_xmit_more();
1375        u16 wrb_cnt;
1376
1377        skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1378        if (unlikely(!skb))
1379                goto drop;
1380
1381        be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1382
1383        wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384        if (unlikely(!wrb_cnt)) {
1385                dev_kfree_skb_any(skb);
1386                goto drop;
1387        }
1388
1389        /* if os2bmc is enabled and if the pkt is destined to bmc,
1390         * enqueue the pkt a 2nd time with mgmt bit set.
1391         */
1392        if (be_send_pkt_to_bmc(adapter, &skb)) {
1393                BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1394                wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1395                if (unlikely(!wrb_cnt))
1396                        goto drop;
1397                else
1398                        skb_get(skb);
1399        }
1400
1401        if (be_is_txq_full(txo)) {
1402                netif_stop_subqueue(netdev, q_idx);
1403                tx_stats(txo)->tx_stops++;
1404        }
1405
1406        if (flush || __netif_subqueue_stopped(netdev, q_idx))
1407                be_xmit_flush(adapter, txo);
1408
1409        return NETDEV_TX_OK;
1410drop:
1411        tx_stats(txo)->tx_drv_drops++;
1412        /* Flush the already enqueued tx requests */
1413        if (flush && txo->pend_wrb_cnt)
1414                be_xmit_flush(adapter, txo);
1415
1416        return NETDEV_TX_OK;
1417}
1418
1419static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1420{
1421        struct be_adapter *adapter = netdev_priv(netdev);
1422        struct device *dev = &adapter->pdev->dev;
1423        struct be_tx_obj *txo;
1424        struct sk_buff *skb;
1425        struct tcphdr *tcphdr;
1426        struct udphdr *udphdr;
1427        u32 *entry;
1428        int status;
1429        int i, j;
1430
1431        for_all_tx_queues(adapter, txo, i) {
1432                dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1433                         i, txo->q.head, txo->q.tail,
1434                         atomic_read(&txo->q.used), txo->q.id);
1435
1436                entry = txo->q.dma_mem.va;
1437                for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1438                        if (entry[j] != 0 || entry[j + 1] != 0 ||
1439                            entry[j + 2] != 0 || entry[j + 3] != 0) {
1440                                dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1441                                         j, entry[j], entry[j + 1],
1442                                         entry[j + 2], entry[j + 3]);
1443                        }
1444                }
1445
1446                entry = txo->cq.dma_mem.va;
1447                dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1448                         i, txo->cq.head, txo->cq.tail,
1449                         atomic_read(&txo->cq.used));
1450                for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1451                        if (entry[j] != 0 || entry[j + 1] != 0 ||
1452                            entry[j + 2] != 0 || entry[j + 3] != 0) {
1453                                dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1454                                         j, entry[j], entry[j + 1],
1455                                         entry[j + 2], entry[j + 3]);
1456                        }
1457                }
1458
1459                for (j = 0; j < TX_Q_LEN; j++) {
1460                        if (txo->sent_skb_list[j]) {
1461                                skb = txo->sent_skb_list[j];
1462                                if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1463                                        tcphdr = tcp_hdr(skb);
1464                                        dev_info(dev, "TCP source port %d\n",
1465                                                 ntohs(tcphdr->source));
1466                                        dev_info(dev, "TCP dest port %d\n",
1467                                                 ntohs(tcphdr->dest));
1468                                        dev_info(dev, "TCP sequence num %d\n",
1469                                                 ntohs(tcphdr->seq));
1470                                        dev_info(dev, "TCP ack_seq %d\n",
1471                                                 ntohs(tcphdr->ack_seq));
1472                                } else if (ip_hdr(skb)->protocol ==
1473                                           IPPROTO_UDP) {
1474                                        udphdr = udp_hdr(skb);
1475                                        dev_info(dev, "UDP source port %d\n",
1476                                                 ntohs(udphdr->source));
1477                                        dev_info(dev, "UDP dest port %d\n",
1478                                                 ntohs(udphdr->dest));
1479                                }
1480                                dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1481                                         j, skb, skb->len, skb->protocol);
1482                        }
1483                }
1484        }
1485
1486        if (lancer_chip(adapter)) {
1487                dev_info(dev, "Initiating reset due to tx timeout\n");
1488                dev_info(dev, "Resetting adapter\n");
1489                status = lancer_physdev_ctrl(adapter,
1490                                             PHYSDEV_CONTROL_FW_RESET_MASK);
1491                if (status)
1492                        dev_err(dev, "Reset failed .. Reboot server\n");
1493        }
1494}
1495
1496static inline bool be_in_all_promisc(struct be_adapter *adapter)
1497{
1498        return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1499                        BE_IF_FLAGS_ALL_PROMISCUOUS;
1500}
1501
1502static int be_set_vlan_promisc(struct be_adapter *adapter)
1503{
1504        struct device *dev = &adapter->pdev->dev;
1505        int status;
1506
1507        if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1508                return 0;
1509
1510        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1511        if (!status) {
1512                dev_info(dev, "Enabled VLAN promiscuous mode\n");
1513                adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1514        } else {
1515                dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1516        }
1517        return status;
1518}
1519
1520static int be_clear_vlan_promisc(struct be_adapter *adapter)
1521{
1522        struct device *dev = &adapter->pdev->dev;
1523        int status;
1524
1525        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1526        if (!status) {
1527                dev_info(dev, "Disabling VLAN promiscuous mode\n");
1528                adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1529        }
1530        return status;
1531}
1532
1533/*
1534 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1535 * If the user configures more, place BE in vlan promiscuous mode.
1536 */
1537static int be_vid_config(struct be_adapter *adapter)
1538{
1539        struct device *dev = &adapter->pdev->dev;
1540        u16 vids[BE_NUM_VLANS_SUPPORTED];
1541        u16 num = 0, i = 0;
1542        int status = 0;
1543
1544        /* No need to change the VLAN state if the I/F is in promiscuous */
1545        if (adapter->netdev->flags & IFF_PROMISC)
1546                return 0;
1547
1548        if (adapter->vlans_added > be_max_vlans(adapter))
1549                return be_set_vlan_promisc(adapter);
1550
1551        if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1552                status = be_clear_vlan_promisc(adapter);
1553                if (status)
1554                        return status;
1555        }
1556        /* Construct VLAN Table to give to HW */
1557        for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1558                vids[num++] = cpu_to_le16(i);
1559
1560        status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1561        if (status) {
1562                dev_err(dev, "Setting HW VLAN filtering failed\n");
1563                /* Set to VLAN promisc mode as setting VLAN filter failed */
1564                if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1565                    addl_status(status) ==
1566                                MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1567                        return be_set_vlan_promisc(adapter);
1568        }
1569        return status;
1570}
1571
1572static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1573{
1574        struct be_adapter *adapter = netdev_priv(netdev);
1575        int status = 0;
1576
1577        mutex_lock(&adapter->rx_filter_lock);
1578
1579        /* Packets with VID 0 are always received by Lancer by default */
1580        if (lancer_chip(adapter) && vid == 0)
1581                goto done;
1582
1583        if (test_bit(vid, adapter->vids))
1584                goto done;
1585
1586        set_bit(vid, adapter->vids);
1587        adapter->vlans_added++;
1588
1589        status = be_vid_config(adapter);
1590done:
1591        mutex_unlock(&adapter->rx_filter_lock);
1592        return status;
1593}
1594
1595static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1596{
1597        struct be_adapter *adapter = netdev_priv(netdev);
1598        int status = 0;
1599
1600        mutex_lock(&adapter->rx_filter_lock);
1601
1602        /* Packets with VID 0 are always received by Lancer by default */
1603        if (lancer_chip(adapter) && vid == 0)
1604                goto done;
1605
1606        if (!test_bit(vid, adapter->vids))
1607                goto done;
1608
1609        clear_bit(vid, adapter->vids);
1610        adapter->vlans_added--;
1611
1612        status = be_vid_config(adapter);
1613done:
1614        mutex_unlock(&adapter->rx_filter_lock);
1615        return status;
1616}
1617
1618static void be_set_all_promisc(struct be_adapter *adapter)
1619{
1620        be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1621        adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1622}
1623
1624static void be_set_mc_promisc(struct be_adapter *adapter)
1625{
1626        int status;
1627
1628        if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1629                return;
1630
1631        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1632        if (!status)
1633                adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1634}
1635
1636static void be_set_uc_promisc(struct be_adapter *adapter)
1637{
1638        int status;
1639
1640        if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1641                return;
1642
1643        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1644        if (!status)
1645                adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1646}
1647
1648static void be_clear_uc_promisc(struct be_adapter *adapter)
1649{
1650        int status;
1651
1652        if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1653                return;
1654
1655        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1656        if (!status)
1657                adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1658}
1659
1660/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1661 * We use a single callback function for both sync and unsync. We really don't
1662 * add/remove addresses through this callback. But, we use it to detect changes
1663 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1664 */
1665static int be_uc_list_update(struct net_device *netdev,
1666                             const unsigned char *addr)
1667{
1668        struct be_adapter *adapter = netdev_priv(netdev);
1669
1670        adapter->update_uc_list = true;
1671        return 0;
1672}
1673
1674static int be_mc_list_update(struct net_device *netdev,
1675                             const unsigned char *addr)
1676{
1677        struct be_adapter *adapter = netdev_priv(netdev);
1678
1679        adapter->update_mc_list = true;
1680        return 0;
1681}
1682
1683static void be_set_mc_list(struct be_adapter *adapter)
1684{
1685        struct net_device *netdev = adapter->netdev;
1686        struct netdev_hw_addr *ha;
1687        bool mc_promisc = false;
1688        int status;
1689
1690        netif_addr_lock_bh(netdev);
1691        __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1692
1693        if (netdev->flags & IFF_PROMISC) {
1694                adapter->update_mc_list = false;
1695        } else if (netdev->flags & IFF_ALLMULTI ||
1696                   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1697                /* Enable multicast promisc if num configured exceeds
1698                 * what we support
1699                 */
1700                mc_promisc = true;
1701                adapter->update_mc_list = false;
1702        } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1703                /* Update mc-list unconditionally if the iface was previously
1704                 * in mc-promisc mode and now is out of that mode.
1705                 */
1706                adapter->update_mc_list = true;
1707        }
1708
1709        if (adapter->update_mc_list) {
1710                int i = 0;
1711
1712                /* cache the mc-list in adapter */
1713                netdev_for_each_mc_addr(ha, netdev) {
1714                        ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1715                        i++;
1716                }
1717                adapter->mc_count = netdev_mc_count(netdev);
1718        }
1719        netif_addr_unlock_bh(netdev);
1720
1721        if (mc_promisc) {
1722                be_set_mc_promisc(adapter);
1723        } else if (adapter->update_mc_list) {
1724                status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1725                if (!status)
1726                        adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1727                else
1728                        be_set_mc_promisc(adapter);
1729
1730                adapter->update_mc_list = false;
1731        }
1732}
1733
1734static void be_clear_mc_list(struct be_adapter *adapter)
1735{
1736        struct net_device *netdev = adapter->netdev;
1737
1738        __dev_mc_unsync(netdev, NULL);
1739        be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1740        adapter->mc_count = 0;
1741}
1742
1743static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1744{
1745        if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1746                adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1747                return 0;
1748        }
1749
1750        return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1751                               adapter->if_handle,
1752                               &adapter->pmac_id[uc_idx + 1], 0);
1753}
1754
1755static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1756{
1757        if (pmac_id == adapter->pmac_id[0])
1758                return;
1759
1760        be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1761}
1762
1763static void be_set_uc_list(struct be_adapter *adapter)
1764{
1765        struct net_device *netdev = adapter->netdev;
1766        struct netdev_hw_addr *ha;
1767        bool uc_promisc = false;
1768        int curr_uc_macs = 0, i;
1769
1770        netif_addr_lock_bh(netdev);
1771        __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1772
1773        if (netdev->flags & IFF_PROMISC) {
1774                adapter->update_uc_list = false;
1775        } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1776                uc_promisc = true;
1777                adapter->update_uc_list = false;
1778        }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1779                /* Update uc-list unconditionally if the iface was previously
1780                 * in uc-promisc mode and now is out of that mode.
1781                 */
1782                adapter->update_uc_list = true;
1783        }
1784
1785        if (adapter->update_uc_list) {
1786                /* cache the uc-list in adapter array */
1787                i = 0;
1788                netdev_for_each_uc_addr(ha, netdev) {
1789                        ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1790                        i++;
1791                }
1792                curr_uc_macs = netdev_uc_count(netdev);
1793        }
1794        netif_addr_unlock_bh(netdev);
1795
1796        if (uc_promisc) {
1797                be_set_uc_promisc(adapter);
1798        } else if (adapter->update_uc_list) {
1799                be_clear_uc_promisc(adapter);
1800
1801                for (i = 0; i < adapter->uc_macs; i++)
1802                        be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1803
1804                for (i = 0; i < curr_uc_macs; i++)
1805                        be_uc_mac_add(adapter, i);
1806                adapter->uc_macs = curr_uc_macs;
1807                adapter->update_uc_list = false;
1808        }
1809}
1810
1811static void be_clear_uc_list(struct be_adapter *adapter)
1812{
1813        struct net_device *netdev = adapter->netdev;
1814        int i;
1815
1816        __dev_uc_unsync(netdev, NULL);
1817        for (i = 0; i < adapter->uc_macs; i++)
1818                be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1819
1820        adapter->uc_macs = 0;
1821}
1822
1823static void __be_set_rx_mode(struct be_adapter *adapter)
1824{
1825        struct net_device *netdev = adapter->netdev;
1826
1827        mutex_lock(&adapter->rx_filter_lock);
1828
1829        if (netdev->flags & IFF_PROMISC) {
1830                if (!be_in_all_promisc(adapter))
1831                        be_set_all_promisc(adapter);
1832        } else if (be_in_all_promisc(adapter)) {
1833                /* We need to re-program the vlan-list or clear
1834                 * vlan-promisc mode (if needed) when the interface
1835                 * comes out of promisc mode.
1836                 */
1837                be_vid_config(adapter);
1838        }
1839
1840        be_set_uc_list(adapter);
1841        be_set_mc_list(adapter);
1842
1843        mutex_unlock(&adapter->rx_filter_lock);
1844}
1845
1846static void be_work_set_rx_mode(struct work_struct *work)
1847{
1848        struct be_cmd_work *cmd_work =
1849                                container_of(work, struct be_cmd_work, work);
1850
1851        __be_set_rx_mode(cmd_work->adapter);
1852        kfree(cmd_work);
1853}
1854
1855static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1856{
1857        struct be_adapter *adapter = netdev_priv(netdev);
1858        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1859        int status;
1860
1861        if (!sriov_enabled(adapter))
1862                return -EPERM;
1863
1864        if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1865                return -EINVAL;
1866
1867        /* Proceed further only if user provided MAC is different
1868         * from active MAC
1869         */
1870        if (ether_addr_equal(mac, vf_cfg->mac_addr))
1871                return 0;
1872
1873        if (BEx_chip(adapter)) {
1874                be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1875                                vf + 1);
1876
1877                status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1878                                         &vf_cfg->pmac_id, vf + 1);
1879        } else {
1880                status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1881                                        vf + 1);
1882        }
1883
1884        if (status) {
1885                dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1886                        mac, vf, status);
1887                return be_cmd_status(status);
1888        }
1889
1890        ether_addr_copy(vf_cfg->mac_addr, mac);
1891
1892        return 0;
1893}
1894
1895static int be_get_vf_config(struct net_device *netdev, int vf,
1896                            struct ifla_vf_info *vi)
1897{
1898        struct be_adapter *adapter = netdev_priv(netdev);
1899        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900
1901        if (!sriov_enabled(adapter))
1902                return -EPERM;
1903
1904        if (vf >= adapter->num_vfs)
1905                return -EINVAL;
1906
1907        vi->vf = vf;
1908        vi->max_tx_rate = vf_cfg->tx_rate;
1909        vi->min_tx_rate = 0;
1910        vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1911        vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1912        memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1913        vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1914        vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1915
1916        return 0;
1917}
1918
1919static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1920{
1921        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1922        u16 vids[BE_NUM_VLANS_SUPPORTED];
1923        int vf_if_id = vf_cfg->if_handle;
1924        int status;
1925
1926        /* Enable Transparent VLAN Tagging */
1927        status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1928        if (status)
1929                return status;
1930
1931        /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1932        vids[0] = 0;
1933        status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1934        if (!status)
1935                dev_info(&adapter->pdev->dev,
1936                         "Cleared guest VLANs on VF%d", vf);
1937
1938        /* After TVT is enabled, disallow VFs to program VLAN filters */
1939        if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1940                status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1941                                                  ~BE_PRIV_FILTMGMT, vf + 1);
1942                if (!status)
1943                        vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1944        }
1945        return 0;
1946}
1947
1948static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1949{
1950        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1951        struct device *dev = &adapter->pdev->dev;
1952        int status;
1953
1954        /* Reset Transparent VLAN Tagging. */
1955        status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1956                                       vf_cfg->if_handle, 0, 0);
1957        if (status)
1958                return status;
1959
1960        /* Allow VFs to program VLAN filtering */
1961        if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1962                status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1963                                                  BE_PRIV_FILTMGMT, vf + 1);
1964                if (!status) {
1965                        vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1966                        dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1967                }
1968        }
1969
1970        dev_info(dev,
1971                 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1972        return 0;
1973}
1974
1975static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1976                          __be16 vlan_proto)
1977{
1978        struct be_adapter *adapter = netdev_priv(netdev);
1979        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1980        int status;
1981
1982        if (!sriov_enabled(adapter))
1983                return -EPERM;
1984
1985        if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1986                return -EINVAL;
1987
1988        if (vlan_proto != htons(ETH_P_8021Q))
1989                return -EPROTONOSUPPORT;
1990
1991        if (vlan || qos) {
1992                vlan |= qos << VLAN_PRIO_SHIFT;
1993                status = be_set_vf_tvt(adapter, vf, vlan);
1994        } else {
1995                status = be_clear_vf_tvt(adapter, vf);
1996        }
1997
1998        if (status) {
1999                dev_err(&adapter->pdev->dev,
2000                        "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2001                        status);
2002                return be_cmd_status(status);
2003        }
2004
2005        vf_cfg->vlan_tag = vlan;
2006        return 0;
2007}
2008
2009static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2010                             int min_tx_rate, int max_tx_rate)
2011{
2012        struct be_adapter *adapter = netdev_priv(netdev);
2013        struct device *dev = &adapter->pdev->dev;
2014        int percent_rate, status = 0;
2015        u16 link_speed = 0;
2016        u8 link_status;
2017
2018        if (!sriov_enabled(adapter))
2019                return -EPERM;
2020
2021        if (vf >= adapter->num_vfs)
2022                return -EINVAL;
2023
2024        if (min_tx_rate)
2025                return -EINVAL;
2026
2027        if (!max_tx_rate)
2028                goto config_qos;
2029
2030        status = be_cmd_link_status_query(adapter, &link_speed,
2031                                          &link_status, 0);
2032        if (status)
2033                goto err;
2034
2035        if (!link_status) {
2036                dev_err(dev, "TX-rate setting not allowed when link is down\n");
2037                status = -ENETDOWN;
2038                goto err;
2039        }
2040
2041        if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2042                dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2043                        link_speed);
2044                status = -EINVAL;
2045                goto err;
2046        }
2047
2048        /* On Skyhawk the QOS setting must be done only as a % value */
2049        percent_rate = link_speed / 100;
2050        if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2051                dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2052                        percent_rate);
2053                status = -EINVAL;
2054                goto err;
2055        }
2056
2057config_qos:
2058        status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2059        if (status)
2060                goto err;
2061
2062        adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2063        return 0;
2064
2065err:
2066        dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2067                max_tx_rate, vf);
2068        return be_cmd_status(status);
2069}
2070
2071static int be_set_vf_link_state(struct net_device *netdev, int vf,
2072                                int link_state)
2073{
2074        struct be_adapter *adapter = netdev_priv(netdev);
2075        int status;
2076
2077        if (!sriov_enabled(adapter))
2078                return -EPERM;
2079
2080        if (vf >= adapter->num_vfs)
2081                return -EINVAL;
2082
2083        status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2084        if (status) {
2085                dev_err(&adapter->pdev->dev,
2086                        "Link state change on VF %d failed: %#x\n", vf, status);
2087                return be_cmd_status(status);
2088        }
2089
2090        adapter->vf_cfg[vf].plink_tracking = link_state;
2091
2092        return 0;
2093}
2094
2095static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2096{
2097        struct be_adapter *adapter = netdev_priv(netdev);
2098        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2099        u8 spoofchk;
2100        int status;
2101
2102        if (!sriov_enabled(adapter))
2103                return -EPERM;
2104
2105        if (vf >= adapter->num_vfs)
2106                return -EINVAL;
2107
2108        if (BEx_chip(adapter))
2109                return -EOPNOTSUPP;
2110
2111        if (enable == vf_cfg->spoofchk)
2112                return 0;
2113
2114        spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2115
2116        status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2117                                       0, spoofchk);
2118        if (status) {
2119                dev_err(&adapter->pdev->dev,
2120                        "Spoofchk change on VF %d failed: %#x\n", vf, status);
2121                return be_cmd_status(status);
2122        }
2123
2124        vf_cfg->spoofchk = enable;
2125        return 0;
2126}
2127
2128static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2129                          ulong now)
2130{
2131        aic->rx_pkts_prev = rx_pkts;
2132        aic->tx_reqs_prev = tx_pkts;
2133        aic->jiffies = now;
2134}
2135
2136static int be_get_new_eqd(struct be_eq_obj *eqo)
2137{
2138        struct be_adapter *adapter = eqo->adapter;
2139        int eqd, start;
2140        struct be_aic_obj *aic;
2141        struct be_rx_obj *rxo;
2142        struct be_tx_obj *txo;
2143        u64 rx_pkts = 0, tx_pkts = 0;
2144        ulong now;
2145        u32 pps, delta;
2146        int i;
2147
2148        aic = &adapter->aic_obj[eqo->idx];
2149        if (!adapter->aic_enabled) {
2150                if (aic->jiffies)
2151                        aic->jiffies = 0;
2152                eqd = aic->et_eqd;
2153                return eqd;
2154        }
2155
2156        for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2157                do {
2158                        start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2159                        rx_pkts += rxo->stats.rx_pkts;
2160                } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2161        }
2162
2163        for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2164                do {
2165                        start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2166                        tx_pkts += txo->stats.tx_reqs;
2167                } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2168        }
2169
2170        /* Skip, if wrapped around or first calculation */
2171        now = jiffies;
2172        if (!aic->jiffies || time_before(now, aic->jiffies) ||
2173            rx_pkts < aic->rx_pkts_prev ||
2174            tx_pkts < aic->tx_reqs_prev) {
2175                be_aic_update(aic, rx_pkts, tx_pkts, now);
2176                return aic->prev_eqd;
2177        }
2178
2179        delta = jiffies_to_msecs(now - aic->jiffies);
2180        if (delta == 0)
2181                return aic->prev_eqd;
2182
2183        pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2184                (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2185        eqd = (pps / 15000) << 2;
2186
2187        if (eqd < 8)
2188                eqd = 0;
2189        eqd = min_t(u32, eqd, aic->max_eqd);
2190        eqd = max_t(u32, eqd, aic->min_eqd);
2191
2192        be_aic_update(aic, rx_pkts, tx_pkts, now);
2193
2194        return eqd;
2195}
2196
2197/* For Skyhawk-R only */
2198static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2199{
2200        struct be_adapter *adapter = eqo->adapter;
2201        struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2202        ulong now = jiffies;
2203        int eqd;
2204        u32 mult_enc;
2205
2206        if (!adapter->aic_enabled)
2207                return 0;
2208
2209        if (jiffies_to_msecs(now - aic->jiffies) < 1)
2210                eqd = aic->prev_eqd;
2211        else
2212                eqd = be_get_new_eqd(eqo);
2213
2214        if (eqd > 100)
2215                mult_enc = R2I_DLY_ENC_1;
2216        else if (eqd > 60)
2217                mult_enc = R2I_DLY_ENC_2;
2218        else if (eqd > 20)
2219                mult_enc = R2I_DLY_ENC_3;
2220        else
2221                mult_enc = R2I_DLY_ENC_0;
2222
2223        aic->prev_eqd = eqd;
2224
2225        return mult_enc;
2226}
2227
2228void be_eqd_update(struct be_adapter *adapter, bool force_update)
2229{
2230        struct be_set_eqd set_eqd[MAX_EVT_QS];
2231        struct be_aic_obj *aic;
2232        struct be_eq_obj *eqo;
2233        int i, num = 0, eqd;
2234
2235        for_all_evt_queues(adapter, eqo, i) {
2236                aic = &adapter->aic_obj[eqo->idx];
2237                eqd = be_get_new_eqd(eqo);
2238                if (force_update || eqd != aic->prev_eqd) {
2239                        set_eqd[num].delay_multiplier = (eqd * 65)/100;
2240                        set_eqd[num].eq_id = eqo->q.id;
2241                        aic->prev_eqd = eqd;
2242                        num++;
2243                }
2244        }
2245
2246        if (num)
2247                be_cmd_modify_eqd(adapter, set_eqd, num);
2248}
2249
2250static void be_rx_stats_update(struct be_rx_obj *rxo,
2251                               struct be_rx_compl_info *rxcp)
2252{
2253        struct be_rx_stats *stats = rx_stats(rxo);
2254
2255        u64_stats_update_begin(&stats->sync);
2256        stats->rx_compl++;
2257        stats->rx_bytes += rxcp->pkt_size;
2258        stats->rx_pkts++;
2259        if (rxcp->tunneled)
2260                stats->rx_vxlan_offload_pkts++;
2261        if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2262                stats->rx_mcast_pkts++;
2263        if (rxcp->err)
2264                stats->rx_compl_err++;
2265        u64_stats_update_end(&stats->sync);
2266}
2267
2268static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2269{
2270        /* L4 checksum is not reliable for non TCP/UDP packets.
2271         * Also ignore ipcksm for ipv6 pkts
2272         */
2273        return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2274                (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2275}
2276
2277static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2278{
2279        struct be_adapter *adapter = rxo->adapter;
2280        struct be_rx_page_info *rx_page_info;
2281        struct be_queue_info *rxq = &rxo->q;
2282        u32 frag_idx = rxq->tail;
2283
2284        rx_page_info = &rxo->page_info_tbl[frag_idx];
2285        BUG_ON(!rx_page_info->page);
2286
2287        if (rx_page_info->last_frag) {
2288                dma_unmap_page(&adapter->pdev->dev,
2289                               dma_unmap_addr(rx_page_info, bus),
2290                               adapter->big_page_size, DMA_FROM_DEVICE);
2291                rx_page_info->last_frag = false;
2292        } else {
2293                dma_sync_single_for_cpu(&adapter->pdev->dev,
2294                                        dma_unmap_addr(rx_page_info, bus),
2295                                        rx_frag_size, DMA_FROM_DEVICE);
2296        }
2297
2298        queue_tail_inc(rxq);
2299        atomic_dec(&rxq->used);
2300        return rx_page_info;
2301}
2302
2303/* Throwaway the data in the Rx completion */
2304static void be_rx_compl_discard(struct be_rx_obj *rxo,
2305                                struct be_rx_compl_info *rxcp)
2306{
2307        struct be_rx_page_info *page_info;
2308        u16 i, num_rcvd = rxcp->num_rcvd;
2309
2310        for (i = 0; i < num_rcvd; i++) {
2311                page_info = get_rx_page_info(rxo);
2312                put_page(page_info->page);
2313                memset(page_info, 0, sizeof(*page_info));
2314        }
2315}
2316
2317/*
2318 * skb_fill_rx_data forms a complete skb for an ether frame
2319 * indicated by rxcp.
2320 */
2321static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2322                             struct be_rx_compl_info *rxcp)
2323{
2324        struct be_rx_page_info *page_info;
2325        u16 i, j;
2326        u16 hdr_len, curr_frag_len, remaining;
2327        u8 *start;
2328
2329        page_info = get_rx_page_info(rxo);
2330        start = page_address(page_info->page) + page_info->page_offset;
2331        prefetch(start);
2332
2333        /* Copy data in the first descriptor of this completion */
2334        curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2335
2336        skb->len = curr_frag_len;
2337        if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2338                memcpy(skb->data, start, curr_frag_len);
2339                /* Complete packet has now been moved to data */
2340                put_page(page_info->page);
2341                skb->data_len = 0;
2342                skb->tail += curr_frag_len;
2343        } else {
2344                hdr_len = ETH_HLEN;
2345                memcpy(skb->data, start, hdr_len);
2346                skb_shinfo(skb)->nr_frags = 1;
2347                skb_frag_set_page(skb, 0, page_info->page);
2348                skb_frag_off_set(&skb_shinfo(skb)->frags[0],
2349                                 page_info->page_offset + hdr_len);
2350                skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2351                                  curr_frag_len - hdr_len);
2352                skb->data_len = curr_frag_len - hdr_len;
2353                skb->truesize += rx_frag_size;
2354                skb->tail += hdr_len;
2355        }
2356        page_info->page = NULL;
2357
2358        if (rxcp->pkt_size <= rx_frag_size) {
2359                BUG_ON(rxcp->num_rcvd != 1);
2360                return;
2361        }
2362
2363        /* More frags present for this completion */
2364        remaining = rxcp->pkt_size - curr_frag_len;
2365        for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2366                page_info = get_rx_page_info(rxo);
2367                curr_frag_len = min(remaining, rx_frag_size);
2368
2369                /* Coalesce all frags from the same physical page in one slot */
2370                if (page_info->page_offset == 0) {
2371                        /* Fresh page */
2372                        j++;
2373                        skb_frag_set_page(skb, j, page_info->page);
2374                        skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2375                                         page_info->page_offset);
2376                        skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2377                        skb_shinfo(skb)->nr_frags++;
2378                } else {
2379                        put_page(page_info->page);
2380                }
2381
2382                skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2383                skb->len += curr_frag_len;
2384                skb->data_len += curr_frag_len;
2385                skb->truesize += rx_frag_size;
2386                remaining -= curr_frag_len;
2387                page_info->page = NULL;
2388        }
2389        BUG_ON(j > MAX_SKB_FRAGS);
2390}
2391
2392/* Process the RX completion indicated by rxcp when GRO is disabled */
2393static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2394                                struct be_rx_compl_info *rxcp)
2395{
2396        struct be_adapter *adapter = rxo->adapter;
2397        struct net_device *netdev = adapter->netdev;
2398        struct sk_buff *skb;
2399
2400        skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2401        if (unlikely(!skb)) {
2402                rx_stats(rxo)->rx_drops_no_skbs++;
2403                be_rx_compl_discard(rxo, rxcp);
2404                return;
2405        }
2406
2407        skb_fill_rx_data(rxo, skb, rxcp);
2408
2409        if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2410                skb->ip_summed = CHECKSUM_UNNECESSARY;
2411        else
2412                skb_checksum_none_assert(skb);
2413
2414        skb->protocol = eth_type_trans(skb, netdev);
2415        skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2416        if (netdev->features & NETIF_F_RXHASH)
2417                skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2418
2419        skb->csum_level = rxcp->tunneled;
2420        skb_mark_napi_id(skb, napi);
2421
2422        if (rxcp->vlanf)
2423                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2424
2425        netif_receive_skb(skb);
2426}
2427
2428/* Process the RX completion indicated by rxcp when GRO is enabled */
2429static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2430                                    struct napi_struct *napi,
2431                                    struct be_rx_compl_info *rxcp)
2432{
2433        struct be_adapter *adapter = rxo->adapter;
2434        struct be_rx_page_info *page_info;
2435        struct sk_buff *skb = NULL;
2436        u16 remaining, curr_frag_len;
2437        u16 i, j;
2438
2439        skb = napi_get_frags(napi);
2440        if (!skb) {
2441                be_rx_compl_discard(rxo, rxcp);
2442                return;
2443        }
2444
2445        remaining = rxcp->pkt_size;
2446        for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2447                page_info = get_rx_page_info(rxo);
2448
2449                curr_frag_len = min(remaining, rx_frag_size);
2450
2451                /* Coalesce all frags from the same physical page in one slot */
2452                if (i == 0 || page_info->page_offset == 0) {
2453                        /* First frag or Fresh page */
2454                        j++;
2455                        skb_frag_set_page(skb, j, page_info->page);
2456                        skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2457                                         page_info->page_offset);
2458                        skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2459                } else {
2460                        put_page(page_info->page);
2461                }
2462                skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2463                skb->truesize += rx_frag_size;
2464                remaining -= curr_frag_len;
2465                memset(page_info, 0, sizeof(*page_info));
2466        }
2467        BUG_ON(j > MAX_SKB_FRAGS);
2468
2469        skb_shinfo(skb)->nr_frags = j + 1;
2470        skb->len = rxcp->pkt_size;
2471        skb->data_len = rxcp->pkt_size;
2472        skb->ip_summed = CHECKSUM_UNNECESSARY;
2473        skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2474        if (adapter->netdev->features & NETIF_F_RXHASH)
2475                skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2476
2477        skb->csum_level = rxcp->tunneled;
2478
2479        if (rxcp->vlanf)
2480                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2481
2482        napi_gro_frags(napi);
2483}
2484
2485static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2486                                 struct be_rx_compl_info *rxcp)
2487{
2488        rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2489        rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2490        rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2491        rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2492        rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2493        rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2494        rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2495        rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2496        rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2497        rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2498        rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2499        if (rxcp->vlanf) {
2500                rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2501                rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2502        }
2503        rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2504        rxcp->tunneled =
2505                GET_RX_COMPL_V1_BITS(tunneled, compl);
2506}
2507
2508static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2509                                 struct be_rx_compl_info *rxcp)
2510{
2511        rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2512        rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2513        rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2514        rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2515        rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2516        rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2517        rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2518        rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2519        rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2520        rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2521        rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2522        if (rxcp->vlanf) {
2523                rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2524                rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2525        }
2526        rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2527        rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2528}
2529
2530static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2531{
2532        struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2533        struct be_rx_compl_info *rxcp = &rxo->rxcp;
2534        struct be_adapter *adapter = rxo->adapter;
2535
2536        /* For checking the valid bit it is Ok to use either definition as the
2537         * valid bit is at the same position in both v0 and v1 Rx compl */
2538        if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2539                return NULL;
2540
2541        rmb();
2542        be_dws_le_to_cpu(compl, sizeof(*compl));
2543
2544        if (adapter->be3_native)
2545                be_parse_rx_compl_v1(compl, rxcp);
2546        else
2547                be_parse_rx_compl_v0(compl, rxcp);
2548
2549        if (rxcp->ip_frag)
2550                rxcp->l4_csum = 0;
2551
2552        if (rxcp->vlanf) {
2553                /* In QNQ modes, if qnq bit is not set, then the packet was
2554                 * tagged only with the transparent outer vlan-tag and must
2555                 * not be treated as a vlan packet by host
2556                 */
2557                if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2558                        rxcp->vlanf = 0;
2559
2560                if (!lancer_chip(adapter))
2561                        rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2562
2563                if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2564                    !test_bit(rxcp->vlan_tag, adapter->vids))
2565                        rxcp->vlanf = 0;
2566        }
2567
2568        /* As the compl has been parsed, reset it; we wont touch it again */
2569        compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2570
2571        queue_tail_inc(&rxo->cq);
2572        return rxcp;
2573}
2574
2575static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2576{
2577        u32 order = get_order(size);
2578
2579        if (order > 0)
2580                gfp |= __GFP_COMP;
2581        return  alloc_pages(gfp, order);
2582}
2583
2584/*
2585 * Allocate a page, split it to fragments of size rx_frag_size and post as
2586 * receive buffers to BE
2587 */
2588static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2589{
2590        struct be_adapter *adapter = rxo->adapter;
2591        struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2592        struct be_queue_info *rxq = &rxo->q;
2593        struct page *pagep = NULL;
2594        struct device *dev = &adapter->pdev->dev;
2595        struct be_eth_rx_d *rxd;
2596        u64 page_dmaaddr = 0, frag_dmaaddr;
2597        u32 posted, page_offset = 0, notify = 0;
2598
2599        page_info = &rxo->page_info_tbl[rxq->head];
2600        for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2601                if (!pagep) {
2602                        pagep = be_alloc_pages(adapter->big_page_size, gfp);
2603                        if (unlikely(!pagep)) {
2604                                rx_stats(rxo)->rx_post_fail++;
2605                                break;
2606                        }
2607                        page_dmaaddr = dma_map_page(dev, pagep, 0,
2608                                                    adapter->big_page_size,
2609                                                    DMA_FROM_DEVICE);
2610                        if (dma_mapping_error(dev, page_dmaaddr)) {
2611                                put_page(pagep);
2612                                pagep = NULL;
2613                                adapter->drv_stats.dma_map_errors++;
2614                                break;
2615                        }
2616                        page_offset = 0;
2617                } else {
2618                        get_page(pagep);
2619                        page_offset += rx_frag_size;
2620                }
2621                page_info->page_offset = page_offset;
2622                page_info->page = pagep;
2623
2624                rxd = queue_head_node(rxq);
2625                frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2626                rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2627                rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2628
2629                /* Any space left in the current big page for another frag? */
2630                if ((page_offset + rx_frag_size + rx_frag_size) >
2631                                        adapter->big_page_size) {
2632                        pagep = NULL;
2633                        page_info->last_frag = true;
2634                        dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2635                } else {
2636                        dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2637                }
2638
2639                prev_page_info = page_info;
2640                queue_head_inc(rxq);
2641                page_info = &rxo->page_info_tbl[rxq->head];
2642        }
2643
2644        /* Mark the last frag of a page when we break out of the above loop
2645         * with no more slots available in the RXQ
2646         */
2647        if (pagep) {
2648                prev_page_info->last_frag = true;
2649                dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2650        }
2651
2652        if (posted) {
2653                atomic_add(posted, &rxq->used);
2654                if (rxo->rx_post_starved)
2655                        rxo->rx_post_starved = false;
2656                do {
2657                        notify = min(MAX_NUM_POST_ERX_DB, posted);
2658                        be_rxq_notify(adapter, rxq->id, notify);
2659                        posted -= notify;
2660                } while (posted);
2661        } else if (atomic_read(&rxq->used) == 0) {
2662                /* Let be_worker replenish when memory is available */
2663                rxo->rx_post_starved = true;
2664        }
2665}
2666
2667static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2668{
2669        switch (status) {
2670        case BE_TX_COMP_HDR_PARSE_ERR:
2671                tx_stats(txo)->tx_hdr_parse_err++;
2672                break;
2673        case BE_TX_COMP_NDMA_ERR:
2674                tx_stats(txo)->tx_dma_err++;
2675                break;
2676        case BE_TX_COMP_ACL_ERR:
2677                tx_stats(txo)->tx_spoof_check_err++;
2678                break;
2679        }
2680}
2681
2682static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2683{
2684        switch (status) {
2685        case LANCER_TX_COMP_LSO_ERR:
2686                tx_stats(txo)->tx_tso_err++;
2687                break;
2688        case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2689        case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2690                tx_stats(txo)->tx_spoof_check_err++;
2691                break;
2692        case LANCER_TX_COMP_QINQ_ERR:
2693                tx_stats(txo)->tx_qinq_err++;
2694                break;
2695        case LANCER_TX_COMP_PARITY_ERR:
2696                tx_stats(txo)->tx_internal_parity_err++;
2697                break;
2698        case LANCER_TX_COMP_DMA_ERR:
2699                tx_stats(txo)->tx_dma_err++;
2700                break;
2701        case LANCER_TX_COMP_SGE_ERR:
2702                tx_stats(txo)->tx_sge_err++;
2703                break;
2704        }
2705}
2706
2707static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2708                                                struct be_tx_obj *txo)
2709{
2710        struct be_queue_info *tx_cq = &txo->cq;
2711        struct be_tx_compl_info *txcp = &txo->txcp;
2712        struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2713
2714        if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2715                return NULL;
2716
2717        /* Ensure load ordering of valid bit dword and other dwords below */
2718        rmb();
2719        be_dws_le_to_cpu(compl, sizeof(*compl));
2720
2721        txcp->status = GET_TX_COMPL_BITS(status, compl);
2722        txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2723
2724        if (txcp->status) {
2725                if (lancer_chip(adapter)) {
2726                        lancer_update_tx_err(txo, txcp->status);
2727                        /* Reset the adapter incase of TSO,
2728                         * SGE or Parity error
2729                         */
2730                        if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2731                            txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2732                            txcp->status == LANCER_TX_COMP_SGE_ERR)
2733                                be_set_error(adapter, BE_ERROR_TX);
2734                } else {
2735                        be_update_tx_err(txo, txcp->status);
2736                }
2737        }
2738
2739        if (be_check_error(adapter, BE_ERROR_TX))
2740                return NULL;
2741
2742        compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2743        queue_tail_inc(tx_cq);
2744        return txcp;
2745}
2746
2747static u16 be_tx_compl_process(struct be_adapter *adapter,
2748                               struct be_tx_obj *txo, u16 last_index)
2749{
2750        struct sk_buff **sent_skbs = txo->sent_skb_list;
2751        struct be_queue_info *txq = &txo->q;
2752        struct sk_buff *skb = NULL;
2753        bool unmap_skb_hdr = false;
2754        struct be_eth_wrb *wrb;
2755        u16 num_wrbs = 0;
2756        u32 frag_index;
2757
2758        do {
2759                if (sent_skbs[txq->tail]) {
2760                        /* Free skb from prev req */
2761                        if (skb)
2762                                dev_consume_skb_any(skb);
2763                        skb = sent_skbs[txq->tail];
2764                        sent_skbs[txq->tail] = NULL;
2765                        queue_tail_inc(txq);  /* skip hdr wrb */
2766                        num_wrbs++;
2767                        unmap_skb_hdr = true;
2768                }
2769                wrb = queue_tail_node(txq);
2770                frag_index = txq->tail;
2771                unmap_tx_frag(&adapter->pdev->dev, wrb,
2772                              (unmap_skb_hdr && skb_headlen(skb)));
2773                unmap_skb_hdr = false;
2774                queue_tail_inc(txq);
2775                num_wrbs++;
2776        } while (frag_index != last_index);
2777        dev_consume_skb_any(skb);
2778
2779        return num_wrbs;
2780}
2781
2782/* Return the number of events in the event queue */
2783static inline int events_get(struct be_eq_obj *eqo)
2784{
2785        struct be_eq_entry *eqe;
2786        int num = 0;
2787
2788        do {
2789                eqe = queue_tail_node(&eqo->q);
2790                if (eqe->evt == 0)
2791                        break;
2792
2793                rmb();
2794                eqe->evt = 0;
2795                num++;
2796                queue_tail_inc(&eqo->q);
2797        } while (true);
2798
2799        return num;
2800}
2801
2802/* Leaves the EQ is disarmed state */
2803static void be_eq_clean(struct be_eq_obj *eqo)
2804{
2805        int num = events_get(eqo);
2806
2807        be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2808}
2809
2810/* Free posted rx buffers that were not used */
2811static void be_rxq_clean(struct be_rx_obj *rxo)
2812{
2813        struct be_queue_info *rxq = &rxo->q;
2814        struct be_rx_page_info *page_info;
2815
2816        while (atomic_read(&rxq->used) > 0) {
2817                page_info = get_rx_page_info(rxo);
2818                put_page(page_info->page);
2819                memset(page_info, 0, sizeof(*page_info));
2820        }
2821        BUG_ON(atomic_read(&rxq->used));
2822        rxq->tail = 0;
2823        rxq->head = 0;
2824}
2825
2826static void be_rx_cq_clean(struct be_rx_obj *rxo)
2827{
2828        struct be_queue_info *rx_cq = &rxo->cq;
2829        struct be_rx_compl_info *rxcp;
2830        struct be_adapter *adapter = rxo->adapter;
2831        int flush_wait = 0;
2832
2833        /* Consume pending rx completions.
2834         * Wait for the flush completion (identified by zero num_rcvd)
2835         * to arrive. Notify CQ even when there are no more CQ entries
2836         * for HW to flush partially coalesced CQ entries.
2837         * In Lancer, there is no need to wait for flush compl.
2838         */
2839        for (;;) {
2840                rxcp = be_rx_compl_get(rxo);
2841                if (!rxcp) {
2842                        if (lancer_chip(adapter))
2843                                break;
2844
2845                        if (flush_wait++ > 50 ||
2846                            be_check_error(adapter,
2847                                           BE_ERROR_HW)) {
2848                                dev_warn(&adapter->pdev->dev,
2849                                         "did not receive flush compl\n");
2850                                break;
2851                        }
2852                        be_cq_notify(adapter, rx_cq->id, true, 0);
2853                        mdelay(1);
2854                } else {
2855                        be_rx_compl_discard(rxo, rxcp);
2856                        be_cq_notify(adapter, rx_cq->id, false, 1);
2857                        if (rxcp->num_rcvd == 0)
2858                                break;
2859                }
2860        }
2861
2862        /* After cleanup, leave the CQ in unarmed state */
2863        be_cq_notify(adapter, rx_cq->id, false, 0);
2864}
2865
2866static void be_tx_compl_clean(struct be_adapter *adapter)
2867{
2868        struct device *dev = &adapter->pdev->dev;
2869        u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2870        struct be_tx_compl_info *txcp;
2871        struct be_queue_info *txq;
2872        u32 end_idx, notified_idx;
2873        struct be_tx_obj *txo;
2874        int i, pending_txqs;
2875
2876        /* Stop polling for compls when HW has been silent for 10ms */
2877        do {
2878                pending_txqs = adapter->num_tx_qs;
2879
2880                for_all_tx_queues(adapter, txo, i) {
2881                        cmpl = 0;
2882                        num_wrbs = 0;
2883                        txq = &txo->q;
2884                        while ((txcp = be_tx_compl_get(adapter, txo))) {
2885                                num_wrbs +=
2886                                        be_tx_compl_process(adapter, txo,
2887                                                            txcp->end_index);
2888                                cmpl++;
2889                        }
2890                        if (cmpl) {
2891                                be_cq_notify(adapter, txo->cq.id, false, cmpl);
2892                                atomic_sub(num_wrbs, &txq->used);
2893                                timeo = 0;
2894                        }
2895                        if (!be_is_tx_compl_pending(txo))
2896                                pending_txqs--;
2897                }
2898
2899                if (pending_txqs == 0 || ++timeo > 10 ||
2900                    be_check_error(adapter, BE_ERROR_HW))
2901                        break;
2902
2903                mdelay(1);
2904        } while (true);
2905
2906        /* Free enqueued TX that was never notified to HW */
2907        for_all_tx_queues(adapter, txo, i) {
2908                txq = &txo->q;
2909
2910                if (atomic_read(&txq->used)) {
2911                        dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2912                                 i, atomic_read(&txq->used));
2913                        notified_idx = txq->tail;
2914                        end_idx = txq->tail;
2915                        index_adv(&end_idx, atomic_read(&txq->used) - 1,
2916                                  txq->len);
2917                        /* Use the tx-compl process logic to handle requests
2918                         * that were not sent to the HW.
2919                         */
2920                        num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2921                        atomic_sub(num_wrbs, &txq->used);
2922                        BUG_ON(atomic_read(&txq->used));
2923                        txo->pend_wrb_cnt = 0;
2924                        /* Since hw was never notified of these requests,
2925                         * reset TXQ indices
2926                         */
2927                        txq->head = notified_idx;
2928                        txq->tail = notified_idx;
2929                }
2930        }
2931}
2932
2933static void be_evt_queues_destroy(struct be_adapter *adapter)
2934{
2935        struct be_eq_obj *eqo;
2936        int i;
2937
2938        for_all_evt_queues(adapter, eqo, i) {
2939                if (eqo->q.created) {
2940                        be_eq_clean(eqo);
2941                        be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2942                        netif_napi_del(&eqo->napi);
2943                        free_cpumask_var(eqo->affinity_mask);
2944                }
2945                be_queue_free(adapter, &eqo->q);
2946        }
2947}
2948
2949static int be_evt_queues_create(struct be_adapter *adapter)
2950{
2951        struct be_queue_info *eq;
2952        struct be_eq_obj *eqo;
2953        struct be_aic_obj *aic;
2954        int i, rc;
2955
2956        /* need enough EQs to service both RX and TX queues */
2957        adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2958                                    max(adapter->cfg_num_rx_irqs,
2959                                        adapter->cfg_num_tx_irqs));
2960
2961        adapter->aic_enabled = true;
2962
2963        for_all_evt_queues(adapter, eqo, i) {
2964                int numa_node = dev_to_node(&adapter->pdev->dev);
2965
2966                aic = &adapter->aic_obj[i];
2967                eqo->adapter = adapter;
2968                eqo->idx = i;
2969                aic->max_eqd = BE_MAX_EQD;
2970
2971                eq = &eqo->q;
2972                rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2973                                    sizeof(struct be_eq_entry));
2974                if (rc)
2975                        return rc;
2976
2977                rc = be_cmd_eq_create(adapter, eqo);
2978                if (rc)
2979                        return rc;
2980
2981                if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2982                        return -ENOMEM;
2983                cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2984                                eqo->affinity_mask);
2985                netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2986                               BE_NAPI_WEIGHT);
2987        }
2988        return 0;
2989}
2990
2991static void be_mcc_queues_destroy(struct be_adapter *adapter)
2992{
2993        struct be_queue_info *q;
2994
2995        q = &adapter->mcc_obj.q;
2996        if (q->created)
2997                be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2998        be_queue_free(adapter, q);
2999
3000        q = &adapter->mcc_obj.cq;
3001        if (q->created)
3002                be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3003        be_queue_free(adapter, q);
3004}
3005
3006/* Must be called only after TX qs are created as MCC shares TX EQ */
3007static int be_mcc_queues_create(struct be_adapter *adapter)
3008{
3009        struct be_queue_info *q, *cq;
3010
3011        cq = &adapter->mcc_obj.cq;
3012        if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3013                           sizeof(struct be_mcc_compl)))
3014                goto err;
3015
3016        /* Use the default EQ for MCC completions */
3017        if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3018                goto mcc_cq_free;
3019
3020        q = &adapter->mcc_obj.q;
3021        if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3022                goto mcc_cq_destroy;
3023
3024        if (be_cmd_mccq_create(adapter, q, cq))
3025                goto mcc_q_free;
3026
3027        return 0;
3028
3029mcc_q_free:
3030        be_queue_free(adapter, q);
3031mcc_cq_destroy:
3032        be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3033mcc_cq_free:
3034        be_queue_free(adapter, cq);
3035err:
3036        return -1;
3037}
3038
3039static void be_tx_queues_destroy(struct be_adapter *adapter)
3040{
3041        struct be_queue_info *q;
3042        struct be_tx_obj *txo;
3043        u8 i;
3044
3045        for_all_tx_queues(adapter, txo, i) {
3046                q = &txo->q;
3047                if (q->created)
3048                        be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3049                be_queue_free(adapter, q);
3050
3051                q = &txo->cq;
3052                if (q->created)
3053                        be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3054                be_queue_free(adapter, q);
3055        }
3056}
3057
3058static int be_tx_qs_create(struct be_adapter *adapter)
3059{
3060        struct be_queue_info *cq;
3061        struct be_tx_obj *txo;
3062        struct be_eq_obj *eqo;
3063        int status, i;
3064
3065        adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3066
3067        for_all_tx_queues(adapter, txo, i) {
3068                cq = &txo->cq;
3069                status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3070                                        sizeof(struct be_eth_tx_compl));
3071                if (status)
3072                        return status;
3073
3074                u64_stats_init(&txo->stats.sync);
3075                u64_stats_init(&txo->stats.sync_compl);
3076
3077                /* If num_evt_qs is less than num_tx_qs, then more than
3078                 * one txq share an eq
3079                 */
3080                eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3081                status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3082                if (status)
3083                        return status;
3084
3085                status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3086                                        sizeof(struct be_eth_wrb));
3087                if (status)
3088                        return status;
3089
3090                status = be_cmd_txq_create(adapter, txo);
3091                if (status)
3092                        return status;
3093
3094                netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3095                                    eqo->idx);
3096        }
3097
3098        dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3099                 adapter->num_tx_qs);
3100        return 0;
3101}
3102
3103static void be_rx_cqs_destroy(struct be_adapter *adapter)
3104{
3105        struct be_queue_info *q;
3106        struct be_rx_obj *rxo;
3107        int i;
3108
3109        for_all_rx_queues(adapter, rxo, i) {
3110                q = &rxo->cq;
3111                if (q->created)
3112                        be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3113                be_queue_free(adapter, q);
3114        }
3115}
3116
3117static int be_rx_cqs_create(struct be_adapter *adapter)
3118{
3119        struct be_queue_info *eq, *cq;
3120        struct be_rx_obj *rxo;
3121        int rc, i;
3122
3123        adapter->num_rss_qs =
3124                        min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3125
3126        /* We'll use RSS only if atleast 2 RSS rings are supported. */
3127        if (adapter->num_rss_qs < 2)
3128                adapter->num_rss_qs = 0;
3129
3130        adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3131
3132        /* When the interface is not capable of RSS rings (and there is no
3133         * need to create a default RXQ) we'll still need one RXQ
3134         */
3135        if (adapter->num_rx_qs == 0)
3136                adapter->num_rx_qs = 1;
3137
3138        adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3139        for_all_rx_queues(adapter, rxo, i) {
3140                rxo->adapter = adapter;
3141                cq = &rxo->cq;
3142                rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3143                                    sizeof(struct be_eth_rx_compl));
3144                if (rc)
3145                        return rc;
3146
3147                u64_stats_init(&rxo->stats.sync);
3148                eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3149                rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3150                if (rc)
3151                        return rc;
3152        }
3153
3154        dev_info(&adapter->pdev->dev,
3155                 "created %d RX queue(s)\n", adapter->num_rx_qs);
3156        return 0;
3157}
3158
3159static irqreturn_t be_intx(int irq, void *dev)
3160{
3161        struct be_eq_obj *eqo = dev;
3162        struct be_adapter *adapter = eqo->adapter;
3163        int num_evts = 0;
3164
3165        /* IRQ is not expected when NAPI is scheduled as the EQ
3166         * will not be armed.
3167         * But, this can happen on Lancer INTx where it takes
3168         * a while to de-assert INTx or in BE2 where occasionaly
3169         * an interrupt may be raised even when EQ is unarmed.
3170         * If NAPI is already scheduled, then counting & notifying
3171         * events will orphan them.
3172         */
3173        if (napi_schedule_prep(&eqo->napi)) {
3174                num_evts = events_get(eqo);
3175                __napi_schedule(&eqo->napi);
3176                if (num_evts)
3177                        eqo->spurious_intr = 0;
3178        }
3179        be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3180
3181        /* Return IRQ_HANDLED only for the the first spurious intr
3182         * after a valid intr to stop the kernel from branding
3183         * this irq as a bad one!
3184         */
3185        if (num_evts || eqo->spurious_intr++ == 0)
3186                return IRQ_HANDLED;
3187        else
3188                return IRQ_NONE;
3189}
3190
3191static irqreturn_t be_msix(int irq, void *dev)
3192{
3193        struct be_eq_obj *eqo = dev;
3194
3195        be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3196        napi_schedule(&eqo->napi);
3197        return IRQ_HANDLED;
3198}
3199
3200static inline bool do_gro(struct be_rx_compl_info *rxcp)
3201{
3202        return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3203}
3204
3205static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3206                         int budget)
3207{
3208        struct be_adapter *adapter = rxo->adapter;
3209        struct be_queue_info *rx_cq = &rxo->cq;
3210        struct be_rx_compl_info *rxcp;
3211        u32 work_done;
3212        u32 frags_consumed = 0;
3213
3214        for (work_done = 0; work_done < budget; work_done++) {
3215                rxcp = be_rx_compl_get(rxo);
3216                if (!rxcp)
3217                        break;
3218
3219                /* Is it a flush compl that has no data */
3220                if (unlikely(rxcp->num_rcvd == 0))
3221                        goto loop_continue;
3222
3223                /* Discard compl with partial DMA Lancer B0 */
3224                if (unlikely(!rxcp->pkt_size)) {
3225                        be_rx_compl_discard(rxo, rxcp);
3226                        goto loop_continue;
3227                }
3228
3229                /* On BE drop pkts that arrive due to imperfect filtering in
3230                 * promiscuous mode on some skews
3231                 */
3232                if (unlikely(rxcp->port != adapter->port_num &&
3233                             !lancer_chip(adapter))) {
3234                        be_rx_compl_discard(rxo, rxcp);
3235                        goto loop_continue;
3236                }
3237
3238                if (do_gro(rxcp))
3239                        be_rx_compl_process_gro(rxo, napi, rxcp);
3240                else
3241                        be_rx_compl_process(rxo, napi, rxcp);
3242
3243loop_continue:
3244                frags_consumed += rxcp->num_rcvd;
3245                be_rx_stats_update(rxo, rxcp);
3246        }
3247
3248        if (work_done) {
3249                be_cq_notify(adapter, rx_cq->id, true, work_done);
3250
3251                /* When an rx-obj gets into post_starved state, just
3252                 * let be_worker do the posting.
3253                 */
3254                if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3255                    !rxo->rx_post_starved)
3256                        be_post_rx_frags(rxo, GFP_ATOMIC,
3257                                         max_t(u32, MAX_RX_POST,
3258                                               frags_consumed));
3259        }
3260
3261        return work_done;
3262}
3263
3264
3265static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3266                          int idx)
3267{
3268        int num_wrbs = 0, work_done = 0;
3269        struct be_tx_compl_info *txcp;
3270
3271        while ((txcp = be_tx_compl_get(adapter, txo))) {
3272                num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3273                work_done++;
3274        }
3275
3276        if (work_done) {
3277                be_cq_notify(adapter, txo->cq.id, true, work_done);
3278                atomic_sub(num_wrbs, &txo->q.used);
3279
3280                /* As Tx wrbs have been freed up, wake up netdev queue
3281                 * if it was stopped due to lack of tx wrbs.  */
3282                if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3283                    be_can_txq_wake(txo)) {
3284                        netif_wake_subqueue(adapter->netdev, idx);
3285                }
3286
3287                u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3288                tx_stats(txo)->tx_compl += work_done;
3289                u64_stats_update_end(&tx_stats(txo)->sync_compl);
3290        }
3291}
3292
3293int be_poll(struct napi_struct *napi, int budget)
3294{
3295        struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3296        struct be_adapter *adapter = eqo->adapter;
3297        int max_work = 0, work, i, num_evts;
3298        struct be_rx_obj *rxo;
3299        struct be_tx_obj *txo;
3300        u32 mult_enc = 0;
3301
3302        num_evts = events_get(eqo);
3303
3304        for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3305                be_process_tx(adapter, txo, i);
3306
3307        /* This loop will iterate twice for EQ0 in which
3308         * completions of the last RXQ (default one) are also processed
3309         * For other EQs the loop iterates only once
3310         */
3311        for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3312                work = be_process_rx(rxo, napi, budget);
3313                max_work = max(work, max_work);
3314        }
3315
3316        if (is_mcc_eqo(eqo))
3317                be_process_mcc(adapter);
3318
3319        if (max_work < budget) {
3320                napi_complete_done(napi, max_work);
3321
3322                /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3323                 * delay via a delay multiplier encoding value
3324                 */
3325                if (skyhawk_chip(adapter))
3326                        mult_enc = be_get_eq_delay_mult_enc(eqo);
3327
3328                be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3329                             mult_enc);
3330        } else {
3331                /* As we'll continue in polling mode, count and clear events */
3332                be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3333        }
3334        return max_work;
3335}
3336
3337void be_detect_error(struct be_adapter *adapter)
3338{
3339        u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3340        u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3341        struct device *dev = &adapter->pdev->dev;
3342        u16 val;
3343        u32 i;
3344
3345        if (be_check_error(adapter, BE_ERROR_HW))
3346                return;
3347
3348        if (lancer_chip(adapter)) {
3349                sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3350                if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3351                        be_set_error(adapter, BE_ERROR_UE);
3352                        sliport_err1 = ioread32(adapter->db +
3353                                                SLIPORT_ERROR1_OFFSET);
3354                        sliport_err2 = ioread32(adapter->db +
3355                                                SLIPORT_ERROR2_OFFSET);
3356                        /* Do not log error messages if its a FW reset */
3357                        if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3358                            sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3359                                dev_info(dev, "Reset is in progress\n");
3360                        } else {
3361                                dev_err(dev, "Error detected in the card\n");
3362                                dev_err(dev, "ERR: sliport status 0x%x\n",
3363                                        sliport_status);
3364                                dev_err(dev, "ERR: sliport error1 0x%x\n",
3365                                        sliport_err1);
3366                                dev_err(dev, "ERR: sliport error2 0x%x\n",
3367                                        sliport_err2);
3368                        }
3369                }
3370        } else {
3371                ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3372                ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3373                ue_lo_mask = ioread32(adapter->pcicfg +
3374                                      PCICFG_UE_STATUS_LOW_MASK);
3375                ue_hi_mask = ioread32(adapter->pcicfg +
3376                                      PCICFG_UE_STATUS_HI_MASK);
3377
3378                ue_lo = (ue_lo & ~ue_lo_mask);
3379                ue_hi = (ue_hi & ~ue_hi_mask);
3380
3381                if (ue_lo || ue_hi) {
3382                        /* On certain platforms BE3 hardware can indicate
3383                         * spurious UEs. In case of a UE in the chip,
3384                         * the POST register correctly reports either a
3385                         * FAT_LOG_START state (FW is currently dumping
3386                         * FAT log data) or a ARMFW_UE state. Check for the
3387                         * above states to ascertain if the UE is valid or not.
3388                         */
3389                        if (BE3_chip(adapter)) {
3390                                val = be_POST_stage_get(adapter);
3391                                if ((val & POST_STAGE_FAT_LOG_START)
3392                                     != POST_STAGE_FAT_LOG_START &&
3393                                    (val & POST_STAGE_ARMFW_UE)
3394                                     != POST_STAGE_ARMFW_UE &&
3395                                    (val & POST_STAGE_RECOVERABLE_ERR)
3396                                     != POST_STAGE_RECOVERABLE_ERR)
3397                                        return;
3398                        }
3399
3400                        dev_err(dev, "Error detected in the adapter");
3401                        be_set_error(adapter, BE_ERROR_UE);
3402
3403                        for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3404                                if (ue_lo & 1)
3405                                        dev_err(dev, "UE: %s bit set\n",
3406                                                ue_status_low_desc[i]);
3407                        }
3408                        for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3409                                if (ue_hi & 1)
3410                                        dev_err(dev, "UE: %s bit set\n",
3411                                                ue_status_hi_desc[i]);
3412                        }
3413                }
3414        }
3415}
3416
3417static void be_msix_disable(struct be_adapter *adapter)
3418{
3419        if (msix_enabled(adapter)) {
3420                pci_disable_msix(adapter->pdev);
3421                adapter->num_msix_vec = 0;
3422                adapter->num_msix_roce_vec = 0;
3423        }
3424}
3425
3426static int be_msix_enable(struct be_adapter *adapter)
3427{
3428        unsigned int i, max_roce_eqs;
3429        struct device *dev = &adapter->pdev->dev;
3430        int num_vec;
3431
3432        /* If RoCE is supported, program the max number of vectors that
3433         * could be used for NIC and RoCE, else, just program the number
3434         * we'll use initially.
3435         */
3436        if (be_roce_supported(adapter)) {
3437                max_roce_eqs =
3438                        be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3439                max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3440                num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3441        } else {
3442                num_vec = max(adapter->cfg_num_rx_irqs,
3443                              adapter->cfg_num_tx_irqs);
3444        }
3445
3446        for (i = 0; i < num_vec; i++)
3447                adapter->msix_entries[i].entry = i;
3448
3449        num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3450                                        MIN_MSIX_VECTORS, num_vec);
3451        if (num_vec < 0)
3452                goto fail;
3453
3454        if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3455                adapter->num_msix_roce_vec = num_vec / 2;
3456                dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3457                         adapter->num_msix_roce_vec);
3458        }
3459
3460        adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3461
3462        dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3463                 adapter->num_msix_vec);
3464        return 0;
3465
3466fail:
3467        dev_warn(dev, "MSIx enable failed\n");
3468
3469        /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3470        if (be_virtfn(adapter))
3471                return num_vec;
3472        return 0;
3473}
3474
3475static inline int be_msix_vec_get(struct be_adapter *adapter,
3476                                  struct be_eq_obj *eqo)
3477{
3478        return adapter->msix_entries[eqo->msix_idx].vector;
3479}
3480
3481static int be_msix_register(struct be_adapter *adapter)
3482{
3483        struct net_device *netdev = adapter->netdev;
3484        struct be_eq_obj *eqo;
3485        int status, i, vec;
3486
3487        for_all_evt_queues(adapter, eqo, i) {
3488                sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3489                vec = be_msix_vec_get(adapter, eqo);
3490                status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3491                if (status)
3492                        goto err_msix;
3493
3494                irq_set_affinity_hint(vec, eqo->affinity_mask);
3495        }
3496
3497        return 0;
3498err_msix:
3499        for (i--; i >= 0; i--) {
3500                eqo = &adapter->eq_obj[i];
3501                free_irq(be_msix_vec_get(adapter, eqo), eqo);
3502        }
3503        dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3504                 status);
3505        be_msix_disable(adapter);
3506        return status;
3507}
3508
3509static int be_irq_register(struct be_adapter *adapter)
3510{
3511        struct net_device *netdev = adapter->netdev;
3512        int status;
3513
3514        if (msix_enabled(adapter)) {
3515                status = be_msix_register(adapter);
3516                if (status == 0)
3517                        goto done;
3518                /* INTx is not supported for VF */
3519                if (be_virtfn(adapter))
3520                        return status;
3521        }
3522
3523        /* INTx: only the first EQ is used */
3524        netdev->irq = adapter->pdev->irq;
3525        status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3526                             &adapter->eq_obj[0]);
3527        if (status) {
3528                dev_err(&adapter->pdev->dev,
3529                        "INTx request IRQ failed - err %d\n", status);
3530                return status;
3531        }
3532done:
3533        adapter->isr_registered = true;
3534        return 0;
3535}
3536
3537static void be_irq_unregister(struct be_adapter *adapter)
3538{
3539        struct net_device *netdev = adapter->netdev;
3540        struct be_eq_obj *eqo;
3541        int i, vec;
3542
3543        if (!adapter->isr_registered)
3544                return;
3545
3546        /* INTx */
3547        if (!msix_enabled(adapter)) {
3548                free_irq(netdev->irq, &adapter->eq_obj[0]);
3549                goto done;
3550        }
3551
3552        /* MSIx */
3553        for_all_evt_queues(adapter, eqo, i) {
3554                vec = be_msix_vec_get(adapter, eqo);
3555                irq_set_affinity_hint(vec, NULL);
3556                free_irq(vec, eqo);
3557        }
3558
3559done:
3560        adapter->isr_registered = false;
3561}
3562
3563static void be_rx_qs_destroy(struct be_adapter *adapter)
3564{
3565        struct rss_info *rss = &adapter->rss_info;
3566        struct be_queue_info *q;
3567        struct be_rx_obj *rxo;
3568        int i;
3569
3570        for_all_rx_queues(adapter, rxo, i) {
3571                q = &rxo->q;
3572                if (q->created) {
3573                        /* If RXQs are destroyed while in an "out of buffer"
3574                         * state, there is a possibility of an HW stall on
3575                         * Lancer. So, post 64 buffers to each queue to relieve
3576                         * the "out of buffer" condition.
3577                         * Make sure there's space in the RXQ before posting.
3578                         */
3579                        if (lancer_chip(adapter)) {
3580                                be_rx_cq_clean(rxo);
3581                                if (atomic_read(&q->used) == 0)
3582                                        be_post_rx_frags(rxo, GFP_KERNEL,
3583                                                         MAX_RX_POST);
3584                        }
3585
3586                        be_cmd_rxq_destroy(adapter, q);
3587                        be_rx_cq_clean(rxo);
3588                        be_rxq_clean(rxo);
3589                }
3590                be_queue_free(adapter, q);
3591        }
3592
3593        if (rss->rss_flags) {
3594                rss->rss_flags = RSS_ENABLE_NONE;
3595                be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3596                                  128, rss->rss_hkey);
3597        }
3598}
3599
3600static void be_disable_if_filters(struct be_adapter *adapter)
3601{
3602        /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3603        if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3604            check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3605                be_dev_mac_del(adapter, adapter->pmac_id[0]);
3606                eth_zero_addr(adapter->dev_mac);
3607        }
3608
3609        be_clear_uc_list(adapter);
3610        be_clear_mc_list(adapter);
3611
3612        /* The IFACE flags are enabled in the open path and cleared
3613         * in the close path. When a VF gets detached from the host and
3614         * assigned to a VM the following happens:
3615         *      - VF's IFACE flags get cleared in the detach path
3616         *      - IFACE create is issued by the VF in the attach path
3617         * Due to a bug in the BE3/Skyhawk-R FW
3618         * (Lancer FW doesn't have the bug), the IFACE capability flags
3619         * specified along with the IFACE create cmd issued by a VF are not
3620         * honoured by FW.  As a consequence, if a *new* driver
3621         * (that enables/disables IFACE flags in open/close)
3622         * is loaded in the host and an *old* driver is * used by a VM/VF,
3623         * the IFACE gets created *without* the needed flags.
3624         * To avoid this, disable RX-filter flags only for Lancer.
3625         */
3626        if (lancer_chip(adapter)) {
3627                be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3628                adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3629        }
3630}
3631
3632static int be_close(struct net_device *netdev)
3633{
3634        struct be_adapter *adapter = netdev_priv(netdev);
3635        struct be_eq_obj *eqo;
3636        int i;
3637
3638        /* This protection is needed as be_close() may be called even when the
3639         * adapter is in cleared state (after eeh perm failure)
3640         */
3641        if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3642                return 0;
3643
3644        /* Before attempting cleanup ensure all the pending cmds in the
3645         * config_wq have finished execution
3646         */
3647        flush_workqueue(be_wq);
3648
3649        be_disable_if_filters(adapter);
3650
3651        if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3652                for_all_evt_queues(adapter, eqo, i) {
3653                        napi_disable(&eqo->napi);
3654                }
3655                adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3656        }
3657
3658        be_async_mcc_disable(adapter);
3659
3660        /* Wait for all pending tx completions to arrive so that
3661         * all tx skbs are freed.
3662         */
3663        netif_tx_disable(netdev);
3664        be_tx_compl_clean(adapter);
3665
3666        be_rx_qs_destroy(adapter);
3667
3668        for_all_evt_queues(adapter, eqo, i) {
3669                if (msix_enabled(adapter))
3670                        synchronize_irq(be_msix_vec_get(adapter, eqo));
3671                else
3672                        synchronize_irq(netdev->irq);
3673                be_eq_clean(eqo);
3674        }
3675
3676        be_irq_unregister(adapter);
3677
3678        return 0;
3679}
3680
3681static int be_rx_qs_create(struct be_adapter *adapter)
3682{
3683        struct rss_info *rss = &adapter->rss_info;
3684        u8 rss_key[RSS_HASH_KEY_LEN];
3685        struct be_rx_obj *rxo;
3686        int rc, i, j;
3687
3688        for_all_rx_queues(adapter, rxo, i) {
3689                rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3690                                    sizeof(struct be_eth_rx_d));
3691                if (rc)
3692                        return rc;
3693        }
3694
3695        if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3696                rxo = default_rxo(adapter);
3697                rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3698                                       rx_frag_size, adapter->if_handle,
3699                                       false, &rxo->rss_id);
3700                if (rc)
3701                        return rc;
3702        }
3703
3704        for_all_rss_queues(adapter, rxo, i) {
3705                rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3706                                       rx_frag_size, adapter->if_handle,
3707                                       true, &rxo->rss_id);
3708                if (rc)
3709                        return rc;
3710        }
3711
3712        if (be_multi_rxq(adapter)) {
3713                for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3714                        for_all_rss_queues(adapter, rxo, i) {
3715                                if ((j + i) >= RSS_INDIR_TABLE_LEN)
3716                                        break;
3717                                rss->rsstable[j + i] = rxo->rss_id;
3718                                rss->rss_queue[j + i] = i;
3719                        }
3720                }
3721                rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3722                        RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3723
3724                if (!BEx_chip(adapter))
3725                        rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3726                                RSS_ENABLE_UDP_IPV6;
3727
3728                netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3729                rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3730                                       RSS_INDIR_TABLE_LEN, rss_key);
3731                if (rc) {
3732                        rss->rss_flags = RSS_ENABLE_NONE;
3733                        return rc;
3734                }
3735
3736                memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3737        } else {
3738                /* Disable RSS, if only default RX Q is created */
3739                rss->rss_flags = RSS_ENABLE_NONE;
3740        }
3741
3742
3743        /* Post 1 less than RXQ-len to avoid head being equal to tail,
3744         * which is a queue empty condition
3745         */
3746        for_all_rx_queues(adapter, rxo, i)
3747                be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3748
3749        return 0;
3750}
3751
3752static int be_enable_if_filters(struct be_adapter *adapter)
3753{
3754        int status;
3755
3756        status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3757        if (status)
3758                return status;
3759
3760        /* Normally this condition usually true as the ->dev_mac is zeroed.
3761         * But on BE3 VFs the initial MAC is pre-programmed by PF and
3762         * subsequent be_dev_mac_add() can fail (after fresh boot)
3763         */
3764        if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3765                int old_pmac_id = -1;
3766
3767                /* Remember old programmed MAC if any - can happen on BE3 VF */
3768                if (!is_zero_ether_addr(adapter->dev_mac))
3769                        old_pmac_id = adapter->pmac_id[0];
3770
3771                status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3772                if (status)
3773                        return status;
3774
3775                /* Delete the old programmed MAC as we successfully programmed
3776                 * a new MAC
3777                 */
3778                if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3779                        be_dev_mac_del(adapter, old_pmac_id);
3780
3781                ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3782        }
3783
3784        if (adapter->vlans_added)
3785                be_vid_config(adapter);
3786
3787        __be_set_rx_mode(adapter);
3788
3789        return 0;
3790}
3791
3792static int be_open(struct net_device *netdev)
3793{
3794        struct be_adapter *adapter = netdev_priv(netdev);
3795        struct be_eq_obj *eqo;
3796        struct be_rx_obj *rxo;
3797        struct be_tx_obj *txo;
3798        u8 link_status;
3799        int status, i;
3800
3801        status = be_rx_qs_create(adapter);
3802        if (status)
3803                goto err;
3804
3805        status = be_enable_if_filters(adapter);
3806        if (status)
3807                goto err;
3808
3809        status = be_irq_register(adapter);
3810        if (status)
3811                goto err;
3812
3813        for_all_rx_queues(adapter, rxo, i)
3814                be_cq_notify(adapter, rxo->cq.id, true, 0);
3815
3816        for_all_tx_queues(adapter, txo, i)
3817                be_cq_notify(adapter, txo->cq.id, true, 0);
3818
3819        be_async_mcc_enable(adapter);
3820
3821        for_all_evt_queues(adapter, eqo, i) {
3822                napi_enable(&eqo->napi);
3823                be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3824        }
3825        adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3826
3827        status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3828        if (!status)
3829                be_link_status_update(adapter, link_status);
3830
3831        netif_tx_start_all_queues(netdev);
3832
3833        udp_tunnel_nic_reset_ntf(netdev);
3834
3835        return 0;
3836err:
3837        be_close(adapter->netdev);
3838        return -EIO;
3839}
3840
3841static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3842{
3843        u32 addr;
3844
3845        addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3846
3847        mac[5] = (u8)(addr & 0xFF);
3848        mac[4] = (u8)((addr >> 8) & 0xFF);
3849        mac[3] = (u8)((addr >> 16) & 0xFF);
3850        /* Use the OUI from the current MAC address */
3851        memcpy(mac, adapter->netdev->dev_addr, 3);
3852}
3853
3854/*
3855 * Generate a seed MAC address from the PF MAC Address using jhash.
3856 * MAC Address for VFs are assigned incrementally starting from the seed.
3857 * These addresses are programmed in the ASIC by the PF and the VF driver
3858 * queries for the MAC address during its probe.
3859 */
3860static int be_vf_eth_addr_config(struct be_adapter *adapter)
3861{
3862        u32 vf;
3863        int status = 0;
3864        u8 mac[ETH_ALEN];
3865        struct be_vf_cfg *vf_cfg;
3866
3867        be_vf_eth_addr_generate(adapter, mac);
3868
3869        for_all_vfs(adapter, vf_cfg, vf) {
3870                if (BEx_chip(adapter))
3871                        status = be_cmd_pmac_add(adapter, mac,
3872                                                 vf_cfg->if_handle,
3873                                                 &vf_cfg->pmac_id, vf + 1);
3874                else
3875                        status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3876                                                vf + 1);
3877
3878                if (status)
3879                        dev_err(&adapter->pdev->dev,
3880                                "Mac address assignment failed for VF %d\n",
3881                                vf);
3882                else
3883                        memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3884
3885                mac[5] += 1;
3886        }
3887        return status;
3888}
3889
3890static int be_vfs_mac_query(struct be_adapter *adapter)
3891{
3892        int status, vf;
3893        u8 mac[ETH_ALEN];
3894        struct be_vf_cfg *vf_cfg;
3895
3896        for_all_vfs(adapter, vf_cfg, vf) {
3897                status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3898                                               mac, vf_cfg->if_handle,
3899                                               false, vf+1);
3900                if (status)
3901                        return status;
3902                memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3903        }
3904        return 0;
3905}
3906
3907static void be_vf_clear(struct be_adapter *adapter)
3908{
3909        struct be_vf_cfg *vf_cfg;
3910        u32 vf;
3911
3912        if (pci_vfs_assigned(adapter->pdev)) {
3913                dev_warn(&adapter->pdev->dev,
3914                         "VFs are assigned to VMs: not disabling VFs\n");
3915                goto done;
3916        }
3917
3918        pci_disable_sriov(adapter->pdev);
3919
3920        for_all_vfs(adapter, vf_cfg, vf) {
3921                if (BEx_chip(adapter))
3922                        be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3923                                        vf_cfg->pmac_id, vf + 1);
3924                else
3925                        be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3926                                       vf + 1);
3927
3928                be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3929        }
3930
3931        if (BE3_chip(adapter))
3932                be_cmd_set_hsw_config(adapter, 0, 0,
3933                                      adapter->if_handle,
3934                                      PORT_FWD_TYPE_PASSTHRU, 0);
3935done:
3936        kfree(adapter->vf_cfg);
3937        adapter->num_vfs = 0;
3938        adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3939}
3940
3941static void be_clear_queues(struct be_adapter *adapter)
3942{
3943        be_mcc_queues_destroy(adapter);
3944        be_rx_cqs_destroy(adapter);
3945        be_tx_queues_destroy(adapter);
3946        be_evt_queues_destroy(adapter);
3947}
3948
3949static void be_cancel_worker(struct be_adapter *adapter)
3950{
3951        if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3952                cancel_delayed_work_sync(&adapter->work);
3953                adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3954        }
3955}
3956
3957static void be_cancel_err_detection(struct be_adapter *adapter)
3958{
3959        struct be_error_recovery *err_rec = &adapter->error_recovery;
3960
3961        if (!be_err_recovery_workq)
3962                return;
3963
3964        if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3965                cancel_delayed_work_sync(&err_rec->err_detection_work);
3966                adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3967        }
3968}
3969
3970/* VxLAN offload Notes:
3971 *
3972 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
3973 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
3974 * is expected to work across all types of IP tunnels once exported. Skyhawk
3975 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
3976 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
3977 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
3978 * those other tunnels are unexported on the fly through ndo_features_check().
3979 */
3980static int be_vxlan_set_port(struct net_device *netdev, unsigned int table,
3981                             unsigned int entry, struct udp_tunnel_info *ti)
3982{
3983        struct be_adapter *adapter = netdev_priv(netdev);
3984        struct device *dev = &adapter->pdev->dev;
3985        int status;
3986
3987        status = be_cmd_manage_iface(adapter, adapter->if_handle,
3988                                     OP_CONVERT_NORMAL_TO_TUNNEL);
3989        if (status) {
3990                dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3991                return status;
3992        }
3993        adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3994
3995        status = be_cmd_set_vxlan_port(adapter, ti->port);
3996        if (status) {
3997                dev_warn(dev, "Failed to add VxLAN port\n");
3998                return status;
3999        }
4000        adapter->vxlan_port = ti->port;
4001
4002        netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4003                                   NETIF_F_TSO | NETIF_F_TSO6 |
4004                                   NETIF_F_GSO_UDP_TUNNEL;
4005
4006        dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4007                 be16_to_cpu(ti->port));
4008        return 0;
4009}
4010
4011static int be_vxlan_unset_port(struct net_device *netdev, unsigned int table,
4012                               unsigned int entry, struct udp_tunnel_info *ti)
4013{
4014        struct be_adapter *adapter = netdev_priv(netdev);
4015
4016        if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4017                be_cmd_manage_iface(adapter, adapter->if_handle,
4018                                    OP_CONVERT_TUNNEL_TO_NORMAL);
4019
4020        if (adapter->vxlan_port)
4021                be_cmd_set_vxlan_port(adapter, 0);
4022
4023        adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4024        adapter->vxlan_port = 0;
4025
4026        netdev->hw_enc_features = 0;
4027        return 0;
4028}
4029
4030static const struct udp_tunnel_nic_info be_udp_tunnels = {
4031        .set_port       = be_vxlan_set_port,
4032        .unset_port     = be_vxlan_unset_port,
4033        .flags          = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
4034                          UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
4035        .tables         = {
4036                { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
4037        },
4038};
4039
4040static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4041                                struct be_resources *vft_res)
4042{
4043        struct be_resources res = adapter->pool_res;
4044        u32 vf_if_cap_flags = res.vf_if_cap_flags;
4045        struct be_resources res_mod = {0};
4046        u16 num_vf_qs = 1;
4047
4048        /* Distribute the queue resources among the PF and it's VFs */
4049        if (num_vfs) {
4050                /* Divide the rx queues evenly among the VFs and the PF, capped
4051                 * at VF-EQ-count. Any remainder queues belong to the PF.
4052                 */
4053                num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4054                                res.max_rss_qs / (num_vfs + 1));
4055
4056                /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4057                 * RSS Tables per port. Provide RSS on VFs, only if number of
4058                 * VFs requested is less than it's PF Pool's RSS Tables limit.
4059                 */
4060                if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4061                        num_vf_qs = 1;
4062        }
4063
4064        /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4065         * which are modifiable using SET_PROFILE_CONFIG cmd.
4066         */
4067        be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4068                                  RESOURCE_MODIFIABLE, 0);
4069
4070        /* If RSS IFACE capability flags are modifiable for a VF, set the
4071         * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4072         * more than 1 RSSQ is available for a VF.
4073         * Otherwise, provision only 1 queue pair for VF.
4074         */
4075        if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4076                vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4077                if (num_vf_qs > 1) {
4078                        vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4079                        if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4080                                vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4081                } else {
4082                        vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4083                                             BE_IF_FLAGS_DEFQ_RSS);
4084                }
4085        } else {
4086                num_vf_qs = 1;
4087        }
4088
4089        if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4090                vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4091                vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4092        }
4093
4094        vft_res->vf_if_cap_flags = vf_if_cap_flags;
4095        vft_res->max_rx_qs = num_vf_qs;
4096        vft_res->max_rss_qs = num_vf_qs;
4097        vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4098        vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4099
4100        /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4101         * among the PF and it's VFs, if the fields are changeable
4102         */
4103        if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4104                vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4105
4106        if (res_mod.max_vlans == FIELD_MODIFIABLE)
4107                vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4108
4109        if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4110                vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4111
4112        if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4113                vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4114}
4115
4116static void be_if_destroy(struct be_adapter *adapter)
4117{
4118        be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4119
4120        kfree(adapter->pmac_id);
4121        adapter->pmac_id = NULL;
4122
4123        kfree(adapter->mc_list);
4124        adapter->mc_list = NULL;
4125
4126        kfree(adapter->uc_list);
4127        adapter->uc_list = NULL;
4128}
4129
4130static int be_clear(struct be_adapter *adapter)
4131{
4132        struct pci_dev *pdev = adapter->pdev;
4133        struct  be_resources vft_res = {0};
4134
4135        be_cancel_worker(adapter);
4136
4137        flush_workqueue(be_wq);
4138
4139        if (sriov_enabled(adapter))
4140                be_vf_clear(adapter);
4141
4142        /* Re-configure FW to distribute resources evenly across max-supported
4143         * number of VFs, only when VFs are not already enabled.
4144         */
4145        if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4146            !pci_vfs_assigned(pdev)) {
4147                be_calculate_vf_res(adapter,
4148                                    pci_sriov_get_totalvfs(pdev),
4149                                    &vft_res);
4150                be_cmd_set_sriov_config(adapter, adapter->pool_res,
4151                                        pci_sriov_get_totalvfs(pdev),
4152                                        &vft_res);
4153        }
4154
4155        be_vxlan_unset_port(adapter->netdev, 0, 0, NULL);
4156
4157        be_if_destroy(adapter);
4158
4159        be_clear_queues(adapter);
4160
4161        be_msix_disable(adapter);
4162        adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4163        return 0;
4164}
4165
4166static int be_vfs_if_create(struct be_adapter *adapter)
4167{
4168        struct be_resources res = {0};
4169        u32 cap_flags, en_flags, vf;
4170        struct be_vf_cfg *vf_cfg;
4171        int status;
4172
4173        /* If a FW profile exists, then cap_flags are updated */
4174        cap_flags = BE_VF_IF_EN_FLAGS;
4175
4176        for_all_vfs(adapter, vf_cfg, vf) {
4177                if (!BE3_chip(adapter)) {
4178                        status = be_cmd_get_profile_config(adapter, &res, NULL,
4179                                                           ACTIVE_PROFILE_TYPE,
4180                                                           RESOURCE_LIMITS,
4181                                                           vf + 1);
4182                        if (!status) {
4183                                cap_flags = res.if_cap_flags;
4184                                /* Prevent VFs from enabling VLAN promiscuous
4185                                 * mode
4186                                 */
4187                                cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4188                        }
4189                }
4190
4191                /* PF should enable IF flags during proxy if_create call */
4192                en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4193                status = be_cmd_if_create(adapter, cap_flags, en_flags,
4194                                          &vf_cfg->if_handle, vf + 1);
4195                if (status)
4196                        return status;
4197        }
4198
4199        return 0;
4200}
4201
4202static int be_vf_setup_init(struct be_adapter *adapter)
4203{
4204        struct be_vf_cfg *vf_cfg;
4205        int vf;
4206
4207        adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4208                                  GFP_KERNEL);
4209        if (!adapter->vf_cfg)
4210                return -ENOMEM;
4211
4212        for_all_vfs(adapter, vf_cfg, vf) {
4213                vf_cfg->if_handle = -1;
4214                vf_cfg->pmac_id = -1;
4215        }
4216        return 0;
4217}
4218
4219static int be_vf_setup(struct be_adapter *adapter)
4220{
4221        struct device *dev = &adapter->pdev->dev;
4222        struct be_vf_cfg *vf_cfg;
4223        int status, old_vfs, vf;
4224        bool spoofchk;
4225
4226        old_vfs = pci_num_vf(adapter->pdev);
4227
4228        status = be_vf_setup_init(adapter);
4229        if (status)
4230                goto err;
4231
4232        if (old_vfs) {
4233                for_all_vfs(adapter, vf_cfg, vf) {
4234                        status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4235                        if (status)
4236                                goto err;
4237                }
4238
4239                status = be_vfs_mac_query(adapter);
4240                if (status)
4241                        goto err;
4242        } else {
4243                status = be_vfs_if_create(adapter);
4244                if (status)
4245                        goto err;
4246
4247                status = be_vf_eth_addr_config(adapter);
4248                if (status)
4249                        goto err;
4250        }
4251
4252        for_all_vfs(adapter, vf_cfg, vf) {
4253                /* Allow VFs to programs MAC/VLAN filters */
4254                status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4255                                                  vf + 1);
4256                if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4257                        status = be_cmd_set_fn_privileges(adapter,
4258                                                          vf_cfg->privileges |
4259                                                          BE_PRIV_FILTMGMT,
4260                                                          vf + 1);
4261                        if (!status) {
4262                                vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4263                                dev_info(dev, "VF%d has FILTMGMT privilege\n",
4264                                         vf);
4265                        }
4266                }
4267
4268                /* Allow full available bandwidth */
4269                if (!old_vfs)
4270                        be_cmd_config_qos(adapter, 0, 0, vf + 1);
4271
4272                status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4273                                               vf_cfg->if_handle, NULL,
4274                                               &spoofchk);
4275                if (!status)
4276                        vf_cfg->spoofchk = spoofchk;
4277
4278                if (!old_vfs) {
4279                        be_cmd_enable_vf(adapter, vf + 1);
4280                        be_cmd_set_logical_link_config(adapter,
4281                                                       IFLA_VF_LINK_STATE_AUTO,
4282                                                       vf+1);
4283                }
4284        }
4285
4286        if (!old_vfs) {
4287                status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4288                if (status) {
4289                        dev_err(dev, "SRIOV enable failed\n");
4290                        adapter->num_vfs = 0;
4291                        goto err;
4292                }
4293        }
4294
4295        if (BE3_chip(adapter)) {
4296                /* On BE3, enable VEB only when SRIOV is enabled */
4297                status = be_cmd_set_hsw_config(adapter, 0, 0,
4298                                               adapter->if_handle,
4299                                               PORT_FWD_TYPE_VEB, 0);
4300                if (status)
4301                        goto err;
4302        }
4303
4304        adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4305        return 0;
4306err:
4307        dev_err(dev, "VF setup failed\n");
4308        be_vf_clear(adapter);
4309        return status;
4310}
4311
4312/* Converting function_mode bits on BE3 to SH mc_type enums */
4313
4314static u8 be_convert_mc_type(u32 function_mode)
4315{
4316        if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4317                return vNIC1;
4318        else if (function_mode & QNQ_MODE)
4319                return FLEX10;
4320        else if (function_mode & VNIC_MODE)
4321                return vNIC2;
4322        else if (function_mode & UMC_ENABLED)
4323                return UMC;
4324        else
4325                return MC_NONE;
4326}
4327
4328/* On BE2/BE3 FW does not suggest the supported limits */
4329static void BEx_get_resources(struct be_adapter *adapter,
4330                              struct be_resources *res)
4331{
4332        bool use_sriov = adapter->num_vfs ? 1 : 0;
4333
4334        if (be_physfn(adapter))
4335                res->max_uc_mac = BE_UC_PMAC_COUNT;
4336        else
4337                res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4338
4339        adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4340
4341        if (be_is_mc(adapter)) {
4342                /* Assuming that there are 4 channels per port,
4343                 * when multi-channel is enabled
4344                 */
4345                if (be_is_qnq_mode(adapter))
4346                        res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4347                else
4348                        /* In a non-qnq multichannel mode, the pvid
4349                         * takes up one vlan entry
4350                         */
4351                        res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4352        } else {
4353                res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4354        }
4355
4356        res->max_mcast_mac = BE_MAX_MC;
4357
4358        /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4359         * 2) Create multiple TX rings on a BE3-R multi-channel interface
4360         *    *only* if it is RSS-capable.
4361         */
4362        if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4363            be_virtfn(adapter) ||
4364            (be_is_mc(adapter) &&
4365             !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4366                res->max_tx_qs = 1;
4367        } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4368                struct be_resources super_nic_res = {0};
4369
4370                /* On a SuperNIC profile, the driver needs to use the
4371                 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4372                 */
4373                be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4374                                          ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4375                                          0);
4376                /* Some old versions of BE3 FW don't report max_tx_qs value */
4377                res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4378        } else {
4379                res->max_tx_qs = BE3_MAX_TX_QS;
4380        }
4381
4382        if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4383            !use_sriov && be_physfn(adapter))
4384                res->max_rss_qs = (adapter->be3_native) ?
4385                                           BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4386        res->max_rx_qs = res->max_rss_qs + 1;
4387
4388        if (be_physfn(adapter))
4389                res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4390                                        BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4391        else
4392                res->max_evt_qs = 1;
4393
4394        res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4395        res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4396        if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4397                res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4398}
4399
4400static void be_setup_init(struct be_adapter *adapter)
4401{
4402        adapter->vlan_prio_bmap = 0xff;
4403        adapter->phy.link_speed = -1;
4404        adapter->if_handle = -1;
4405        adapter->be3_native = false;
4406        adapter->if_flags = 0;
4407        adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4408        if (be_physfn(adapter))
4409                adapter->cmd_privileges = MAX_PRIVILEGES;
4410        else
4411                adapter->cmd_privileges = MIN_PRIVILEGES;
4412}
4413
4414/* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4415 * However, this HW limitation is not exposed to the host via any SLI cmd.
4416 * As a result, in the case of SRIOV and in particular multi-partition configs
4417 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4418 * for distribution between the VFs. This self-imposed limit will determine the
4419 * no: of VFs for which RSS can be enabled.
4420 */
4421static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4422{
4423        struct be_port_resources port_res = {0};
4424        u8 rss_tables_on_port;
4425        u16 max_vfs = be_max_vfs(adapter);
4426
4427        be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4428                                  RESOURCE_LIMITS, 0);
4429
4430        rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4431
4432        /* Each PF Pool's RSS Tables limit =
4433         * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4434         */
4435        adapter->pool_res.max_rss_tables =
4436                max_vfs * rss_tables_on_port / port_res.max_vfs;
4437}
4438
4439static int be_get_sriov_config(struct be_adapter *adapter)
4440{
4441        struct be_resources res = {0};
4442        int max_vfs, old_vfs;
4443
4444        be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4445                                  RESOURCE_LIMITS, 0);
4446
4447        /* Some old versions of BE3 FW don't report max_vfs value */
4448        if (BE3_chip(adapter) && !res.max_vfs) {
4449                max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4450                res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4451        }
4452
4453        adapter->pool_res = res;
4454
4455        /* If during previous unload of the driver, the VFs were not disabled,
4456         * then we cannot rely on the PF POOL limits for the TotalVFs value.
4457         * Instead use the TotalVFs value stored in the pci-dev struct.
4458         */
4459        old_vfs = pci_num_vf(adapter->pdev);
4460        if (old_vfs) {
4461                dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4462                         old_vfs);
4463
4464                adapter->pool_res.max_vfs =
4465                        pci_sriov_get_totalvfs(adapter->pdev);
4466                adapter->num_vfs = old_vfs;
4467        }
4468
4469        if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4470                be_calculate_pf_pool_rss_tables(adapter);
4471                dev_info(&adapter->pdev->dev,
4472                         "RSS can be enabled for all VFs if num_vfs <= %d\n",
4473                         be_max_pf_pool_rss_tables(adapter));
4474        }
4475        return 0;
4476}
4477
4478static void be_alloc_sriov_res(struct be_adapter *adapter)
4479{
4480        int old_vfs = pci_num_vf(adapter->pdev);
4481        struct  be_resources vft_res = {0};
4482        int status;
4483
4484        be_get_sriov_config(adapter);
4485
4486        if (!old_vfs)
4487                pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4488
4489        /* When the HW is in SRIOV capable configuration, the PF-pool
4490         * resources are given to PF during driver load, if there are no
4491         * old VFs. This facility is not available in BE3 FW.
4492         * Also, this is done by FW in Lancer chip.
4493         */
4494        if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4495                be_calculate_vf_res(adapter, 0, &vft_res);
4496                status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4497                                                 &vft_res);
4498                if (status)
4499                        dev_err(&adapter->pdev->dev,
4500                                "Failed to optimize SRIOV resources\n");
4501        }
4502}
4503
4504static int be_get_resources(struct be_adapter *adapter)
4505{
4506        struct device *dev = &adapter->pdev->dev;
4507        struct be_resources res = {0};
4508        int status;
4509
4510        /* For Lancer, SH etc read per-function resource limits from FW.
4511         * GET_FUNC_CONFIG returns per function guaranteed limits.
4512         * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4513         */
4514        if (BEx_chip(adapter)) {
4515                BEx_get_resources(adapter, &res);
4516        } else {
4517                status = be_cmd_get_func_config(adapter, &res);
4518                if (status)
4519                        return status;
4520
4521                /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4522                if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4523                    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4524                        res.max_rss_qs -= 1;
4525        }
4526
4527        /* If RoCE is supported stash away half the EQs for RoCE */
4528        res.max_nic_evt_qs = be_roce_supported(adapter) ?
4529                                res.max_evt_qs / 2 : res.max_evt_qs;
4530        adapter->res = res;
4531
4532        /* If FW supports RSS default queue, then skip creating non-RSS
4533         * queue for non-IP traffic.
4534         */
4535        adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4536                                 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4537
4538        dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4539                 be_max_txqs(adapter), be_max_rxqs(adapter),
4540                 be_max_rss(adapter), be_max_nic_eqs(adapter),
4541                 be_max_vfs(adapter));
4542        dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4543                 be_max_uc(adapter), be_max_mc(adapter),
4544                 be_max_vlans(adapter));
4545
4546        /* Ensure RX and TX queues are created in pairs at init time */
4547        adapter->cfg_num_rx_irqs =
4548                                min_t(u16, netif_get_num_default_rss_queues(),
4549                                      be_max_qp_irqs(adapter));
4550        adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4551        return 0;
4552}
4553
4554static int be_get_config(struct be_adapter *adapter)
4555{
4556        int status, level;
4557        u16 profile_id;
4558
4559        status = be_cmd_get_cntl_attributes(adapter);
4560        if (status)
4561                return status;
4562
4563        status = be_cmd_query_fw_cfg(adapter);
4564        if (status)
4565                return status;
4566
4567        if (!lancer_chip(adapter) && be_physfn(adapter))
4568                be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4569
4570        if (BEx_chip(adapter)) {
4571                level = be_cmd_get_fw_log_level(adapter);
4572                adapter->msg_enable =
4573                        level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4574        }
4575
4576        be_cmd_get_acpi_wol_cap(adapter);
4577        pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4578        pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4579
4580        be_cmd_query_port_name(adapter);
4581
4582        if (be_physfn(adapter)) {
4583                status = be_cmd_get_active_profile(adapter, &profile_id);
4584                if (!status)
4585                        dev_info(&adapter->pdev->dev,
4586                                 "Using profile 0x%x\n", profile_id);
4587        }
4588
4589        return 0;
4590}
4591
4592static int be_mac_setup(struct be_adapter *adapter)
4593{
4594        u8 mac[ETH_ALEN];
4595        int status;
4596
4597        if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4598                status = be_cmd_get_perm_mac(adapter, mac);
4599                if (status)
4600                        return status;
4601
4602                memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4603                memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4604
4605                /* Initial MAC for BE3 VFs is already programmed by PF */
4606                if (BEx_chip(adapter) && be_virtfn(adapter))
4607                        memcpy(adapter->dev_mac, mac, ETH_ALEN);
4608        }
4609
4610        return 0;
4611}
4612
4613static void be_schedule_worker(struct be_adapter *adapter)
4614{
4615        queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4616        adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4617}
4618
4619static void be_destroy_err_recovery_workq(void)
4620{
4621        if (!be_err_recovery_workq)
4622                return;
4623
4624        flush_workqueue(be_err_recovery_workq);
4625        destroy_workqueue(be_err_recovery_workq);
4626        be_err_recovery_workq = NULL;
4627}
4628
4629static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4630{
4631        struct be_error_recovery *err_rec = &adapter->error_recovery;
4632
4633        if (!be_err_recovery_workq)
4634                return;
4635
4636        queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4637                           msecs_to_jiffies(delay));
4638        adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4639}
4640
4641static int be_setup_queues(struct be_adapter *adapter)
4642{
4643        struct net_device *netdev = adapter->netdev;
4644        int status;
4645
4646        status = be_evt_queues_create(adapter);
4647        if (status)
4648                goto err;
4649
4650        status = be_tx_qs_create(adapter);
4651        if (status)
4652                goto err;
4653
4654        status = be_rx_cqs_create(adapter);
4655        if (status)
4656                goto err;
4657
4658        status = be_mcc_queues_create(adapter);
4659        if (status)
4660                goto err;
4661
4662        status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4663        if (status)
4664                goto err;
4665
4666        status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4667        if (status)
4668                goto err;
4669
4670        return 0;
4671err:
4672        dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4673        return status;
4674}
4675
4676static int be_if_create(struct be_adapter *adapter)
4677{
4678        u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4679        u32 cap_flags = be_if_cap_flags(adapter);
4680
4681        /* alloc required memory for other filtering fields */
4682        adapter->pmac_id = kcalloc(be_max_uc(adapter),
4683                                   sizeof(*adapter->pmac_id), GFP_KERNEL);
4684        if (!adapter->pmac_id)
4685                return -ENOMEM;
4686
4687        adapter->mc_list = kcalloc(be_max_mc(adapter),
4688                                   sizeof(*adapter->mc_list), GFP_KERNEL);
4689        if (!adapter->mc_list)
4690                return -ENOMEM;
4691
4692        adapter->uc_list = kcalloc(be_max_uc(adapter),
4693                                   sizeof(*adapter->uc_list), GFP_KERNEL);
4694        if (!adapter->uc_list)
4695                return -ENOMEM;
4696
4697        if (adapter->cfg_num_rx_irqs == 1)
4698                cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4699
4700        en_flags &= cap_flags;
4701        /* will enable all the needed filter flags in be_open() */
4702        return be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4703                                  &adapter->if_handle, 0);
4704}
4705
4706int be_update_queues(struct be_adapter *adapter)
4707{
4708        struct net_device *netdev = adapter->netdev;
4709        int status;
4710
4711        if (netif_running(netdev)) {
4712                /* be_tx_timeout() must not run concurrently with this
4713                 * function, synchronize with an already-running dev_watchdog
4714                 */
4715                netif_tx_lock_bh(netdev);
4716                /* device cannot transmit now, avoid dev_watchdog timeouts */
4717                netif_carrier_off(netdev);
4718                netif_tx_unlock_bh(netdev);
4719
4720                be_close(netdev);
4721        }
4722
4723        be_cancel_worker(adapter);
4724
4725        /* If any vectors have been shared with RoCE we cannot re-program
4726         * the MSIx table.
4727         */
4728        if (!adapter->num_msix_roce_vec)
4729                be_msix_disable(adapter);
4730
4731        be_clear_queues(adapter);
4732        status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4733        if (status)
4734                return status;
4735
4736        if (!msix_enabled(adapter)) {
4737                status = be_msix_enable(adapter);
4738                if (status)
4739                        return status;
4740        }
4741
4742        status = be_if_create(adapter);
4743        if (status)
4744                return status;
4745
4746        status = be_setup_queues(adapter);
4747        if (status)
4748                return status;
4749
4750        be_schedule_worker(adapter);
4751
4752        /* The IF was destroyed and re-created. We need to clear
4753         * all promiscuous flags valid for the destroyed IF.
4754         * Without this promisc mode is not restored during
4755         * be_open() because the driver thinks that it is
4756         * already enabled in HW.
4757         */
4758        adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4759
4760        if (netif_running(netdev))
4761                status = be_open(netdev);
4762
4763        return status;
4764}
4765
4766static inline int fw_major_num(const char *fw_ver)
4767{
4768        int fw_major = 0, i;
4769
4770        i = sscanf(fw_ver, "%d.", &fw_major);
4771        if (i != 1)
4772                return 0;
4773
4774        return fw_major;
4775}
4776
4777/* If it is error recovery, FLR the PF
4778 * Else if any VFs are already enabled don't FLR the PF
4779 */
4780static bool be_reset_required(struct be_adapter *adapter)
4781{
4782        if (be_error_recovering(adapter))
4783                return true;
4784        else
4785                return pci_num_vf(adapter->pdev) == 0;
4786}
4787
4788/* Wait for the FW to be ready and perform the required initialization */
4789static int be_func_init(struct be_adapter *adapter)
4790{
4791        int status;
4792
4793        status = be_fw_wait_ready(adapter);
4794        if (status)
4795                return status;
4796
4797        /* FW is now ready; clear errors to allow cmds/doorbell */
4798        be_clear_error(adapter, BE_CLEAR_ALL);
4799
4800        if (be_reset_required(adapter)) {
4801                status = be_cmd_reset_function(adapter);
4802                if (status)
4803                        return status;
4804
4805                /* Wait for interrupts to quiesce after an FLR */
4806                msleep(100);
4807        }
4808
4809        /* Tell FW we're ready to fire cmds */
4810        status = be_cmd_fw_init(adapter);
4811        if (status)
4812                return status;
4813
4814        /* Allow interrupts for other ULPs running on NIC function */
4815        be_intr_set(adapter, true);
4816
4817        return 0;
4818}
4819
4820static int be_setup(struct be_adapter *adapter)
4821{
4822        struct device *dev = &adapter->pdev->dev;
4823        int status;
4824
4825        status = be_func_init(adapter);
4826        if (status)
4827                return status;
4828
4829        be_setup_init(adapter);
4830
4831        if (!lancer_chip(adapter))
4832                be_cmd_req_native_mode(adapter);
4833
4834        /* invoke this cmd first to get pf_num and vf_num which are needed
4835         * for issuing profile related cmds
4836         */
4837        if (!BEx_chip(adapter)) {
4838                status = be_cmd_get_func_config(adapter, NULL);
4839                if (status)
4840                        return status;
4841        }
4842
4843        status = be_get_config(adapter);
4844        if (status)
4845                goto err;
4846
4847        if (!BE2_chip(adapter) && be_physfn(adapter))
4848                be_alloc_sriov_res(adapter);
4849
4850        status = be_get_resources(adapter);
4851        if (status)
4852                goto err;
4853
4854        status = be_msix_enable(adapter);
4855        if (status)
4856                goto err;
4857
4858        /* will enable all the needed filter flags in be_open() */
4859        status = be_if_create(adapter);
4860        if (status)
4861                goto err;
4862
4863        /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4864        rtnl_lock();
4865        status = be_setup_queues(adapter);
4866        rtnl_unlock();
4867        if (status)
4868                goto err;
4869
4870        be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4871
4872        status = be_mac_setup(adapter);
4873        if (status)
4874                goto err;
4875
4876        be_cmd_get_fw_ver(adapter);
4877        dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4878
4879        if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4880                dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4881                        adapter->fw_ver);
4882                dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4883        }
4884
4885        status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4886                                         adapter->rx_fc);
4887        if (status)
4888                be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4889                                        &adapter->rx_fc);
4890
4891        dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4892                 adapter->tx_fc, adapter->rx_fc);
4893
4894        if (be_physfn(adapter))
4895                be_cmd_set_logical_link_config(adapter,
4896                                               IFLA_VF_LINK_STATE_AUTO, 0);
4897
4898        /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4899         * confusing a linux bridge or OVS that it might be connected to.
4900         * Set the EVB to PASSTHRU mode which effectively disables the EVB
4901         * when SRIOV is not enabled.
4902         */
4903        if (BE3_chip(adapter))
4904                be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4905                                      PORT_FWD_TYPE_PASSTHRU, 0);
4906
4907        if (adapter->num_vfs)
4908                be_vf_setup(adapter);
4909
4910        status = be_cmd_get_phy_info(adapter);
4911        if (!status && be_pause_supported(adapter))
4912                adapter->phy.fc_autoneg = 1;
4913
4914        if (be_physfn(adapter) && !lancer_chip(adapter))
4915                be_cmd_set_features(adapter);
4916
4917        be_schedule_worker(adapter);
4918        adapter->flags |= BE_FLAGS_SETUP_DONE;
4919        return 0;
4920err:
4921        be_clear(adapter);
4922        return status;
4923}
4924
4925#ifdef CONFIG_NET_POLL_CONTROLLER
4926static void be_netpoll(struct net_device *netdev)
4927{
4928        struct be_adapter *adapter = netdev_priv(netdev);
4929        struct be_eq_obj *eqo;
4930        int i;
4931
4932        for_all_evt_queues(adapter, eqo, i) {
4933                be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4934                napi_schedule(&eqo->napi);
4935        }
4936}
4937#endif
4938
4939int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4940{
4941        const struct firmware *fw;
4942        int status;
4943
4944        if (!netif_running(adapter->netdev)) {
4945                dev_err(&adapter->pdev->dev,
4946                        "Firmware load not allowed (interface is down)\n");
4947                return -ENETDOWN;
4948        }
4949
4950        status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4951        if (status)
4952                goto fw_exit;
4953
4954        dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4955
4956        if (lancer_chip(adapter))
4957                status = lancer_fw_download(adapter, fw);
4958        else
4959                status = be_fw_download(adapter, fw);
4960
4961        if (!status)
4962                be_cmd_get_fw_ver(adapter);
4963
4964fw_exit:
4965        release_firmware(fw);
4966        return status;
4967}
4968
4969static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4970                                 u16 flags, struct netlink_ext_ack *extack)
4971{
4972        struct be_adapter *adapter = netdev_priv(dev);
4973        struct nlattr *attr, *br_spec;
4974        int rem;
4975        int status = 0;
4976        u16 mode = 0;
4977
4978        if (!sriov_enabled(adapter))
4979                return -EOPNOTSUPP;
4980
4981        br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4982        if (!br_spec)
4983                return -EINVAL;
4984
4985        nla_for_each_nested(attr, br_spec, rem) {
4986                if (nla_type(attr) != IFLA_BRIDGE_MODE)
4987                        continue;
4988
4989                if (nla_len(attr) < sizeof(mode))
4990                        return -EINVAL;
4991
4992                mode = nla_get_u16(attr);
4993                if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4994                        return -EOPNOTSUPP;
4995
4996                if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4997                        return -EINVAL;
4998
4999                status = be_cmd_set_hsw_config(adapter, 0, 0,
5000                                               adapter->if_handle,
5001                                               mode == BRIDGE_MODE_VEPA ?
5002                                               PORT_FWD_TYPE_VEPA :
5003                                               PORT_FWD_TYPE_VEB, 0);
5004                if (status)
5005                        goto err;
5006
5007                dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
5008                         mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5009
5010                return status;
5011        }
5012err:
5013        dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5014                mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5015
5016        return status;
5017}
5018
5019static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5020                                 struct net_device *dev, u32 filter_mask,
5021                                 int nlflags)
5022{
5023        struct be_adapter *adapter = netdev_priv(dev);
5024        int status = 0;
5025        u8 hsw_mode;
5026
5027        /* BE and Lancer chips support VEB mode only */
5028        if (BEx_chip(adapter) || lancer_chip(adapter)) {
5029                /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5030                if (!pci_sriov_get_totalvfs(adapter->pdev))
5031                        return 0;
5032                hsw_mode = PORT_FWD_TYPE_VEB;
5033        } else {
5034                status = be_cmd_get_hsw_config(adapter, NULL, 0,
5035                                               adapter->if_handle, &hsw_mode,
5036                                               NULL);
5037                if (status)
5038                        return 0;
5039
5040                if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5041                        return 0;
5042        }
5043
5044        return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5045                                       hsw_mode == PORT_FWD_TYPE_VEPA ?
5046                                       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5047                                       0, 0, nlflags, filter_mask, NULL);
5048}
5049
5050static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5051                                         void (*func)(struct work_struct *))
5052{
5053        struct be_cmd_work *work;
5054
5055        work = kzalloc(sizeof(*work), GFP_ATOMIC);
5056        if (!work) {
5057                dev_err(&adapter->pdev->dev,
5058                        "be_work memory allocation failed\n");
5059                return NULL;
5060        }
5061
5062        INIT_WORK(&work->work, func);
5063        work->adapter = adapter;
5064        return work;
5065}
5066
5067static netdev_features_t be_features_check(struct sk_buff *skb,
5068                                           struct net_device *dev,
5069                                           netdev_features_t features)
5070{
5071        struct be_adapter *adapter = netdev_priv(dev);
5072        u8 l4_hdr = 0;
5073
5074        if (skb_is_gso(skb)) {
5075                /* IPv6 TSO requests with extension hdrs are a problem
5076                 * to Lancer and BE3 HW. Disable TSO6 feature.
5077                 */
5078                if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5079                        features &= ~NETIF_F_TSO6;
5080
5081                /* Lancer cannot handle the packet with MSS less than 256.
5082                 * Also it can't handle a TSO packet with a single segment
5083                 * Disable the GSO support in such cases
5084                 */
5085                if (lancer_chip(adapter) &&
5086                    (skb_shinfo(skb)->gso_size < 256 ||
5087                     skb_shinfo(skb)->gso_segs == 1))
5088                        features &= ~NETIF_F_GSO_MASK;
5089        }
5090
5091        /* The code below restricts offload features for some tunneled and
5092         * Q-in-Q packets.
5093         * Offload features for normal (non tunnel) packets are unchanged.
5094         */
5095        features = vlan_features_check(skb, features);
5096        if (!skb->encapsulation ||
5097            !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5098                return features;
5099
5100        /* It's an encapsulated packet and VxLAN offloads are enabled. We
5101         * should disable tunnel offload features if it's not a VxLAN packet,
5102         * as tunnel offloads have been enabled only for VxLAN. This is done to
5103         * allow other tunneled traffic like GRE work fine while VxLAN
5104         * offloads are configured in Skyhawk-R.
5105         */
5106        switch (vlan_get_protocol(skb)) {
5107        case htons(ETH_P_IP):
5108                l4_hdr = ip_hdr(skb)->protocol;
5109                break;
5110        case htons(ETH_P_IPV6):
5111                l4_hdr = ipv6_hdr(skb)->nexthdr;
5112                break;
5113        default:
5114                return features;
5115        }
5116
5117        if (l4_hdr != IPPROTO_UDP ||
5118            skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5119            skb->inner_protocol != htons(ETH_P_TEB) ||
5120            skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5121                sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5122            !adapter->vxlan_port ||
5123            udp_hdr(skb)->dest != adapter->vxlan_port)
5124                return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5125
5126        return features;
5127}
5128
5129static int be_get_phys_port_id(struct net_device *dev,
5130                               struct netdev_phys_item_id *ppid)
5131{
5132        int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5133        struct be_adapter *adapter = netdev_priv(dev);
5134        u8 *id;
5135
5136        if (MAX_PHYS_ITEM_ID_LEN < id_len)
5137                return -ENOSPC;
5138
5139        ppid->id[0] = adapter->hba_port_num + 1;
5140        id = &ppid->id[1];
5141        for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5142             i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5143                memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5144
5145        ppid->id_len = id_len;
5146
5147        return 0;
5148}
5149
5150static void be_set_rx_mode(struct net_device *dev)
5151{
5152        struct be_adapter *adapter = netdev_priv(dev);
5153        struct be_cmd_work *work;
5154
5155        work = be_alloc_work(adapter, be_work_set_rx_mode);
5156        if (work)
5157                queue_work(be_wq, &work->work);
5158}
5159
5160static const struct net_device_ops be_netdev_ops = {
5161        .ndo_open               = be_open,
5162        .ndo_stop               = be_close,
5163        .ndo_start_xmit         = be_xmit,
5164        .ndo_set_rx_mode        = be_set_rx_mode,
5165        .ndo_set_mac_address    = be_mac_addr_set,
5166        .ndo_get_stats64        = be_get_stats64,
5167        .ndo_validate_addr      = eth_validate_addr,
5168        .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5169        .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5170        .ndo_set_vf_mac         = be_set_vf_mac,
5171        .ndo_set_vf_vlan        = be_set_vf_vlan,
5172        .ndo_set_vf_rate        = be_set_vf_tx_rate,
5173        .ndo_get_vf_config      = be_get_vf_config,
5174        .ndo_set_vf_link_state  = be_set_vf_link_state,
5175        .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5176        .ndo_tx_timeout         = be_tx_timeout,
5177#ifdef CONFIG_NET_POLL_CONTROLLER
5178        .ndo_poll_controller    = be_netpoll,
5179#endif
5180        .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5181        .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5182        .ndo_features_check     = be_features_check,
5183        .ndo_get_phys_port_id   = be_get_phys_port_id,
5184};
5185
5186static void be_netdev_init(struct net_device *netdev)
5187{
5188        struct be_adapter *adapter = netdev_priv(netdev);
5189
5190        netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5191                NETIF_F_GSO_UDP_TUNNEL |
5192                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5193                NETIF_F_HW_VLAN_CTAG_TX;
5194        if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5195                netdev->hw_features |= NETIF_F_RXHASH;
5196
5197        netdev->features |= netdev->hw_features |
5198                NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5199
5200        netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5201                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5202
5203        netdev->priv_flags |= IFF_UNICAST_FLT;
5204
5205        netdev->flags |= IFF_MULTICAST;
5206
5207        netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5208
5209        netdev->netdev_ops = &be_netdev_ops;
5210
5211        netdev->ethtool_ops = &be_ethtool_ops;
5212
5213        if (!lancer_chip(adapter) && !BEx_chip(adapter) && !be_is_mc(adapter))
5214                netdev->udp_tunnel_nic_info = &be_udp_tunnels;
5215
5216        /* MTU range: 256 - 9000 */
5217        netdev->min_mtu = BE_MIN_MTU;
5218        netdev->max_mtu = BE_MAX_MTU;
5219}
5220
5221static void be_cleanup(struct be_adapter *adapter)
5222{
5223        struct net_device *netdev = adapter->netdev;
5224
5225        rtnl_lock();
5226        netif_device_detach(netdev);
5227        if (netif_running(netdev))
5228                be_close(netdev);
5229        rtnl_unlock();
5230
5231        be_clear(adapter);
5232}
5233
5234static int be_resume(struct be_adapter *adapter)
5235{
5236        struct net_device *netdev = adapter->netdev;
5237        int status;
5238
5239        status = be_setup(adapter);
5240        if (status)
5241                return status;
5242
5243        rtnl_lock();
5244        if (netif_running(netdev))
5245                status = be_open(netdev);
5246        rtnl_unlock();
5247
5248        if (status)
5249                return status;
5250
5251        netif_device_attach(netdev);
5252
5253        return 0;
5254}
5255
5256static void be_soft_reset(struct be_adapter *adapter)
5257{
5258        u32 val;
5259
5260        dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5261        val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5262        val |= SLIPORT_SOFTRESET_SR_MASK;
5263        iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5264}
5265
5266static bool be_err_is_recoverable(struct be_adapter *adapter)
5267{
5268        struct be_error_recovery *err_rec = &adapter->error_recovery;
5269        unsigned long initial_idle_time =
5270                msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5271        unsigned long recovery_interval =
5272                msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5273        u16 ue_err_code;
5274        u32 val;
5275
5276        val = be_POST_stage_get(adapter);
5277        if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5278                return false;
5279        ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5280        if (ue_err_code == 0)
5281                return false;
5282
5283        dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5284                ue_err_code);
5285
5286        if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5287                dev_err(&adapter->pdev->dev,
5288                        "Cannot recover within %lu sec from driver load\n",
5289                        jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5290                return false;
5291        }
5292
5293        if (err_rec->last_recovery_time && time_before_eq(
5294                jiffies - err_rec->last_recovery_time, recovery_interval)) {
5295                dev_err(&adapter->pdev->dev,
5296                        "Cannot recover within %lu sec from last recovery\n",
5297                        jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5298                return false;
5299        }
5300
5301        if (ue_err_code == err_rec->last_err_code) {
5302                dev_err(&adapter->pdev->dev,
5303                        "Cannot recover from a consecutive TPE error\n");
5304                return false;
5305        }
5306
5307        err_rec->last_recovery_time = jiffies;
5308        err_rec->last_err_code = ue_err_code;
5309        return true;
5310}
5311
5312static int be_tpe_recover(struct be_adapter *adapter)
5313{
5314        struct be_error_recovery *err_rec = &adapter->error_recovery;
5315        int status = -EAGAIN;
5316        u32 val;
5317
5318        switch (err_rec->recovery_state) {
5319        case ERR_RECOVERY_ST_NONE:
5320                err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5321                err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5322                break;
5323
5324        case ERR_RECOVERY_ST_DETECT:
5325                val = be_POST_stage_get(adapter);
5326                if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5327                    POST_STAGE_RECOVERABLE_ERR) {
5328                        dev_err(&adapter->pdev->dev,
5329                                "Unrecoverable HW error detected: 0x%x\n", val);
5330                        status = -EINVAL;
5331                        err_rec->resched_delay = 0;
5332                        break;
5333                }
5334
5335                dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5336
5337                /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5338                 * milliseconds before it checks for final error status in
5339                 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5340                 * If it does, then PF0 initiates a Soft Reset.
5341                 */
5342                if (adapter->pf_num == 0) {
5343                        err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5344                        err_rec->resched_delay = err_rec->ue_to_reset_time -
5345                                        ERR_RECOVERY_UE_DETECT_DURATION;
5346                        break;
5347                }
5348
5349                err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5350                err_rec->resched_delay = err_rec->ue_to_poll_time -
5351                                        ERR_RECOVERY_UE_DETECT_DURATION;
5352                break;
5353
5354        case ERR_RECOVERY_ST_RESET:
5355                if (!be_err_is_recoverable(adapter)) {
5356                        dev_err(&adapter->pdev->dev,
5357                                "Failed to meet recovery criteria\n");
5358                        status = -EIO;
5359                        err_rec->resched_delay = 0;
5360                        break;
5361                }
5362                be_soft_reset(adapter);
5363                err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5364                err_rec->resched_delay = err_rec->ue_to_poll_time -
5365                                        err_rec->ue_to_reset_time;
5366                break;
5367
5368        case ERR_RECOVERY_ST_PRE_POLL:
5369                err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5370                err_rec->resched_delay = 0;
5371                status = 0;                     /* done */
5372                break;
5373
5374        default:
5375                status = -EINVAL;
5376                err_rec->resched_delay = 0;
5377                break;
5378        }
5379
5380        return status;
5381}
5382
5383static int be_err_recover(struct be_adapter *adapter)
5384{
5385        int status;
5386
5387        if (!lancer_chip(adapter)) {
5388                if (!adapter->error_recovery.recovery_supported ||
5389                    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5390                        return -EIO;
5391                status = be_tpe_recover(adapter);
5392                if (status)
5393                        goto err;
5394        }
5395
5396        /* Wait for adapter to reach quiescent state before
5397         * destroying queues
5398         */
5399        status = be_fw_wait_ready(adapter);
5400        if (status)
5401                goto err;
5402
5403        adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5404
5405        be_cleanup(adapter);
5406
5407        status = be_resume(adapter);
5408        if (status)
5409                goto err;
5410
5411        adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5412
5413err:
5414        return status;
5415}
5416
5417static void be_err_detection_task(struct work_struct *work)
5418{
5419        struct be_error_recovery *err_rec =
5420                        container_of(work, struct be_error_recovery,
5421                                     err_detection_work.work);
5422        struct be_adapter *adapter =
5423                        container_of(err_rec, struct be_adapter,
5424                                     error_recovery);
5425        u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5426        struct device *dev = &adapter->pdev->dev;
5427        int recovery_status;
5428
5429        be_detect_error(adapter);
5430        if (!be_check_error(adapter, BE_ERROR_HW))
5431                goto reschedule_task;
5432
5433        recovery_status = be_err_recover(adapter);
5434        if (!recovery_status) {
5435                err_rec->recovery_retries = 0;
5436                err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5437                dev_info(dev, "Adapter recovery successful\n");
5438                goto reschedule_task;
5439        } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5440                /* BEx/SH recovery state machine */
5441                if (adapter->pf_num == 0 &&
5442                    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5443                        dev_err(&adapter->pdev->dev,
5444                                "Adapter recovery in progress\n");
5445                resched_delay = err_rec->resched_delay;
5446                goto reschedule_task;
5447        } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5448                /* For VFs, check if PF have allocated resources
5449                 * every second.
5450                 */
5451                dev_err(dev, "Re-trying adapter recovery\n");
5452                goto reschedule_task;
5453        } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5454                   ERR_RECOVERY_MAX_RETRY_COUNT) {
5455                /* In case of another error during recovery, it takes 30 sec
5456                 * for adapter to come out of error. Retry error recovery after
5457                 * this time interval.
5458                 */
5459                dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5460                resched_delay = ERR_RECOVERY_RETRY_DELAY;
5461                goto reschedule_task;
5462        } else {
5463                dev_err(dev, "Adapter recovery failed\n");
5464                dev_err(dev, "Please reboot server to recover\n");
5465        }
5466
5467        return;
5468
5469reschedule_task:
5470        be_schedule_err_detection(adapter, resched_delay);
5471}
5472
5473static void be_log_sfp_info(struct be_adapter *adapter)
5474{
5475        int status;
5476
5477        status = be_cmd_query_sfp_info(adapter);
5478        if (!status) {
5479                dev_err(&adapter->pdev->dev,
5480                        "Port %c: %s Vendor: %s part no: %s",
5481                        adapter->port_name,
5482                        be_misconfig_evt_port_state[adapter->phy_state],
5483                        adapter->phy.vendor_name,
5484                        adapter->phy.vendor_pn);
5485        }
5486        adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5487}
5488
5489static void be_worker(struct work_struct *work)
5490{
5491        struct be_adapter *adapter =
5492                container_of(work, struct be_adapter, work.work);
5493        struct be_rx_obj *rxo;
5494        int i;
5495
5496        if (be_physfn(adapter) &&
5497            MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5498                be_cmd_get_die_temperature(adapter);
5499
5500        /* when interrupts are not yet enabled, just reap any pending
5501         * mcc completions
5502         */
5503        if (!netif_running(adapter->netdev)) {
5504                local_bh_disable();
5505                be_process_mcc(adapter);
5506                local_bh_enable();
5507                goto reschedule;
5508        }
5509
5510        if (!adapter->stats_cmd_sent) {
5511                if (lancer_chip(adapter))
5512                        lancer_cmd_get_pport_stats(adapter,
5513                                                   &adapter->stats_cmd);
5514                else
5515                        be_cmd_get_stats(adapter, &adapter->stats_cmd);
5516        }
5517
5518        for_all_rx_queues(adapter, rxo, i) {
5519                /* Replenish RX-queues starved due to memory
5520                 * allocation failures.
5521                 */
5522                if (rxo->rx_post_starved)
5523                        be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5524        }
5525
5526        /* EQ-delay update for Skyhawk is done while notifying EQ */
5527        if (!skyhawk_chip(adapter))
5528                be_eqd_update(adapter, false);
5529
5530        if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5531                be_log_sfp_info(adapter);
5532
5533reschedule:
5534        adapter->work_counter++;
5535        queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5536}
5537
5538static void be_unmap_pci_bars(struct be_adapter *adapter)
5539{
5540        if (adapter->csr)
5541                pci_iounmap(adapter->pdev, adapter->csr);
5542        if (adapter->db)
5543                pci_iounmap(adapter->pdev, adapter->db);
5544        if (adapter->pcicfg && adapter->pcicfg_mapped)
5545                pci_iounmap(adapter->pdev, adapter->pcicfg);
5546}
5547
5548static int db_bar(struct be_adapter *adapter)
5549{
5550        if (lancer_chip(adapter) || be_virtfn(adapter))
5551                return 0;
5552        else
5553                return 4;
5554}
5555
5556static int be_roce_map_pci_bars(struct be_adapter *adapter)
5557{
5558        if (skyhawk_chip(adapter)) {
5559                adapter->roce_db.size = 4096;
5560                adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5561                                                              db_bar(adapter));
5562                adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5563                                                               db_bar(adapter));
5564        }
5565        return 0;
5566}
5567
5568static int be_map_pci_bars(struct be_adapter *adapter)
5569{
5570        struct pci_dev *pdev = adapter->pdev;
5571        u8 __iomem *addr;
5572        u32 sli_intf;
5573
5574        pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5575        adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5576                                SLI_INTF_FAMILY_SHIFT;
5577        adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5578
5579        if (BEx_chip(adapter) && be_physfn(adapter)) {
5580                adapter->csr = pci_iomap(pdev, 2, 0);
5581                if (!adapter->csr)
5582                        return -ENOMEM;
5583        }
5584
5585        addr = pci_iomap(pdev, db_bar(adapter), 0);
5586        if (!addr)
5587                goto pci_map_err;
5588        adapter->db = addr;
5589
5590        if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5591                if (be_physfn(adapter)) {
5592                        /* PCICFG is the 2nd BAR in BE2 */
5593                        addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5594                        if (!addr)
5595                                goto pci_map_err;
5596                        adapter->pcicfg = addr;
5597                        adapter->pcicfg_mapped = true;
5598                } else {
5599                        adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5600                        adapter->pcicfg_mapped = false;
5601                }
5602        }
5603
5604        be_roce_map_pci_bars(adapter);
5605        return 0;
5606
5607pci_map_err:
5608        dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5609        be_unmap_pci_bars(adapter);
5610        return -ENOMEM;
5611}
5612
5613static void be_drv_cleanup(struct be_adapter *adapter)
5614{
5615        struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5616        struct device *dev = &adapter->pdev->dev;
5617
5618        if (mem->va)
5619                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5620
5621        mem = &adapter->rx_filter;
5622        if (mem->va)
5623                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5624
5625        mem = &adapter->stats_cmd;
5626        if (mem->va)
5627                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5628}
5629
5630/* Allocate and initialize various fields in be_adapter struct */
5631static int be_drv_init(struct be_adapter *adapter)
5632{
5633        struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5634        struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5635        struct be_dma_mem *rx_filter = &adapter->rx_filter;
5636        struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5637        struct device *dev = &adapter->pdev->dev;
5638        int status = 0;
5639
5640        mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5641        mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5642                                                &mbox_mem_alloc->dma,
5643                                                GFP_KERNEL);
5644        if (!mbox_mem_alloc->va)
5645                return -ENOMEM;
5646
5647        mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5648        mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5649        mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5650
5651        rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5652        rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5653                                           &rx_filter->dma, GFP_KERNEL);
5654        if (!rx_filter->va) {
5655                status = -ENOMEM;
5656                goto free_mbox;
5657        }
5658
5659        if (lancer_chip(adapter))
5660                stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5661        else if (BE2_chip(adapter))
5662                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5663        else if (BE3_chip(adapter))
5664                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5665        else
5666                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5667        stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5668                                           &stats_cmd->dma, GFP_KERNEL);
5669        if (!stats_cmd->va) {
5670                status = -ENOMEM;
5671                goto free_rx_filter;
5672        }
5673
5674        mutex_init(&adapter->mbox_lock);
5675        mutex_init(&adapter->mcc_lock);
5676        mutex_init(&adapter->rx_filter_lock);
5677        spin_lock_init(&adapter->mcc_cq_lock);
5678        init_completion(&adapter->et_cmd_compl);
5679
5680        pci_save_state(adapter->pdev);
5681
5682        INIT_DELAYED_WORK(&adapter->work, be_worker);
5683
5684        adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5685        adapter->error_recovery.resched_delay = 0;
5686        INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5687                          be_err_detection_task);
5688
5689        adapter->rx_fc = true;
5690        adapter->tx_fc = true;
5691
5692        /* Must be a power of 2 or else MODULO will BUG_ON */
5693        adapter->be_get_temp_freq = 64;
5694
5695        return 0;
5696
5697free_rx_filter:
5698        dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5699free_mbox:
5700        dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5701                          mbox_mem_alloc->dma);
5702        return status;
5703}
5704
5705static void be_remove(struct pci_dev *pdev)
5706{
5707        struct be_adapter *adapter = pci_get_drvdata(pdev);
5708
5709        if (!adapter)
5710                return;
5711
5712        be_roce_dev_remove(adapter);
5713        be_intr_set(adapter, false);
5714
5715        be_cancel_err_detection(adapter);
5716
5717        unregister_netdev(adapter->netdev);
5718
5719        be_clear(adapter);
5720
5721        if (!pci_vfs_assigned(adapter->pdev))
5722                be_cmd_reset_function(adapter);
5723
5724        /* tell fw we're done with firing cmds */
5725        be_cmd_fw_clean(adapter);
5726
5727        be_unmap_pci_bars(adapter);
5728        be_drv_cleanup(adapter);
5729
5730        pci_disable_pcie_error_reporting(pdev);
5731
5732        pci_release_regions(pdev);
5733        pci_disable_device(pdev);
5734
5735        free_netdev(adapter->netdev);
5736}
5737
5738static ssize_t be_hwmon_show_temp(struct device *dev,
5739                                  struct device_attribute *dev_attr,
5740                                  char *buf)
5741{
5742        struct be_adapter *adapter = dev_get_drvdata(dev);
5743
5744        /* Unit: millidegree Celsius */
5745        if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5746                return -EIO;
5747        else
5748                return sprintf(buf, "%u\n",
5749                               adapter->hwmon_info.be_on_die_temp * 1000);
5750}
5751
5752static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5753                          be_hwmon_show_temp, NULL, 1);
5754
5755static struct attribute *be_hwmon_attrs[] = {
5756        &sensor_dev_attr_temp1_input.dev_attr.attr,
5757        NULL
5758};
5759
5760ATTRIBUTE_GROUPS(be_hwmon);
5761
5762static char *mc_name(struct be_adapter *adapter)
5763{
5764        char *str = ""; /* default */
5765
5766        switch (adapter->mc_type) {
5767        case UMC:
5768                str = "UMC";
5769                break;
5770        case FLEX10:
5771                str = "FLEX10";
5772                break;
5773        case vNIC1:
5774                str = "vNIC-1";
5775                break;
5776        case nPAR:
5777                str = "nPAR";
5778                break;
5779        case UFP:
5780                str = "UFP";
5781                break;
5782        case vNIC2:
5783                str = "vNIC-2";
5784                break;
5785        default:
5786                str = "";
5787        }
5788
5789        return str;
5790}
5791
5792static inline char *func_name(struct be_adapter *adapter)
5793{
5794        return be_physfn(adapter) ? "PF" : "VF";
5795}
5796
5797static inline char *nic_name(struct pci_dev *pdev)
5798{
5799        switch (pdev->device) {
5800        case OC_DEVICE_ID1:
5801                return OC_NAME;
5802        case OC_DEVICE_ID2:
5803                return OC_NAME_BE;
5804        case OC_DEVICE_ID3:
5805        case OC_DEVICE_ID4:
5806                return OC_NAME_LANCER;
5807        case BE_DEVICE_ID2:
5808                return BE3_NAME;
5809        case OC_DEVICE_ID5:
5810        case OC_DEVICE_ID6:
5811                return OC_NAME_SH;
5812        default:
5813                return BE_NAME;
5814        }
5815}
5816
5817static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5818{
5819        struct be_adapter *adapter;
5820        struct net_device *netdev;
5821        int status = 0;
5822
5823        status = pci_enable_device(pdev);
5824        if (status)
5825                goto do_none;
5826
5827        status = pci_request_regions(pdev, DRV_NAME);
5828        if (status)
5829                goto disable_dev;
5830        pci_set_master(pdev);
5831
5832        netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5833        if (!netdev) {
5834                status = -ENOMEM;
5835                goto rel_reg;
5836        }
5837        adapter = netdev_priv(netdev);
5838        adapter->pdev = pdev;
5839        pci_set_drvdata(pdev, adapter);
5840        adapter->netdev = netdev;
5841        SET_NETDEV_DEV(netdev, &pdev->dev);
5842
5843        status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5844        if (!status) {
5845                netdev->features |= NETIF_F_HIGHDMA;
5846        } else {
5847                status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5848                if (status) {
5849                        dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5850                        goto free_netdev;
5851                }
5852        }
5853
5854        status = pci_enable_pcie_error_reporting(pdev);
5855        if (!status)
5856                dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5857
5858        status = be_map_pci_bars(adapter);
5859        if (status)
5860                goto free_netdev;
5861
5862        status = be_drv_init(adapter);
5863        if (status)
5864                goto unmap_bars;
5865
5866        status = be_setup(adapter);
5867        if (status)
5868                goto drv_cleanup;
5869
5870        be_netdev_init(netdev);
5871        status = register_netdev(netdev);
5872        if (status != 0)
5873                goto unsetup;
5874
5875        be_roce_dev_add(adapter);
5876
5877        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5878        adapter->error_recovery.probe_time = jiffies;
5879
5880        /* On Die temperature not supported for VF. */
5881        if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5882                adapter->hwmon_info.hwmon_dev =
5883                        devm_hwmon_device_register_with_groups(&pdev->dev,
5884                                                               DRV_NAME,
5885                                                               adapter,
5886                                                               be_hwmon_groups);
5887                adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5888        }
5889
5890        dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5891                 func_name(adapter), mc_name(adapter), adapter->port_name);
5892
5893        return 0;
5894
5895unsetup:
5896        be_clear(adapter);
5897drv_cleanup:
5898        be_drv_cleanup(adapter);
5899unmap_bars:
5900        be_unmap_pci_bars(adapter);
5901free_netdev:
5902        pci_disable_pcie_error_reporting(pdev);
5903        free_netdev(netdev);
5904rel_reg:
5905        pci_release_regions(pdev);
5906disable_dev:
5907        pci_disable_device(pdev);
5908do_none:
5909        dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5910        return status;
5911}
5912
5913static int __maybe_unused be_suspend(struct device *dev_d)
5914{
5915        struct be_adapter *adapter = dev_get_drvdata(dev_d);
5916
5917        be_intr_set(adapter, false);
5918        be_cancel_err_detection(adapter);
5919
5920        be_cleanup(adapter);
5921
5922        return 0;
5923}
5924
5925static int __maybe_unused be_pci_resume(struct device *dev_d)
5926{
5927        struct be_adapter *adapter = dev_get_drvdata(dev_d);
5928        int status = 0;
5929
5930        status = be_resume(adapter);
5931        if (status)
5932                return status;
5933
5934        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5935
5936        return 0;
5937}
5938
5939/*
5940 * An FLR will stop BE from DMAing any data.
5941 */
5942static void be_shutdown(struct pci_dev *pdev)
5943{
5944        struct be_adapter *adapter = pci_get_drvdata(pdev);
5945
5946        if (!adapter)
5947                return;
5948
5949        be_roce_dev_shutdown(adapter);
5950        cancel_delayed_work_sync(&adapter->work);
5951        be_cancel_err_detection(adapter);
5952
5953        netif_device_detach(adapter->netdev);
5954
5955        be_cmd_reset_function(adapter);
5956
5957        pci_disable_device(pdev);
5958}
5959
5960static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5961                                            pci_channel_state_t state)
5962{
5963        struct be_adapter *adapter = pci_get_drvdata(pdev);
5964
5965        dev_err(&adapter->pdev->dev, "EEH error detected\n");
5966
5967        be_roce_dev_remove(adapter);
5968
5969        if (!be_check_error(adapter, BE_ERROR_EEH)) {
5970                be_set_error(adapter, BE_ERROR_EEH);
5971
5972                be_cancel_err_detection(adapter);
5973
5974                be_cleanup(adapter);
5975        }
5976
5977        if (state == pci_channel_io_perm_failure)
5978                return PCI_ERS_RESULT_DISCONNECT;
5979
5980        pci_disable_device(pdev);
5981
5982        /* The error could cause the FW to trigger a flash debug dump.
5983         * Resetting the card while flash dump is in progress
5984         * can cause it not to recover; wait for it to finish.
5985         * Wait only for first function as it is needed only once per
5986         * adapter.
5987         */
5988        if (pdev->devfn == 0)
5989                ssleep(30);
5990
5991        return PCI_ERS_RESULT_NEED_RESET;
5992}
5993
5994static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5995{
5996        struct be_adapter *adapter = pci_get_drvdata(pdev);
5997        int status;
5998
5999        dev_info(&adapter->pdev->dev, "EEH reset\n");
6000
6001        status = pci_enable_device(pdev);
6002        if (status)
6003                return PCI_ERS_RESULT_DISCONNECT;
6004
6005        pci_set_master(pdev);
6006        pci_restore_state(pdev);
6007
6008        /* Check if card is ok and fw is ready */
6009        dev_info(&adapter->pdev->dev,
6010                 "Waiting for FW to be ready after EEH reset\n");
6011        status = be_fw_wait_ready(adapter);
6012        if (status)
6013                return PCI_ERS_RESULT_DISCONNECT;
6014
6015        be_clear_error(adapter, BE_CLEAR_ALL);
6016        return PCI_ERS_RESULT_RECOVERED;
6017}
6018
6019static void be_eeh_resume(struct pci_dev *pdev)
6020{
6021        int status = 0;
6022        struct be_adapter *adapter = pci_get_drvdata(pdev);
6023
6024        dev_info(&adapter->pdev->dev, "EEH resume\n");
6025
6026        pci_save_state(pdev);
6027
6028        status = be_resume(adapter);
6029        if (status)
6030                goto err;
6031
6032        be_roce_dev_add(adapter);
6033
6034        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6035        return;
6036err:
6037        dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6038}
6039
6040static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6041{
6042        struct be_adapter *adapter = pci_get_drvdata(pdev);
6043        struct be_resources vft_res = {0};
6044        int status;
6045
6046        if (!num_vfs)
6047                be_vf_clear(adapter);
6048
6049        adapter->num_vfs = num_vfs;
6050
6051        if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6052                dev_warn(&pdev->dev,
6053                         "Cannot disable VFs while they are assigned\n");
6054                return -EBUSY;
6055        }
6056
6057        /* When the HW is in SRIOV capable configuration, the PF-pool resources
6058         * are equally distributed across the max-number of VFs. The user may
6059         * request only a subset of the max-vfs to be enabled.
6060         * Based on num_vfs, redistribute the resources across num_vfs so that
6061         * each VF will have access to more number of resources.
6062         * This facility is not available in BE3 FW.
6063         * Also, this is done by FW in Lancer chip.
6064         */
6065        if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6066                be_calculate_vf_res(adapter, adapter->num_vfs,
6067                                    &vft_res);
6068                status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6069                                                 adapter->num_vfs, &vft_res);
6070                if (status)
6071                        dev_err(&pdev->dev,
6072                                "Failed to optimize SR-IOV resources\n");
6073        }
6074
6075        status = be_get_resources(adapter);
6076        if (status)
6077                return be_cmd_status(status);
6078
6079        /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6080        rtnl_lock();
6081        status = be_update_queues(adapter);
6082        rtnl_unlock();
6083        if (status)
6084                return be_cmd_status(status);
6085
6086        if (adapter->num_vfs)
6087                status = be_vf_setup(adapter);
6088
6089        if (!status)
6090                return adapter->num_vfs;
6091
6092        return 0;
6093}
6094
6095static const struct pci_error_handlers be_eeh_handlers = {
6096        .error_detected = be_eeh_err_detected,
6097        .slot_reset = be_eeh_reset,
6098        .resume = be_eeh_resume,
6099};
6100
6101static SIMPLE_DEV_PM_OPS(be_pci_pm_ops, be_suspend, be_pci_resume);
6102
6103static struct pci_driver be_driver = {
6104        .name = DRV_NAME,
6105        .id_table = be_dev_ids,
6106        .probe = be_probe,
6107        .remove = be_remove,
6108        .driver.pm = &be_pci_pm_ops,
6109        .shutdown = be_shutdown,
6110        .sriov_configure = be_pci_sriov_configure,
6111        .err_handler = &be_eeh_handlers
6112};
6113
6114static int __init be_init_module(void)
6115{
6116        int status;
6117
6118        if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6119            rx_frag_size != 2048) {
6120                printk(KERN_WARNING DRV_NAME
6121                        " : Module param rx_frag_size must be 2048/4096/8192."
6122                        " Using 2048\n");
6123                rx_frag_size = 2048;
6124        }
6125
6126        if (num_vfs > 0) {
6127                pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6128                pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6129        }
6130
6131        be_wq = create_singlethread_workqueue("be_wq");
6132        if (!be_wq) {
6133                pr_warn(DRV_NAME "workqueue creation failed\n");
6134                return -1;
6135        }
6136
6137        be_err_recovery_workq =
6138                create_singlethread_workqueue("be_err_recover");
6139        if (!be_err_recovery_workq)
6140                pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6141
6142        status = pci_register_driver(&be_driver);
6143        if (status) {
6144                destroy_workqueue(be_wq);
6145                be_destroy_err_recovery_workq();
6146        }
6147        return status;
6148}
6149module_init(be_init_module);
6150
6151static void __exit be_exit_module(void)
6152{
6153        pci_unregister_driver(&be_driver);
6154
6155        be_destroy_err_recovery_workq();
6156
6157        if (be_wq)
6158                destroy_workqueue(be_wq);
6159}
6160module_exit(be_exit_module);
6161