linux/drivers/net/ethernet/emulex/benet/be_main.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2005 - 2016 Broadcom
   3 * All rights reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License version 2
   7 * as published by the Free Software Foundation.  The full GNU General
   8 * Public License is included in this distribution in the file called COPYING.
   9 *
  10 * Contact Information:
  11 * linux-drivers@emulex.com
  12 *
  13 * Emulex
  14 * 3333 Susan Street
  15 * Costa Mesa, CA 92626
  16 */
  17
  18#include <linux/prefetch.h>
  19#include <linux/module.h>
  20#include "be.h"
  21#include "be_cmds.h"
  22#include <asm/div64.h>
  23#include <linux/aer.h>
  24#include <linux/if_bridge.h>
  25#include <net/busy_poll.h>
  26#include <net/vxlan.h>
  27
  28MODULE_VERSION(DRV_VER);
  29MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
  30MODULE_AUTHOR("Emulex Corporation");
  31MODULE_LICENSE("GPL");
  32
  33/* num_vfs module param is obsolete.
  34 * Use sysfs method to enable/disable VFs.
  35 */
  36static unsigned int num_vfs;
  37module_param(num_vfs, uint, 0444);
  38MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
  39
  40static ushort rx_frag_size = 2048;
  41module_param(rx_frag_size, ushort, 0444);
  42MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
  43
  44/* Per-module error detection/recovery workq shared across all functions.
  45 * Each function schedules its own work request on this shared workq.
  46 */
  47static struct workqueue_struct *be_err_recovery_workq;
  48
  49static const struct pci_device_id be_dev_ids[] = {
  50#ifdef CONFIG_BE2NET_BE2
  51        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
  52        { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
  53#endif /* CONFIG_BE2NET_BE2 */
  54#ifdef CONFIG_BE2NET_BE3
  55        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
  56        { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
  57#endif /* CONFIG_BE2NET_BE3 */
  58#ifdef CONFIG_BE2NET_LANCER
  59        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
  60        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
  61#endif /* CONFIG_BE2NET_LANCER */
  62#ifdef CONFIG_BE2NET_SKYHAWK
  63        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
  64        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
  65#endif /* CONFIG_BE2NET_SKYHAWK */
  66        { 0 }
  67};
  68MODULE_DEVICE_TABLE(pci, be_dev_ids);
  69
  70/* Workqueue used by all functions for defering cmd calls to the adapter */
  71static struct workqueue_struct *be_wq;
  72
  73/* UE Status Low CSR */
  74static const char * const ue_status_low_desc[] = {
  75        "CEV",
  76        "CTX",
  77        "DBUF",
  78        "ERX",
  79        "Host",
  80        "MPU",
  81        "NDMA",
  82        "PTC ",
  83        "RDMA ",
  84        "RXF ",
  85        "RXIPS ",
  86        "RXULP0 ",
  87        "RXULP1 ",
  88        "RXULP2 ",
  89        "TIM ",
  90        "TPOST ",
  91        "TPRE ",
  92        "TXIPS ",
  93        "TXULP0 ",
  94        "TXULP1 ",
  95        "UC ",
  96        "WDMA ",
  97        "TXULP2 ",
  98        "HOST1 ",
  99        "P0_OB_LINK ",
 100        "P1_OB_LINK ",
 101        "HOST_GPIO ",
 102        "MBOX ",
 103        "ERX2 ",
 104        "SPARE ",
 105        "JTAG ",
 106        "MPU_INTPEND "
 107};
 108
 109/* UE Status High CSR */
 110static const char * const ue_status_hi_desc[] = {
 111        "LPCMEMHOST",
 112        "MGMT_MAC",
 113        "PCS0ONLINE",
 114        "MPU_IRAM",
 115        "PCS1ONLINE",
 116        "PCTL0",
 117        "PCTL1",
 118        "PMEM",
 119        "RR",
 120        "TXPB",
 121        "RXPP",
 122        "XAUI",
 123        "TXP",
 124        "ARM",
 125        "IPC",
 126        "HOST2",
 127        "HOST3",
 128        "HOST4",
 129        "HOST5",
 130        "HOST6",
 131        "HOST7",
 132        "ECRC",
 133        "Poison TLP",
 134        "NETC",
 135        "PERIPH",
 136        "LLTXULP",
 137        "D2P",
 138        "RCON",
 139        "LDMA",
 140        "LLTXP",
 141        "LLTXPB",
 142        "Unknown"
 143};
 144
 145#define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
 146                                 BE_IF_FLAGS_BROADCAST | \
 147                                 BE_IF_FLAGS_MULTICAST | \
 148                                 BE_IF_FLAGS_PASS_L3L4_ERRORS)
 149
 150static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
 151{
 152        struct be_dma_mem *mem = &q->dma_mem;
 153
 154        if (mem->va) {
 155                dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
 156                                  mem->dma);
 157                mem->va = NULL;
 158        }
 159}
 160
 161static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
 162                          u16 len, u16 entry_size)
 163{
 164        struct be_dma_mem *mem = &q->dma_mem;
 165
 166        memset(q, 0, sizeof(*q));
 167        q->len = len;
 168        q->entry_size = entry_size;
 169        mem->size = len * entry_size;
 170        mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
 171                                      GFP_KERNEL);
 172        if (!mem->va)
 173                return -ENOMEM;
 174        return 0;
 175}
 176
 177static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
 178{
 179        u32 reg, enabled;
 180
 181        pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
 182                              &reg);
 183        enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 184
 185        if (!enabled && enable)
 186                reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 187        else if (enabled && !enable)
 188                reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 189        else
 190                return;
 191
 192        pci_write_config_dword(adapter->pdev,
 193                               PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
 194}
 195
 196static void be_intr_set(struct be_adapter *adapter, bool enable)
 197{
 198        int status = 0;
 199
 200        /* On lancer interrupts can't be controlled via this register */
 201        if (lancer_chip(adapter))
 202                return;
 203
 204        if (be_check_error(adapter, BE_ERROR_EEH))
 205                return;
 206
 207        status = be_cmd_intr_set(adapter, enable);
 208        if (status)
 209                be_reg_intr_set(adapter, enable);
 210}
 211
 212static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
 213{
 214        u32 val = 0;
 215
 216        if (be_check_error(adapter, BE_ERROR_HW))
 217                return;
 218
 219        val |= qid & DB_RQ_RING_ID_MASK;
 220        val |= posted << DB_RQ_NUM_POSTED_SHIFT;
 221
 222        wmb();
 223        iowrite32(val, adapter->db + DB_RQ_OFFSET);
 224}
 225
 226static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
 227                          u16 posted)
 228{
 229        u32 val = 0;
 230
 231        if (be_check_error(adapter, BE_ERROR_HW))
 232                return;
 233
 234        val |= txo->q.id & DB_TXULP_RING_ID_MASK;
 235        val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
 236
 237        wmb();
 238        iowrite32(val, adapter->db + txo->db_offset);
 239}
 240
 241static void be_eq_notify(struct be_adapter *adapter, u16 qid,
 242                         bool arm, bool clear_int, u16 num_popped,
 243                         u32 eq_delay_mult_enc)
 244{
 245        u32 val = 0;
 246
 247        val |= qid & DB_EQ_RING_ID_MASK;
 248        val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
 249
 250        if (be_check_error(adapter, BE_ERROR_HW))
 251                return;
 252
 253        if (arm)
 254                val |= 1 << DB_EQ_REARM_SHIFT;
 255        if (clear_int)
 256                val |= 1 << DB_EQ_CLR_SHIFT;
 257        val |= 1 << DB_EQ_EVNT_SHIFT;
 258        val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
 259        val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
 260        iowrite32(val, adapter->db + DB_EQ_OFFSET);
 261}
 262
 263void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
 264{
 265        u32 val = 0;
 266
 267        val |= qid & DB_CQ_RING_ID_MASK;
 268        val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
 269                        DB_CQ_RING_ID_EXT_MASK_SHIFT);
 270
 271        if (be_check_error(adapter, BE_ERROR_HW))
 272                return;
 273
 274        if (arm)
 275                val |= 1 << DB_CQ_REARM_SHIFT;
 276        val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
 277        iowrite32(val, adapter->db + DB_CQ_OFFSET);
 278}
 279
 280static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
 281{
 282        int i;
 283
 284        /* Check if mac has already been added as part of uc-list */
 285        for (i = 0; i < adapter->uc_macs; i++) {
 286                if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
 287                        /* mac already added, skip addition */
 288                        adapter->pmac_id[0] = adapter->pmac_id[i + 1];
 289                        return 0;
 290                }
 291        }
 292
 293        return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
 294                               &adapter->pmac_id[0], 0);
 295}
 296
 297static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
 298{
 299        int i;
 300
 301        /* Skip deletion if the programmed mac is
 302         * being used in uc-list
 303         */
 304        for (i = 0; i < adapter->uc_macs; i++) {
 305                if (adapter->pmac_id[i + 1] == pmac_id)
 306                        return;
 307        }
 308        be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
 309}
 310
 311static int be_mac_addr_set(struct net_device *netdev, void *p)
 312{
 313        struct be_adapter *adapter = netdev_priv(netdev);
 314        struct device *dev = &adapter->pdev->dev;
 315        struct sockaddr *addr = p;
 316        int status;
 317        u8 mac[ETH_ALEN];
 318        u32 old_pmac_id = adapter->pmac_id[0];
 319
 320        if (!is_valid_ether_addr(addr->sa_data))
 321                return -EADDRNOTAVAIL;
 322
 323        /* Proceed further only if, User provided MAC is different
 324         * from active MAC
 325         */
 326        if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
 327                return 0;
 328
 329        /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
 330         * address
 331         */
 332        if (BEx_chip(adapter) && be_virtfn(adapter) &&
 333            !check_privilege(adapter, BE_PRIV_FILTMGMT))
 334                return -EPERM;
 335
 336        /* if device is not running, copy MAC to netdev->dev_addr */
 337        if (!netif_running(netdev))
 338                goto done;
 339
 340        /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
 341         * privilege or if PF did not provision the new MAC address.
 342         * On BE3, this cmd will always fail if the VF doesn't have the
 343         * FILTMGMT privilege. This failure is OK, only if the PF programmed
 344         * the MAC for the VF.
 345         */
 346        mutex_lock(&adapter->rx_filter_lock);
 347        status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
 348        if (!status) {
 349
 350                /* Delete the old programmed MAC. This call may fail if the
 351                 * old MAC was already deleted by the PF driver.
 352                 */
 353                if (adapter->pmac_id[0] != old_pmac_id)
 354                        be_dev_mac_del(adapter, old_pmac_id);
 355        }
 356
 357        mutex_unlock(&adapter->rx_filter_lock);
 358        /* Decide if the new MAC is successfully activated only after
 359         * querying the FW
 360         */
 361        status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
 362                                       adapter->if_handle, true, 0);
 363        if (status)
 364                goto err;
 365
 366        /* The MAC change did not happen, either due to lack of privilege
 367         * or PF didn't pre-provision.
 368         */
 369        if (!ether_addr_equal(addr->sa_data, mac)) {
 370                status = -EPERM;
 371                goto err;
 372        }
 373
 374        /* Remember currently programmed MAC */
 375        ether_addr_copy(adapter->dev_mac, addr->sa_data);
 376done:
 377        ether_addr_copy(netdev->dev_addr, addr->sa_data);
 378        dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
 379        return 0;
 380err:
 381        dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
 382        return status;
 383}
 384
 385/* BE2 supports only v0 cmd */
 386static void *hw_stats_from_cmd(struct be_adapter *adapter)
 387{
 388        if (BE2_chip(adapter)) {
 389                struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
 390
 391                return &cmd->hw_stats;
 392        } else if (BE3_chip(adapter)) {
 393                struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
 394
 395                return &cmd->hw_stats;
 396        } else {
 397                struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
 398
 399                return &cmd->hw_stats;
 400        }
 401}
 402
 403/* BE2 supports only v0 cmd */
 404static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
 405{
 406        if (BE2_chip(adapter)) {
 407                struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 408
 409                return &hw_stats->erx;
 410        } else if (BE3_chip(adapter)) {
 411                struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 412
 413                return &hw_stats->erx;
 414        } else {
 415                struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 416
 417                return &hw_stats->erx;
 418        }
 419}
 420
 421static void populate_be_v0_stats(struct be_adapter *adapter)
 422{
 423        struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 424        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 425        struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
 426        struct be_port_rxf_stats_v0 *port_stats =
 427                                        &rxf_stats->port[adapter->port_num];
 428        struct be_drv_stats *drvs = &adapter->drv_stats;
 429
 430        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 431        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 432        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 433        drvs->rx_control_frames = port_stats->rx_control_frames;
 434        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 435        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 436        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 437        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 438        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 439        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 440        drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
 441        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 442        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 443        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 444        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 445        drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
 446        drvs->rx_dropped_header_too_small =
 447                port_stats->rx_dropped_header_too_small;
 448        drvs->rx_address_filtered =
 449                                        port_stats->rx_address_filtered +
 450                                        port_stats->rx_vlan_filtered;
 451        drvs->rx_alignment_symbol_errors =
 452                port_stats->rx_alignment_symbol_errors;
 453
 454        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 455        drvs->tx_controlframes = port_stats->tx_controlframes;
 456
 457        if (adapter->port_num)
 458                drvs->jabber_events = rxf_stats->port1_jabber_events;
 459        else
 460                drvs->jabber_events = rxf_stats->port0_jabber_events;
 461        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 462        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 463        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 464        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 465        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 466        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 467        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 468}
 469
 470static void populate_be_v1_stats(struct be_adapter *adapter)
 471{
 472        struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 473        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 474        struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
 475        struct be_port_rxf_stats_v1 *port_stats =
 476                                        &rxf_stats->port[adapter->port_num];
 477        struct be_drv_stats *drvs = &adapter->drv_stats;
 478
 479        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 480        drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 481        drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 482        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 483        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 484        drvs->rx_control_frames = port_stats->rx_control_frames;
 485        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 486        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 487        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 488        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 489        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 490        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 491        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 492        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 493        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 494        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 495        drvs->rx_dropped_header_too_small =
 496                port_stats->rx_dropped_header_too_small;
 497        drvs->rx_input_fifo_overflow_drop =
 498                port_stats->rx_input_fifo_overflow_drop;
 499        drvs->rx_address_filtered = port_stats->rx_address_filtered;
 500        drvs->rx_alignment_symbol_errors =
 501                port_stats->rx_alignment_symbol_errors;
 502        drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 503        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 504        drvs->tx_controlframes = port_stats->tx_controlframes;
 505        drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 506        drvs->jabber_events = port_stats->jabber_events;
 507        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 508        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 509        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 510        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 511        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 512        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 513        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 514}
 515
 516static void populate_be_v2_stats(struct be_adapter *adapter)
 517{
 518        struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 519        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 520        struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
 521        struct be_port_rxf_stats_v2 *port_stats =
 522                                        &rxf_stats->port[adapter->port_num];
 523        struct be_drv_stats *drvs = &adapter->drv_stats;
 524
 525        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 526        drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 527        drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 528        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 529        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 530        drvs->rx_control_frames = port_stats->rx_control_frames;
 531        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 532        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 533        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 534        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 535        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 536        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 537        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 538        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 539        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 540        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 541        drvs->rx_dropped_header_too_small =
 542                port_stats->rx_dropped_header_too_small;
 543        drvs->rx_input_fifo_overflow_drop =
 544                port_stats->rx_input_fifo_overflow_drop;
 545        drvs->rx_address_filtered = port_stats->rx_address_filtered;
 546        drvs->rx_alignment_symbol_errors =
 547                port_stats->rx_alignment_symbol_errors;
 548        drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 549        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 550        drvs->tx_controlframes = port_stats->tx_controlframes;
 551        drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 552        drvs->jabber_events = port_stats->jabber_events;
 553        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 554        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 555        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 556        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 557        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 558        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 559        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 560        if (be_roce_supported(adapter)) {
 561                drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
 562                drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
 563                drvs->rx_roce_frames = port_stats->roce_frames_received;
 564                drvs->roce_drops_crc = port_stats->roce_drops_crc;
 565                drvs->roce_drops_payload_len =
 566                        port_stats->roce_drops_payload_len;
 567        }
 568}
 569
 570static void populate_lancer_stats(struct be_adapter *adapter)
 571{
 572        struct be_drv_stats *drvs = &adapter->drv_stats;
 573        struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
 574
 575        be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
 576        drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
 577        drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
 578        drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
 579        drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
 580        drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
 581        drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
 582        drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
 583        drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
 584        drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
 585        drvs->rx_dropped_tcp_length =
 586                                pport_stats->rx_dropped_invalid_tcp_length;
 587        drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
 588        drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
 589        drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
 590        drvs->rx_dropped_header_too_small =
 591                                pport_stats->rx_dropped_header_too_small;
 592        drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 593        drvs->rx_address_filtered =
 594                                        pport_stats->rx_address_filtered +
 595                                        pport_stats->rx_vlan_filtered;
 596        drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
 597        drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 598        drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
 599        drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
 600        drvs->jabber_events = pport_stats->rx_jabbers;
 601        drvs->forwarded_packets = pport_stats->num_forwards_lo;
 602        drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
 603        drvs->rx_drops_too_many_frags =
 604                                pport_stats->rx_drops_too_many_frags_lo;
 605}
 606
 607static void accumulate_16bit_val(u32 *acc, u16 val)
 608{
 609#define lo(x)                   (x & 0xFFFF)
 610#define hi(x)                   (x & 0xFFFF0000)
 611        bool wrapped = val < lo(*acc);
 612        u32 newacc = hi(*acc) + val;
 613
 614        if (wrapped)
 615                newacc += 65536;
 616        WRITE_ONCE(*acc, newacc);
 617}
 618
 619static void populate_erx_stats(struct be_adapter *adapter,
 620                               struct be_rx_obj *rxo, u32 erx_stat)
 621{
 622        if (!BEx_chip(adapter))
 623                rx_stats(rxo)->rx_drops_no_frags = erx_stat;
 624        else
 625                /* below erx HW counter can actually wrap around after
 626                 * 65535. Driver accumulates a 32-bit value
 627                 */
 628                accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
 629                                     (u16)erx_stat);
 630}
 631
 632void be_parse_stats(struct be_adapter *adapter)
 633{
 634        struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
 635        struct be_rx_obj *rxo;
 636        int i;
 637        u32 erx_stat;
 638
 639        if (lancer_chip(adapter)) {
 640                populate_lancer_stats(adapter);
 641        } else {
 642                if (BE2_chip(adapter))
 643                        populate_be_v0_stats(adapter);
 644                else if (BE3_chip(adapter))
 645                        /* for BE3 */
 646                        populate_be_v1_stats(adapter);
 647                else
 648                        populate_be_v2_stats(adapter);
 649
 650                /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
 651                for_all_rx_queues(adapter, rxo, i) {
 652                        erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
 653                        populate_erx_stats(adapter, rxo, erx_stat);
 654                }
 655        }
 656}
 657
 658static void be_get_stats64(struct net_device *netdev,
 659                           struct rtnl_link_stats64 *stats)
 660{
 661        struct be_adapter *adapter = netdev_priv(netdev);
 662        struct be_drv_stats *drvs = &adapter->drv_stats;
 663        struct be_rx_obj *rxo;
 664        struct be_tx_obj *txo;
 665        u64 pkts, bytes;
 666        unsigned int start;
 667        int i;
 668
 669        for_all_rx_queues(adapter, rxo, i) {
 670                const struct be_rx_stats *rx_stats = rx_stats(rxo);
 671
 672                do {
 673                        start = u64_stats_fetch_begin_irq(&rx_stats->sync);
 674                        pkts = rx_stats(rxo)->rx_pkts;
 675                        bytes = rx_stats(rxo)->rx_bytes;
 676                } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
 677                stats->rx_packets += pkts;
 678                stats->rx_bytes += bytes;
 679                stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
 680                stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
 681                                        rx_stats(rxo)->rx_drops_no_frags;
 682        }
 683
 684        for_all_tx_queues(adapter, txo, i) {
 685                const struct be_tx_stats *tx_stats = tx_stats(txo);
 686
 687                do {
 688                        start = u64_stats_fetch_begin_irq(&tx_stats->sync);
 689                        pkts = tx_stats(txo)->tx_pkts;
 690                        bytes = tx_stats(txo)->tx_bytes;
 691                } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
 692                stats->tx_packets += pkts;
 693                stats->tx_bytes += bytes;
 694        }
 695
 696        /* bad pkts received */
 697        stats->rx_errors = drvs->rx_crc_errors +
 698                drvs->rx_alignment_symbol_errors +
 699                drvs->rx_in_range_errors +
 700                drvs->rx_out_range_errors +
 701                drvs->rx_frame_too_long +
 702                drvs->rx_dropped_too_small +
 703                drvs->rx_dropped_too_short +
 704                drvs->rx_dropped_header_too_small +
 705                drvs->rx_dropped_tcp_length +
 706                drvs->rx_dropped_runt;
 707
 708        /* detailed rx errors */
 709        stats->rx_length_errors = drvs->rx_in_range_errors +
 710                drvs->rx_out_range_errors +
 711                drvs->rx_frame_too_long;
 712
 713        stats->rx_crc_errors = drvs->rx_crc_errors;
 714
 715        /* frame alignment errors */
 716        stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
 717
 718        /* receiver fifo overrun */
 719        /* drops_no_pbuf is no per i/f, it's per BE card */
 720        stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
 721                                drvs->rx_input_fifo_overflow_drop +
 722                                drvs->rx_drops_no_pbuf;
 723}
 724
 725void be_link_status_update(struct be_adapter *adapter, u8 link_status)
 726{
 727        struct net_device *netdev = adapter->netdev;
 728
 729        if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
 730                netif_carrier_off(netdev);
 731                adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
 732        }
 733
 734        if (link_status)
 735                netif_carrier_on(netdev);
 736        else
 737                netif_carrier_off(netdev);
 738
 739        netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
 740}
 741
 742static int be_gso_hdr_len(struct sk_buff *skb)
 743{
 744        if (skb->encapsulation)
 745                return skb_inner_transport_offset(skb) +
 746                       inner_tcp_hdrlen(skb);
 747        return skb_transport_offset(skb) + tcp_hdrlen(skb);
 748}
 749
 750static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
 751{
 752        struct be_tx_stats *stats = tx_stats(txo);
 753        u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
 754        /* Account for headers which get duplicated in TSO pkt */
 755        u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
 756
 757        u64_stats_update_begin(&stats->sync);
 758        stats->tx_reqs++;
 759        stats->tx_bytes += skb->len + dup_hdr_len;
 760        stats->tx_pkts += tx_pkts;
 761        if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
 762                stats->tx_vxlan_offload_pkts += tx_pkts;
 763        u64_stats_update_end(&stats->sync);
 764}
 765
 766/* Returns number of WRBs needed for the skb */
 767static u32 skb_wrb_cnt(struct sk_buff *skb)
 768{
 769        /* +1 for the header wrb */
 770        return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
 771}
 772
 773static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
 774{
 775        wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
 776        wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
 777        wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
 778        wrb->rsvd0 = 0;
 779}
 780
 781/* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
 782 * to avoid the swap and shift/mask operations in wrb_fill().
 783 */
 784static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
 785{
 786        wrb->frag_pa_hi = 0;
 787        wrb->frag_pa_lo = 0;
 788        wrb->frag_len = 0;
 789        wrb->rsvd0 = 0;
 790}
 791
 792static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
 793                                     struct sk_buff *skb)
 794{
 795        u8 vlan_prio;
 796        u16 vlan_tag;
 797
 798        vlan_tag = skb_vlan_tag_get(skb);
 799        vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 800        /* If vlan priority provided by OS is NOT in available bmap */
 801        if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
 802                vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
 803                                adapter->recommended_prio_bits;
 804
 805        return vlan_tag;
 806}
 807
 808/* Used only for IP tunnel packets */
 809static u16 skb_inner_ip_proto(struct sk_buff *skb)
 810{
 811        return (inner_ip_hdr(skb)->version == 4) ?
 812                inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
 813}
 814
 815static u16 skb_ip_proto(struct sk_buff *skb)
 816{
 817        return (ip_hdr(skb)->version == 4) ?
 818                ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
 819}
 820
 821static inline bool be_is_txq_full(struct be_tx_obj *txo)
 822{
 823        return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
 824}
 825
 826static inline bool be_can_txq_wake(struct be_tx_obj *txo)
 827{
 828        return atomic_read(&txo->q.used) < txo->q.len / 2;
 829}
 830
 831static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
 832{
 833        return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
 834}
 835
 836static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
 837                                       struct sk_buff *skb,
 838                                       struct be_wrb_params *wrb_params)
 839{
 840        u16 proto;
 841
 842        if (skb_is_gso(skb)) {
 843                BE_WRB_F_SET(wrb_params->features, LSO, 1);
 844                wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
 845                if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
 846                        BE_WRB_F_SET(wrb_params->features, LSO6, 1);
 847        } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 848                if (skb->encapsulation) {
 849                        BE_WRB_F_SET(wrb_params->features, IPCS, 1);
 850                        proto = skb_inner_ip_proto(skb);
 851                } else {
 852                        proto = skb_ip_proto(skb);
 853                }
 854                if (proto == IPPROTO_TCP)
 855                        BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
 856                else if (proto == IPPROTO_UDP)
 857                        BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
 858        }
 859
 860        if (skb_vlan_tag_present(skb)) {
 861                BE_WRB_F_SET(wrb_params->features, VLAN, 1);
 862                wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
 863        }
 864
 865        BE_WRB_F_SET(wrb_params->features, CRC, 1);
 866}
 867
 868static void wrb_fill_hdr(struct be_adapter *adapter,
 869                         struct be_eth_hdr_wrb *hdr,
 870                         struct be_wrb_params *wrb_params,
 871                         struct sk_buff *skb)
 872{
 873        memset(hdr, 0, sizeof(*hdr));
 874
 875        SET_TX_WRB_HDR_BITS(crc, hdr,
 876                            BE_WRB_F_GET(wrb_params->features, CRC));
 877        SET_TX_WRB_HDR_BITS(ipcs, hdr,
 878                            BE_WRB_F_GET(wrb_params->features, IPCS));
 879        SET_TX_WRB_HDR_BITS(tcpcs, hdr,
 880                            BE_WRB_F_GET(wrb_params->features, TCPCS));
 881        SET_TX_WRB_HDR_BITS(udpcs, hdr,
 882                            BE_WRB_F_GET(wrb_params->features, UDPCS));
 883
 884        SET_TX_WRB_HDR_BITS(lso, hdr,
 885                            BE_WRB_F_GET(wrb_params->features, LSO));
 886        SET_TX_WRB_HDR_BITS(lso6, hdr,
 887                            BE_WRB_F_GET(wrb_params->features, LSO6));
 888        SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
 889
 890        /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
 891         * hack is not needed, the evt bit is set while ringing DB.
 892         */
 893        SET_TX_WRB_HDR_BITS(event, hdr,
 894                            BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
 895        SET_TX_WRB_HDR_BITS(vlan, hdr,
 896                            BE_WRB_F_GET(wrb_params->features, VLAN));
 897        SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
 898
 899        SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
 900        SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
 901        SET_TX_WRB_HDR_BITS(mgmt, hdr,
 902                            BE_WRB_F_GET(wrb_params->features, OS2BMC));
 903}
 904
 905static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
 906                          bool unmap_single)
 907{
 908        dma_addr_t dma;
 909        u32 frag_len = le32_to_cpu(wrb->frag_len);
 910
 911
 912        dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
 913                (u64)le32_to_cpu(wrb->frag_pa_lo);
 914        if (frag_len) {
 915                if (unmap_single)
 916                        dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
 917                else
 918                        dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
 919        }
 920}
 921
 922/* Grab a WRB header for xmit */
 923static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
 924{
 925        u32 head = txo->q.head;
 926
 927        queue_head_inc(&txo->q);
 928        return head;
 929}
 930
 931/* Set up the WRB header for xmit */
 932static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
 933                                struct be_tx_obj *txo,
 934                                struct be_wrb_params *wrb_params,
 935                                struct sk_buff *skb, u16 head)
 936{
 937        u32 num_frags = skb_wrb_cnt(skb);
 938        struct be_queue_info *txq = &txo->q;
 939        struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
 940
 941        wrb_fill_hdr(adapter, hdr, wrb_params, skb);
 942        be_dws_cpu_to_le(hdr, sizeof(*hdr));
 943
 944        BUG_ON(txo->sent_skb_list[head]);
 945        txo->sent_skb_list[head] = skb;
 946        txo->last_req_hdr = head;
 947        atomic_add(num_frags, &txq->used);
 948        txo->last_req_wrb_cnt = num_frags;
 949        txo->pend_wrb_cnt += num_frags;
 950}
 951
 952/* Setup a WRB fragment (buffer descriptor) for xmit */
 953static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
 954                                 int len)
 955{
 956        struct be_eth_wrb *wrb;
 957        struct be_queue_info *txq = &txo->q;
 958
 959        wrb = queue_head_node(txq);
 960        wrb_fill(wrb, busaddr, len);
 961        queue_head_inc(txq);
 962}
 963
 964/* Bring the queue back to the state it was in before be_xmit_enqueue() routine
 965 * was invoked. The producer index is restored to the previous packet and the
 966 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
 967 */
 968static void be_xmit_restore(struct be_adapter *adapter,
 969                            struct be_tx_obj *txo, u32 head, bool map_single,
 970                            u32 copied)
 971{
 972        struct device *dev;
 973        struct be_eth_wrb *wrb;
 974        struct be_queue_info *txq = &txo->q;
 975
 976        dev = &adapter->pdev->dev;
 977        txq->head = head;
 978
 979        /* skip the first wrb (hdr); it's not mapped */
 980        queue_head_inc(txq);
 981        while (copied) {
 982                wrb = queue_head_node(txq);
 983                unmap_tx_frag(dev, wrb, map_single);
 984                map_single = false;
 985                copied -= le32_to_cpu(wrb->frag_len);
 986                queue_head_inc(txq);
 987        }
 988
 989        txq->head = head;
 990}
 991
 992/* Enqueue the given packet for transmit. This routine allocates WRBs for the
 993 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
 994 * of WRBs used up by the packet.
 995 */
 996static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
 997                           struct sk_buff *skb,
 998                           struct be_wrb_params *wrb_params)
 999{
1000        u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
1001        struct device *dev = &adapter->pdev->dev;
1002        bool map_single = false;
1003        u32 head;
1004        dma_addr_t busaddr;
1005        int len;
1006
1007        head = be_tx_get_wrb_hdr(txo);
1008
1009        if (skb->len > skb->data_len) {
1010                len = skb_headlen(skb);
1011
1012                busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1013                if (dma_mapping_error(dev, busaddr))
1014                        goto dma_err;
1015                map_single = true;
1016                be_tx_setup_wrb_frag(txo, busaddr, len);
1017                copied += len;
1018        }
1019
1020        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1021                const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1022                len = skb_frag_size(frag);
1023
1024                busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1025                if (dma_mapping_error(dev, busaddr))
1026                        goto dma_err;
1027                be_tx_setup_wrb_frag(txo, busaddr, len);
1028                copied += len;
1029        }
1030
1031        be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1032
1033        be_tx_stats_update(txo, skb);
1034        return wrb_cnt;
1035
1036dma_err:
1037        adapter->drv_stats.dma_map_errors++;
1038        be_xmit_restore(adapter, txo, head, map_single, copied);
1039        return 0;
1040}
1041
1042static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1043{
1044        return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1045}
1046
1047static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1048                                             struct sk_buff *skb,
1049                                             struct be_wrb_params
1050                                             *wrb_params)
1051{
1052        u16 vlan_tag = 0;
1053
1054        skb = skb_share_check(skb, GFP_ATOMIC);
1055        if (unlikely(!skb))
1056                return skb;
1057
1058        if (skb_vlan_tag_present(skb))
1059                vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1060
1061        if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1062                if (!vlan_tag)
1063                        vlan_tag = adapter->pvid;
1064                /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1065                 * skip VLAN insertion
1066                 */
1067                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1068        }
1069
1070        if (vlan_tag) {
1071                skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1072                                                vlan_tag);
1073                if (unlikely(!skb))
1074                        return skb;
1075                skb->vlan_tci = 0;
1076        }
1077
1078        /* Insert the outer VLAN, if any */
1079        if (adapter->qnq_vid) {
1080                vlan_tag = adapter->qnq_vid;
1081                skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1082                                                vlan_tag);
1083                if (unlikely(!skb))
1084                        return skb;
1085                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086        }
1087
1088        return skb;
1089}
1090
1091static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1092{
1093        struct ethhdr *eh = (struct ethhdr *)skb->data;
1094        u16 offset = ETH_HLEN;
1095
1096        if (eh->h_proto == htons(ETH_P_IPV6)) {
1097                struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1098
1099                offset += sizeof(struct ipv6hdr);
1100                if (ip6h->nexthdr != NEXTHDR_TCP &&
1101                    ip6h->nexthdr != NEXTHDR_UDP) {
1102                        struct ipv6_opt_hdr *ehdr =
1103                                (struct ipv6_opt_hdr *)(skb->data + offset);
1104
1105                        /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1106                        if (ehdr->hdrlen == 0xff)
1107                                return true;
1108                }
1109        }
1110        return false;
1111}
1112
1113static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1114{
1115        return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1116}
1117
1118static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119{
1120        return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1121}
1122
1123static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1124                                                  struct sk_buff *skb,
1125                                                  struct be_wrb_params
1126                                                  *wrb_params)
1127{
1128        struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1129        unsigned int eth_hdr_len;
1130        struct iphdr *ip;
1131
1132        /* For padded packets, BE HW modifies tot_len field in IP header
1133         * incorrecly when VLAN tag is inserted by HW.
1134         * For padded packets, Lancer computes incorrect checksum.
1135         */
1136        eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1137                                                VLAN_ETH_HLEN : ETH_HLEN;
1138        if (skb->len <= 60 &&
1139            (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1140            is_ipv4_pkt(skb)) {
1141                ip = (struct iphdr *)ip_hdr(skb);
1142                pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1143        }
1144
1145        /* If vlan tag is already inlined in the packet, skip HW VLAN
1146         * tagging in pvid-tagging mode
1147         */
1148        if (be_pvid_tagging_enabled(adapter) &&
1149            veh->h_vlan_proto == htons(ETH_P_8021Q))
1150                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152        /* HW has a bug wherein it will calculate CSUM for VLAN
1153         * pkts even though it is disabled.
1154         * Manually insert VLAN in pkt.
1155         */
1156        if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157            skb_vlan_tag_present(skb)) {
1158                skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159                if (unlikely(!skb))
1160                        goto err;
1161        }
1162
1163        /* HW may lockup when VLAN HW tagging is requested on
1164         * certain ipv6 packets. Drop such pkts if the HW workaround to
1165         * skip HW tagging is not enabled by FW.
1166         */
1167        if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168                     (adapter->pvid || adapter->qnq_vid) &&
1169                     !qnq_async_evt_rcvd(adapter)))
1170                goto tx_drop;
1171
1172        /* Manual VLAN tag insertion to prevent:
1173         * ASIC lockup when the ASIC inserts VLAN tag into
1174         * certain ipv6 packets. Insert VLAN tags in driver,
1175         * and set event, completion, vlan bits accordingly
1176         * in the Tx WRB.
1177         */
1178        if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179            be_vlan_tag_tx_chk(adapter, skb)) {
1180                skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181                if (unlikely(!skb))
1182                        goto err;
1183        }
1184
1185        return skb;
1186tx_drop:
1187        dev_kfree_skb_any(skb);
1188err:
1189        return NULL;
1190}
1191
1192static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193                                           struct sk_buff *skb,
1194                                           struct be_wrb_params *wrb_params)
1195{
1196        int err;
1197
1198        /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199         * packets that are 32b or less may cause a transmit stall
1200         * on that port. The workaround is to pad such packets
1201         * (len <= 32 bytes) to a minimum length of 36b.
1202         */
1203        if (skb->len <= 32) {
1204                if (skb_put_padto(skb, 36))
1205                        return NULL;
1206        }
1207
1208        if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209                skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210                if (!skb)
1211                        return NULL;
1212        }
1213
1214        /* The stack can send us skbs with length greater than
1215         * what the HW can handle. Trim the extra bytes.
1216         */
1217        WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218        err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219        WARN_ON(err);
1220
1221        return skb;
1222}
1223
1224static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225{
1226        struct be_queue_info *txq = &txo->q;
1227        struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229        /* Mark the last request eventable if it hasn't been marked already */
1230        if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231                hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233        /* compose a dummy wrb if there are odd set of wrbs to notify */
1234        if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235                wrb_fill_dummy(queue_head_node(txq));
1236                queue_head_inc(txq);
1237                atomic_inc(&txq->used);
1238                txo->pend_wrb_cnt++;
1239                hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240                                           TX_HDR_WRB_NUM_SHIFT);
1241                hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242                                          TX_HDR_WRB_NUM_SHIFT);
1243        }
1244        be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245        txo->pend_wrb_cnt = 0;
1246}
1247
1248/* OS2BMC related */
1249
1250#define DHCP_CLIENT_PORT        68
1251#define DHCP_SERVER_PORT        67
1252#define NET_BIOS_PORT1          137
1253#define NET_BIOS_PORT2          138
1254#define DHCPV6_RAS_PORT         547
1255
1256#define is_mc_allowed_on_bmc(adapter, eh)       \
1257        (!is_multicast_filt_enabled(adapter) && \
1258         is_multicast_ether_addr(eh->h_dest) && \
1259         !is_broadcast_ether_addr(eh->h_dest))
1260
1261#define is_bc_allowed_on_bmc(adapter, eh)       \
1262        (!is_broadcast_filt_enabled(adapter) && \
1263         is_broadcast_ether_addr(eh->h_dest))
1264
1265#define is_arp_allowed_on_bmc(adapter, skb)     \
1266        (is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268#define is_broadcast_packet(eh, adapter)        \
1269                (is_multicast_ether_addr(eh->h_dest) && \
1270                !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1271
1272#define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1273
1274#define is_arp_filt_enabled(adapter)    \
1275                (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1276
1277#define is_dhcp_client_filt_enabled(adapter)    \
1278                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1279
1280#define is_dhcp_srvr_filt_enabled(adapter)      \
1281                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1282
1283#define is_nbios_filt_enabled(adapter)  \
1284                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1285
1286#define is_ipv6_na_filt_enabled(adapter)        \
1287                (adapter->bmc_filt_mask &       \
1288                        BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1289
1290#define is_ipv6_ra_filt_enabled(adapter)        \
1291                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1292
1293#define is_ipv6_ras_filt_enabled(adapter)       \
1294                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1295
1296#define is_broadcast_filt_enabled(adapter)      \
1297                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1298
1299#define is_multicast_filt_enabled(adapter)      \
1300                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1301
1302static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1303                               struct sk_buff **skb)
1304{
1305        struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1306        bool os2bmc = false;
1307
1308        if (!be_is_os2bmc_enabled(adapter))
1309                goto done;
1310
1311        if (!is_multicast_ether_addr(eh->h_dest))
1312                goto done;
1313
1314        if (is_mc_allowed_on_bmc(adapter, eh) ||
1315            is_bc_allowed_on_bmc(adapter, eh) ||
1316            is_arp_allowed_on_bmc(adapter, (*skb))) {
1317                os2bmc = true;
1318                goto done;
1319        }
1320
1321        if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1322                struct ipv6hdr *hdr = ipv6_hdr((*skb));
1323                u8 nexthdr = hdr->nexthdr;
1324
1325                if (nexthdr == IPPROTO_ICMPV6) {
1326                        struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1327
1328                        switch (icmp6->icmp6_type) {
1329                        case NDISC_ROUTER_ADVERTISEMENT:
1330                                os2bmc = is_ipv6_ra_filt_enabled(adapter);
1331                                goto done;
1332                        case NDISC_NEIGHBOUR_ADVERTISEMENT:
1333                                os2bmc = is_ipv6_na_filt_enabled(adapter);
1334                                goto done;
1335                        default:
1336                                break;
1337                        }
1338                }
1339        }
1340
1341        if (is_udp_pkt((*skb))) {
1342                struct udphdr *udp = udp_hdr((*skb));
1343
1344                switch (ntohs(udp->dest)) {
1345                case DHCP_CLIENT_PORT:
1346                        os2bmc = is_dhcp_client_filt_enabled(adapter);
1347                        goto done;
1348                case DHCP_SERVER_PORT:
1349                        os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1350                        goto done;
1351                case NET_BIOS_PORT1:
1352                case NET_BIOS_PORT2:
1353                        os2bmc = is_nbios_filt_enabled(adapter);
1354                        goto done;
1355                case DHCPV6_RAS_PORT:
1356                        os2bmc = is_ipv6_ras_filt_enabled(adapter);
1357                        goto done;
1358                default:
1359                        break;
1360                }
1361        }
1362done:
1363        /* For packets over a vlan, which are destined
1364         * to BMC, asic expects the vlan to be inline in the packet.
1365         */
1366        if (os2bmc)
1367                *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1368
1369        return os2bmc;
1370}
1371
1372static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1373{
1374        struct be_adapter *adapter = netdev_priv(netdev);
1375        u16 q_idx = skb_get_queue_mapping(skb);
1376        struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1377        struct be_wrb_params wrb_params = { 0 };
1378        bool flush = !skb->xmit_more;
1379        u16 wrb_cnt;
1380
1381        skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1382        if (unlikely(!skb))
1383                goto drop;
1384
1385        be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1386
1387        wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1388        if (unlikely(!wrb_cnt)) {
1389                dev_kfree_skb_any(skb);
1390                goto drop;
1391        }
1392
1393        /* if os2bmc is enabled and if the pkt is destined to bmc,
1394         * enqueue the pkt a 2nd time with mgmt bit set.
1395         */
1396        if (be_send_pkt_to_bmc(adapter, &skb)) {
1397                BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1398                wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1399                if (unlikely(!wrb_cnt))
1400                        goto drop;
1401                else
1402                        skb_get(skb);
1403        }
1404
1405        if (be_is_txq_full(txo)) {
1406                netif_stop_subqueue(netdev, q_idx);
1407                tx_stats(txo)->tx_stops++;
1408        }
1409
1410        if (flush || __netif_subqueue_stopped(netdev, q_idx))
1411                be_xmit_flush(adapter, txo);
1412
1413        return NETDEV_TX_OK;
1414drop:
1415        tx_stats(txo)->tx_drv_drops++;
1416        /* Flush the already enqueued tx requests */
1417        if (flush && txo->pend_wrb_cnt)
1418                be_xmit_flush(adapter, txo);
1419
1420        return NETDEV_TX_OK;
1421}
1422
1423static void be_tx_timeout(struct net_device *netdev)
1424{
1425        struct be_adapter *adapter = netdev_priv(netdev);
1426        struct device *dev = &adapter->pdev->dev;
1427#if 0 /* RHEL only: Disable queue dump in be_tx_timeout */
1428        struct be_tx_obj *txo;
1429        struct sk_buff *skb;
1430        struct tcphdr *tcphdr;
1431        struct udphdr *udphdr;
1432        u32 *entry;
1433#endif
1434        int status;
1435#if 0 /* RHEL only: Disable queue dump in be_tx_timeout */
1436        int i, j;
1437
1438        for_all_tx_queues(adapter, txo, i) {
1439                dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1440                         i, txo->q.head, txo->q.tail,
1441                         atomic_read(&txo->q.used), txo->q.id);
1442
1443                entry = txo->q.dma_mem.va;
1444                for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1445                        if (entry[j] != 0 || entry[j + 1] != 0 ||
1446                            entry[j + 2] != 0 || entry[j + 3] != 0) {
1447                                dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1448                                         j, entry[j], entry[j + 1],
1449                                         entry[j + 2], entry[j + 3]);
1450                        }
1451                }
1452
1453                entry = txo->cq.dma_mem.va;
1454                dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1455                         i, txo->cq.head, txo->cq.tail,
1456                         atomic_read(&txo->cq.used));
1457                for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1458                        if (entry[j] != 0 || entry[j + 1] != 0 ||
1459                            entry[j + 2] != 0 || entry[j + 3] != 0) {
1460                                dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1461                                         j, entry[j], entry[j + 1],
1462                                         entry[j + 2], entry[j + 3]);
1463                        }
1464                }
1465
1466                for (j = 0; j < TX_Q_LEN; j++) {
1467                        if (txo->sent_skb_list[j]) {
1468                                skb = txo->sent_skb_list[j];
1469                                if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1470                                        tcphdr = tcp_hdr(skb);
1471                                        dev_info(dev, "TCP source port %d\n",
1472                                                 ntohs(tcphdr->source));
1473                                        dev_info(dev, "TCP dest port %d\n",
1474                                                 ntohs(tcphdr->dest));
1475                                        dev_info(dev, "TCP sequence num %d\n",
1476                                                 ntohs(tcphdr->seq));
1477                                        dev_info(dev, "TCP ack_seq %d\n",
1478                                                 ntohs(tcphdr->ack_seq));
1479                                } else if (ip_hdr(skb)->protocol ==
1480                                           IPPROTO_UDP) {
1481                                        udphdr = udp_hdr(skb);
1482                                        dev_info(dev, "UDP source port %d\n",
1483                                                 ntohs(udphdr->source));
1484                                        dev_info(dev, "UDP dest port %d\n",
1485                                                 ntohs(udphdr->dest));
1486                                }
1487                                dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1488                                         j, skb, skb->len, skb->protocol);
1489                        }
1490                }
1491        }
1492
1493#endif
1494        if (lancer_chip(adapter)) {
1495                dev_info(dev, "Initiating reset due to tx timeout\n");
1496                dev_info(dev, "Resetting adapter\n");
1497                status = lancer_physdev_ctrl(adapter,
1498                                             PHYSDEV_CONTROL_FW_RESET_MASK);
1499                if (status)
1500                        dev_err(dev, "Reset failed .. Reboot server\n");
1501        }
1502}
1503
1504static inline bool be_in_all_promisc(struct be_adapter *adapter)
1505{
1506        return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1507                        BE_IF_FLAGS_ALL_PROMISCUOUS;
1508}
1509
1510static int be_set_vlan_promisc(struct be_adapter *adapter)
1511{
1512        struct device *dev = &adapter->pdev->dev;
1513        int status;
1514
1515        if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1516                return 0;
1517
1518        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1519        if (!status) {
1520                dev_info(dev, "Enabled VLAN promiscuous mode\n");
1521                adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1522        } else {
1523                dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1524        }
1525        return status;
1526}
1527
1528static int be_clear_vlan_promisc(struct be_adapter *adapter)
1529{
1530        struct device *dev = &adapter->pdev->dev;
1531        int status;
1532
1533        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1534        if (!status) {
1535                dev_info(dev, "Disabling VLAN promiscuous mode\n");
1536                adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1537        }
1538        return status;
1539}
1540
1541/*
1542 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1543 * If the user configures more, place BE in vlan promiscuous mode.
1544 */
1545static int be_vid_config(struct be_adapter *adapter)
1546{
1547        struct device *dev = &adapter->pdev->dev;
1548        u16 vids[BE_NUM_VLANS_SUPPORTED];
1549        u16 num = 0, i = 0;
1550        int status = 0;
1551
1552        /* No need to change the VLAN state if the I/F is in promiscuous */
1553        if (adapter->netdev->flags & IFF_PROMISC)
1554                return 0;
1555
1556        if (adapter->vlans_added > be_max_vlans(adapter))
1557                return be_set_vlan_promisc(adapter);
1558
1559        if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1560                status = be_clear_vlan_promisc(adapter);
1561                if (status)
1562                        return status;
1563        }
1564        /* Construct VLAN Table to give to HW */
1565        for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1566                vids[num++] = cpu_to_le16(i);
1567
1568        status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1569        if (status) {
1570                dev_err(dev, "Setting HW VLAN filtering failed\n");
1571                /* Set to VLAN promisc mode as setting VLAN filter failed */
1572                if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1573                    addl_status(status) ==
1574                                MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1575                        return be_set_vlan_promisc(adapter);
1576        }
1577        return status;
1578}
1579
1580static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1581{
1582        struct be_adapter *adapter = netdev_priv(netdev);
1583        int status = 0;
1584
1585        mutex_lock(&adapter->rx_filter_lock);
1586
1587        /* Packets with VID 0 are always received by Lancer by default */
1588        if (lancer_chip(adapter) && vid == 0)
1589                goto done;
1590
1591        if (test_bit(vid, adapter->vids))
1592                goto done;
1593
1594        set_bit(vid, adapter->vids);
1595        adapter->vlans_added++;
1596
1597        status = be_vid_config(adapter);
1598done:
1599        mutex_unlock(&adapter->rx_filter_lock);
1600        return status;
1601}
1602
1603static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1604{
1605        struct be_adapter *adapter = netdev_priv(netdev);
1606        int status = 0;
1607
1608        mutex_lock(&adapter->rx_filter_lock);
1609
1610        /* Packets with VID 0 are always received by Lancer by default */
1611        if (lancer_chip(adapter) && vid == 0)
1612                goto done;
1613
1614        if (!test_bit(vid, adapter->vids))
1615                goto done;
1616
1617        clear_bit(vid, adapter->vids);
1618        adapter->vlans_added--;
1619
1620        status = be_vid_config(adapter);
1621done:
1622        mutex_unlock(&adapter->rx_filter_lock);
1623        return status;
1624}
1625
1626static void be_set_all_promisc(struct be_adapter *adapter)
1627{
1628        be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1629        adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1630}
1631
1632static void be_set_mc_promisc(struct be_adapter *adapter)
1633{
1634        int status;
1635
1636        if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1637                return;
1638
1639        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1640        if (!status)
1641                adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1642}
1643
1644static void be_set_uc_promisc(struct be_adapter *adapter)
1645{
1646        int status;
1647
1648        if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1649                return;
1650
1651        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1652        if (!status)
1653                adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1654}
1655
1656static void be_clear_uc_promisc(struct be_adapter *adapter)
1657{
1658        int status;
1659
1660        if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1661                return;
1662
1663        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1664        if (!status)
1665                adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1666}
1667
1668/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1669 * We use a single callback function for both sync and unsync. We really don't
1670 * add/remove addresses through this callback. But, we use it to detect changes
1671 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1672 */
1673static int be_uc_list_update(struct net_device *netdev,
1674                             const unsigned char *addr)
1675{
1676        struct be_adapter *adapter = netdev_priv(netdev);
1677
1678        adapter->update_uc_list = true;
1679        return 0;
1680}
1681
1682static int be_mc_list_update(struct net_device *netdev,
1683                             const unsigned char *addr)
1684{
1685        struct be_adapter *adapter = netdev_priv(netdev);
1686
1687        adapter->update_mc_list = true;
1688        return 0;
1689}
1690
1691static void be_set_mc_list(struct be_adapter *adapter)
1692{
1693        struct net_device *netdev = adapter->netdev;
1694        struct netdev_hw_addr *ha;
1695        bool mc_promisc = false;
1696        int status;
1697
1698        netif_addr_lock_bh(netdev);
1699        __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1700
1701        if (netdev->flags & IFF_PROMISC) {
1702                adapter->update_mc_list = false;
1703        } else if (netdev->flags & IFF_ALLMULTI ||
1704                   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1705                /* Enable multicast promisc if num configured exceeds
1706                 * what we support
1707                 */
1708                mc_promisc = true;
1709                adapter->update_mc_list = false;
1710        } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1711                /* Update mc-list unconditionally if the iface was previously
1712                 * in mc-promisc mode and now is out of that mode.
1713                 */
1714                adapter->update_mc_list = true;
1715        }
1716
1717        if (adapter->update_mc_list) {
1718                int i = 0;
1719
1720                /* cache the mc-list in adapter */
1721                netdev_for_each_mc_addr(ha, netdev) {
1722                        ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1723                        i++;
1724                }
1725                adapter->mc_count = netdev_mc_count(netdev);
1726        }
1727        netif_addr_unlock_bh(netdev);
1728
1729        if (mc_promisc) {
1730                be_set_mc_promisc(adapter);
1731        } else if (adapter->update_mc_list) {
1732                status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1733                if (!status)
1734                        adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1735                else
1736                        be_set_mc_promisc(adapter);
1737
1738                adapter->update_mc_list = false;
1739        }
1740}
1741
1742static void be_clear_mc_list(struct be_adapter *adapter)
1743{
1744        struct net_device *netdev = adapter->netdev;
1745
1746        __dev_mc_unsync(netdev, NULL);
1747        be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1748        adapter->mc_count = 0;
1749}
1750
1751static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1752{
1753        if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1754                adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1755                return 0;
1756        }
1757
1758        return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1759                               adapter->if_handle,
1760                               &adapter->pmac_id[uc_idx + 1], 0);
1761}
1762
1763static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1764{
1765        if (pmac_id == adapter->pmac_id[0])
1766                return;
1767
1768        be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1769}
1770
1771static void be_set_uc_list(struct be_adapter *adapter)
1772{
1773        struct net_device *netdev = adapter->netdev;
1774        struct netdev_hw_addr *ha;
1775        bool uc_promisc = false;
1776        int curr_uc_macs = 0, i;
1777
1778        netif_addr_lock_bh(netdev);
1779        __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1780
1781        if (netdev->flags & IFF_PROMISC) {
1782                adapter->update_uc_list = false;
1783        } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1784                uc_promisc = true;
1785                adapter->update_uc_list = false;
1786        }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1787                /* Update uc-list unconditionally if the iface was previously
1788                 * in uc-promisc mode and now is out of that mode.
1789                 */
1790                adapter->update_uc_list = true;
1791        }
1792
1793        if (adapter->update_uc_list) {
1794                /* cache the uc-list in adapter array */
1795                i = 0;
1796                netdev_for_each_uc_addr(ha, netdev) {
1797                        ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1798                        i++;
1799                }
1800                curr_uc_macs = netdev_uc_count(netdev);
1801        }
1802        netif_addr_unlock_bh(netdev);
1803
1804        if (uc_promisc) {
1805                be_set_uc_promisc(adapter);
1806        } else if (adapter->update_uc_list) {
1807                be_clear_uc_promisc(adapter);
1808
1809                for (i = 0; i < adapter->uc_macs; i++)
1810                        be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1811
1812                for (i = 0; i < curr_uc_macs; i++)
1813                        be_uc_mac_add(adapter, i);
1814                adapter->uc_macs = curr_uc_macs;
1815                adapter->update_uc_list = false;
1816        }
1817}
1818
1819static void be_clear_uc_list(struct be_adapter *adapter)
1820{
1821        struct net_device *netdev = adapter->netdev;
1822        int i;
1823
1824        __dev_uc_unsync(netdev, NULL);
1825        for (i = 0; i < adapter->uc_macs; i++)
1826                be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1827
1828        adapter->uc_macs = 0;
1829}
1830
1831static void __be_set_rx_mode(struct be_adapter *adapter)
1832{
1833        struct net_device *netdev = adapter->netdev;
1834
1835        mutex_lock(&adapter->rx_filter_lock);
1836
1837        if (netdev->flags & IFF_PROMISC) {
1838                if (!be_in_all_promisc(adapter))
1839                        be_set_all_promisc(adapter);
1840        } else if (be_in_all_promisc(adapter)) {
1841                /* We need to re-program the vlan-list or clear
1842                 * vlan-promisc mode (if needed) when the interface
1843                 * comes out of promisc mode.
1844                 */
1845                be_vid_config(adapter);
1846        }
1847
1848        be_set_uc_list(adapter);
1849        be_set_mc_list(adapter);
1850
1851        mutex_unlock(&adapter->rx_filter_lock);
1852}
1853
1854static void be_work_set_rx_mode(struct work_struct *work)
1855{
1856        struct be_cmd_work *cmd_work =
1857                                container_of(work, struct be_cmd_work, work);
1858
1859        __be_set_rx_mode(cmd_work->adapter);
1860        kfree(cmd_work);
1861}
1862
1863static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1864{
1865        struct be_adapter *adapter = netdev_priv(netdev);
1866        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1867        int status;
1868
1869        if (!sriov_enabled(adapter))
1870                return -EPERM;
1871
1872        if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1873                return -EINVAL;
1874
1875        /* Proceed further only if user provided MAC is different
1876         * from active MAC
1877         */
1878        if (ether_addr_equal(mac, vf_cfg->mac_addr))
1879                return 0;
1880
1881        if (BEx_chip(adapter)) {
1882                be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1883                                vf + 1);
1884
1885                status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1886                                         &vf_cfg->pmac_id, vf + 1);
1887        } else {
1888                status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1889                                        vf + 1);
1890        }
1891
1892        if (status) {
1893                dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1894                        mac, vf, status);
1895                return be_cmd_status(status);
1896        }
1897
1898        ether_addr_copy(vf_cfg->mac_addr, mac);
1899
1900        return 0;
1901}
1902
1903static int be_get_vf_config(struct net_device *netdev, int vf,
1904                            struct ifla_vf_info *vi)
1905{
1906        struct be_adapter *adapter = netdev_priv(netdev);
1907        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1908
1909        if (!sriov_enabled(adapter))
1910                return -EPERM;
1911
1912        if (vf >= adapter->num_vfs)
1913                return -EINVAL;
1914
1915        vi->vf = vf;
1916        vi->max_tx_rate = vf_cfg->tx_rate;
1917        vi->min_tx_rate = 0;
1918        vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1919        vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1920        memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1921        vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1922        vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1923
1924        return 0;
1925}
1926
1927static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1928{
1929        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1930        u16 vids[BE_NUM_VLANS_SUPPORTED];
1931        int vf_if_id = vf_cfg->if_handle;
1932        int status;
1933
1934        /* Enable Transparent VLAN Tagging */
1935        status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1936        if (status)
1937                return status;
1938
1939        /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1940        vids[0] = 0;
1941        status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1942        if (!status)
1943                dev_info(&adapter->pdev->dev,
1944                         "Cleared guest VLANs on VF%d", vf);
1945
1946        /* After TVT is enabled, disallow VFs to program VLAN filters */
1947        if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1948                status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1949                                                  ~BE_PRIV_FILTMGMT, vf + 1);
1950                if (!status)
1951                        vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1952        }
1953        return 0;
1954}
1955
1956static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1957{
1958        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1959        struct device *dev = &adapter->pdev->dev;
1960        int status;
1961
1962        /* Reset Transparent VLAN Tagging. */
1963        status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1964                                       vf_cfg->if_handle, 0, 0);
1965        if (status)
1966                return status;
1967
1968        /* Allow VFs to program VLAN filtering */
1969        if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1970                status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1971                                                  BE_PRIV_FILTMGMT, vf + 1);
1972                if (!status) {
1973                        vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1974                        dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1975                }
1976        }
1977
1978        dev_info(dev,
1979                 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1980        return 0;
1981}
1982
1983static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1984                          __be16 vlan_proto)
1985{
1986        struct be_adapter *adapter = netdev_priv(netdev);
1987        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1988        int status;
1989
1990        if (!sriov_enabled(adapter))
1991                return -EPERM;
1992
1993        if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1994                return -EINVAL;
1995
1996        if (vlan_proto != htons(ETH_P_8021Q))
1997                return -EPROTONOSUPPORT;
1998
1999        if (vlan || qos) {
2000                vlan |= qos << VLAN_PRIO_SHIFT;
2001                status = be_set_vf_tvt(adapter, vf, vlan);
2002        } else {
2003                status = be_clear_vf_tvt(adapter, vf);
2004        }
2005
2006        if (status) {
2007                dev_err(&adapter->pdev->dev,
2008                        "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2009                        status);
2010                return be_cmd_status(status);
2011        }
2012
2013        vf_cfg->vlan_tag = vlan;
2014        return 0;
2015}
2016
2017static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2018                             int min_tx_rate, int max_tx_rate)
2019{
2020        struct be_adapter *adapter = netdev_priv(netdev);
2021        struct device *dev = &adapter->pdev->dev;
2022        int percent_rate, status = 0;
2023        u16 link_speed = 0;
2024        u8 link_status;
2025
2026        if (!sriov_enabled(adapter))
2027                return -EPERM;
2028
2029        if (vf >= adapter->num_vfs)
2030                return -EINVAL;
2031
2032        if (min_tx_rate)
2033                return -EINVAL;
2034
2035        if (!max_tx_rate)
2036                goto config_qos;
2037
2038        status = be_cmd_link_status_query(adapter, &link_speed,
2039                                          &link_status, 0);
2040        if (status)
2041                goto err;
2042
2043        if (!link_status) {
2044                dev_err(dev, "TX-rate setting not allowed when link is down\n");
2045                status = -ENETDOWN;
2046                goto err;
2047        }
2048
2049        if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2050                dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2051                        link_speed);
2052                status = -EINVAL;
2053                goto err;
2054        }
2055
2056        /* On Skyhawk the QOS setting must be done only as a % value */
2057        percent_rate = link_speed / 100;
2058        if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2059                dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2060                        percent_rate);
2061                status = -EINVAL;
2062                goto err;
2063        }
2064
2065config_qos:
2066        status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2067        if (status)
2068                goto err;
2069
2070        adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2071        return 0;
2072
2073err:
2074        dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2075                max_tx_rate, vf);
2076        return be_cmd_status(status);
2077}
2078
2079static int be_set_vf_link_state(struct net_device *netdev, int vf,
2080                                int link_state)
2081{
2082        struct be_adapter *adapter = netdev_priv(netdev);
2083        int status;
2084
2085        if (!sriov_enabled(adapter))
2086                return -EPERM;
2087
2088        if (vf >= adapter->num_vfs)
2089                return -EINVAL;
2090
2091        status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2092        if (status) {
2093                dev_err(&adapter->pdev->dev,
2094                        "Link state change on VF %d failed: %#x\n", vf, status);
2095                return be_cmd_status(status);
2096        }
2097
2098        adapter->vf_cfg[vf].plink_tracking = link_state;
2099
2100        return 0;
2101}
2102
2103static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2104{
2105        struct be_adapter *adapter = netdev_priv(netdev);
2106        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2107        u8 spoofchk;
2108        int status;
2109
2110        if (!sriov_enabled(adapter))
2111                return -EPERM;
2112
2113        if (vf >= adapter->num_vfs)
2114                return -EINVAL;
2115
2116        if (BEx_chip(adapter))
2117                return -EOPNOTSUPP;
2118
2119        if (enable == vf_cfg->spoofchk)
2120                return 0;
2121
2122        spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2123
2124        status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2125                                       0, spoofchk);
2126        if (status) {
2127                dev_err(&adapter->pdev->dev,
2128                        "Spoofchk change on VF %d failed: %#x\n", vf, status);
2129                return be_cmd_status(status);
2130        }
2131
2132        vf_cfg->spoofchk = enable;
2133        return 0;
2134}
2135
2136static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2137                          ulong now)
2138{
2139        aic->rx_pkts_prev = rx_pkts;
2140        aic->tx_reqs_prev = tx_pkts;
2141        aic->jiffies = now;
2142}
2143
2144static int be_get_new_eqd(struct be_eq_obj *eqo)
2145{
2146        struct be_adapter *adapter = eqo->adapter;
2147        int eqd, start;
2148        struct be_aic_obj *aic;
2149        struct be_rx_obj *rxo;
2150        struct be_tx_obj *txo;
2151        u64 rx_pkts = 0, tx_pkts = 0;
2152        ulong now;
2153        u32 pps, delta;
2154        int i;
2155
2156        aic = &adapter->aic_obj[eqo->idx];
2157        if (!aic->enable) {
2158                if (aic->jiffies)
2159                        aic->jiffies = 0;
2160                eqd = aic->et_eqd;
2161                return eqd;
2162        }
2163
2164        for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2165                do {
2166                        start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2167                        rx_pkts += rxo->stats.rx_pkts;
2168                } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2169        }
2170
2171        for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2172                do {
2173                        start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2174                        tx_pkts += txo->stats.tx_reqs;
2175                } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2176        }
2177
2178        /* Skip, if wrapped around or first calculation */
2179        now = jiffies;
2180        if (!aic->jiffies || time_before(now, aic->jiffies) ||
2181            rx_pkts < aic->rx_pkts_prev ||
2182            tx_pkts < aic->tx_reqs_prev) {
2183                be_aic_update(aic, rx_pkts, tx_pkts, now);
2184                return aic->prev_eqd;
2185        }
2186
2187        delta = jiffies_to_msecs(now - aic->jiffies);
2188        if (delta == 0)
2189                return aic->prev_eqd;
2190
2191        pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2192                (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2193        eqd = (pps / 15000) << 2;
2194
2195        if (eqd < 8)
2196                eqd = 0;
2197        eqd = min_t(u32, eqd, aic->max_eqd);
2198        eqd = max_t(u32, eqd, aic->min_eqd);
2199
2200        be_aic_update(aic, rx_pkts, tx_pkts, now);
2201
2202        return eqd;
2203}
2204
2205/* For Skyhawk-R only */
2206static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2207{
2208        struct be_adapter *adapter = eqo->adapter;
2209        struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2210        ulong now = jiffies;
2211        int eqd;
2212        u32 mult_enc;
2213
2214        if (!aic->enable)
2215                return 0;
2216
2217        if (jiffies_to_msecs(now - aic->jiffies) < 1)
2218                eqd = aic->prev_eqd;
2219        else
2220                eqd = be_get_new_eqd(eqo);
2221
2222        if (eqd > 100)
2223                mult_enc = R2I_DLY_ENC_1;
2224        else if (eqd > 60)
2225                mult_enc = R2I_DLY_ENC_2;
2226        else if (eqd > 20)
2227                mult_enc = R2I_DLY_ENC_3;
2228        else
2229                mult_enc = R2I_DLY_ENC_0;
2230
2231        aic->prev_eqd = eqd;
2232
2233        return mult_enc;
2234}
2235
2236void be_eqd_update(struct be_adapter *adapter, bool force_update)
2237{
2238        struct be_set_eqd set_eqd[MAX_EVT_QS];
2239        struct be_aic_obj *aic;
2240        struct be_eq_obj *eqo;
2241        int i, num = 0, eqd;
2242
2243        for_all_evt_queues(adapter, eqo, i) {
2244                aic = &adapter->aic_obj[eqo->idx];
2245                eqd = be_get_new_eqd(eqo);
2246                if (force_update || eqd != aic->prev_eqd) {
2247                        set_eqd[num].delay_multiplier = (eqd * 65)/100;
2248                        set_eqd[num].eq_id = eqo->q.id;
2249                        aic->prev_eqd = eqd;
2250                        num++;
2251                }
2252        }
2253
2254        if (num)
2255                be_cmd_modify_eqd(adapter, set_eqd, num);
2256}
2257
2258static void be_rx_stats_update(struct be_rx_obj *rxo,
2259                               struct be_rx_compl_info *rxcp)
2260{
2261        struct be_rx_stats *stats = rx_stats(rxo);
2262
2263        u64_stats_update_begin(&stats->sync);
2264        stats->rx_compl++;
2265        stats->rx_bytes += rxcp->pkt_size;
2266        stats->rx_pkts++;
2267        if (rxcp->tunneled)
2268                stats->rx_vxlan_offload_pkts++;
2269        if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2270                stats->rx_mcast_pkts++;
2271        if (rxcp->err)
2272                stats->rx_compl_err++;
2273        u64_stats_update_end(&stats->sync);
2274}
2275
2276static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2277{
2278        /* L4 checksum is not reliable for non TCP/UDP packets.
2279         * Also ignore ipcksm for ipv6 pkts
2280         */
2281        return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2282                (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2283}
2284
2285static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2286{
2287        struct be_adapter *adapter = rxo->adapter;
2288        struct be_rx_page_info *rx_page_info;
2289        struct be_queue_info *rxq = &rxo->q;
2290        u32 frag_idx = rxq->tail;
2291
2292        rx_page_info = &rxo->page_info_tbl[frag_idx];
2293        BUG_ON(!rx_page_info->page);
2294
2295        if (rx_page_info->last_frag) {
2296                dma_unmap_page(&adapter->pdev->dev,
2297                               dma_unmap_addr(rx_page_info, bus),
2298                               adapter->big_page_size, DMA_FROM_DEVICE);
2299                rx_page_info->last_frag = false;
2300        } else {
2301                dma_sync_single_for_cpu(&adapter->pdev->dev,
2302                                        dma_unmap_addr(rx_page_info, bus),
2303                                        rx_frag_size, DMA_FROM_DEVICE);
2304        }
2305
2306        queue_tail_inc(rxq);
2307        atomic_dec(&rxq->used);
2308        return rx_page_info;
2309}
2310
2311/* Throwaway the data in the Rx completion */
2312static void be_rx_compl_discard(struct be_rx_obj *rxo,
2313                                struct be_rx_compl_info *rxcp)
2314{
2315        struct be_rx_page_info *page_info;
2316        u16 i, num_rcvd = rxcp->num_rcvd;
2317
2318        for (i = 0; i < num_rcvd; i++) {
2319                page_info = get_rx_page_info(rxo);
2320                put_page(page_info->page);
2321                memset(page_info, 0, sizeof(*page_info));
2322        }
2323}
2324
2325/*
2326 * skb_fill_rx_data forms a complete skb for an ether frame
2327 * indicated by rxcp.
2328 */
2329static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2330                             struct be_rx_compl_info *rxcp)
2331{
2332        struct be_rx_page_info *page_info;
2333        u16 i, j;
2334        u16 hdr_len, curr_frag_len, remaining;
2335        u8 *start;
2336
2337        page_info = get_rx_page_info(rxo);
2338        start = page_address(page_info->page) + page_info->page_offset;
2339        prefetch(start);
2340
2341        /* Copy data in the first descriptor of this completion */
2342        curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2343
2344        skb->len = curr_frag_len;
2345        if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2346                memcpy(skb->data, start, curr_frag_len);
2347                /* Complete packet has now been moved to data */
2348                put_page(page_info->page);
2349                skb->data_len = 0;
2350                skb->tail += curr_frag_len;
2351        } else {
2352                hdr_len = ETH_HLEN;
2353                memcpy(skb->data, start, hdr_len);
2354                skb_shinfo(skb)->nr_frags = 1;
2355                skb_frag_set_page(skb, 0, page_info->page);
2356                skb_shinfo(skb)->frags[0].page_offset =
2357                                        page_info->page_offset + hdr_len;
2358                skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2359                                  curr_frag_len - hdr_len);
2360                skb->data_len = curr_frag_len - hdr_len;
2361                skb->truesize += rx_frag_size;
2362                skb->tail += hdr_len;
2363        }
2364        page_info->page = NULL;
2365
2366        if (rxcp->pkt_size <= rx_frag_size) {
2367                BUG_ON(rxcp->num_rcvd != 1);
2368                return;
2369        }
2370
2371        /* More frags present for this completion */
2372        remaining = rxcp->pkt_size - curr_frag_len;
2373        for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2374                page_info = get_rx_page_info(rxo);
2375                curr_frag_len = min(remaining, rx_frag_size);
2376
2377                /* Coalesce all frags from the same physical page in one slot */
2378                if (page_info->page_offset == 0) {
2379                        /* Fresh page */
2380                        j++;
2381                        skb_frag_set_page(skb, j, page_info->page);
2382                        skb_shinfo(skb)->frags[j].page_offset =
2383                                                        page_info->page_offset;
2384                        skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2385                        skb_shinfo(skb)->nr_frags++;
2386                } else {
2387                        put_page(page_info->page);
2388                }
2389
2390                skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2391                skb->len += curr_frag_len;
2392                skb->data_len += curr_frag_len;
2393                skb->truesize += rx_frag_size;
2394                remaining -= curr_frag_len;
2395                page_info->page = NULL;
2396        }
2397        BUG_ON(j > MAX_SKB_FRAGS);
2398}
2399
2400/* Process the RX completion indicated by rxcp when GRO is disabled */
2401static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2402                                struct be_rx_compl_info *rxcp)
2403{
2404        struct be_adapter *adapter = rxo->adapter;
2405        struct net_device *netdev = adapter->netdev;
2406        struct sk_buff *skb;
2407
2408        skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2409        if (unlikely(!skb)) {
2410                rx_stats(rxo)->rx_drops_no_skbs++;
2411                be_rx_compl_discard(rxo, rxcp);
2412                return;
2413        }
2414
2415        skb_fill_rx_data(rxo, skb, rxcp);
2416
2417        if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2418                skb->ip_summed = CHECKSUM_UNNECESSARY;
2419        else
2420                skb_checksum_none_assert(skb);
2421
2422        skb->protocol = eth_type_trans(skb, netdev);
2423        skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2424        if (netdev->features & NETIF_F_RXHASH)
2425                skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2426
2427        skb->csum_level = rxcp->tunneled;
2428        skb_mark_napi_id(skb, napi);
2429
2430        if (rxcp->vlanf)
2431                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2432
2433        netif_receive_skb(skb);
2434}
2435
2436/* Process the RX completion indicated by rxcp when GRO is enabled */
2437static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2438                                    struct napi_struct *napi,
2439                                    struct be_rx_compl_info *rxcp)
2440{
2441        struct be_adapter *adapter = rxo->adapter;
2442        struct be_rx_page_info *page_info;
2443        struct sk_buff *skb = NULL;
2444        u16 remaining, curr_frag_len;
2445        u16 i, j;
2446
2447        skb = napi_get_frags(napi);
2448        if (!skb) {
2449                be_rx_compl_discard(rxo, rxcp);
2450                return;
2451        }
2452
2453        remaining = rxcp->pkt_size;
2454        for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2455                page_info = get_rx_page_info(rxo);
2456
2457                curr_frag_len = min(remaining, rx_frag_size);
2458
2459                /* Coalesce all frags from the same physical page in one slot */
2460                if (i == 0 || page_info->page_offset == 0) {
2461                        /* First frag or Fresh page */
2462                        j++;
2463                        skb_frag_set_page(skb, j, page_info->page);
2464                        skb_shinfo(skb)->frags[j].page_offset =
2465                                                        page_info->page_offset;
2466                        skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2467                } else {
2468                        put_page(page_info->page);
2469                }
2470                skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2471                skb->truesize += rx_frag_size;
2472                remaining -= curr_frag_len;
2473                memset(page_info, 0, sizeof(*page_info));
2474        }
2475        BUG_ON(j > MAX_SKB_FRAGS);
2476
2477        skb_shinfo(skb)->nr_frags = j + 1;
2478        skb->len = rxcp->pkt_size;
2479        skb->data_len = rxcp->pkt_size;
2480        skb->ip_summed = CHECKSUM_UNNECESSARY;
2481        skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2482        if (adapter->netdev->features & NETIF_F_RXHASH)
2483                skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2484
2485        skb->csum_level = rxcp->tunneled;
2486
2487        if (rxcp->vlanf)
2488                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2489
2490        napi_gro_frags(napi);
2491}
2492
2493static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2494                                 struct be_rx_compl_info *rxcp)
2495{
2496        rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2497        rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2498        rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2499        rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2500        rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2501        rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2502        rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2503        rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2504        rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2505        rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2506        rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2507        if (rxcp->vlanf) {
2508                rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2509                rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2510        }
2511        rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2512        rxcp->tunneled =
2513                GET_RX_COMPL_V1_BITS(tunneled, compl);
2514}
2515
2516static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2517                                 struct be_rx_compl_info *rxcp)
2518{
2519        rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2520        rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2521        rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2522        rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2523        rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2524        rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2525        rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2526        rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2527        rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2528        rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2529        rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2530        if (rxcp->vlanf) {
2531                rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2532                rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2533        }
2534        rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2535        rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2536}
2537
2538static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2539{
2540        struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2541        struct be_rx_compl_info *rxcp = &rxo->rxcp;
2542        struct be_adapter *adapter = rxo->adapter;
2543
2544        /* For checking the valid bit it is Ok to use either definition as the
2545         * valid bit is at the same position in both v0 and v1 Rx compl */
2546        if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2547                return NULL;
2548
2549        rmb();
2550        be_dws_le_to_cpu(compl, sizeof(*compl));
2551
2552        if (adapter->be3_native)
2553                be_parse_rx_compl_v1(compl, rxcp);
2554        else
2555                be_parse_rx_compl_v0(compl, rxcp);
2556
2557        if (rxcp->ip_frag)
2558                rxcp->l4_csum = 0;
2559
2560        if (rxcp->vlanf) {
2561                /* In QNQ modes, if qnq bit is not set, then the packet was
2562                 * tagged only with the transparent outer vlan-tag and must
2563                 * not be treated as a vlan packet by host
2564                 */
2565                if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2566                        rxcp->vlanf = 0;
2567
2568                if (!lancer_chip(adapter))
2569                        rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2570
2571                if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2572                    !test_bit(rxcp->vlan_tag, adapter->vids))
2573                        rxcp->vlanf = 0;
2574        }
2575
2576        /* As the compl has been parsed, reset it; we wont touch it again */
2577        compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2578
2579        queue_tail_inc(&rxo->cq);
2580        return rxcp;
2581}
2582
2583static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2584{
2585        u32 order = get_order(size);
2586
2587        if (order > 0)
2588                gfp |= __GFP_COMP;
2589        return  alloc_pages(gfp, order);
2590}
2591
2592/*
2593 * Allocate a page, split it to fragments of size rx_frag_size and post as
2594 * receive buffers to BE
2595 */
2596static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2597{
2598        struct be_adapter *adapter = rxo->adapter;
2599        struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2600        struct be_queue_info *rxq = &rxo->q;
2601        struct page *pagep = NULL;
2602        struct device *dev = &adapter->pdev->dev;
2603        struct be_eth_rx_d *rxd;
2604        u64 page_dmaaddr = 0, frag_dmaaddr;
2605        u32 posted, page_offset = 0, notify = 0;
2606
2607        page_info = &rxo->page_info_tbl[rxq->head];
2608        for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2609                if (!pagep) {
2610                        pagep = be_alloc_pages(adapter->big_page_size, gfp);
2611                        if (unlikely(!pagep)) {
2612                                rx_stats(rxo)->rx_post_fail++;
2613                                break;
2614                        }
2615                        page_dmaaddr = dma_map_page(dev, pagep, 0,
2616                                                    adapter->big_page_size,
2617                                                    DMA_FROM_DEVICE);
2618                        if (dma_mapping_error(dev, page_dmaaddr)) {
2619                                put_page(pagep);
2620                                pagep = NULL;
2621                                adapter->drv_stats.dma_map_errors++;
2622                                break;
2623                        }
2624                        page_offset = 0;
2625                } else {
2626                        get_page(pagep);
2627                        page_offset += rx_frag_size;
2628                }
2629                page_info->page_offset = page_offset;
2630                page_info->page = pagep;
2631
2632                rxd = queue_head_node(rxq);
2633                frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2634                rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2635                rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2636
2637                /* Any space left in the current big page for another frag? */
2638                if ((page_offset + rx_frag_size + rx_frag_size) >
2639                                        adapter->big_page_size) {
2640                        pagep = NULL;
2641                        page_info->last_frag = true;
2642                        dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2643                } else {
2644                        dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2645                }
2646
2647                prev_page_info = page_info;
2648                queue_head_inc(rxq);
2649                page_info = &rxo->page_info_tbl[rxq->head];
2650        }
2651
2652        /* Mark the last frag of a page when we break out of the above loop
2653         * with no more slots available in the RXQ
2654         */
2655        if (pagep) {
2656                prev_page_info->last_frag = true;
2657                dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2658        }
2659
2660        if (posted) {
2661                atomic_add(posted, &rxq->used);
2662                if (rxo->rx_post_starved)
2663                        rxo->rx_post_starved = false;
2664                do {
2665                        notify = min(MAX_NUM_POST_ERX_DB, posted);
2666                        be_rxq_notify(adapter, rxq->id, notify);
2667                        posted -= notify;
2668                } while (posted);
2669        } else if (atomic_read(&rxq->used) == 0) {
2670                /* Let be_worker replenish when memory is available */
2671                rxo->rx_post_starved = true;
2672        }
2673}
2674
2675static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2676{
2677        switch (status) {
2678        case BE_TX_COMP_HDR_PARSE_ERR:
2679                tx_stats(txo)->tx_hdr_parse_err++;
2680                break;
2681        case BE_TX_COMP_NDMA_ERR:
2682                tx_stats(txo)->tx_dma_err++;
2683                break;
2684        case BE_TX_COMP_ACL_ERR:
2685                tx_stats(txo)->tx_spoof_check_err++;
2686                break;
2687        }
2688}
2689
2690static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2691{
2692        switch (status) {
2693        case LANCER_TX_COMP_LSO_ERR:
2694                tx_stats(txo)->tx_tso_err++;
2695                break;
2696        case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2697        case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2698                tx_stats(txo)->tx_spoof_check_err++;
2699                break;
2700        case LANCER_TX_COMP_QINQ_ERR:
2701                tx_stats(txo)->tx_qinq_err++;
2702                break;
2703        case LANCER_TX_COMP_PARITY_ERR:
2704                tx_stats(txo)->tx_internal_parity_err++;
2705                break;
2706        case LANCER_TX_COMP_DMA_ERR:
2707                tx_stats(txo)->tx_dma_err++;
2708                break;
2709        case LANCER_TX_COMP_SGE_ERR:
2710                tx_stats(txo)->tx_sge_err++;
2711                break;
2712        }
2713}
2714
2715static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2716                                                struct be_tx_obj *txo)
2717{
2718        struct be_queue_info *tx_cq = &txo->cq;
2719        struct be_tx_compl_info *txcp = &txo->txcp;
2720        struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2721
2722        if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2723                return NULL;
2724
2725        /* Ensure load ordering of valid bit dword and other dwords below */
2726        rmb();
2727        be_dws_le_to_cpu(compl, sizeof(*compl));
2728
2729        txcp->status = GET_TX_COMPL_BITS(status, compl);
2730        txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2731
2732        if (txcp->status) {
2733                if (lancer_chip(adapter)) {
2734                        lancer_update_tx_err(txo, txcp->status);
2735                        /* Reset the adapter incase of TSO,
2736                         * SGE or Parity error
2737                         */
2738                        if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2739                            txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2740                            txcp->status == LANCER_TX_COMP_SGE_ERR)
2741                                be_set_error(adapter, BE_ERROR_TX);
2742                } else {
2743                        be_update_tx_err(txo, txcp->status);
2744                }
2745        }
2746
2747        if (be_check_error(adapter, BE_ERROR_TX))
2748                return NULL;
2749
2750        compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2751        queue_tail_inc(tx_cq);
2752        return txcp;
2753}
2754
2755static u16 be_tx_compl_process(struct be_adapter *adapter,
2756                               struct be_tx_obj *txo, u16 last_index)
2757{
2758        struct sk_buff **sent_skbs = txo->sent_skb_list;
2759        struct be_queue_info *txq = &txo->q;
2760        struct sk_buff *skb = NULL;
2761        bool unmap_skb_hdr = false;
2762        struct be_eth_wrb *wrb;
2763        u16 num_wrbs = 0;
2764        u32 frag_index;
2765
2766        do {
2767                if (sent_skbs[txq->tail]) {
2768                        /* Free skb from prev req */
2769                        if (skb)
2770                                dev_consume_skb_any(skb);
2771                        skb = sent_skbs[txq->tail];
2772                        sent_skbs[txq->tail] = NULL;
2773                        queue_tail_inc(txq);  /* skip hdr wrb */
2774                        num_wrbs++;
2775                        unmap_skb_hdr = true;
2776                }
2777                wrb = queue_tail_node(txq);
2778                frag_index = txq->tail;
2779                unmap_tx_frag(&adapter->pdev->dev, wrb,
2780                              (unmap_skb_hdr && skb_headlen(skb)));
2781                unmap_skb_hdr = false;
2782                queue_tail_inc(txq);
2783                num_wrbs++;
2784        } while (frag_index != last_index);
2785        dev_consume_skb_any(skb);
2786
2787        return num_wrbs;
2788}
2789
2790/* Return the number of events in the event queue */
2791static inline int events_get(struct be_eq_obj *eqo)
2792{
2793        struct be_eq_entry *eqe;
2794        int num = 0;
2795
2796        do {
2797                eqe = queue_tail_node(&eqo->q);
2798                if (eqe->evt == 0)
2799                        break;
2800
2801                rmb();
2802                eqe->evt = 0;
2803                num++;
2804                queue_tail_inc(&eqo->q);
2805        } while (true);
2806
2807        return num;
2808}
2809
2810/* Leaves the EQ is disarmed state */
2811static void be_eq_clean(struct be_eq_obj *eqo)
2812{
2813        int num = events_get(eqo);
2814
2815        be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2816}
2817
2818/* Free posted rx buffers that were not used */
2819static void be_rxq_clean(struct be_rx_obj *rxo)
2820{
2821        struct be_queue_info *rxq = &rxo->q;
2822        struct be_rx_page_info *page_info;
2823
2824        while (atomic_read(&rxq->used) > 0) {
2825                page_info = get_rx_page_info(rxo);
2826                put_page(page_info->page);
2827                memset(page_info, 0, sizeof(*page_info));
2828        }
2829        BUG_ON(atomic_read(&rxq->used));
2830        rxq->tail = 0;
2831        rxq->head = 0;
2832}
2833
2834static void be_rx_cq_clean(struct be_rx_obj *rxo)
2835{
2836        struct be_queue_info *rx_cq = &rxo->cq;
2837        struct be_rx_compl_info *rxcp;
2838        struct be_adapter *adapter = rxo->adapter;
2839        int flush_wait = 0;
2840
2841        /* Consume pending rx completions.
2842         * Wait for the flush completion (identified by zero num_rcvd)
2843         * to arrive. Notify CQ even when there are no more CQ entries
2844         * for HW to flush partially coalesced CQ entries.
2845         * In Lancer, there is no need to wait for flush compl.
2846         */
2847        for (;;) {
2848                rxcp = be_rx_compl_get(rxo);
2849                if (!rxcp) {
2850                        if (lancer_chip(adapter))
2851                                break;
2852
2853                        if (flush_wait++ > 50 ||
2854                            be_check_error(adapter,
2855                                           BE_ERROR_HW)) {
2856                                dev_warn(&adapter->pdev->dev,
2857                                         "did not receive flush compl\n");
2858                                break;
2859                        }
2860                        be_cq_notify(adapter, rx_cq->id, true, 0);
2861                        mdelay(1);
2862                } else {
2863                        be_rx_compl_discard(rxo, rxcp);
2864                        be_cq_notify(adapter, rx_cq->id, false, 1);
2865                        if (rxcp->num_rcvd == 0)
2866                                break;
2867                }
2868        }
2869
2870        /* After cleanup, leave the CQ in unarmed state */
2871        be_cq_notify(adapter, rx_cq->id, false, 0);
2872}
2873
2874static void be_tx_compl_clean(struct be_adapter *adapter)
2875{
2876        struct device *dev = &adapter->pdev->dev;
2877        u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2878        struct be_tx_compl_info *txcp;
2879        struct be_queue_info *txq;
2880        u32 end_idx, notified_idx;
2881        struct be_tx_obj *txo;
2882        int i, pending_txqs;
2883
2884        /* Stop polling for compls when HW has been silent for 10ms */
2885        do {
2886                pending_txqs = adapter->num_tx_qs;
2887
2888                for_all_tx_queues(adapter, txo, i) {
2889                        cmpl = 0;
2890                        num_wrbs = 0;
2891                        txq = &txo->q;
2892                        while ((txcp = be_tx_compl_get(adapter, txo))) {
2893                                num_wrbs +=
2894                                        be_tx_compl_process(adapter, txo,
2895                                                            txcp->end_index);
2896                                cmpl++;
2897                        }
2898                        if (cmpl) {
2899                                be_cq_notify(adapter, txo->cq.id, false, cmpl);
2900                                atomic_sub(num_wrbs, &txq->used);
2901                                timeo = 0;
2902                        }
2903                        if (!be_is_tx_compl_pending(txo))
2904                                pending_txqs--;
2905                }
2906
2907                if (pending_txqs == 0 || ++timeo > 10 ||
2908                    be_check_error(adapter, BE_ERROR_HW))
2909                        break;
2910
2911                mdelay(1);
2912        } while (true);
2913
2914        /* Free enqueued TX that was never notified to HW */
2915        for_all_tx_queues(adapter, txo, i) {
2916                txq = &txo->q;
2917
2918                if (atomic_read(&txq->used)) {
2919                        dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2920                                 i, atomic_read(&txq->used));
2921                        notified_idx = txq->tail;
2922                        end_idx = txq->tail;
2923                        index_adv(&end_idx, atomic_read(&txq->used) - 1,
2924                                  txq->len);
2925                        /* Use the tx-compl process logic to handle requests
2926                         * that were not sent to the HW.
2927                         */
2928                        num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2929                        atomic_sub(num_wrbs, &txq->used);
2930                        BUG_ON(atomic_read(&txq->used));
2931                        txo->pend_wrb_cnt = 0;
2932                        /* Since hw was never notified of these requests,
2933                         * reset TXQ indices
2934                         */
2935                        txq->head = notified_idx;
2936                        txq->tail = notified_idx;
2937                }
2938        }
2939}
2940
2941static void be_evt_queues_destroy(struct be_adapter *adapter)
2942{
2943        struct be_eq_obj *eqo;
2944        int i;
2945
2946        for_all_evt_queues(adapter, eqo, i) {
2947                if (eqo->q.created) {
2948                        be_eq_clean(eqo);
2949                        be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2950                        netif_napi_del(&eqo->napi);
2951                        free_cpumask_var(eqo->affinity_mask);
2952                }
2953                be_queue_free(adapter, &eqo->q);
2954        }
2955}
2956
2957static int be_evt_queues_create(struct be_adapter *adapter)
2958{
2959        struct be_queue_info *eq;
2960        struct be_eq_obj *eqo;
2961        struct be_aic_obj *aic;
2962        int i, rc;
2963
2964        /* need enough EQs to service both RX and TX queues */
2965        adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2966                                    max(adapter->cfg_num_rx_irqs,
2967                                        adapter->cfg_num_tx_irqs));
2968
2969        for_all_evt_queues(adapter, eqo, i) {
2970                int numa_node = dev_to_node(&adapter->pdev->dev);
2971
2972                aic = &adapter->aic_obj[i];
2973                eqo->adapter = adapter;
2974                eqo->idx = i;
2975                aic->max_eqd = BE_MAX_EQD;
2976                aic->enable = true;
2977
2978                eq = &eqo->q;
2979                rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2980                                    sizeof(struct be_eq_entry));
2981                if (rc)
2982                        return rc;
2983
2984                rc = be_cmd_eq_create(adapter, eqo);
2985                if (rc)
2986                        return rc;
2987
2988                if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2989                        return -ENOMEM;
2990                cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2991                                eqo->affinity_mask);
2992                netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2993                               BE_NAPI_WEIGHT);
2994        }
2995        return 0;
2996}
2997
2998static void be_mcc_queues_destroy(struct be_adapter *adapter)
2999{
3000        struct be_queue_info *q;
3001
3002        q = &adapter->mcc_obj.q;
3003        if (q->created)
3004                be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
3005        be_queue_free(adapter, q);
3006
3007        q = &adapter->mcc_obj.cq;
3008        if (q->created)
3009                be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3010        be_queue_free(adapter, q);
3011}
3012
3013/* Must be called only after TX qs are created as MCC shares TX EQ */
3014static int be_mcc_queues_create(struct be_adapter *adapter)
3015{
3016        struct be_queue_info *q, *cq;
3017
3018        cq = &adapter->mcc_obj.cq;
3019        if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3020                           sizeof(struct be_mcc_compl)))
3021                goto err;
3022
3023        /* Use the default EQ for MCC completions */
3024        if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3025                goto mcc_cq_free;
3026
3027        q = &adapter->mcc_obj.q;
3028        if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3029                goto mcc_cq_destroy;
3030
3031        if (be_cmd_mccq_create(adapter, q, cq))
3032                goto mcc_q_free;
3033
3034        return 0;
3035
3036mcc_q_free:
3037        be_queue_free(adapter, q);
3038mcc_cq_destroy:
3039        be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3040mcc_cq_free:
3041        be_queue_free(adapter, cq);
3042err:
3043        return -1;
3044}
3045
3046static void be_tx_queues_destroy(struct be_adapter *adapter)
3047{
3048        struct be_queue_info *q;
3049        struct be_tx_obj *txo;
3050        u8 i;
3051
3052        for_all_tx_queues(adapter, txo, i) {
3053                q = &txo->q;
3054                if (q->created)
3055                        be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3056                be_queue_free(adapter, q);
3057
3058                q = &txo->cq;
3059                if (q->created)
3060                        be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3061                be_queue_free(adapter, q);
3062        }
3063}
3064
3065static int be_tx_qs_create(struct be_adapter *adapter)
3066{
3067        struct be_queue_info *cq;
3068        struct be_tx_obj *txo;
3069        struct be_eq_obj *eqo;
3070        int status, i;
3071
3072        adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3073
3074        for_all_tx_queues(adapter, txo, i) {
3075                cq = &txo->cq;
3076                status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3077                                        sizeof(struct be_eth_tx_compl));
3078                if (status)
3079                        return status;
3080
3081                u64_stats_init(&txo->stats.sync);
3082                u64_stats_init(&txo->stats.sync_compl);
3083
3084                /* If num_evt_qs is less than num_tx_qs, then more than
3085                 * one txq share an eq
3086                 */
3087                eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3088                status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3089                if (status)
3090                        return status;
3091
3092                status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3093                                        sizeof(struct be_eth_wrb));
3094                if (status)
3095                        return status;
3096
3097                status = be_cmd_txq_create(adapter, txo);
3098                if (status)
3099                        return status;
3100
3101                netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3102                                    eqo->idx);
3103        }
3104
3105        dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3106                 adapter->num_tx_qs);
3107        return 0;
3108}
3109
3110static void be_rx_cqs_destroy(struct be_adapter *adapter)
3111{
3112        struct be_queue_info *q;
3113        struct be_rx_obj *rxo;
3114        int i;
3115
3116        for_all_rx_queues(adapter, rxo, i) {
3117                q = &rxo->cq;
3118                if (q->created)
3119                        be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3120                be_queue_free(adapter, q);
3121        }
3122}
3123
3124static int be_rx_cqs_create(struct be_adapter *adapter)
3125{
3126        struct be_queue_info *eq, *cq;
3127        struct be_rx_obj *rxo;
3128        int rc, i;
3129
3130        adapter->num_rss_qs =
3131                        min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3132
3133        /* We'll use RSS only if atleast 2 RSS rings are supported. */
3134        if (adapter->num_rss_qs < 2)
3135                adapter->num_rss_qs = 0;
3136
3137        adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3138
3139        /* When the interface is not capable of RSS rings (and there is no
3140         * need to create a default RXQ) we'll still need one RXQ
3141         */
3142        if (adapter->num_rx_qs == 0)
3143                adapter->num_rx_qs = 1;
3144
3145        adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3146        for_all_rx_queues(adapter, rxo, i) {
3147                rxo->adapter = adapter;
3148                cq = &rxo->cq;
3149                rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3150                                    sizeof(struct be_eth_rx_compl));
3151                if (rc)
3152                        return rc;
3153
3154                u64_stats_init(&rxo->stats.sync);
3155                eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3156                rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3157                if (rc)
3158                        return rc;
3159        }
3160
3161        dev_info(&adapter->pdev->dev,
3162                 "created %d RX queue(s)\n", adapter->num_rx_qs);
3163        return 0;
3164}
3165
3166static irqreturn_t be_intx(int irq, void *dev)
3167{
3168        struct be_eq_obj *eqo = dev;
3169        struct be_adapter *adapter = eqo->adapter;
3170        int num_evts = 0;
3171
3172        /* IRQ is not expected when NAPI is scheduled as the EQ
3173         * will not be armed.
3174         * But, this can happen on Lancer INTx where it takes
3175         * a while to de-assert INTx or in BE2 where occasionaly
3176         * an interrupt may be raised even when EQ is unarmed.
3177         * If NAPI is already scheduled, then counting & notifying
3178         * events will orphan them.
3179         */
3180        if (napi_schedule_prep(&eqo->napi)) {
3181                num_evts = events_get(eqo);
3182                __napi_schedule(&eqo->napi);
3183                if (num_evts)
3184                        eqo->spurious_intr = 0;
3185        }
3186        be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3187
3188        /* Return IRQ_HANDLED only for the the first spurious intr
3189         * after a valid intr to stop the kernel from branding
3190         * this irq as a bad one!
3191         */
3192        if (num_evts || eqo->spurious_intr++ == 0)
3193                return IRQ_HANDLED;
3194        else
3195                return IRQ_NONE;
3196}
3197
3198static irqreturn_t be_msix(int irq, void *dev)
3199{
3200        struct be_eq_obj *eqo = dev;
3201
3202        be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3203        napi_schedule(&eqo->napi);
3204        return IRQ_HANDLED;
3205}
3206
3207static inline bool do_gro(struct be_rx_compl_info *rxcp)
3208{
3209        return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3210}
3211
3212static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3213                         int budget)
3214{
3215        struct be_adapter *adapter = rxo->adapter;
3216        struct be_queue_info *rx_cq = &rxo->cq;
3217        struct be_rx_compl_info *rxcp;
3218        u32 work_done;
3219        u32 frags_consumed = 0;
3220
3221        for (work_done = 0; work_done < budget; work_done++) {
3222                rxcp = be_rx_compl_get(rxo);
3223                if (!rxcp)
3224                        break;
3225
3226                /* Is it a flush compl that has no data */
3227                if (unlikely(rxcp->num_rcvd == 0))
3228                        goto loop_continue;
3229
3230                /* Discard compl with partial DMA Lancer B0 */
3231                if (unlikely(!rxcp->pkt_size)) {
3232                        be_rx_compl_discard(rxo, rxcp);
3233                        goto loop_continue;
3234                }
3235
3236                /* On BE drop pkts that arrive due to imperfect filtering in
3237                 * promiscuous mode on some skews
3238                 */
3239                if (unlikely(rxcp->port != adapter->port_num &&
3240                             !lancer_chip(adapter))) {
3241                        be_rx_compl_discard(rxo, rxcp);
3242                        goto loop_continue;
3243                }
3244
3245                if (do_gro(rxcp))
3246                        be_rx_compl_process_gro(rxo, napi, rxcp);
3247                else
3248                        be_rx_compl_process(rxo, napi, rxcp);
3249
3250loop_continue:
3251                frags_consumed += rxcp->num_rcvd;
3252                be_rx_stats_update(rxo, rxcp);
3253        }
3254
3255        if (work_done) {
3256                be_cq_notify(adapter, rx_cq->id, true, work_done);
3257
3258                /* When an rx-obj gets into post_starved state, just
3259                 * let be_worker do the posting.
3260                 */
3261                if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3262                    !rxo->rx_post_starved)
3263                        be_post_rx_frags(rxo, GFP_ATOMIC,
3264                                         max_t(u32, MAX_RX_POST,
3265                                               frags_consumed));
3266        }
3267
3268        return work_done;
3269}
3270
3271
3272static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3273                          int idx)
3274{
3275        int num_wrbs = 0, work_done = 0;
3276        struct be_tx_compl_info *txcp;
3277
3278        while ((txcp = be_tx_compl_get(adapter, txo))) {
3279                num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3280                work_done++;
3281        }
3282
3283        if (work_done) {
3284                be_cq_notify(adapter, txo->cq.id, true, work_done);
3285                atomic_sub(num_wrbs, &txo->q.used);
3286
3287                /* As Tx wrbs have been freed up, wake up netdev queue
3288                 * if it was stopped due to lack of tx wrbs.  */
3289                if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3290                    be_can_txq_wake(txo)) {
3291                        netif_wake_subqueue(adapter->netdev, idx);
3292                }
3293
3294                u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3295                tx_stats(txo)->tx_compl += work_done;
3296                u64_stats_update_end(&tx_stats(txo)->sync_compl);
3297        }
3298}
3299
3300int be_poll(struct napi_struct *napi, int budget)
3301{
3302        struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3303        struct be_adapter *adapter = eqo->adapter;
3304        int max_work = 0, work, i, num_evts;
3305        struct be_rx_obj *rxo;
3306        struct be_tx_obj *txo;
3307        u32 mult_enc = 0;
3308
3309        num_evts = events_get(eqo);
3310
3311        for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3312                be_process_tx(adapter, txo, i);
3313
3314        /* This loop will iterate twice for EQ0 in which
3315         * completions of the last RXQ (default one) are also processed
3316         * For other EQs the loop iterates only once
3317         */
3318        for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3319                work = be_process_rx(rxo, napi, budget);
3320                max_work = max(work, max_work);
3321        }
3322
3323        if (is_mcc_eqo(eqo))
3324                be_process_mcc(adapter);
3325
3326        if (max_work < budget) {
3327                napi_complete_done(napi, max_work);
3328
3329                /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3330                 * delay via a delay multiplier encoding value
3331                 */
3332                if (skyhawk_chip(adapter))
3333                        mult_enc = be_get_eq_delay_mult_enc(eqo);
3334
3335                be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3336                             mult_enc);
3337        } else {
3338                /* As we'll continue in polling mode, count and clear events */
3339                be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3340        }
3341        return max_work;
3342}
3343
3344void be_detect_error(struct be_adapter *adapter)
3345{
3346        u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3347        u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3348        struct device *dev = &adapter->pdev->dev;
3349        u16 val;
3350        u32 i;
3351
3352        if (be_check_error(adapter, BE_ERROR_HW))
3353                return;
3354
3355        if (lancer_chip(adapter)) {
3356                sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3357                if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3358                        be_set_error(adapter, BE_ERROR_UE);
3359                        sliport_err1 = ioread32(adapter->db +
3360                                                SLIPORT_ERROR1_OFFSET);
3361                        sliport_err2 = ioread32(adapter->db +
3362                                                SLIPORT_ERROR2_OFFSET);
3363                        /* Do not log error messages if its a FW reset */
3364                        if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3365                            sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3366                                dev_info(dev, "Reset is in progress\n");
3367                        } else {
3368                                dev_err(dev, "Error detected in the card\n");
3369                                dev_err(dev, "ERR: sliport status 0x%x\n",
3370                                        sliport_status);
3371                                dev_err(dev, "ERR: sliport error1 0x%x\n",
3372                                        sliport_err1);
3373                                dev_err(dev, "ERR: sliport error2 0x%x\n",
3374                                        sliport_err2);
3375                        }
3376                }
3377        } else {
3378                ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3379                ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3380                ue_lo_mask = ioread32(adapter->pcicfg +
3381                                      PCICFG_UE_STATUS_LOW_MASK);
3382                ue_hi_mask = ioread32(adapter->pcicfg +
3383                                      PCICFG_UE_STATUS_HI_MASK);
3384
3385                ue_lo = (ue_lo & ~ue_lo_mask);
3386                ue_hi = (ue_hi & ~ue_hi_mask);
3387
3388                if (ue_lo || ue_hi) {
3389                        /* On certain platforms BE3 hardware can indicate
3390                         * spurious UEs. In case of a UE in the chip,
3391                         * the POST register correctly reports either a
3392                         * FAT_LOG_START state (FW is currently dumping
3393                         * FAT log data) or a ARMFW_UE state. Check for the
3394                         * above states to ascertain if the UE is valid or not.
3395                         */
3396                        if (BE3_chip(adapter)) {
3397                                val = be_POST_stage_get(adapter);
3398                                if ((val & POST_STAGE_FAT_LOG_START)
3399                                     != POST_STAGE_FAT_LOG_START &&
3400                                    (val & POST_STAGE_ARMFW_UE)
3401                                     != POST_STAGE_ARMFW_UE &&
3402                                    (val & POST_STAGE_RECOVERABLE_ERR)
3403                                     != POST_STAGE_RECOVERABLE_ERR)
3404                                        return;
3405                        }
3406
3407                        dev_err(dev, "Error detected in the adapter");
3408                        be_set_error(adapter, BE_ERROR_UE);
3409
3410                        for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3411                                if (ue_lo & 1)
3412                                        dev_err(dev, "UE: %s bit set\n",
3413                                                ue_status_low_desc[i]);
3414                        }
3415                        for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3416                                if (ue_hi & 1)
3417                                        dev_err(dev, "UE: %s bit set\n",
3418                                                ue_status_hi_desc[i]);
3419                        }
3420                }
3421        }
3422}
3423
3424static void be_msix_disable(struct be_adapter *adapter)
3425{
3426        if (msix_enabled(adapter)) {
3427                pci_disable_msix(adapter->pdev);
3428                adapter->num_msix_vec = 0;
3429                adapter->num_msix_roce_vec = 0;
3430        }
3431}
3432
3433static int be_msix_enable(struct be_adapter *adapter)
3434{
3435        unsigned int i, max_roce_eqs;
3436        struct device *dev = &adapter->pdev->dev;
3437        int num_vec;
3438
3439        /* If RoCE is supported, program the max number of vectors that
3440         * could be used for NIC and RoCE, else, just program the number
3441         * we'll use initially.
3442         */
3443        if (be_roce_supported(adapter)) {
3444                max_roce_eqs =
3445                        be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3446                max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3447                num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3448        } else {
3449                num_vec = max(adapter->cfg_num_rx_irqs,
3450                              adapter->cfg_num_tx_irqs);
3451        }
3452
3453        for (i = 0; i < num_vec; i++)
3454                adapter->msix_entries[i].entry = i;
3455
3456        num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3457                                        MIN_MSIX_VECTORS, num_vec);
3458        if (num_vec < 0)
3459                goto fail;
3460
3461        if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3462                adapter->num_msix_roce_vec = num_vec / 2;
3463                dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3464                         adapter->num_msix_roce_vec);
3465        }
3466
3467        adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3468
3469        dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3470                 adapter->num_msix_vec);
3471        return 0;
3472
3473fail:
3474        dev_warn(dev, "MSIx enable failed\n");
3475
3476        /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3477        if (be_virtfn(adapter))
3478                return num_vec;
3479        return 0;
3480}
3481
3482static inline int be_msix_vec_get(struct be_adapter *adapter,
3483                                  struct be_eq_obj *eqo)
3484{
3485        return adapter->msix_entries[eqo->msix_idx].vector;
3486}
3487
3488static int be_msix_register(struct be_adapter *adapter)
3489{
3490        struct net_device *netdev = adapter->netdev;
3491        struct be_eq_obj *eqo;
3492        int status, i, vec;
3493
3494        for_all_evt_queues(adapter, eqo, i) {
3495                sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3496                vec = be_msix_vec_get(adapter, eqo);
3497                status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3498                if (status)
3499                        goto err_msix;
3500
3501                irq_set_affinity_hint(vec, eqo->affinity_mask);
3502        }
3503
3504        return 0;
3505err_msix:
3506        for (i--; i >= 0; i--) {
3507                eqo = &adapter->eq_obj[i];
3508                free_irq(be_msix_vec_get(adapter, eqo), eqo);
3509        }
3510        dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3511                 status);
3512        be_msix_disable(adapter);
3513        return status;
3514}
3515
3516static int be_irq_register(struct be_adapter *adapter)
3517{
3518        struct net_device *netdev = adapter->netdev;
3519        int status;
3520
3521        if (msix_enabled(adapter)) {
3522                status = be_msix_register(adapter);
3523                if (status == 0)
3524                        goto done;
3525                /* INTx is not supported for VF */
3526                if (be_virtfn(adapter))
3527                        return status;
3528        }
3529
3530        /* INTx: only the first EQ is used */
3531        netdev->irq = adapter->pdev->irq;
3532        status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3533                             &adapter->eq_obj[0]);
3534        if (status) {
3535                dev_err(&adapter->pdev->dev,
3536                        "INTx request IRQ failed - err %d\n", status);
3537                return status;
3538        }
3539done:
3540        adapter->isr_registered = true;
3541        return 0;
3542}
3543
3544static void be_irq_unregister(struct be_adapter *adapter)
3545{
3546        struct net_device *netdev = adapter->netdev;
3547        struct be_eq_obj *eqo;
3548        int i, vec;
3549
3550        if (!adapter->isr_registered)
3551                return;
3552
3553        /* INTx */
3554        if (!msix_enabled(adapter)) {
3555                free_irq(netdev->irq, &adapter->eq_obj[0]);
3556                goto done;
3557        }
3558
3559        /* MSIx */
3560        for_all_evt_queues(adapter, eqo, i) {
3561                vec = be_msix_vec_get(adapter, eqo);
3562                irq_set_affinity_hint(vec, NULL);
3563                free_irq(vec, eqo);
3564        }
3565
3566done:
3567        adapter->isr_registered = false;
3568}
3569
3570static void be_rx_qs_destroy(struct be_adapter *adapter)
3571{
3572        struct rss_info *rss = &adapter->rss_info;
3573        struct be_queue_info *q;
3574        struct be_rx_obj *rxo;
3575        int i;
3576
3577        for_all_rx_queues(adapter, rxo, i) {
3578                q = &rxo->q;
3579                if (q->created) {
3580                        /* If RXQs are destroyed while in an "out of buffer"
3581                         * state, there is a possibility of an HW stall on
3582                         * Lancer. So, post 64 buffers to each queue to relieve
3583                         * the "out of buffer" condition.
3584                         * Make sure there's space in the RXQ before posting.
3585                         */
3586                        if (lancer_chip(adapter)) {
3587                                be_rx_cq_clean(rxo);
3588                                if (atomic_read(&q->used) == 0)
3589                                        be_post_rx_frags(rxo, GFP_KERNEL,
3590                                                         MAX_RX_POST);
3591                        }
3592
3593                        be_cmd_rxq_destroy(adapter, q);
3594                        be_rx_cq_clean(rxo);
3595                        be_rxq_clean(rxo);
3596                }
3597                be_queue_free(adapter, q);
3598        }
3599
3600        if (rss->rss_flags) {
3601                rss->rss_flags = RSS_ENABLE_NONE;
3602                be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3603                                  128, rss->rss_hkey);
3604        }
3605}
3606
3607static void be_disable_if_filters(struct be_adapter *adapter)
3608{
3609        /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3610        if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3611            check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3612                be_dev_mac_del(adapter, adapter->pmac_id[0]);
3613                eth_zero_addr(adapter->dev_mac);
3614        }
3615
3616        be_clear_uc_list(adapter);
3617        be_clear_mc_list(adapter);
3618
3619        /* The IFACE flags are enabled in the open path and cleared
3620         * in the close path. When a VF gets detached from the host and
3621         * assigned to a VM the following happens:
3622         *      - VF's IFACE flags get cleared in the detach path
3623         *      - IFACE create is issued by the VF in the attach path
3624         * Due to a bug in the BE3/Skyhawk-R FW
3625         * (Lancer FW doesn't have the bug), the IFACE capability flags
3626         * specified along with the IFACE create cmd issued by a VF are not
3627         * honoured by FW.  As a consequence, if a *new* driver
3628         * (that enables/disables IFACE flags in open/close)
3629         * is loaded in the host and an *old* driver is * used by a VM/VF,
3630         * the IFACE gets created *without* the needed flags.
3631         * To avoid this, disable RX-filter flags only for Lancer.
3632         */
3633        if (lancer_chip(adapter)) {
3634                be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3635                adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3636        }
3637}
3638
3639static int be_close(struct net_device *netdev)
3640{
3641        struct be_adapter *adapter = netdev_priv(netdev);
3642        struct be_eq_obj *eqo;
3643        int i;
3644
3645        /* This protection is needed as be_close() may be called even when the
3646         * adapter is in cleared state (after eeh perm failure)
3647         */
3648        if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3649                return 0;
3650
3651        /* Before attempting cleanup ensure all the pending cmds in the
3652         * config_wq have finished execution
3653         */
3654        flush_workqueue(be_wq);
3655
3656        be_disable_if_filters(adapter);
3657
3658        if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3659                for_all_evt_queues(adapter, eqo, i) {
3660                        napi_disable(&eqo->napi);
3661                }
3662                adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3663        }
3664
3665        be_async_mcc_disable(adapter);
3666
3667        /* Wait for all pending tx completions to arrive so that
3668         * all tx skbs are freed.
3669         */
3670        netif_tx_disable(netdev);
3671        be_tx_compl_clean(adapter);
3672
3673        be_rx_qs_destroy(adapter);
3674
3675        for_all_evt_queues(adapter, eqo, i) {
3676                if (msix_enabled(adapter))
3677                        synchronize_irq(be_msix_vec_get(adapter, eqo));
3678                else
3679                        synchronize_irq(netdev->irq);
3680                be_eq_clean(eqo);
3681        }
3682
3683        be_irq_unregister(adapter);
3684
3685        return 0;
3686}
3687
3688static int be_rx_qs_create(struct be_adapter *adapter)
3689{
3690        struct rss_info *rss = &adapter->rss_info;
3691        u8 rss_key[RSS_HASH_KEY_LEN];
3692        struct be_rx_obj *rxo;
3693        int rc, i, j;
3694
3695        for_all_rx_queues(adapter, rxo, i) {
3696                rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3697                                    sizeof(struct be_eth_rx_d));
3698                if (rc)
3699                        return rc;
3700        }
3701
3702        if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3703                rxo = default_rxo(adapter);
3704                rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3705                                       rx_frag_size, adapter->if_handle,
3706                                       false, &rxo->rss_id);
3707                if (rc)
3708                        return rc;
3709        }
3710
3711        for_all_rss_queues(adapter, rxo, i) {
3712                rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3713                                       rx_frag_size, adapter->if_handle,
3714                                       true, &rxo->rss_id);
3715                if (rc)
3716                        return rc;
3717        }
3718
3719        if (be_multi_rxq(adapter)) {
3720                for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3721                        for_all_rss_queues(adapter, rxo, i) {
3722                                if ((j + i) >= RSS_INDIR_TABLE_LEN)
3723                                        break;
3724                                rss->rsstable[j + i] = rxo->rss_id;
3725                                rss->rss_queue[j + i] = i;
3726                        }
3727                }
3728                rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3729                        RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3730
3731                if (!BEx_chip(adapter))
3732                        rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3733                                RSS_ENABLE_UDP_IPV6;
3734
3735                netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3736                rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3737                                       RSS_INDIR_TABLE_LEN, rss_key);
3738                if (rc) {
3739                        rss->rss_flags = RSS_ENABLE_NONE;
3740                        return rc;
3741                }
3742
3743                memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3744        } else {
3745                /* Disable RSS, if only default RX Q is created */
3746                rss->rss_flags = RSS_ENABLE_NONE;
3747        }
3748
3749
3750        /* Post 1 less than RXQ-len to avoid head being equal to tail,
3751         * which is a queue empty condition
3752         */
3753        for_all_rx_queues(adapter, rxo, i)
3754                be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3755
3756        return 0;
3757}
3758
3759static int be_enable_if_filters(struct be_adapter *adapter)
3760{
3761        int status;
3762
3763        status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3764        if (status)
3765                return status;
3766
3767        /* Normally this condition usually true as the ->dev_mac is zeroed.
3768         * But on BE3 VFs the initial MAC is pre-programmed by PF and
3769         * subsequent be_dev_mac_add() can fail (after fresh boot)
3770         */
3771        if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3772                int old_pmac_id = -1;
3773
3774                /* Remember old programmed MAC if any - can happen on BE3 VF */
3775                if (!is_zero_ether_addr(adapter->dev_mac))
3776                        old_pmac_id = adapter->pmac_id[0];
3777
3778                status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3779                if (status)
3780                        return status;
3781
3782                /* Delete the old programmed MAC as we successfully programmed
3783                 * a new MAC
3784                 */
3785                if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3786                        be_dev_mac_del(adapter, old_pmac_id);
3787
3788                ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3789        }
3790
3791        if (adapter->vlans_added)
3792                be_vid_config(adapter);
3793
3794        __be_set_rx_mode(adapter);
3795
3796        return 0;
3797}
3798
3799static int be_open(struct net_device *netdev)
3800{
3801        struct be_adapter *adapter = netdev_priv(netdev);
3802        struct be_eq_obj *eqo;
3803        struct be_rx_obj *rxo;
3804        struct be_tx_obj *txo;
3805        u8 link_status;
3806        int status, i;
3807
3808        status = be_rx_qs_create(adapter);
3809        if (status)
3810                goto err;
3811
3812        status = be_enable_if_filters(adapter);
3813        if (status)
3814                goto err;
3815
3816        status = be_irq_register(adapter);
3817        if (status)
3818                goto err;
3819
3820        for_all_rx_queues(adapter, rxo, i)
3821                be_cq_notify(adapter, rxo->cq.id, true, 0);
3822
3823        for_all_tx_queues(adapter, txo, i)
3824                be_cq_notify(adapter, txo->cq.id, true, 0);
3825
3826        be_async_mcc_enable(adapter);
3827
3828        for_all_evt_queues(adapter, eqo, i) {
3829                napi_enable(&eqo->napi);
3830                be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3831        }
3832        adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3833
3834        status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3835        if (!status)
3836                be_link_status_update(adapter, link_status);
3837
3838        netif_tx_start_all_queues(netdev);
3839        if (skyhawk_chip(adapter))
3840                udp_tunnel_get_rx_info(netdev);
3841
3842        return 0;
3843err:
3844        be_close(adapter->netdev);
3845        return -EIO;
3846}
3847
3848static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3849{
3850        u32 addr;
3851
3852        addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3853
3854        mac[5] = (u8)(addr & 0xFF);
3855        mac[4] = (u8)((addr >> 8) & 0xFF);
3856        mac[3] = (u8)((addr >> 16) & 0xFF);
3857        /* Use the OUI from the current MAC address */
3858        memcpy(mac, adapter->netdev->dev_addr, 3);
3859}
3860
3861/*
3862 * Generate a seed MAC address from the PF MAC Address using jhash.
3863 * MAC Address for VFs are assigned incrementally starting from the seed.
3864 * These addresses are programmed in the ASIC by the PF and the VF driver
3865 * queries for the MAC address during its probe.
3866 */
3867static int be_vf_eth_addr_config(struct be_adapter *adapter)
3868{
3869        u32 vf;
3870        int status = 0;
3871        u8 mac[ETH_ALEN];
3872        struct be_vf_cfg *vf_cfg;
3873
3874        be_vf_eth_addr_generate(adapter, mac);
3875
3876        for_all_vfs(adapter, vf_cfg, vf) {
3877                if (BEx_chip(adapter))
3878                        status = be_cmd_pmac_add(adapter, mac,
3879                                                 vf_cfg->if_handle,
3880                                                 &vf_cfg->pmac_id, vf + 1);
3881                else
3882                        status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3883                                                vf + 1);
3884
3885                if (status)
3886                        dev_err(&adapter->pdev->dev,
3887                                "Mac address assignment failed for VF %d\n",
3888                                vf);
3889                else
3890                        memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3891
3892                mac[5] += 1;
3893        }
3894        return status;
3895}
3896
3897static int be_vfs_mac_query(struct be_adapter *adapter)
3898{
3899        int status, vf;
3900        u8 mac[ETH_ALEN];
3901        struct be_vf_cfg *vf_cfg;
3902
3903        for_all_vfs(adapter, vf_cfg, vf) {
3904                status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3905                                               mac, vf_cfg->if_handle,
3906                                               false, vf+1);
3907                if (status)
3908                        return status;
3909                memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3910        }
3911        return 0;
3912}
3913
3914static void be_vf_clear(struct be_adapter *adapter)
3915{
3916        struct be_vf_cfg *vf_cfg;
3917        u32 vf;
3918
3919        if (pci_vfs_assigned(adapter->pdev)) {
3920                dev_warn(&adapter->pdev->dev,
3921                         "VFs are assigned to VMs: not disabling VFs\n");
3922                goto done;
3923        }
3924
3925        pci_disable_sriov(adapter->pdev);
3926
3927        for_all_vfs(adapter, vf_cfg, vf) {
3928                if (BEx_chip(adapter))
3929                        be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3930                                        vf_cfg->pmac_id, vf + 1);
3931                else
3932                        be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3933                                       vf + 1);
3934
3935                be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3936        }
3937
3938        if (BE3_chip(adapter))
3939                be_cmd_set_hsw_config(adapter, 0, 0,
3940                                      adapter->if_handle,
3941                                      PORT_FWD_TYPE_PASSTHRU, 0);
3942done:
3943        kfree(adapter->vf_cfg);
3944        adapter->num_vfs = 0;
3945        adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3946}
3947
3948static void be_clear_queues(struct be_adapter *adapter)
3949{
3950        be_mcc_queues_destroy(adapter);
3951        be_rx_cqs_destroy(adapter);
3952        be_tx_queues_destroy(adapter);
3953        be_evt_queues_destroy(adapter);
3954}
3955
3956static void be_cancel_worker(struct be_adapter *adapter)
3957{
3958        if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3959                cancel_delayed_work_sync(&adapter->work);
3960                adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3961        }
3962}
3963
3964static void be_cancel_err_detection(struct be_adapter *adapter)
3965{
3966        struct be_error_recovery *err_rec = &adapter->error_recovery;
3967
3968        if (!be_err_recovery_workq)
3969                return;
3970
3971        if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3972                cancel_delayed_work_sync(&err_rec->err_detection_work);
3973                adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3974        }
3975}
3976
3977static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3978{
3979        struct net_device *netdev = adapter->netdev;
3980        struct device *dev = &adapter->pdev->dev;
3981        struct be_vxlan_port *vxlan_port;
3982        __be16 port;
3983        int status;
3984
3985        vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3986                                      struct be_vxlan_port, list);
3987        port = vxlan_port->port;
3988
3989        status = be_cmd_manage_iface(adapter, adapter->if_handle,
3990                                     OP_CONVERT_NORMAL_TO_TUNNEL);
3991        if (status) {
3992                dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3993                return status;
3994        }
3995        adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3996
3997        status = be_cmd_set_vxlan_port(adapter, port);
3998        if (status) {
3999                dev_warn(dev, "Failed to add VxLAN port\n");
4000                return status;
4001        }
4002        adapter->vxlan_port = port;
4003
4004        netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4005                                   NETIF_F_TSO | NETIF_F_TSO6 |
4006                                   NETIF_F_GSO_UDP_TUNNEL;
4007
4008        dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4009                 be16_to_cpu(port));
4010        return 0;
4011}
4012
4013static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4014{
4015        struct net_device *netdev = adapter->netdev;
4016
4017        if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4018                be_cmd_manage_iface(adapter, adapter->if_handle,
4019                                    OP_CONVERT_TUNNEL_TO_NORMAL);
4020
4021        if (adapter->vxlan_port)
4022                be_cmd_set_vxlan_port(adapter, 0);
4023
4024        adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4025        adapter->vxlan_port = 0;
4026
4027        netdev->hw_enc_features = 0;
4028}
4029
4030static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4031                                struct be_resources *vft_res)
4032{
4033        struct be_resources res = adapter->pool_res;
4034        u32 vf_if_cap_flags = res.vf_if_cap_flags;
4035        struct be_resources res_mod = {0};
4036        u16 num_vf_qs = 1;
4037
4038        /* Distribute the queue resources among the PF and it's VFs */
4039        if (num_vfs) {
4040                /* Divide the rx queues evenly among the VFs and the PF, capped
4041                 * at VF-EQ-count. Any remainder queues belong to the PF.
4042                 */
4043                num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4044                                res.max_rss_qs / (num_vfs + 1));
4045
4046                /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4047                 * RSS Tables per port. Provide RSS on VFs, only if number of
4048                 * VFs requested is less than it's PF Pool's RSS Tables limit.
4049                 */
4050                if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4051                        num_vf_qs = 1;
4052        }
4053
4054        /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4055         * which are modifiable using SET_PROFILE_CONFIG cmd.
4056         */
4057        be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4058                                  RESOURCE_MODIFIABLE, 0);
4059
4060        /* If RSS IFACE capability flags are modifiable for a VF, set the
4061         * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4062         * more than 1 RSSQ is available for a VF.
4063         * Otherwise, provision only 1 queue pair for VF.
4064         */
4065        if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4066                vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4067                if (num_vf_qs > 1) {
4068                        vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4069                        if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4070                                vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4071                } else {
4072                        vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4073                                             BE_IF_FLAGS_DEFQ_RSS);
4074                }
4075        } else {
4076                num_vf_qs = 1;
4077        }
4078
4079        if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4080                vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4081                vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4082        }
4083
4084        vft_res->vf_if_cap_flags = vf_if_cap_flags;
4085        vft_res->max_rx_qs = num_vf_qs;
4086        vft_res->max_rss_qs = num_vf_qs;
4087        vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4088        vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4089
4090        /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4091         * among the PF and it's VFs, if the fields are changeable
4092         */
4093        if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4094                vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4095
4096        if (res_mod.max_vlans == FIELD_MODIFIABLE)
4097                vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4098
4099        if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4100                vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4101
4102        if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4103                vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4104}
4105
4106static void be_if_destroy(struct be_adapter *adapter)
4107{
4108        be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4109
4110        kfree(adapter->pmac_id);
4111        adapter->pmac_id = NULL;
4112
4113        kfree(adapter->mc_list);
4114        adapter->mc_list = NULL;
4115
4116        kfree(adapter->uc_list);
4117        adapter->uc_list = NULL;
4118}
4119
4120static int be_clear(struct be_adapter *adapter)
4121{
4122        struct pci_dev *pdev = adapter->pdev;
4123        struct  be_resources vft_res = {0};
4124
4125        be_cancel_worker(adapter);
4126
4127        flush_workqueue(be_wq);
4128
4129        if (sriov_enabled(adapter))
4130                be_vf_clear(adapter);
4131
4132        /* Re-configure FW to distribute resources evenly across max-supported
4133         * number of VFs, only when VFs are not already enabled.
4134         */
4135        if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4136            !pci_vfs_assigned(pdev)) {
4137                be_calculate_vf_res(adapter,
4138                                    pci_sriov_get_totalvfs(pdev),
4139                                    &vft_res);
4140                be_cmd_set_sriov_config(adapter, adapter->pool_res,
4141                                        pci_sriov_get_totalvfs(pdev),
4142                                        &vft_res);
4143        }
4144
4145        be_disable_vxlan_offloads(adapter);
4146
4147        be_if_destroy(adapter);
4148
4149        be_clear_queues(adapter);
4150
4151        be_msix_disable(adapter);
4152        adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4153        return 0;
4154}
4155
4156static int be_vfs_if_create(struct be_adapter *adapter)
4157{
4158        struct be_resources res = {0};
4159        u32 cap_flags, en_flags, vf;
4160        struct be_vf_cfg *vf_cfg;
4161        int status;
4162
4163        /* If a FW profile exists, then cap_flags are updated */
4164        cap_flags = BE_VF_IF_EN_FLAGS;
4165
4166        for_all_vfs(adapter, vf_cfg, vf) {
4167                if (!BE3_chip(adapter)) {
4168                        status = be_cmd_get_profile_config(adapter, &res, NULL,
4169                                                           ACTIVE_PROFILE_TYPE,
4170                                                           RESOURCE_LIMITS,
4171                                                           vf + 1);
4172                        if (!status) {
4173                                cap_flags = res.if_cap_flags;
4174                                /* Prevent VFs from enabling VLAN promiscuous
4175                                 * mode
4176                                 */
4177                                cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4178                        }
4179                }
4180
4181                /* PF should enable IF flags during proxy if_create call */
4182                en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4183                status = be_cmd_if_create(adapter, cap_flags, en_flags,
4184                                          &vf_cfg->if_handle, vf + 1);
4185                if (status)
4186                        return status;
4187        }
4188
4189        return 0;
4190}
4191
4192static int be_vf_setup_init(struct be_adapter *adapter)
4193{
4194        struct be_vf_cfg *vf_cfg;
4195        int vf;
4196
4197        adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4198                                  GFP_KERNEL);
4199        if (!adapter->vf_cfg)
4200                return -ENOMEM;
4201
4202        for_all_vfs(adapter, vf_cfg, vf) {
4203                vf_cfg->if_handle = -1;
4204                vf_cfg->pmac_id = -1;
4205        }
4206        return 0;
4207}
4208
4209static int be_vf_setup(struct be_adapter *adapter)
4210{
4211        struct device *dev = &adapter->pdev->dev;
4212        struct be_vf_cfg *vf_cfg;
4213        int status, old_vfs, vf;
4214        bool spoofchk;
4215
4216        old_vfs = pci_num_vf(adapter->pdev);
4217
4218        status = be_vf_setup_init(adapter);
4219        if (status)
4220                goto err;
4221
4222        if (old_vfs) {
4223                for_all_vfs(adapter, vf_cfg, vf) {
4224                        status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4225                        if (status)
4226                                goto err;
4227                }
4228
4229                status = be_vfs_mac_query(adapter);
4230                if (status)
4231                        goto err;
4232        } else {
4233                status = be_vfs_if_create(adapter);
4234                if (status)
4235                        goto err;
4236
4237                status = be_vf_eth_addr_config(adapter);
4238                if (status)
4239                        goto err;
4240        }
4241
4242        for_all_vfs(adapter, vf_cfg, vf) {
4243                /* Allow VFs to programs MAC/VLAN filters */
4244                status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4245                                                  vf + 1);
4246                if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4247                        status = be_cmd_set_fn_privileges(adapter,
4248                                                          vf_cfg->privileges |
4249                                                          BE_PRIV_FILTMGMT,
4250                                                          vf + 1);
4251                        if (!status) {
4252                                vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4253                                dev_info(dev, "VF%d has FILTMGMT privilege\n",
4254                                         vf);
4255                        }
4256                }
4257
4258                /* Allow full available bandwidth */
4259                if (!old_vfs)
4260                        be_cmd_config_qos(adapter, 0, 0, vf + 1);
4261
4262                status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4263                                               vf_cfg->if_handle, NULL,
4264                                               &spoofchk);
4265                if (!status)
4266                        vf_cfg->spoofchk = spoofchk;
4267
4268                if (!old_vfs) {
4269                        be_cmd_enable_vf(adapter, vf + 1);
4270                        be_cmd_set_logical_link_config(adapter,
4271                                                       IFLA_VF_LINK_STATE_AUTO,
4272                                                       vf+1);
4273                }
4274        }
4275
4276        if (!old_vfs) {
4277                status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4278                if (status) {
4279                        dev_err(dev, "SRIOV enable failed\n");
4280                        adapter->num_vfs = 0;
4281                        goto err;
4282                }
4283        }
4284
4285        if (BE3_chip(adapter)) {
4286                /* On BE3, enable VEB only when SRIOV is enabled */
4287                status = be_cmd_set_hsw_config(adapter, 0, 0,
4288                                               adapter->if_handle,
4289                                               PORT_FWD_TYPE_VEB, 0);
4290                if (status)
4291                        goto err;
4292        }
4293
4294        adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4295        return 0;
4296err:
4297        dev_err(dev, "VF setup failed\n");
4298        be_vf_clear(adapter);
4299        return status;
4300}
4301
4302/* Converting function_mode bits on BE3 to SH mc_type enums */
4303
4304static u8 be_convert_mc_type(u32 function_mode)
4305{
4306        if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4307                return vNIC1;
4308        else if (function_mode & QNQ_MODE)
4309                return FLEX10;
4310        else if (function_mode & VNIC_MODE)
4311                return vNIC2;
4312        else if (function_mode & UMC_ENABLED)
4313                return UMC;
4314        else
4315                return MC_NONE;
4316}
4317
4318/* On BE2/BE3 FW does not suggest the supported limits */
4319static void BEx_get_resources(struct be_adapter *adapter,
4320                              struct be_resources *res)
4321{
4322        bool use_sriov = adapter->num_vfs ? 1 : 0;
4323
4324        if (be_physfn(adapter))
4325                res->max_uc_mac = BE_UC_PMAC_COUNT;
4326        else
4327                res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4328
4329        adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4330
4331        if (be_is_mc(adapter)) {
4332                /* Assuming that there are 4 channels per port,
4333                 * when multi-channel is enabled
4334                 */
4335                if (be_is_qnq_mode(adapter))
4336                        res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4337                else
4338                        /* In a non-qnq multichannel mode, the pvid
4339                         * takes up one vlan entry
4340                         */
4341                        res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4342        } else {
4343                res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4344        }
4345
4346        res->max_mcast_mac = BE_MAX_MC;
4347
4348        /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4349         * 2) Create multiple TX rings on a BE3-R multi-channel interface
4350         *    *only* if it is RSS-capable.
4351         */
4352        if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4353            be_virtfn(adapter) ||
4354            (be_is_mc(adapter) &&
4355             !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4356                res->max_tx_qs = 1;
4357        } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4358                struct be_resources super_nic_res = {0};
4359
4360                /* On a SuperNIC profile, the driver needs to use the
4361                 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4362                 */
4363                be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4364                                          ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4365                                          0);
4366                /* Some old versions of BE3 FW don't report max_tx_qs value */
4367                res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4368        } else {
4369                res->max_tx_qs = BE3_MAX_TX_QS;
4370        }
4371
4372        if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4373            !use_sriov && be_physfn(adapter))
4374                res->max_rss_qs = (adapter->be3_native) ?
4375                                           BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4376        res->max_rx_qs = res->max_rss_qs + 1;
4377
4378        if (be_physfn(adapter))
4379                res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4380                                        BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4381        else
4382                res->max_evt_qs = 1;
4383
4384        res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4385        res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4386        if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4387                res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4388}
4389
4390static void be_setup_init(struct be_adapter *adapter)
4391{
4392        adapter->vlan_prio_bmap = 0xff;
4393        adapter->phy.link_speed = -1;
4394        adapter->if_handle = -1;
4395        adapter->be3_native = false;
4396        adapter->if_flags = 0;
4397        adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4398        if (be_physfn(adapter))
4399                adapter->cmd_privileges = MAX_PRIVILEGES;
4400        else
4401                adapter->cmd_privileges = MIN_PRIVILEGES;
4402}
4403
4404/* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4405 * However, this HW limitation is not exposed to the host via any SLI cmd.
4406 * As a result, in the case of SRIOV and in particular multi-partition configs
4407 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4408 * for distribution between the VFs. This self-imposed limit will determine the
4409 * no: of VFs for which RSS can be enabled.
4410 */
4411static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4412{
4413        struct be_port_resources port_res = {0};
4414        u8 rss_tables_on_port;
4415        u16 max_vfs = be_max_vfs(adapter);
4416
4417        be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4418                                  RESOURCE_LIMITS, 0);
4419
4420        rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4421
4422        /* Each PF Pool's RSS Tables limit =
4423         * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4424         */
4425        adapter->pool_res.max_rss_tables =
4426                max_vfs * rss_tables_on_port / port_res.max_vfs;
4427}
4428
4429static int be_get_sriov_config(struct be_adapter *adapter)
4430{
4431        struct be_resources res = {0};
4432        int max_vfs, old_vfs;
4433
4434        be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4435                                  RESOURCE_LIMITS, 0);
4436
4437        /* Some old versions of BE3 FW don't report max_vfs value */
4438        if (BE3_chip(adapter) && !res.max_vfs) {
4439                max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4440                res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4441        }
4442
4443        adapter->pool_res = res;
4444
4445        /* If during previous unload of the driver, the VFs were not disabled,
4446         * then we cannot rely on the PF POOL limits for the TotalVFs value.
4447         * Instead use the TotalVFs value stored in the pci-dev struct.
4448         */
4449        old_vfs = pci_num_vf(adapter->pdev);
4450        if (old_vfs) {
4451                dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4452                         old_vfs);
4453
4454                adapter->pool_res.max_vfs =
4455                        pci_sriov_get_totalvfs(adapter->pdev);
4456                adapter->num_vfs = old_vfs;
4457        }
4458
4459        if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4460                be_calculate_pf_pool_rss_tables(adapter);
4461                dev_info(&adapter->pdev->dev,
4462                         "RSS can be enabled for all VFs if num_vfs <= %d\n",
4463                         be_max_pf_pool_rss_tables(adapter));
4464        }
4465        return 0;
4466}
4467
4468static void be_alloc_sriov_res(struct be_adapter *adapter)
4469{
4470        int old_vfs = pci_num_vf(adapter->pdev);
4471        struct  be_resources vft_res = {0};
4472        int status;
4473
4474        be_get_sriov_config(adapter);
4475
4476        if (!old_vfs)
4477                pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4478
4479        /* When the HW is in SRIOV capable configuration, the PF-pool
4480         * resources are given to PF during driver load, if there are no
4481         * old VFs. This facility is not available in BE3 FW.
4482         * Also, this is done by FW in Lancer chip.
4483         */
4484        if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4485                be_calculate_vf_res(adapter, 0, &vft_res);
4486                status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4487                                                 &vft_res);
4488                if (status)
4489                        dev_err(&adapter->pdev->dev,
4490                                "Failed to optimize SRIOV resources\n");
4491        }
4492}
4493
4494static int be_get_resources(struct be_adapter *adapter)
4495{
4496        struct device *dev = &adapter->pdev->dev;
4497        struct be_resources res = {0};
4498        int status;
4499
4500        /* For Lancer, SH etc read per-function resource limits from FW.
4501         * GET_FUNC_CONFIG returns per function guaranteed limits.
4502         * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4503         */
4504        if (BEx_chip(adapter)) {
4505                BEx_get_resources(adapter, &res);
4506        } else {
4507                status = be_cmd_get_func_config(adapter, &res);
4508                if (status)
4509                        return status;
4510
4511                /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4512                if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4513                    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4514                        res.max_rss_qs -= 1;
4515        }
4516
4517        /* If RoCE is supported stash away half the EQs for RoCE */
4518        res.max_nic_evt_qs = be_roce_supported(adapter) ?
4519                                res.max_evt_qs / 2 : res.max_evt_qs;
4520        adapter->res = res;
4521
4522        /* If FW supports RSS default queue, then skip creating non-RSS
4523         * queue for non-IP traffic.
4524         */
4525        adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4526                                 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4527
4528        dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4529                 be_max_txqs(adapter), be_max_rxqs(adapter),
4530                 be_max_rss(adapter), be_max_nic_eqs(adapter),
4531                 be_max_vfs(adapter));
4532        dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4533                 be_max_uc(adapter), be_max_mc(adapter),
4534                 be_max_vlans(adapter));
4535
4536        /* Ensure RX and TX queues are created in pairs at init time */
4537        adapter->cfg_num_rx_irqs =
4538                                min_t(u16, netif_get_num_default_rss_queues(),
4539                                      be_max_qp_irqs(adapter));
4540        adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4541        return 0;
4542}
4543
4544static int be_get_config(struct be_adapter *adapter)
4545{
4546        int status, level;
4547        u16 profile_id;
4548
4549        status = be_cmd_get_cntl_attributes(adapter);
4550        if (status)
4551                return status;
4552
4553        status = be_cmd_query_fw_cfg(adapter);
4554        if (status)
4555                return status;
4556
4557        if (!lancer_chip(adapter) && be_physfn(adapter))
4558                be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4559
4560        if (BEx_chip(adapter)) {
4561                level = be_cmd_get_fw_log_level(adapter);
4562                adapter->msg_enable =
4563                        level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4564        }
4565
4566        be_cmd_get_acpi_wol_cap(adapter);
4567        pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4568        pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4569
4570        be_cmd_query_port_name(adapter);
4571
4572        if (be_physfn(adapter)) {
4573                status = be_cmd_get_active_profile(adapter, &profile_id);
4574                if (!status)
4575                        dev_info(&adapter->pdev->dev,
4576                                 "Using profile 0x%x\n", profile_id);
4577        }
4578
4579        return 0;
4580}
4581
4582static int be_mac_setup(struct be_adapter *adapter)
4583{
4584        u8 mac[ETH_ALEN];
4585        int status;
4586
4587        if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4588                status = be_cmd_get_perm_mac(adapter, mac);
4589                if (status)
4590                        return status;
4591
4592                memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4593                memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4594
4595                /* Initial MAC for BE3 VFs is already programmed by PF */
4596                if (BEx_chip(adapter) && be_virtfn(adapter))
4597                        memcpy(adapter->dev_mac, mac, ETH_ALEN);
4598        }
4599
4600        return 0;
4601}
4602
4603static void be_schedule_worker(struct be_adapter *adapter)
4604{
4605        queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4606        adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4607}
4608
4609static void be_destroy_err_recovery_workq(void)
4610{
4611        if (!be_err_recovery_workq)
4612                return;
4613
4614        flush_workqueue(be_err_recovery_workq);
4615        destroy_workqueue(be_err_recovery_workq);
4616        be_err_recovery_workq = NULL;
4617}
4618
4619static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4620{
4621        struct be_error_recovery *err_rec = &adapter->error_recovery;
4622
4623        if (!be_err_recovery_workq)
4624                return;
4625
4626        queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4627                           msecs_to_jiffies(delay));
4628        adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4629}
4630
4631static int be_setup_queues(struct be_adapter *adapter)
4632{
4633        struct net_device *netdev = adapter->netdev;
4634        int status;
4635
4636        status = be_evt_queues_create(adapter);
4637        if (status)
4638                goto err;
4639
4640        status = be_tx_qs_create(adapter);
4641        if (status)
4642                goto err;
4643
4644        status = be_rx_cqs_create(adapter);
4645        if (status)
4646                goto err;
4647
4648        status = be_mcc_queues_create(adapter);
4649        if (status)
4650                goto err;
4651
4652        status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4653        if (status)
4654                goto err;
4655
4656        status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4657        if (status)
4658                goto err;
4659
4660        return 0;
4661err:
4662        dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4663        return status;
4664}
4665
4666static int be_if_create(struct be_adapter *adapter)
4667{
4668        u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4669        u32 cap_flags = be_if_cap_flags(adapter);
4670        int status;
4671
4672        /* alloc required memory for other filtering fields */
4673        adapter->pmac_id = kcalloc(be_max_uc(adapter),
4674                                   sizeof(*adapter->pmac_id), GFP_KERNEL);
4675        if (!adapter->pmac_id)
4676                return -ENOMEM;
4677
4678        adapter->mc_list = kcalloc(be_max_mc(adapter),
4679                                   sizeof(*adapter->mc_list), GFP_KERNEL);
4680        if (!adapter->mc_list)
4681                return -ENOMEM;
4682
4683        adapter->uc_list = kcalloc(be_max_uc(adapter),
4684                                   sizeof(*adapter->uc_list), GFP_KERNEL);
4685        if (!adapter->uc_list)
4686                return -ENOMEM;
4687
4688        if (adapter->cfg_num_rx_irqs == 1)
4689                cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4690
4691        en_flags &= cap_flags;
4692        /* will enable all the needed filter flags in be_open() */
4693        status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4694                                  &adapter->if_handle, 0);
4695
4696        if (status)
4697                return status;
4698
4699        return 0;
4700}
4701
4702int be_update_queues(struct be_adapter *adapter)
4703{
4704        struct net_device *netdev = adapter->netdev;
4705        int status;
4706
4707        if (netif_running(netdev))
4708                be_close(netdev);
4709
4710        be_cancel_worker(adapter);
4711
4712        /* If any vectors have been shared with RoCE we cannot re-program
4713         * the MSIx table.
4714         */
4715        if (!adapter->num_msix_roce_vec)
4716                be_msix_disable(adapter);
4717
4718        be_clear_queues(adapter);
4719        status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4720        if (status)
4721                return status;
4722
4723        if (!msix_enabled(adapter)) {
4724                status = be_msix_enable(adapter);
4725                if (status)
4726                        return status;
4727        }
4728
4729        status = be_if_create(adapter);
4730        if (status)
4731                return status;
4732
4733        status = be_setup_queues(adapter);
4734        if (status)
4735                return status;
4736
4737        be_schedule_worker(adapter);
4738
4739        /* The IF was destroyed and re-created. We need to clear
4740         * all promiscuous flags valid for the destroyed IF.
4741         * Without this promisc mode is not restored during
4742         * be_open() because the driver thinks that it is
4743         * already enabled in HW.
4744         */
4745        adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4746
4747        if (netif_running(netdev))
4748                status = be_open(netdev);
4749
4750        return status;
4751}
4752
4753static inline int fw_major_num(const char *fw_ver)
4754{
4755        int fw_major = 0, i;
4756
4757        i = sscanf(fw_ver, "%d.", &fw_major);
4758        if (i != 1)
4759                return 0;
4760
4761        return fw_major;
4762}
4763
4764/* If it is error recovery, FLR the PF
4765 * Else if any VFs are already enabled don't FLR the PF
4766 */
4767static bool be_reset_required(struct be_adapter *adapter)
4768{
4769        if (be_error_recovering(adapter))
4770                return true;
4771        else
4772                return pci_num_vf(adapter->pdev) == 0;
4773}
4774
4775/* Wait for the FW to be ready and perform the required initialization */
4776static int be_func_init(struct be_adapter *adapter)
4777{
4778        int status;
4779
4780        status = be_fw_wait_ready(adapter);
4781        if (status)
4782                return status;
4783
4784        /* FW is now ready; clear errors to allow cmds/doorbell */
4785        be_clear_error(adapter, BE_CLEAR_ALL);
4786
4787        if (be_reset_required(adapter)) {
4788                status = be_cmd_reset_function(adapter);
4789                if (status)
4790                        return status;
4791
4792                /* Wait for interrupts to quiesce after an FLR */
4793                msleep(100);
4794        }
4795
4796        /* Tell FW we're ready to fire cmds */
4797        status = be_cmd_fw_init(adapter);
4798        if (status)
4799                return status;
4800
4801        /* Allow interrupts for other ULPs running on NIC function */
4802        be_intr_set(adapter, true);
4803
4804        return 0;
4805}
4806
4807static int be_setup(struct be_adapter *adapter)
4808{
4809        struct device *dev = &adapter->pdev->dev;
4810        int status;
4811
4812        status = be_func_init(adapter);
4813        if (status)
4814                return status;
4815
4816        be_setup_init(adapter);
4817
4818        if (!lancer_chip(adapter))
4819                be_cmd_req_native_mode(adapter);
4820
4821        /* invoke this cmd first to get pf_num and vf_num which are needed
4822         * for issuing profile related cmds
4823         */
4824        if (!BEx_chip(adapter)) {
4825                status = be_cmd_get_func_config(adapter, NULL);
4826                if (status)
4827                        return status;
4828        }
4829
4830        status = be_get_config(adapter);
4831        if (status)
4832                goto err;
4833
4834        if (!BE2_chip(adapter) && be_physfn(adapter))
4835                be_alloc_sriov_res(adapter);
4836
4837        status = be_get_resources(adapter);
4838        if (status)
4839                goto err;
4840
4841        status = be_msix_enable(adapter);
4842        if (status)
4843                goto err;
4844
4845        /* will enable all the needed filter flags in be_open() */
4846        status = be_if_create(adapter);
4847        if (status)
4848                goto err;
4849
4850        /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4851        rtnl_lock();
4852        status = be_setup_queues(adapter);
4853        rtnl_unlock();
4854        if (status)
4855                goto err;
4856
4857        be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4858
4859        status = be_mac_setup(adapter);
4860        if (status)
4861                goto err;
4862
4863        be_cmd_get_fw_ver(adapter);
4864        dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4865
4866        if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4867                dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4868                        adapter->fw_ver);
4869                dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4870        }
4871
4872        status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4873                                         adapter->rx_fc);
4874        if (status)
4875                be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4876                                        &adapter->rx_fc);
4877
4878        dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4879                 adapter->tx_fc, adapter->rx_fc);
4880
4881        if (be_physfn(adapter))
4882                be_cmd_set_logical_link_config(adapter,
4883                                               IFLA_VF_LINK_STATE_AUTO, 0);
4884
4885        /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4886         * confusing a linux bridge or OVS that it might be connected to.
4887         * Set the EVB to PASSTHRU mode which effectively disables the EVB
4888         * when SRIOV is not enabled.
4889         */
4890        if (BE3_chip(adapter))
4891                be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4892                                      PORT_FWD_TYPE_PASSTHRU, 0);
4893
4894        if (adapter->num_vfs)
4895                be_vf_setup(adapter);
4896
4897        status = be_cmd_get_phy_info(adapter);
4898        if (!status && be_pause_supported(adapter))
4899                adapter->phy.fc_autoneg = 1;
4900
4901        if (be_physfn(adapter) && !lancer_chip(adapter))
4902                be_cmd_set_features(adapter);
4903
4904        be_schedule_worker(adapter);
4905        adapter->flags |= BE_FLAGS_SETUP_DONE;
4906        return 0;
4907err:
4908        be_clear(adapter);
4909        return status;
4910}
4911
4912#ifdef CONFIG_NET_POLL_CONTROLLER
4913static void be_netpoll(struct net_device *netdev)
4914{
4915        struct be_adapter *adapter = netdev_priv(netdev);
4916        struct be_eq_obj *eqo;
4917        int i;
4918
4919        for_all_evt_queues(adapter, eqo, i) {
4920                be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4921                napi_schedule(&eqo->napi);
4922        }
4923}
4924#endif
4925
4926int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4927{
4928        const struct firmware *fw;
4929        int status;
4930
4931        if (!netif_running(adapter->netdev)) {
4932                dev_err(&adapter->pdev->dev,
4933                        "Firmware load not allowed (interface is down)\n");
4934                return -ENETDOWN;
4935        }
4936
4937        status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4938        if (status)
4939                goto fw_exit;
4940
4941        dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4942
4943        if (lancer_chip(adapter))
4944                status = lancer_fw_download(adapter, fw);
4945        else
4946                status = be_fw_download(adapter, fw);
4947
4948        if (!status)
4949                be_cmd_get_fw_ver(adapter);
4950
4951fw_exit:
4952        release_firmware(fw);
4953        return status;
4954}
4955
4956static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4957                                 u16 flags, struct netlink_ext_ack *extack)
4958{
4959        struct be_adapter *adapter = netdev_priv(dev);
4960        struct nlattr *attr, *br_spec;
4961        int rem;
4962        int status = 0;
4963        u16 mode = 0;
4964
4965        if (!sriov_enabled(adapter))
4966                return -EOPNOTSUPP;
4967
4968        br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4969        if (!br_spec)
4970                return -EINVAL;
4971
4972        nla_for_each_nested(attr, br_spec, rem) {
4973                if (nla_type(attr) != IFLA_BRIDGE_MODE)
4974                        continue;
4975
4976                if (nla_len(attr) < sizeof(mode))
4977                        return -EINVAL;
4978
4979                mode = nla_get_u16(attr);
4980                if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4981                        return -EOPNOTSUPP;
4982
4983                if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4984                        return -EINVAL;
4985
4986                status = be_cmd_set_hsw_config(adapter, 0, 0,
4987                                               adapter->if_handle,
4988                                               mode == BRIDGE_MODE_VEPA ?
4989                                               PORT_FWD_TYPE_VEPA :
4990                                               PORT_FWD_TYPE_VEB, 0);
4991                if (status)
4992                        goto err;
4993
4994                dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4995                         mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4996
4997                return status;
4998        }
4999err:
5000        dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5001                mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5002
5003        return status;
5004}
5005
5006static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5007                                 struct net_device *dev, u32 filter_mask,
5008                                 int nlflags)
5009{
5010        struct be_adapter *adapter = netdev_priv(dev);
5011        int status = 0;
5012        u8 hsw_mode;
5013
5014        /* BE and Lancer chips support VEB mode only */
5015        if (BEx_chip(adapter) || lancer_chip(adapter)) {
5016                /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5017                if (!pci_sriov_get_totalvfs(adapter->pdev))
5018                        return 0;
5019                hsw_mode = PORT_FWD_TYPE_VEB;
5020        } else {
5021                status = be_cmd_get_hsw_config(adapter, NULL, 0,
5022                                               adapter->if_handle, &hsw_mode,
5023                                               NULL);
5024                if (status)
5025                        return 0;
5026
5027                if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5028                        return 0;
5029        }
5030
5031        return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5032                                       hsw_mode == PORT_FWD_TYPE_VEPA ?
5033                                       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5034                                       0, 0, nlflags, filter_mask, NULL);
5035}
5036
5037static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5038                                         void (*func)(struct work_struct *))
5039{
5040        struct be_cmd_work *work;
5041
5042        work = kzalloc(sizeof(*work), GFP_ATOMIC);
5043        if (!work) {
5044                dev_err(&adapter->pdev->dev,
5045                        "be_work memory allocation failed\n");
5046                return NULL;
5047        }
5048
5049        INIT_WORK(&work->work, func);
5050        work->adapter = adapter;
5051        return work;
5052}
5053
5054/* VxLAN offload Notes:
5055 *
5056 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5057 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5058 * is expected to work across all types of IP tunnels once exported. Skyhawk
5059 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5060 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5061 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5062 * those other tunnels are unexported on the fly through ndo_features_check().
5063 *
5064 * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5065 * adds more than one port, disable offloads and re-enable them again when
5066 * there's only one port left. We maintain a list of ports for this purpose.
5067 */
5068static void be_work_add_vxlan_port(struct work_struct *work)
5069{
5070        struct be_cmd_work *cmd_work =
5071                                container_of(work, struct be_cmd_work, work);
5072        struct be_adapter *adapter = cmd_work->adapter;
5073        struct device *dev = &adapter->pdev->dev;
5074        __be16 port = cmd_work->info.vxlan_port;
5075        struct be_vxlan_port *vxlan_port;
5076        int status;
5077
5078        /* Bump up the alias count if it is an existing port */
5079        list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5080                if (vxlan_port->port == port) {
5081                        vxlan_port->port_aliases++;
5082                        goto done;
5083                }
5084        }
5085
5086        /* Add a new port to our list. We don't need a lock here since port
5087         * add/delete are done only in the context of a single-threaded work
5088         * queue (be_wq).
5089         */
5090        vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5091        if (!vxlan_port)
5092                goto done;
5093
5094        vxlan_port->port = port;
5095        INIT_LIST_HEAD(&vxlan_port->list);
5096        list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5097        adapter->vxlan_port_count++;
5098
5099        if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5100                dev_info(dev,
5101                         "Only one UDP port supported for VxLAN offloads\n");
5102                dev_info(dev, "Disabling VxLAN offloads\n");
5103                goto err;
5104        }
5105
5106        if (adapter->vxlan_port_count > 1)
5107                goto done;
5108
5109        status = be_enable_vxlan_offloads(adapter);
5110        if (!status)
5111                goto done;
5112
5113err:
5114        be_disable_vxlan_offloads(adapter);
5115done:
5116        kfree(cmd_work);
5117        return;
5118}
5119
5120static void be_work_del_vxlan_port(struct work_struct *work)
5121{
5122        struct be_cmd_work *cmd_work =
5123                                container_of(work, struct be_cmd_work, work);
5124        struct be_adapter *adapter = cmd_work->adapter;
5125        __be16 port = cmd_work->info.vxlan_port;
5126        struct be_vxlan_port *vxlan_port;
5127
5128        /* Nothing to be done if a port alias is being deleted */
5129        list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5130                if (vxlan_port->port == port) {
5131                        if (vxlan_port->port_aliases) {
5132                                vxlan_port->port_aliases--;
5133                                goto done;
5134                        }
5135                        break;
5136                }
5137        }
5138
5139        /* No port aliases left; delete the port from the list */
5140        list_del(&vxlan_port->list);
5141        adapter->vxlan_port_count--;
5142
5143        /* Disable VxLAN offload if this is the offloaded port */
5144        if (adapter->vxlan_port == vxlan_port->port) {
5145                WARN_ON(adapter->vxlan_port_count);
5146                be_disable_vxlan_offloads(adapter);
5147                dev_info(&adapter->pdev->dev,
5148                         "Disabled VxLAN offloads for UDP port %d\n",
5149                         be16_to_cpu(port));
5150                goto out;
5151        }
5152
5153        /* If only 1 port is left, re-enable VxLAN offload */
5154        if (adapter->vxlan_port_count == 1)
5155                be_enable_vxlan_offloads(adapter);
5156
5157out:
5158        kfree(vxlan_port);
5159done:
5160        kfree(cmd_work);
5161}
5162
5163static void be_cfg_vxlan_port(struct net_device *netdev,
5164                              struct udp_tunnel_info *ti,
5165                              void (*func)(struct work_struct *))
5166{
5167        struct be_adapter *adapter = netdev_priv(netdev);
5168        struct be_cmd_work *cmd_work;
5169
5170        if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5171                return;
5172
5173        if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5174                return;
5175
5176        cmd_work = be_alloc_work(adapter, func);
5177        if (cmd_work) {
5178                cmd_work->info.vxlan_port = ti->port;
5179                queue_work(be_wq, &cmd_work->work);
5180        }
5181}
5182
5183static void be_del_vxlan_port(struct net_device *netdev,
5184                              struct udp_tunnel_info *ti)
5185{
5186        be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5187}
5188
5189static void be_add_vxlan_port(struct net_device *netdev,
5190                              struct udp_tunnel_info *ti)
5191{
5192        be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5193}
5194
5195static netdev_features_t be_features_check(struct sk_buff *skb,
5196                                           struct net_device *dev,
5197                                           netdev_features_t features)
5198{
5199        struct be_adapter *adapter = netdev_priv(dev);
5200        u8 l4_hdr = 0;
5201
5202        if (skb_is_gso(skb)) {
5203                /* IPv6 TSO requests with extension hdrs are a problem
5204                 * to Lancer and BE3 HW. Disable TSO6 feature.
5205                 */
5206                if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5207                        features &= ~NETIF_F_TSO6;
5208
5209                /* Lancer cannot handle the packet with MSS less than 256.
5210                 * Also it can't handle a TSO packet with a single segment
5211                 * Disable the GSO support in such cases
5212                 */
5213                if (lancer_chip(adapter) &&
5214                    (skb_shinfo(skb)->gso_size < 256 ||
5215                     skb_shinfo(skb)->gso_segs == 1))
5216                        features &= ~NETIF_F_GSO_MASK;
5217        }
5218
5219        /* The code below restricts offload features for some tunneled and
5220         * Q-in-Q packets.
5221         * Offload features for normal (non tunnel) packets are unchanged.
5222         */
5223        features = vlan_features_check(skb, features);
5224        if (!skb->encapsulation ||
5225            !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5226                return features;
5227
5228        /* It's an encapsulated packet and VxLAN offloads are enabled. We
5229         * should disable tunnel offload features if it's not a VxLAN packet,
5230         * as tunnel offloads have been enabled only for VxLAN. This is done to
5231         * allow other tunneled traffic like GRE work fine while VxLAN
5232         * offloads are configured in Skyhawk-R.
5233         */
5234        switch (vlan_get_protocol(skb)) {
5235        case htons(ETH_P_IP):
5236                l4_hdr = ip_hdr(skb)->protocol;
5237                break;
5238        case htons(ETH_P_IPV6):
5239                l4_hdr = ipv6_hdr(skb)->nexthdr;
5240                break;
5241        default:
5242                return features;
5243        }
5244
5245        if (l4_hdr != IPPROTO_UDP ||
5246            skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5247            skb->inner_protocol != htons(ETH_P_TEB) ||
5248            skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5249                sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5250            !adapter->vxlan_port ||
5251            udp_hdr(skb)->dest != adapter->vxlan_port)
5252                return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5253
5254        return features;
5255}
5256
5257static int be_get_phys_port_id(struct net_device *dev,
5258                               struct netdev_phys_item_id *ppid)
5259{
5260        int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5261        struct be_adapter *adapter = netdev_priv(dev);
5262        u8 *id;
5263
5264        if (MAX_PHYS_ITEM_ID_LEN < id_len)
5265                return -ENOSPC;
5266
5267        ppid->id[0] = adapter->hba_port_num + 1;
5268        id = &ppid->id[1];
5269        for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5270             i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5271                memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5272
5273        ppid->id_len = id_len;
5274
5275        return 0;
5276}
5277
5278static void be_set_rx_mode(struct net_device *dev)
5279{
5280        struct be_adapter *adapter = netdev_priv(dev);
5281        struct be_cmd_work *work;
5282
5283        work = be_alloc_work(adapter, be_work_set_rx_mode);
5284        if (work)
5285                queue_work(be_wq, &work->work);
5286}
5287
5288static const struct net_device_ops be_netdev_ops = {
5289        .ndo_open               = be_open,
5290        .ndo_stop               = be_close,
5291        .ndo_start_xmit         = be_xmit,
5292        .ndo_set_rx_mode        = be_set_rx_mode,
5293        .ndo_set_mac_address    = be_mac_addr_set,
5294        .ndo_get_stats64        = be_get_stats64,
5295        .ndo_validate_addr      = eth_validate_addr,
5296        .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5297        .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5298        .ndo_set_vf_mac         = be_set_vf_mac,
5299        .ndo_set_vf_vlan        = be_set_vf_vlan,
5300        .ndo_set_vf_rate        = be_set_vf_tx_rate,
5301        .ndo_get_vf_config      = be_get_vf_config,
5302        .ndo_set_vf_link_state  = be_set_vf_link_state,
5303        .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5304        .ndo_tx_timeout         = be_tx_timeout,
5305#ifdef CONFIG_NET_POLL_CONTROLLER
5306        .ndo_poll_controller    = be_netpoll,
5307#endif
5308        .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5309        .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5310        .ndo_udp_tunnel_add     = be_add_vxlan_port,
5311        .ndo_udp_tunnel_del     = be_del_vxlan_port,
5312        .ndo_features_check     = be_features_check,
5313        .ndo_get_phys_port_id   = be_get_phys_port_id,
5314};
5315
5316static void be_netdev_init(struct net_device *netdev)
5317{
5318        struct be_adapter *adapter = netdev_priv(netdev);
5319
5320        netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5321                NETIF_F_GSO_UDP_TUNNEL |
5322                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5323                NETIF_F_HW_VLAN_CTAG_TX;
5324        if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5325                netdev->hw_features |= NETIF_F_RXHASH;
5326
5327        netdev->features |= netdev->hw_features |
5328                NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5329
5330        netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5331                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5332
5333        netdev->priv_flags |= IFF_UNICAST_FLT;
5334
5335        netdev->flags |= IFF_MULTICAST;
5336
5337        netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5338
5339        netdev->netdev_ops = &be_netdev_ops;
5340
5341        netdev->ethtool_ops = &be_ethtool_ops;
5342
5343        /* MTU range: 256 - 9000 */
5344        netdev->min_mtu = BE_MIN_MTU;
5345        netdev->max_mtu = BE_MAX_MTU;
5346}
5347
5348static void be_cleanup(struct be_adapter *adapter)
5349{
5350        struct net_device *netdev = adapter->netdev;
5351
5352        rtnl_lock();
5353        netif_device_detach(netdev);
5354        if (netif_running(netdev))
5355                be_close(netdev);
5356        rtnl_unlock();
5357
5358        be_clear(adapter);
5359}
5360
5361static int be_resume(struct be_adapter *adapter)
5362{
5363        struct net_device *netdev = adapter->netdev;
5364        int status;
5365
5366        status = be_setup(adapter);
5367        if (status)
5368                return status;
5369
5370        rtnl_lock();
5371        if (netif_running(netdev))
5372                status = be_open(netdev);
5373        rtnl_unlock();
5374
5375        if (status)
5376                return status;
5377
5378        netif_device_attach(netdev);
5379
5380        return 0;
5381}
5382
5383static void be_soft_reset(struct be_adapter *adapter)
5384{
5385        u32 val;
5386
5387        dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5388        val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5389        val |= SLIPORT_SOFTRESET_SR_MASK;
5390        iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5391}
5392
5393static bool be_err_is_recoverable(struct be_adapter *adapter)
5394{
5395        struct be_error_recovery *err_rec = &adapter->error_recovery;
5396        unsigned long initial_idle_time =
5397                msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5398        unsigned long recovery_interval =
5399                msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5400        u16 ue_err_code;
5401        u32 val;
5402
5403        val = be_POST_stage_get(adapter);
5404        if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5405                return false;
5406        ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5407        if (ue_err_code == 0)
5408                return false;
5409
5410        dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5411                ue_err_code);
5412
5413        if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5414                dev_err(&adapter->pdev->dev,
5415                        "Cannot recover within %lu sec from driver load\n",
5416                        jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5417                return false;
5418        }
5419
5420        if (err_rec->last_recovery_time && time_before_eq(
5421                jiffies - err_rec->last_recovery_time, recovery_interval)) {
5422                dev_err(&adapter->pdev->dev,
5423                        "Cannot recover within %lu sec from last recovery\n",
5424                        jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5425                return false;
5426        }
5427
5428        if (ue_err_code == err_rec->last_err_code) {
5429                dev_err(&adapter->pdev->dev,
5430                        "Cannot recover from a consecutive TPE error\n");
5431                return false;
5432        }
5433
5434        err_rec->last_recovery_time = jiffies;
5435        err_rec->last_err_code = ue_err_code;
5436        return true;
5437}
5438
5439static int be_tpe_recover(struct be_adapter *adapter)
5440{
5441        struct be_error_recovery *err_rec = &adapter->error_recovery;
5442        int status = -EAGAIN;
5443        u32 val;
5444
5445        switch (err_rec->recovery_state) {
5446        case ERR_RECOVERY_ST_NONE:
5447                err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5448                err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5449                break;
5450
5451        case ERR_RECOVERY_ST_DETECT:
5452                val = be_POST_stage_get(adapter);
5453                if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5454                    POST_STAGE_RECOVERABLE_ERR) {
5455                        dev_err(&adapter->pdev->dev,
5456                                "Unrecoverable HW error detected: 0x%x\n", val);
5457                        status = -EINVAL;
5458                        err_rec->resched_delay = 0;
5459                        break;
5460                }
5461
5462                dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5463
5464                /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5465                 * milliseconds before it checks for final error status in
5466                 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5467                 * If it does, then PF0 initiates a Soft Reset.
5468                 */
5469                if (adapter->pf_num == 0) {
5470                        err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5471                        err_rec->resched_delay = err_rec->ue_to_reset_time -
5472                                        ERR_RECOVERY_UE_DETECT_DURATION;
5473                        break;
5474                }
5475
5476                err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5477                err_rec->resched_delay = err_rec->ue_to_poll_time -
5478                                        ERR_RECOVERY_UE_DETECT_DURATION;
5479                break;
5480
5481        case ERR_RECOVERY_ST_RESET:
5482                if (!be_err_is_recoverable(adapter)) {
5483                        dev_err(&adapter->pdev->dev,
5484                                "Failed to meet recovery criteria\n");
5485                        status = -EIO;
5486                        err_rec->resched_delay = 0;
5487                        break;
5488                }
5489                be_soft_reset(adapter);
5490                err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5491                err_rec->resched_delay = err_rec->ue_to_poll_time -
5492                                        err_rec->ue_to_reset_time;
5493                break;
5494
5495        case ERR_RECOVERY_ST_PRE_POLL:
5496                err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5497                err_rec->resched_delay = 0;
5498                status = 0;                     /* done */
5499                break;
5500
5501        default:
5502                status = -EINVAL;
5503                err_rec->resched_delay = 0;
5504                break;
5505        }
5506
5507        return status;
5508}
5509
5510static int be_err_recover(struct be_adapter *adapter)
5511{
5512        int status;
5513
5514        if (!lancer_chip(adapter)) {
5515                if (!adapter->error_recovery.recovery_supported ||
5516                    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5517                        return -EIO;
5518                status = be_tpe_recover(adapter);
5519                if (status)
5520                        goto err;
5521        }
5522
5523        /* Wait for adapter to reach quiescent state before
5524         * destroying queues
5525         */
5526        status = be_fw_wait_ready(adapter);
5527        if (status)
5528                goto err;
5529
5530        adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5531
5532        be_cleanup(adapter);
5533
5534        status = be_resume(adapter);
5535        if (status)
5536                goto err;
5537
5538        adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5539
5540err:
5541        return status;
5542}
5543
5544static void be_err_detection_task(struct work_struct *work)
5545{
5546        struct be_error_recovery *err_rec =
5547                        container_of(work, struct be_error_recovery,
5548                                     err_detection_work.work);
5549        struct be_adapter *adapter =
5550                        container_of(err_rec, struct be_adapter,
5551                                     error_recovery);
5552        u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5553        struct device *dev = &adapter->pdev->dev;
5554        int recovery_status;
5555
5556        be_detect_error(adapter);
5557        if (!be_check_error(adapter, BE_ERROR_HW))
5558                goto reschedule_task;
5559
5560        recovery_status = be_err_recover(adapter);
5561        if (!recovery_status) {
5562                err_rec->recovery_retries = 0;
5563                err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5564                dev_info(dev, "Adapter recovery successful\n");
5565                goto reschedule_task;
5566        } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5567                /* BEx/SH recovery state machine */
5568                if (adapter->pf_num == 0 &&
5569                    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5570                        dev_err(&adapter->pdev->dev,
5571                                "Adapter recovery in progress\n");
5572                resched_delay = err_rec->resched_delay;
5573                goto reschedule_task;
5574        } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5575                /* For VFs, check if PF have allocated resources
5576                 * every second.
5577                 */
5578                dev_err(dev, "Re-trying adapter recovery\n");
5579                goto reschedule_task;
5580        } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5581                   ERR_RECOVERY_MAX_RETRY_COUNT) {
5582                /* In case of another error during recovery, it takes 30 sec
5583                 * for adapter to come out of error. Retry error recovery after
5584                 * this time interval.
5585                 */
5586                dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5587                resched_delay = ERR_RECOVERY_RETRY_DELAY;
5588                goto reschedule_task;
5589        } else {
5590                dev_err(dev, "Adapter recovery failed\n");
5591                dev_err(dev, "Please reboot server to recover\n");
5592        }
5593
5594        return;
5595
5596reschedule_task:
5597        be_schedule_err_detection(adapter, resched_delay);
5598}
5599
5600static void be_log_sfp_info(struct be_adapter *adapter)
5601{
5602        int status;
5603
5604        status = be_cmd_query_sfp_info(adapter);
5605        if (!status) {
5606                dev_err(&adapter->pdev->dev,
5607                        "Port %c: %s Vendor: %s part no: %s",
5608                        adapter->port_name,
5609                        be_misconfig_evt_port_state[adapter->phy_state],
5610                        adapter->phy.vendor_name,
5611                        adapter->phy.vendor_pn);
5612        }
5613        adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5614}
5615
5616static void be_worker(struct work_struct *work)
5617{
5618        struct be_adapter *adapter =
5619                container_of(work, struct be_adapter, work.work);
5620        struct be_rx_obj *rxo;
5621        int i;
5622
5623        if (be_physfn(adapter) &&
5624            MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5625                be_cmd_get_die_temperature(adapter);
5626
5627        /* when interrupts are not yet enabled, just reap any pending
5628         * mcc completions
5629         */
5630        if (!netif_running(adapter->netdev)) {
5631                local_bh_disable();
5632                be_process_mcc(adapter);
5633                local_bh_enable();
5634                goto reschedule;
5635        }
5636
5637        if (!adapter->stats_cmd_sent) {
5638                if (lancer_chip(adapter))
5639                        lancer_cmd_get_pport_stats(adapter,
5640                                                   &adapter->stats_cmd);
5641                else
5642                        be_cmd_get_stats(adapter, &adapter->stats_cmd);
5643        }
5644
5645        for_all_rx_queues(adapter, rxo, i) {
5646                /* Replenish RX-queues starved due to memory
5647                 * allocation failures.
5648                 */
5649                if (rxo->rx_post_starved)
5650                        be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5651        }
5652
5653        /* EQ-delay update for Skyhawk is done while notifying EQ */
5654        if (!skyhawk_chip(adapter))
5655                be_eqd_update(adapter, false);
5656
5657        if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5658                be_log_sfp_info(adapter);
5659
5660reschedule:
5661        adapter->work_counter++;
5662        queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5663}
5664
5665static void be_unmap_pci_bars(struct be_adapter *adapter)
5666{
5667        if (adapter->csr)
5668                pci_iounmap(adapter->pdev, adapter->csr);
5669        if (adapter->db)
5670                pci_iounmap(adapter->pdev, adapter->db);
5671        if (adapter->pcicfg && adapter->pcicfg_mapped)
5672                pci_iounmap(adapter->pdev, adapter->pcicfg);
5673}
5674
5675static int db_bar(struct be_adapter *adapter)
5676{
5677        if (lancer_chip(adapter) || be_virtfn(adapter))
5678                return 0;
5679        else
5680                return 4;
5681}
5682
5683static int be_roce_map_pci_bars(struct be_adapter *adapter)
5684{
5685        if (skyhawk_chip(adapter)) {
5686                adapter->roce_db.size = 4096;
5687                adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5688                                                              db_bar(adapter));
5689                adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5690                                                               db_bar(adapter));
5691        }
5692        return 0;
5693}
5694
5695static int be_map_pci_bars(struct be_adapter *adapter)
5696{
5697        struct pci_dev *pdev = adapter->pdev;
5698        u8 __iomem *addr;
5699        u32 sli_intf;
5700
5701        pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5702        adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5703                                SLI_INTF_FAMILY_SHIFT;
5704        adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5705
5706        if (BEx_chip(adapter) && be_physfn(adapter)) {
5707                adapter->csr = pci_iomap(pdev, 2, 0);
5708                if (!adapter->csr)
5709                        return -ENOMEM;
5710        }
5711
5712        addr = pci_iomap(pdev, db_bar(adapter), 0);
5713        if (!addr)
5714                goto pci_map_err;
5715        adapter->db = addr;
5716
5717        if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5718                if (be_physfn(adapter)) {
5719                        /* PCICFG is the 2nd BAR in BE2 */
5720                        addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5721                        if (!addr)
5722                                goto pci_map_err;
5723                        adapter->pcicfg = addr;
5724                        adapter->pcicfg_mapped = true;
5725                } else {
5726                        adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5727                        adapter->pcicfg_mapped = false;
5728                }
5729        }
5730
5731        be_roce_map_pci_bars(adapter);
5732        return 0;
5733
5734pci_map_err:
5735        dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5736        be_unmap_pci_bars(adapter);
5737        return -ENOMEM;
5738}
5739
5740static void be_drv_cleanup(struct be_adapter *adapter)
5741{
5742        struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5743        struct device *dev = &adapter->pdev->dev;
5744
5745        if (mem->va)
5746                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5747
5748        mem = &adapter->rx_filter;
5749        if (mem->va)
5750                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5751
5752        mem = &adapter->stats_cmd;
5753        if (mem->va)
5754                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5755}
5756
5757/* Allocate and initialize various fields in be_adapter struct */
5758static int be_drv_init(struct be_adapter *adapter)
5759{
5760        struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5761        struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5762        struct be_dma_mem *rx_filter = &adapter->rx_filter;
5763        struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5764        struct device *dev = &adapter->pdev->dev;
5765        int status = 0;
5766
5767        mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5768        mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5769                                                 &mbox_mem_alloc->dma,
5770                                                 GFP_KERNEL);
5771        if (!mbox_mem_alloc->va)
5772                return -ENOMEM;
5773
5774        mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5775        mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5776        mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5777
5778        rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5779        rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5780                                            &rx_filter->dma, GFP_KERNEL);
5781        if (!rx_filter->va) {
5782                status = -ENOMEM;
5783                goto free_mbox;
5784        }
5785
5786        if (lancer_chip(adapter))
5787                stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5788        else if (BE2_chip(adapter))
5789                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5790        else if (BE3_chip(adapter))
5791                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5792        else
5793                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5794        stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5795                                            &stats_cmd->dma, GFP_KERNEL);
5796        if (!stats_cmd->va) {
5797                status = -ENOMEM;
5798                goto free_rx_filter;
5799        }
5800
5801        mutex_init(&adapter->mbox_lock);
5802        mutex_init(&adapter->mcc_lock);
5803        mutex_init(&adapter->rx_filter_lock);
5804        spin_lock_init(&adapter->mcc_cq_lock);
5805        init_completion(&adapter->et_cmd_compl);
5806
5807        pci_save_state(adapter->pdev);
5808
5809        INIT_DELAYED_WORK(&adapter->work, be_worker);
5810
5811        adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5812        adapter->error_recovery.resched_delay = 0;
5813        INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5814                          be_err_detection_task);
5815
5816        adapter->rx_fc = true;
5817        adapter->tx_fc = true;
5818
5819        /* Must be a power of 2 or else MODULO will BUG_ON */
5820        adapter->be_get_temp_freq = 64;
5821
5822        INIT_LIST_HEAD(&adapter->vxlan_port_list);
5823        return 0;
5824
5825free_rx_filter:
5826        dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5827free_mbox:
5828        dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5829                          mbox_mem_alloc->dma);
5830        return status;
5831}
5832
5833static void be_remove(struct pci_dev *pdev)
5834{
5835        struct be_adapter *adapter = pci_get_drvdata(pdev);
5836
5837        if (!adapter)
5838                return;
5839
5840        be_roce_dev_remove(adapter);
5841        be_intr_set(adapter, false);
5842
5843        be_cancel_err_detection(adapter);
5844
5845        unregister_netdev(adapter->netdev);
5846
5847        be_clear(adapter);
5848
5849        if (!pci_vfs_assigned(adapter->pdev))
5850                be_cmd_reset_function(adapter);
5851
5852        /* tell fw we're done with firing cmds */
5853        be_cmd_fw_clean(adapter);
5854
5855        be_unmap_pci_bars(adapter);
5856        be_drv_cleanup(adapter);
5857
5858        pci_disable_pcie_error_reporting(pdev);
5859
5860        pci_release_regions(pdev);
5861        pci_disable_device(pdev);
5862
5863        free_netdev(adapter->netdev);
5864}
5865
5866static ssize_t be_hwmon_show_temp(struct device *dev,
5867                                  struct device_attribute *dev_attr,
5868                                  char *buf)
5869{
5870        struct be_adapter *adapter = dev_get_drvdata(dev);
5871
5872        /* Unit: millidegree Celsius */
5873        if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5874                return -EIO;
5875        else
5876                return sprintf(buf, "%u\n",
5877                               adapter->hwmon_info.be_on_die_temp * 1000);
5878}
5879
5880static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5881                          be_hwmon_show_temp, NULL, 1);
5882
5883static struct attribute *be_hwmon_attrs[] = {
5884        &sensor_dev_attr_temp1_input.dev_attr.attr,
5885        NULL
5886};
5887
5888ATTRIBUTE_GROUPS(be_hwmon);
5889
5890static char *mc_name(struct be_adapter *adapter)
5891{
5892        char *str = ""; /* default */
5893
5894        switch (adapter->mc_type) {
5895        case UMC:
5896                str = "UMC";
5897                break;
5898        case FLEX10:
5899                str = "FLEX10";
5900                break;
5901        case vNIC1:
5902                str = "vNIC-1";
5903                break;
5904        case nPAR:
5905                str = "nPAR";
5906                break;
5907        case UFP:
5908                str = "UFP";
5909                break;
5910        case vNIC2:
5911                str = "vNIC-2";
5912                break;
5913        default:
5914                str = "";
5915        }
5916
5917        return str;
5918}
5919
5920static inline char *func_name(struct be_adapter *adapter)
5921{
5922        return be_physfn(adapter) ? "PF" : "VF";
5923}
5924
5925static inline char *nic_name(struct pci_dev *pdev)
5926{
5927        switch (pdev->device) {
5928        case OC_DEVICE_ID1:
5929                return OC_NAME;
5930        case OC_DEVICE_ID2:
5931                return OC_NAME_BE;
5932        case OC_DEVICE_ID3:
5933        case OC_DEVICE_ID4:
5934                return OC_NAME_LANCER;
5935        case BE_DEVICE_ID2:
5936                return BE3_NAME;
5937        case OC_DEVICE_ID5:
5938        case OC_DEVICE_ID6:
5939                return OC_NAME_SH;
5940        default:
5941                return BE_NAME;
5942        }
5943}
5944
5945static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5946{
5947        struct be_adapter *adapter;
5948        struct net_device *netdev;
5949        int status = 0;
5950
5951        dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5952
5953        status = pci_enable_device(pdev);
5954        if (status)
5955                goto do_none;
5956
5957        status = pci_request_regions(pdev, DRV_NAME);
5958        if (status)
5959                goto disable_dev;
5960        pci_set_master(pdev);
5961
5962        netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5963        if (!netdev) {
5964                status = -ENOMEM;
5965                goto rel_reg;
5966        }
5967        adapter = netdev_priv(netdev);
5968        adapter->pdev = pdev;
5969        pci_set_drvdata(pdev, adapter);
5970        adapter->netdev = netdev;
5971        SET_NETDEV_DEV(netdev, &pdev->dev);
5972
5973        status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5974        if (!status) {
5975                netdev->features |= NETIF_F_HIGHDMA;
5976        } else {
5977                status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5978                if (status) {
5979                        dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5980                        goto free_netdev;
5981                }
5982        }
5983
5984        status = pci_enable_pcie_error_reporting(pdev);
5985        if (!status)
5986                dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5987
5988        status = be_map_pci_bars(adapter);
5989        if (status)
5990                goto free_netdev;
5991
5992        status = be_drv_init(adapter);
5993        if (status)
5994                goto unmap_bars;
5995
5996        status = be_setup(adapter);
5997        if (status)
5998                goto drv_cleanup;
5999
6000        be_netdev_init(netdev);
6001        status = register_netdev(netdev);
6002        if (status != 0)
6003                goto unsetup;
6004
6005        be_roce_dev_add(adapter);
6006
6007        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6008        adapter->error_recovery.probe_time = jiffies;
6009
6010        /* On Die temperature not supported for VF. */
6011        if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
6012                adapter->hwmon_info.hwmon_dev =
6013                        devm_hwmon_device_register_with_groups(&pdev->dev,
6014                                                               DRV_NAME,
6015                                                               adapter,
6016                                                               be_hwmon_groups);
6017                adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
6018        }
6019
6020        dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
6021                 func_name(adapter), mc_name(adapter), adapter->port_name);
6022
6023        return 0;
6024
6025unsetup:
6026        be_clear(adapter);
6027drv_cleanup:
6028        be_drv_cleanup(adapter);
6029unmap_bars:
6030        be_unmap_pci_bars(adapter);
6031free_netdev:
6032        free_netdev(netdev);
6033rel_reg:
6034        pci_release_regions(pdev);
6035disable_dev:
6036        pci_disable_device(pdev);
6037do_none:
6038        dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6039        return status;
6040}
6041
6042static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6043{
6044        struct be_adapter *adapter = pci_get_drvdata(pdev);
6045
6046        be_intr_set(adapter, false);
6047        be_cancel_err_detection(adapter);
6048
6049        be_cleanup(adapter);
6050
6051        pci_save_state(pdev);
6052        pci_disable_device(pdev);
6053        pci_set_power_state(pdev, pci_choose_state(pdev, state));
6054        return 0;
6055}
6056
6057static int be_pci_resume(struct pci_dev *pdev)
6058{
6059        struct be_adapter *adapter = pci_get_drvdata(pdev);
6060        int status = 0;
6061
6062        status = pci_enable_device(pdev);
6063        if (status)
6064                return status;
6065
6066        pci_restore_state(pdev);
6067
6068        status = be_resume(adapter);
6069        if (status)
6070                return status;
6071
6072        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6073
6074        return 0;
6075}
6076
6077/*
6078 * An FLR will stop BE from DMAing any data.
6079 */
6080static void be_shutdown(struct pci_dev *pdev)
6081{
6082        struct be_adapter *adapter = pci_get_drvdata(pdev);
6083
6084        if (!adapter)
6085                return;
6086
6087        be_roce_dev_shutdown(adapter);
6088        cancel_delayed_work_sync(&adapter->work);
6089        be_cancel_err_detection(adapter);
6090
6091        netif_device_detach(adapter->netdev);
6092
6093        be_cmd_reset_function(adapter);
6094
6095        pci_disable_device(pdev);
6096}
6097
6098static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6099                                            pci_channel_state_t state)
6100{
6101        struct be_adapter *adapter = pci_get_drvdata(pdev);
6102
6103        dev_err(&adapter->pdev->dev, "EEH error detected\n");
6104
6105        be_roce_dev_remove(adapter);
6106
6107        if (!be_check_error(adapter, BE_ERROR_EEH)) {
6108                be_set_error(adapter, BE_ERROR_EEH);
6109
6110                be_cancel_err_detection(adapter);
6111
6112                be_cleanup(adapter);
6113        }
6114
6115        if (state == pci_channel_io_perm_failure)
6116                return PCI_ERS_RESULT_DISCONNECT;
6117
6118        pci_disable_device(pdev);
6119
6120        /* The error could cause the FW to trigger a flash debug dump.
6121         * Resetting the card while flash dump is in progress
6122         * can cause it not to recover; wait for it to finish.
6123         * Wait only for first function as it is needed only once per
6124         * adapter.
6125         */
6126        if (pdev->devfn == 0)
6127                ssleep(30);
6128
6129        return PCI_ERS_RESULT_NEED_RESET;
6130}
6131
6132static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6133{
6134        struct be_adapter *adapter = pci_get_drvdata(pdev);
6135        int status;
6136
6137        dev_info(&adapter->pdev->dev, "EEH reset\n");
6138
6139        status = pci_enable_device(pdev);
6140        if (status)
6141                return PCI_ERS_RESULT_DISCONNECT;
6142
6143        pci_set_master(pdev);
6144        pci_restore_state(pdev);
6145
6146        /* Check if card is ok and fw is ready */
6147        dev_info(&adapter->pdev->dev,
6148                 "Waiting for FW to be ready after EEH reset\n");
6149        status = be_fw_wait_ready(adapter);
6150        if (status)
6151                return PCI_ERS_RESULT_DISCONNECT;
6152
6153        be_clear_error(adapter, BE_CLEAR_ALL);
6154        return PCI_ERS_RESULT_RECOVERED;
6155}
6156
6157static void be_eeh_resume(struct pci_dev *pdev)
6158{
6159        int status = 0;
6160        struct be_adapter *adapter = pci_get_drvdata(pdev);
6161
6162        dev_info(&adapter->pdev->dev, "EEH resume\n");
6163
6164        pci_save_state(pdev);
6165
6166        status = be_resume(adapter);
6167        if (status)
6168                goto err;
6169
6170        be_roce_dev_add(adapter);
6171
6172        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6173        return;
6174err:
6175        dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6176}
6177
6178static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6179{
6180        struct be_adapter *adapter = pci_get_drvdata(pdev);
6181        struct be_resources vft_res = {0};
6182        int status;
6183
6184        if (!num_vfs)
6185                be_vf_clear(adapter);
6186
6187        adapter->num_vfs = num_vfs;
6188
6189        if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6190                dev_warn(&pdev->dev,
6191                         "Cannot disable VFs while they are assigned\n");
6192                return -EBUSY;
6193        }
6194
6195        /* When the HW is in SRIOV capable configuration, the PF-pool resources
6196         * are equally distributed across the max-number of VFs. The user may
6197         * request only a subset of the max-vfs to be enabled.
6198         * Based on num_vfs, redistribute the resources across num_vfs so that
6199         * each VF will have access to more number of resources.
6200         * This facility is not available in BE3 FW.
6201         * Also, this is done by FW in Lancer chip.
6202         */
6203        if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6204                be_calculate_vf_res(adapter, adapter->num_vfs,
6205                                    &vft_res);
6206                status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6207                                                 adapter->num_vfs, &vft_res);
6208                if (status)
6209                        dev_err(&pdev->dev,
6210                                "Failed to optimize SR-IOV resources\n");
6211        }
6212
6213        status = be_get_resources(adapter);
6214        if (status)
6215                return be_cmd_status(status);
6216
6217        /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6218        rtnl_lock();
6219        status = be_update_queues(adapter);
6220        rtnl_unlock();
6221        if (status)
6222                return be_cmd_status(status);
6223
6224        if (adapter->num_vfs)
6225                status = be_vf_setup(adapter);
6226
6227        if (!status)
6228                return adapter->num_vfs;
6229
6230        return 0;
6231}
6232
6233static const struct pci_error_handlers be_eeh_handlers = {
6234        .error_detected = be_eeh_err_detected,
6235        .slot_reset = be_eeh_reset,
6236        .resume = be_eeh_resume,
6237};
6238
6239static struct pci_driver be_driver = {
6240        .name = DRV_NAME,
6241        .id_table = be_dev_ids,
6242        .probe = be_probe,
6243        .remove = be_remove,
6244        .suspend = be_suspend,
6245        .resume = be_pci_resume,
6246        .shutdown = be_shutdown,
6247        .sriov_configure = be_pci_sriov_configure,
6248        .err_handler = &be_eeh_handlers
6249};
6250
6251static int __init be_init_module(void)
6252{
6253        int status;
6254
6255        if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6256            rx_frag_size != 2048) {
6257                printk(KERN_WARNING DRV_NAME
6258                        " : Module param rx_frag_size must be 2048/4096/8192."
6259                        " Using 2048\n");
6260                rx_frag_size = 2048;
6261        }
6262
6263        if (num_vfs > 0) {
6264                pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6265                pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6266        }
6267
6268        be_wq = create_singlethread_workqueue("be_wq");
6269        if (!be_wq) {
6270                pr_warn(DRV_NAME "workqueue creation failed\n");
6271                return -1;
6272        }
6273
6274        be_err_recovery_workq =
6275                create_singlethread_workqueue("be_err_recover");
6276        if (!be_err_recovery_workq)
6277                pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6278
6279        status = pci_register_driver(&be_driver);
6280        if (status) {
6281                destroy_workqueue(be_wq);
6282                be_destroy_err_recovery_workq();
6283        }
6284        return status;
6285}
6286module_init(be_init_module);
6287
6288static void __exit be_exit_module(void)
6289{
6290        pci_unregister_driver(&be_driver);
6291
6292        be_destroy_err_recovery_workq();
6293
6294        if (be_wq)
6295                destroy_workqueue(be_wq);
6296}
6297module_exit(be_exit_module);
6298