linux/drivers/net/ethernet/emulex/benet/be_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2005 - 2016 Broadcom
   4 * All rights reserved.
   5 *
   6 * Contact Information:
   7 * linux-drivers@emulex.com
   8 *
   9 * Emulex
  10 * 3333 Susan Street
  11 * Costa Mesa, CA 92626
  12 */
  13
  14#include <linux/prefetch.h>
  15#include <linux/module.h>
  16#include "be.h"
  17#include "be_cmds.h"
  18#include <asm/div64.h>
  19#include <linux/aer.h>
  20#include <linux/if_bridge.h>
  21#include <net/busy_poll.h>
  22#include <net/vxlan.h>
  23
  24MODULE_VERSION(DRV_VER);
  25MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
  26MODULE_AUTHOR("Emulex Corporation");
  27MODULE_LICENSE("GPL");
  28
  29/* num_vfs module param is obsolete.
  30 * Use sysfs method to enable/disable VFs.
  31 */
  32static unsigned int num_vfs;
  33module_param(num_vfs, uint, 0444);
  34MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
  35
  36static ushort rx_frag_size = 2048;
  37module_param(rx_frag_size, ushort, 0444);
  38MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
  39
  40/* Per-module error detection/recovery workq shared across all functions.
  41 * Each function schedules its own work request on this shared workq.
  42 */
  43static struct workqueue_struct *be_err_recovery_workq;
  44
  45static const struct pci_device_id be_dev_ids[] = {
  46#ifdef CONFIG_BE2NET_BE2
  47        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
  48        { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
  49#endif /* CONFIG_BE2NET_BE2 */
  50#ifdef CONFIG_BE2NET_BE3
  51        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
  52        { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
  53#endif /* CONFIG_BE2NET_BE3 */
  54#ifdef CONFIG_BE2NET_LANCER
  55        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
  56        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
  57#endif /* CONFIG_BE2NET_LANCER */
  58#ifdef CONFIG_BE2NET_SKYHAWK
  59        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
  60        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
  61#endif /* CONFIG_BE2NET_SKYHAWK */
  62        { 0 }
  63};
  64MODULE_DEVICE_TABLE(pci, be_dev_ids);
  65
  66/* Workqueue used by all functions for defering cmd calls to the adapter */
  67static struct workqueue_struct *be_wq;
  68
  69/* UE Status Low CSR */
  70static const char * const ue_status_low_desc[] = {
  71        "CEV",
  72        "CTX",
  73        "DBUF",
  74        "ERX",
  75        "Host",
  76        "MPU",
  77        "NDMA",
  78        "PTC ",
  79        "RDMA ",
  80        "RXF ",
  81        "RXIPS ",
  82        "RXULP0 ",
  83        "RXULP1 ",
  84        "RXULP2 ",
  85        "TIM ",
  86        "TPOST ",
  87        "TPRE ",
  88        "TXIPS ",
  89        "TXULP0 ",
  90        "TXULP1 ",
  91        "UC ",
  92        "WDMA ",
  93        "TXULP2 ",
  94        "HOST1 ",
  95        "P0_OB_LINK ",
  96        "P1_OB_LINK ",
  97        "HOST_GPIO ",
  98        "MBOX ",
  99        "ERX2 ",
 100        "SPARE ",
 101        "JTAG ",
 102        "MPU_INTPEND "
 103};
 104
 105/* UE Status High CSR */
 106static const char * const ue_status_hi_desc[] = {
 107        "LPCMEMHOST",
 108        "MGMT_MAC",
 109        "PCS0ONLINE",
 110        "MPU_IRAM",
 111        "PCS1ONLINE",
 112        "PCTL0",
 113        "PCTL1",
 114        "PMEM",
 115        "RR",
 116        "TXPB",
 117        "RXPP",
 118        "XAUI",
 119        "TXP",
 120        "ARM",
 121        "IPC",
 122        "HOST2",
 123        "HOST3",
 124        "HOST4",
 125        "HOST5",
 126        "HOST6",
 127        "HOST7",
 128        "ECRC",
 129        "Poison TLP",
 130        "NETC",
 131        "PERIPH",
 132        "LLTXULP",
 133        "D2P",
 134        "RCON",
 135        "LDMA",
 136        "LLTXP",
 137        "LLTXPB",
 138        "Unknown"
 139};
 140
 141#define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
 142                                 BE_IF_FLAGS_BROADCAST | \
 143                                 BE_IF_FLAGS_MULTICAST | \
 144                                 BE_IF_FLAGS_PASS_L3L4_ERRORS)
 145
 146static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
 147{
 148        struct be_dma_mem *mem = &q->dma_mem;
 149
 150        if (mem->va) {
 151                dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
 152                                  mem->dma);
 153                mem->va = NULL;
 154        }
 155}
 156
 157static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
 158                          u16 len, u16 entry_size)
 159{
 160        struct be_dma_mem *mem = &q->dma_mem;
 161
 162        memset(q, 0, sizeof(*q));
 163        q->len = len;
 164        q->entry_size = entry_size;
 165        mem->size = len * entry_size;
 166        mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
 167                                     &mem->dma, GFP_KERNEL);
 168        if (!mem->va)
 169                return -ENOMEM;
 170        return 0;
 171}
 172
 173static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
 174{
 175        u32 reg, enabled;
 176
 177        pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
 178                              &reg);
 179        enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 180
 181        if (!enabled && enable)
 182                reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 183        else if (enabled && !enable)
 184                reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 185        else
 186                return;
 187
 188        pci_write_config_dword(adapter->pdev,
 189                               PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
 190}
 191
 192static void be_intr_set(struct be_adapter *adapter, bool enable)
 193{
 194        int status = 0;
 195
 196        /* On lancer interrupts can't be controlled via this register */
 197        if (lancer_chip(adapter))
 198                return;
 199
 200        if (be_check_error(adapter, BE_ERROR_EEH))
 201                return;
 202
 203        status = be_cmd_intr_set(adapter, enable);
 204        if (status)
 205                be_reg_intr_set(adapter, enable);
 206}
 207
 208static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
 209{
 210        u32 val = 0;
 211
 212        if (be_check_error(adapter, BE_ERROR_HW))
 213                return;
 214
 215        val |= qid & DB_RQ_RING_ID_MASK;
 216        val |= posted << DB_RQ_NUM_POSTED_SHIFT;
 217
 218        wmb();
 219        iowrite32(val, adapter->db + DB_RQ_OFFSET);
 220}
 221
 222static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
 223                          u16 posted)
 224{
 225        u32 val = 0;
 226
 227        if (be_check_error(adapter, BE_ERROR_HW))
 228                return;
 229
 230        val |= txo->q.id & DB_TXULP_RING_ID_MASK;
 231        val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
 232
 233        wmb();
 234        iowrite32(val, adapter->db + txo->db_offset);
 235}
 236
 237static void be_eq_notify(struct be_adapter *adapter, u16 qid,
 238                         bool arm, bool clear_int, u16 num_popped,
 239                         u32 eq_delay_mult_enc)
 240{
 241        u32 val = 0;
 242
 243        val |= qid & DB_EQ_RING_ID_MASK;
 244        val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
 245
 246        if (be_check_error(adapter, BE_ERROR_HW))
 247                return;
 248
 249        if (arm)
 250                val |= 1 << DB_EQ_REARM_SHIFT;
 251        if (clear_int)
 252                val |= 1 << DB_EQ_CLR_SHIFT;
 253        val |= 1 << DB_EQ_EVNT_SHIFT;
 254        val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
 255        val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
 256        iowrite32(val, adapter->db + DB_EQ_OFFSET);
 257}
 258
 259void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
 260{
 261        u32 val = 0;
 262
 263        val |= qid & DB_CQ_RING_ID_MASK;
 264        val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
 265                        DB_CQ_RING_ID_EXT_MASK_SHIFT);
 266
 267        if (be_check_error(adapter, BE_ERROR_HW))
 268                return;
 269
 270        if (arm)
 271                val |= 1 << DB_CQ_REARM_SHIFT;
 272        val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
 273        iowrite32(val, adapter->db + DB_CQ_OFFSET);
 274}
 275
 276static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
 277{
 278        int i;
 279
 280        /* Check if mac has already been added as part of uc-list */
 281        for (i = 0; i < adapter->uc_macs; i++) {
 282                if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
 283                        /* mac already added, skip addition */
 284                        adapter->pmac_id[0] = adapter->pmac_id[i + 1];
 285                        return 0;
 286                }
 287        }
 288
 289        return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
 290                               &adapter->pmac_id[0], 0);
 291}
 292
 293static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
 294{
 295        int i;
 296
 297        /* Skip deletion if the programmed mac is
 298         * being used in uc-list
 299         */
 300        for (i = 0; i < adapter->uc_macs; i++) {
 301                if (adapter->pmac_id[i + 1] == pmac_id)
 302                        return;
 303        }
 304        be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
 305}
 306
 307static int be_mac_addr_set(struct net_device *netdev, void *p)
 308{
 309        struct be_adapter *adapter = netdev_priv(netdev);
 310        struct device *dev = &adapter->pdev->dev;
 311        struct sockaddr *addr = p;
 312        int status;
 313        u8 mac[ETH_ALEN];
 314        u32 old_pmac_id = adapter->pmac_id[0];
 315
 316        if (!is_valid_ether_addr(addr->sa_data))
 317                return -EADDRNOTAVAIL;
 318
 319        /* Proceed further only if, User provided MAC is different
 320         * from active MAC
 321         */
 322        if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
 323                return 0;
 324
 325        /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
 326         * address
 327         */
 328        if (BEx_chip(adapter) && be_virtfn(adapter) &&
 329            !check_privilege(adapter, BE_PRIV_FILTMGMT))
 330                return -EPERM;
 331
 332        /* if device is not running, copy MAC to netdev->dev_addr */
 333        if (!netif_running(netdev))
 334                goto done;
 335
 336        /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
 337         * privilege or if PF did not provision the new MAC address.
 338         * On BE3, this cmd will always fail if the VF doesn't have the
 339         * FILTMGMT privilege. This failure is OK, only if the PF programmed
 340         * the MAC for the VF.
 341         */
 342        mutex_lock(&adapter->rx_filter_lock);
 343        status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
 344        if (!status) {
 345
 346                /* Delete the old programmed MAC. This call may fail if the
 347                 * old MAC was already deleted by the PF driver.
 348                 */
 349                if (adapter->pmac_id[0] != old_pmac_id)
 350                        be_dev_mac_del(adapter, old_pmac_id);
 351        }
 352
 353        mutex_unlock(&adapter->rx_filter_lock);
 354        /* Decide if the new MAC is successfully activated only after
 355         * querying the FW
 356         */
 357        status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
 358                                       adapter->if_handle, true, 0);
 359        if (status)
 360                goto err;
 361
 362        /* The MAC change did not happen, either due to lack of privilege
 363         * or PF didn't pre-provision.
 364         */
 365        if (!ether_addr_equal(addr->sa_data, mac)) {
 366                status = -EPERM;
 367                goto err;
 368        }
 369
 370        /* Remember currently programmed MAC */
 371        ether_addr_copy(adapter->dev_mac, addr->sa_data);
 372done:
 373        ether_addr_copy(netdev->dev_addr, addr->sa_data);
 374        dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
 375        return 0;
 376err:
 377        dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
 378        return status;
 379}
 380
 381/* BE2 supports only v0 cmd */
 382static void *hw_stats_from_cmd(struct be_adapter *adapter)
 383{
 384        if (BE2_chip(adapter)) {
 385                struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
 386
 387                return &cmd->hw_stats;
 388        } else if (BE3_chip(adapter)) {
 389                struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
 390
 391                return &cmd->hw_stats;
 392        } else {
 393                struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
 394
 395                return &cmd->hw_stats;
 396        }
 397}
 398
 399/* BE2 supports only v0 cmd */
 400static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
 401{
 402        if (BE2_chip(adapter)) {
 403                struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 404
 405                return &hw_stats->erx;
 406        } else if (BE3_chip(adapter)) {
 407                struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 408
 409                return &hw_stats->erx;
 410        } else {
 411                struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 412
 413                return &hw_stats->erx;
 414        }
 415}
 416
 417static void populate_be_v0_stats(struct be_adapter *adapter)
 418{
 419        struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 420        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 421        struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
 422        struct be_port_rxf_stats_v0 *port_stats =
 423                                        &rxf_stats->port[adapter->port_num];
 424        struct be_drv_stats *drvs = &adapter->drv_stats;
 425
 426        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 427        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 428        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 429        drvs->rx_control_frames = port_stats->rx_control_frames;
 430        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 431        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 432        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 433        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 434        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 435        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 436        drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
 437        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 438        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 439        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 440        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 441        drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
 442        drvs->rx_dropped_header_too_small =
 443                port_stats->rx_dropped_header_too_small;
 444        drvs->rx_address_filtered =
 445                                        port_stats->rx_address_filtered +
 446                                        port_stats->rx_vlan_filtered;
 447        drvs->rx_alignment_symbol_errors =
 448                port_stats->rx_alignment_symbol_errors;
 449
 450        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 451        drvs->tx_controlframes = port_stats->tx_controlframes;
 452
 453        if (adapter->port_num)
 454                drvs->jabber_events = rxf_stats->port1_jabber_events;
 455        else
 456                drvs->jabber_events = rxf_stats->port0_jabber_events;
 457        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 458        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 459        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 460        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 461        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 462        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 463        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 464}
 465
 466static void populate_be_v1_stats(struct be_adapter *adapter)
 467{
 468        struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 469        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 470        struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
 471        struct be_port_rxf_stats_v1 *port_stats =
 472                                        &rxf_stats->port[adapter->port_num];
 473        struct be_drv_stats *drvs = &adapter->drv_stats;
 474
 475        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 476        drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 477        drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 478        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 479        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 480        drvs->rx_control_frames = port_stats->rx_control_frames;
 481        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 482        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 483        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 484        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 485        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 486        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 487        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 488        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 489        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 490        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 491        drvs->rx_dropped_header_too_small =
 492                port_stats->rx_dropped_header_too_small;
 493        drvs->rx_input_fifo_overflow_drop =
 494                port_stats->rx_input_fifo_overflow_drop;
 495        drvs->rx_address_filtered = port_stats->rx_address_filtered;
 496        drvs->rx_alignment_symbol_errors =
 497                port_stats->rx_alignment_symbol_errors;
 498        drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 499        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 500        drvs->tx_controlframes = port_stats->tx_controlframes;
 501        drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 502        drvs->jabber_events = port_stats->jabber_events;
 503        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 504        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 505        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 506        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 507        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 508        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 509        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 510}
 511
 512static void populate_be_v2_stats(struct be_adapter *adapter)
 513{
 514        struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 515        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 516        struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
 517        struct be_port_rxf_stats_v2 *port_stats =
 518                                        &rxf_stats->port[adapter->port_num];
 519        struct be_drv_stats *drvs = &adapter->drv_stats;
 520
 521        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 522        drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 523        drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 524        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 525        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 526        drvs->rx_control_frames = port_stats->rx_control_frames;
 527        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 528        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 529        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 530        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 531        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 532        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 533        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 534        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 535        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 536        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 537        drvs->rx_dropped_header_too_small =
 538                port_stats->rx_dropped_header_too_small;
 539        drvs->rx_input_fifo_overflow_drop =
 540                port_stats->rx_input_fifo_overflow_drop;
 541        drvs->rx_address_filtered = port_stats->rx_address_filtered;
 542        drvs->rx_alignment_symbol_errors =
 543                port_stats->rx_alignment_symbol_errors;
 544        drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 545        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 546        drvs->tx_controlframes = port_stats->tx_controlframes;
 547        drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 548        drvs->jabber_events = port_stats->jabber_events;
 549        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 550        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 551        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 552        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 553        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 554        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 555        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 556        if (be_roce_supported(adapter)) {
 557                drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
 558                drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
 559                drvs->rx_roce_frames = port_stats->roce_frames_received;
 560                drvs->roce_drops_crc = port_stats->roce_drops_crc;
 561                drvs->roce_drops_payload_len =
 562                        port_stats->roce_drops_payload_len;
 563        }
 564}
 565
 566static void populate_lancer_stats(struct be_adapter *adapter)
 567{
 568        struct be_drv_stats *drvs = &adapter->drv_stats;
 569        struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
 570
 571        be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
 572        drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
 573        drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
 574        drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
 575        drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
 576        drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
 577        drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
 578        drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
 579        drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
 580        drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
 581        drvs->rx_dropped_tcp_length =
 582                                pport_stats->rx_dropped_invalid_tcp_length;
 583        drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
 584        drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
 585        drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
 586        drvs->rx_dropped_header_too_small =
 587                                pport_stats->rx_dropped_header_too_small;
 588        drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 589        drvs->rx_address_filtered =
 590                                        pport_stats->rx_address_filtered +
 591                                        pport_stats->rx_vlan_filtered;
 592        drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
 593        drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 594        drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
 595        drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
 596        drvs->jabber_events = pport_stats->rx_jabbers;
 597        drvs->forwarded_packets = pport_stats->num_forwards_lo;
 598        drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
 599        drvs->rx_drops_too_many_frags =
 600                                pport_stats->rx_drops_too_many_frags_lo;
 601}
 602
 603static void accumulate_16bit_val(u32 *acc, u16 val)
 604{
 605#define lo(x)                   (x & 0xFFFF)
 606#define hi(x)                   (x & 0xFFFF0000)
 607        bool wrapped = val < lo(*acc);
 608        u32 newacc = hi(*acc) + val;
 609
 610        if (wrapped)
 611                newacc += 65536;
 612        WRITE_ONCE(*acc, newacc);
 613}
 614
 615static void populate_erx_stats(struct be_adapter *adapter,
 616                               struct be_rx_obj *rxo, u32 erx_stat)
 617{
 618        if (!BEx_chip(adapter))
 619                rx_stats(rxo)->rx_drops_no_frags = erx_stat;
 620        else
 621                /* below erx HW counter can actually wrap around after
 622                 * 65535. Driver accumulates a 32-bit value
 623                 */
 624                accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
 625                                     (u16)erx_stat);
 626}
 627
 628void be_parse_stats(struct be_adapter *adapter)
 629{
 630        struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
 631        struct be_rx_obj *rxo;
 632        int i;
 633        u32 erx_stat;
 634
 635        if (lancer_chip(adapter)) {
 636                populate_lancer_stats(adapter);
 637        } else {
 638                if (BE2_chip(adapter))
 639                        populate_be_v0_stats(adapter);
 640                else if (BE3_chip(adapter))
 641                        /* for BE3 */
 642                        populate_be_v1_stats(adapter);
 643                else
 644                        populate_be_v2_stats(adapter);
 645
 646                /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
 647                for_all_rx_queues(adapter, rxo, i) {
 648                        erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
 649                        populate_erx_stats(adapter, rxo, erx_stat);
 650                }
 651        }
 652}
 653
 654static void be_get_stats64(struct net_device *netdev,
 655                           struct rtnl_link_stats64 *stats)
 656{
 657        struct be_adapter *adapter = netdev_priv(netdev);
 658        struct be_drv_stats *drvs = &adapter->drv_stats;
 659        struct be_rx_obj *rxo;
 660        struct be_tx_obj *txo;
 661        u64 pkts, bytes;
 662        unsigned int start;
 663        int i;
 664
 665        for_all_rx_queues(adapter, rxo, i) {
 666                const struct be_rx_stats *rx_stats = rx_stats(rxo);
 667
 668                do {
 669                        start = u64_stats_fetch_begin_irq(&rx_stats->sync);
 670                        pkts = rx_stats(rxo)->rx_pkts;
 671                        bytes = rx_stats(rxo)->rx_bytes;
 672                } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
 673                stats->rx_packets += pkts;
 674                stats->rx_bytes += bytes;
 675                stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
 676                stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
 677                                        rx_stats(rxo)->rx_drops_no_frags;
 678        }
 679
 680        for_all_tx_queues(adapter, txo, i) {
 681                const struct be_tx_stats *tx_stats = tx_stats(txo);
 682
 683                do {
 684                        start = u64_stats_fetch_begin_irq(&tx_stats->sync);
 685                        pkts = tx_stats(txo)->tx_pkts;
 686                        bytes = tx_stats(txo)->tx_bytes;
 687                } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
 688                stats->tx_packets += pkts;
 689                stats->tx_bytes += bytes;
 690        }
 691
 692        /* bad pkts received */
 693        stats->rx_errors = drvs->rx_crc_errors +
 694                drvs->rx_alignment_symbol_errors +
 695                drvs->rx_in_range_errors +
 696                drvs->rx_out_range_errors +
 697                drvs->rx_frame_too_long +
 698                drvs->rx_dropped_too_small +
 699                drvs->rx_dropped_too_short +
 700                drvs->rx_dropped_header_too_small +
 701                drvs->rx_dropped_tcp_length +
 702                drvs->rx_dropped_runt;
 703
 704        /* detailed rx errors */
 705        stats->rx_length_errors = drvs->rx_in_range_errors +
 706                drvs->rx_out_range_errors +
 707                drvs->rx_frame_too_long;
 708
 709        stats->rx_crc_errors = drvs->rx_crc_errors;
 710
 711        /* frame alignment errors */
 712        stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
 713
 714        /* receiver fifo overrun */
 715        /* drops_no_pbuf is no per i/f, it's per BE card */
 716        stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
 717                                drvs->rx_input_fifo_overflow_drop +
 718                                drvs->rx_drops_no_pbuf;
 719}
 720
 721void be_link_status_update(struct be_adapter *adapter, u8 link_status)
 722{
 723        struct net_device *netdev = adapter->netdev;
 724
 725        if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
 726                netif_carrier_off(netdev);
 727                adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
 728        }
 729
 730        if (link_status)
 731                netif_carrier_on(netdev);
 732        else
 733                netif_carrier_off(netdev);
 734
 735        netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
 736}
 737
 738static int be_gso_hdr_len(struct sk_buff *skb)
 739{
 740        if (skb->encapsulation)
 741                return skb_inner_transport_offset(skb) +
 742                       inner_tcp_hdrlen(skb);
 743        return skb_transport_offset(skb) + tcp_hdrlen(skb);
 744}
 745
 746static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
 747{
 748        struct be_tx_stats *stats = tx_stats(txo);
 749        u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
 750        /* Account for headers which get duplicated in TSO pkt */
 751        u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
 752
 753        u64_stats_update_begin(&stats->sync);
 754        stats->tx_reqs++;
 755        stats->tx_bytes += skb->len + dup_hdr_len;
 756        stats->tx_pkts += tx_pkts;
 757        if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
 758                stats->tx_vxlan_offload_pkts += tx_pkts;
 759        u64_stats_update_end(&stats->sync);
 760}
 761
 762/* Returns number of WRBs needed for the skb */
 763static u32 skb_wrb_cnt(struct sk_buff *skb)
 764{
 765        /* +1 for the header wrb */
 766        return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
 767}
 768
 769static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
 770{
 771        wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
 772        wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
 773        wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
 774        wrb->rsvd0 = 0;
 775}
 776
 777/* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
 778 * to avoid the swap and shift/mask operations in wrb_fill().
 779 */
 780static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
 781{
 782        wrb->frag_pa_hi = 0;
 783        wrb->frag_pa_lo = 0;
 784        wrb->frag_len = 0;
 785        wrb->rsvd0 = 0;
 786}
 787
 788static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
 789                                     struct sk_buff *skb)
 790{
 791        u8 vlan_prio;
 792        u16 vlan_tag;
 793
 794        vlan_tag = skb_vlan_tag_get(skb);
 795        vlan_prio = skb_vlan_tag_get_prio(skb);
 796        /* If vlan priority provided by OS is NOT in available bmap */
 797        if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
 798                vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
 799                                adapter->recommended_prio_bits;
 800
 801        return vlan_tag;
 802}
 803
 804/* Used only for IP tunnel packets */
 805static u16 skb_inner_ip_proto(struct sk_buff *skb)
 806{
 807        return (inner_ip_hdr(skb)->version == 4) ?
 808                inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
 809}
 810
 811static u16 skb_ip_proto(struct sk_buff *skb)
 812{
 813        return (ip_hdr(skb)->version == 4) ?
 814                ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
 815}
 816
 817static inline bool be_is_txq_full(struct be_tx_obj *txo)
 818{
 819        return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
 820}
 821
 822static inline bool be_can_txq_wake(struct be_tx_obj *txo)
 823{
 824        return atomic_read(&txo->q.used) < txo->q.len / 2;
 825}
 826
 827static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
 828{
 829        return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
 830}
 831
 832static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
 833                                       struct sk_buff *skb,
 834                                       struct be_wrb_params *wrb_params)
 835{
 836        u16 proto;
 837
 838        if (skb_is_gso(skb)) {
 839                BE_WRB_F_SET(wrb_params->features, LSO, 1);
 840                wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
 841                if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
 842                        BE_WRB_F_SET(wrb_params->features, LSO6, 1);
 843        } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 844                if (skb->encapsulation) {
 845                        BE_WRB_F_SET(wrb_params->features, IPCS, 1);
 846                        proto = skb_inner_ip_proto(skb);
 847                } else {
 848                        proto = skb_ip_proto(skb);
 849                }
 850                if (proto == IPPROTO_TCP)
 851                        BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
 852                else if (proto == IPPROTO_UDP)
 853                        BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
 854        }
 855
 856        if (skb_vlan_tag_present(skb)) {
 857                BE_WRB_F_SET(wrb_params->features, VLAN, 1);
 858                wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
 859        }
 860
 861        BE_WRB_F_SET(wrb_params->features, CRC, 1);
 862}
 863
 864static void wrb_fill_hdr(struct be_adapter *adapter,
 865                         struct be_eth_hdr_wrb *hdr,
 866                         struct be_wrb_params *wrb_params,
 867                         struct sk_buff *skb)
 868{
 869        memset(hdr, 0, sizeof(*hdr));
 870
 871        SET_TX_WRB_HDR_BITS(crc, hdr,
 872                            BE_WRB_F_GET(wrb_params->features, CRC));
 873        SET_TX_WRB_HDR_BITS(ipcs, hdr,
 874                            BE_WRB_F_GET(wrb_params->features, IPCS));
 875        SET_TX_WRB_HDR_BITS(tcpcs, hdr,
 876                            BE_WRB_F_GET(wrb_params->features, TCPCS));
 877        SET_TX_WRB_HDR_BITS(udpcs, hdr,
 878                            BE_WRB_F_GET(wrb_params->features, UDPCS));
 879
 880        SET_TX_WRB_HDR_BITS(lso, hdr,
 881                            BE_WRB_F_GET(wrb_params->features, LSO));
 882        SET_TX_WRB_HDR_BITS(lso6, hdr,
 883                            BE_WRB_F_GET(wrb_params->features, LSO6));
 884        SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
 885
 886        /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
 887         * hack is not needed, the evt bit is set while ringing DB.
 888         */
 889        SET_TX_WRB_HDR_BITS(event, hdr,
 890                            BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
 891        SET_TX_WRB_HDR_BITS(vlan, hdr,
 892                            BE_WRB_F_GET(wrb_params->features, VLAN));
 893        SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
 894
 895        SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
 896        SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
 897        SET_TX_WRB_HDR_BITS(mgmt, hdr,
 898                            BE_WRB_F_GET(wrb_params->features, OS2BMC));
 899}
 900
 901static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
 902                          bool unmap_single)
 903{
 904        dma_addr_t dma;
 905        u32 frag_len = le32_to_cpu(wrb->frag_len);
 906
 907
 908        dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
 909                (u64)le32_to_cpu(wrb->frag_pa_lo);
 910        if (frag_len) {
 911                if (unmap_single)
 912                        dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
 913                else
 914                        dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
 915        }
 916}
 917
 918/* Grab a WRB header for xmit */
 919static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
 920{
 921        u32 head = txo->q.head;
 922
 923        queue_head_inc(&txo->q);
 924        return head;
 925}
 926
 927/* Set up the WRB header for xmit */
 928static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
 929                                struct be_tx_obj *txo,
 930                                struct be_wrb_params *wrb_params,
 931                                struct sk_buff *skb, u16 head)
 932{
 933        u32 num_frags = skb_wrb_cnt(skb);
 934        struct be_queue_info *txq = &txo->q;
 935        struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
 936
 937        wrb_fill_hdr(adapter, hdr, wrb_params, skb);
 938        be_dws_cpu_to_le(hdr, sizeof(*hdr));
 939
 940        BUG_ON(txo->sent_skb_list[head]);
 941        txo->sent_skb_list[head] = skb;
 942        txo->last_req_hdr = head;
 943        atomic_add(num_frags, &txq->used);
 944        txo->last_req_wrb_cnt = num_frags;
 945        txo->pend_wrb_cnt += num_frags;
 946}
 947
 948/* Setup a WRB fragment (buffer descriptor) for xmit */
 949static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
 950                                 int len)
 951{
 952        struct be_eth_wrb *wrb;
 953        struct be_queue_info *txq = &txo->q;
 954
 955        wrb = queue_head_node(txq);
 956        wrb_fill(wrb, busaddr, len);
 957        queue_head_inc(txq);
 958}
 959
 960/* Bring the queue back to the state it was in before be_xmit_enqueue() routine
 961 * was invoked. The producer index is restored to the previous packet and the
 962 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
 963 */
 964static void be_xmit_restore(struct be_adapter *adapter,
 965                            struct be_tx_obj *txo, u32 head, bool map_single,
 966                            u32 copied)
 967{
 968        struct device *dev;
 969        struct be_eth_wrb *wrb;
 970        struct be_queue_info *txq = &txo->q;
 971
 972        dev = &adapter->pdev->dev;
 973        txq->head = head;
 974
 975        /* skip the first wrb (hdr); it's not mapped */
 976        queue_head_inc(txq);
 977        while (copied) {
 978                wrb = queue_head_node(txq);
 979                unmap_tx_frag(dev, wrb, map_single);
 980                map_single = false;
 981                copied -= le32_to_cpu(wrb->frag_len);
 982                queue_head_inc(txq);
 983        }
 984
 985        txq->head = head;
 986}
 987
 988/* Enqueue the given packet for transmit. This routine allocates WRBs for the
 989 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
 990 * of WRBs used up by the packet.
 991 */
 992static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
 993                           struct sk_buff *skb,
 994                           struct be_wrb_params *wrb_params)
 995{
 996        u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
 997        struct device *dev = &adapter->pdev->dev;
 998        bool map_single = false;
 999        u32 head;
1000        dma_addr_t busaddr;
1001        int len;
1002
1003        head = be_tx_get_wrb_hdr(txo);
1004
1005        if (skb->len > skb->data_len) {
1006                len = skb_headlen(skb);
1007
1008                busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1009                if (dma_mapping_error(dev, busaddr))
1010                        goto dma_err;
1011                map_single = true;
1012                be_tx_setup_wrb_frag(txo, busaddr, len);
1013                copied += len;
1014        }
1015
1016        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1017                const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1018                len = skb_frag_size(frag);
1019
1020                busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1021                if (dma_mapping_error(dev, busaddr))
1022                        goto dma_err;
1023                be_tx_setup_wrb_frag(txo, busaddr, len);
1024                copied += len;
1025        }
1026
1027        be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1028
1029        be_tx_stats_update(txo, skb);
1030        return wrb_cnt;
1031
1032dma_err:
1033        adapter->drv_stats.dma_map_errors++;
1034        be_xmit_restore(adapter, txo, head, map_single, copied);
1035        return 0;
1036}
1037
1038static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1039{
1040        return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1041}
1042
1043static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1044                                             struct sk_buff *skb,
1045                                             struct be_wrb_params
1046                                             *wrb_params)
1047{
1048        bool insert_vlan = false;
1049        u16 vlan_tag = 0;
1050
1051        skb = skb_share_check(skb, GFP_ATOMIC);
1052        if (unlikely(!skb))
1053                return skb;
1054
1055        if (skb_vlan_tag_present(skb)) {
1056                vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1057                insert_vlan = true;
1058        }
1059
1060        if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1061                if (!insert_vlan) {
1062                        vlan_tag = adapter->pvid;
1063                        insert_vlan = true;
1064                }
1065                /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1066                 * skip VLAN insertion
1067                 */
1068                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1069        }
1070
1071        if (insert_vlan) {
1072                skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1073                                                vlan_tag);
1074                if (unlikely(!skb))
1075                        return skb;
1076                __vlan_hwaccel_clear_tag(skb);
1077        }
1078
1079        /* Insert the outer VLAN, if any */
1080        if (adapter->qnq_vid) {
1081                vlan_tag = adapter->qnq_vid;
1082                skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1083                                                vlan_tag);
1084                if (unlikely(!skb))
1085                        return skb;
1086                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1087        }
1088
1089        return skb;
1090}
1091
1092static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1093{
1094        struct ethhdr *eh = (struct ethhdr *)skb->data;
1095        u16 offset = ETH_HLEN;
1096
1097        if (eh->h_proto == htons(ETH_P_IPV6)) {
1098                struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1099
1100                offset += sizeof(struct ipv6hdr);
1101                if (ip6h->nexthdr != NEXTHDR_TCP &&
1102                    ip6h->nexthdr != NEXTHDR_UDP) {
1103                        struct ipv6_opt_hdr *ehdr =
1104                                (struct ipv6_opt_hdr *)(skb->data + offset);
1105
1106                        /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1107                        if (ehdr->hdrlen == 0xff)
1108                                return true;
1109                }
1110        }
1111        return false;
1112}
1113
1114static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1115{
1116        return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1117}
1118
1119static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1120{
1121        return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1122}
1123
1124static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1125                                                  struct sk_buff *skb,
1126                                                  struct be_wrb_params
1127                                                  *wrb_params)
1128{
1129        struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1130        unsigned int eth_hdr_len;
1131        struct iphdr *ip;
1132
1133        /* For padded packets, BE HW modifies tot_len field in IP header
1134         * incorrecly when VLAN tag is inserted by HW.
1135         * For padded packets, Lancer computes incorrect checksum.
1136         */
1137        eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1138                                                VLAN_ETH_HLEN : ETH_HLEN;
1139        if (skb->len <= 60 &&
1140            (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1141            is_ipv4_pkt(skb)) {
1142                ip = (struct iphdr *)ip_hdr(skb);
1143                pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1144        }
1145
1146        /* If vlan tag is already inlined in the packet, skip HW VLAN
1147         * tagging in pvid-tagging mode
1148         */
1149        if (be_pvid_tagging_enabled(adapter) &&
1150            veh->h_vlan_proto == htons(ETH_P_8021Q))
1151                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1152
1153        /* HW has a bug wherein it will calculate CSUM for VLAN
1154         * pkts even though it is disabled.
1155         * Manually insert VLAN in pkt.
1156         */
1157        if (skb->ip_summed != CHECKSUM_PARTIAL &&
1158            skb_vlan_tag_present(skb)) {
1159                skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1160                if (unlikely(!skb))
1161                        goto err;
1162        }
1163
1164        /* HW may lockup when VLAN HW tagging is requested on
1165         * certain ipv6 packets. Drop such pkts if the HW workaround to
1166         * skip HW tagging is not enabled by FW.
1167         */
1168        if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1169                     (adapter->pvid || adapter->qnq_vid) &&
1170                     !qnq_async_evt_rcvd(adapter)))
1171                goto tx_drop;
1172
1173        /* Manual VLAN tag insertion to prevent:
1174         * ASIC lockup when the ASIC inserts VLAN tag into
1175         * certain ipv6 packets. Insert VLAN tags in driver,
1176         * and set event, completion, vlan bits accordingly
1177         * in the Tx WRB.
1178         */
1179        if (be_ipv6_tx_stall_chk(adapter, skb) &&
1180            be_vlan_tag_tx_chk(adapter, skb)) {
1181                skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1182                if (unlikely(!skb))
1183                        goto err;
1184        }
1185
1186        return skb;
1187tx_drop:
1188        dev_kfree_skb_any(skb);
1189err:
1190        return NULL;
1191}
1192
1193static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1194                                           struct sk_buff *skb,
1195                                           struct be_wrb_params *wrb_params)
1196{
1197        int err;
1198
1199        /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1200         * packets that are 32b or less may cause a transmit stall
1201         * on that port. The workaround is to pad such packets
1202         * (len <= 32 bytes) to a minimum length of 36b.
1203         */
1204        if (skb->len <= 32) {
1205                if (skb_put_padto(skb, 36))
1206                        return NULL;
1207        }
1208
1209        if (BEx_chip(adapter) || lancer_chip(adapter)) {
1210                skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1211                if (!skb)
1212                        return NULL;
1213        }
1214
1215        /* The stack can send us skbs with length greater than
1216         * what the HW can handle. Trim the extra bytes.
1217         */
1218        WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1219        err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1220        WARN_ON(err);
1221
1222        return skb;
1223}
1224
1225static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1226{
1227        struct be_queue_info *txq = &txo->q;
1228        struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1229
1230        /* Mark the last request eventable if it hasn't been marked already */
1231        if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1232                hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1233
1234        /* compose a dummy wrb if there are odd set of wrbs to notify */
1235        if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1236                wrb_fill_dummy(queue_head_node(txq));
1237                queue_head_inc(txq);
1238                atomic_inc(&txq->used);
1239                txo->pend_wrb_cnt++;
1240                hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1241                                           TX_HDR_WRB_NUM_SHIFT);
1242                hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1243                                          TX_HDR_WRB_NUM_SHIFT);
1244        }
1245        be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1246        txo->pend_wrb_cnt = 0;
1247}
1248
1249/* OS2BMC related */
1250
1251#define DHCP_CLIENT_PORT        68
1252#define DHCP_SERVER_PORT        67
1253#define NET_BIOS_PORT1          137
1254#define NET_BIOS_PORT2          138
1255#define DHCPV6_RAS_PORT         547
1256
1257#define is_mc_allowed_on_bmc(adapter, eh)       \
1258        (!is_multicast_filt_enabled(adapter) && \
1259         is_multicast_ether_addr(eh->h_dest) && \
1260         !is_broadcast_ether_addr(eh->h_dest))
1261
1262#define is_bc_allowed_on_bmc(adapter, eh)       \
1263        (!is_broadcast_filt_enabled(adapter) && \
1264         is_broadcast_ether_addr(eh->h_dest))
1265
1266#define is_arp_allowed_on_bmc(adapter, skb)     \
1267        (is_arp(skb) && is_arp_filt_enabled(adapter))
1268
1269#define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1270
1271#define is_arp_filt_enabled(adapter)    \
1272                (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1273
1274#define is_dhcp_client_filt_enabled(adapter)    \
1275                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1276
1277#define is_dhcp_srvr_filt_enabled(adapter)      \
1278                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1279
1280#define is_nbios_filt_enabled(adapter)  \
1281                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1282
1283#define is_ipv6_na_filt_enabled(adapter)        \
1284                (adapter->bmc_filt_mask &       \
1285                        BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1286
1287#define is_ipv6_ra_filt_enabled(adapter)        \
1288                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1289
1290#define is_ipv6_ras_filt_enabled(adapter)       \
1291                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1292
1293#define is_broadcast_filt_enabled(adapter)      \
1294                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1295
1296#define is_multicast_filt_enabled(adapter)      \
1297                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1298
1299static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1300                               struct sk_buff **skb)
1301{
1302        struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1303        bool os2bmc = false;
1304
1305        if (!be_is_os2bmc_enabled(adapter))
1306                goto done;
1307
1308        if (!is_multicast_ether_addr(eh->h_dest))
1309                goto done;
1310
1311        if (is_mc_allowed_on_bmc(adapter, eh) ||
1312            is_bc_allowed_on_bmc(adapter, eh) ||
1313            is_arp_allowed_on_bmc(adapter, (*skb))) {
1314                os2bmc = true;
1315                goto done;
1316        }
1317
1318        if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1319                struct ipv6hdr *hdr = ipv6_hdr((*skb));
1320                u8 nexthdr = hdr->nexthdr;
1321
1322                if (nexthdr == IPPROTO_ICMPV6) {
1323                        struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1324
1325                        switch (icmp6->icmp6_type) {
1326                        case NDISC_ROUTER_ADVERTISEMENT:
1327                                os2bmc = is_ipv6_ra_filt_enabled(adapter);
1328                                goto done;
1329                        case NDISC_NEIGHBOUR_ADVERTISEMENT:
1330                                os2bmc = is_ipv6_na_filt_enabled(adapter);
1331                                goto done;
1332                        default:
1333                                break;
1334                        }
1335                }
1336        }
1337
1338        if (is_udp_pkt((*skb))) {
1339                struct udphdr *udp = udp_hdr((*skb));
1340
1341                switch (ntohs(udp->dest)) {
1342                case DHCP_CLIENT_PORT:
1343                        os2bmc = is_dhcp_client_filt_enabled(adapter);
1344                        goto done;
1345                case DHCP_SERVER_PORT:
1346                        os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1347                        goto done;
1348                case NET_BIOS_PORT1:
1349                case NET_BIOS_PORT2:
1350                        os2bmc = is_nbios_filt_enabled(adapter);
1351                        goto done;
1352                case DHCPV6_RAS_PORT:
1353                        os2bmc = is_ipv6_ras_filt_enabled(adapter);
1354                        goto done;
1355                default:
1356                        break;
1357                }
1358        }
1359done:
1360        /* For packets over a vlan, which are destined
1361         * to BMC, asic expects the vlan to be inline in the packet.
1362         */
1363        if (os2bmc)
1364                *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1365
1366        return os2bmc;
1367}
1368
1369static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1370{
1371        struct be_adapter *adapter = netdev_priv(netdev);
1372        u16 q_idx = skb_get_queue_mapping(skb);
1373        struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1374        struct be_wrb_params wrb_params = { 0 };
1375        bool flush = !netdev_xmit_more();
1376        u16 wrb_cnt;
1377
1378        skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1379        if (unlikely(!skb))
1380                goto drop;
1381
1382        be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1383
1384        wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1385        if (unlikely(!wrb_cnt)) {
1386                dev_kfree_skb_any(skb);
1387                goto drop;
1388        }
1389
1390        /* if os2bmc is enabled and if the pkt is destined to bmc,
1391         * enqueue the pkt a 2nd time with mgmt bit set.
1392         */
1393        if (be_send_pkt_to_bmc(adapter, &skb)) {
1394                BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1395                wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1396                if (unlikely(!wrb_cnt))
1397                        goto drop;
1398                else
1399                        skb_get(skb);
1400        }
1401
1402        if (be_is_txq_full(txo)) {
1403                netif_stop_subqueue(netdev, q_idx);
1404                tx_stats(txo)->tx_stops++;
1405        }
1406
1407        if (flush || __netif_subqueue_stopped(netdev, q_idx))
1408                be_xmit_flush(adapter, txo);
1409
1410        return NETDEV_TX_OK;
1411drop:
1412        tx_stats(txo)->tx_drv_drops++;
1413        /* Flush the already enqueued tx requests */
1414        if (flush && txo->pend_wrb_cnt)
1415                be_xmit_flush(adapter, txo);
1416
1417        return NETDEV_TX_OK;
1418}
1419
1420static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1421{
1422        struct be_adapter *adapter = netdev_priv(netdev);
1423        struct device *dev = &adapter->pdev->dev;
1424        struct be_tx_obj *txo;
1425        struct sk_buff *skb;
1426        struct tcphdr *tcphdr;
1427        struct udphdr *udphdr;
1428        u32 *entry;
1429        int status;
1430        int i, j;
1431
1432        for_all_tx_queues(adapter, txo, i) {
1433                dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1434                         i, txo->q.head, txo->q.tail,
1435                         atomic_read(&txo->q.used), txo->q.id);
1436
1437                entry = txo->q.dma_mem.va;
1438                for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1439                        if (entry[j] != 0 || entry[j + 1] != 0 ||
1440                            entry[j + 2] != 0 || entry[j + 3] != 0) {
1441                                dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1442                                         j, entry[j], entry[j + 1],
1443                                         entry[j + 2], entry[j + 3]);
1444                        }
1445                }
1446
1447                entry = txo->cq.dma_mem.va;
1448                dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1449                         i, txo->cq.head, txo->cq.tail,
1450                         atomic_read(&txo->cq.used));
1451                for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1452                        if (entry[j] != 0 || entry[j + 1] != 0 ||
1453                            entry[j + 2] != 0 || entry[j + 3] != 0) {
1454                                dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1455                                         j, entry[j], entry[j + 1],
1456                                         entry[j + 2], entry[j + 3]);
1457                        }
1458                }
1459
1460                for (j = 0; j < TX_Q_LEN; j++) {
1461                        if (txo->sent_skb_list[j]) {
1462                                skb = txo->sent_skb_list[j];
1463                                if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1464                                        tcphdr = tcp_hdr(skb);
1465                                        dev_info(dev, "TCP source port %d\n",
1466                                                 ntohs(tcphdr->source));
1467                                        dev_info(dev, "TCP dest port %d\n",
1468                                                 ntohs(tcphdr->dest));
1469                                        dev_info(dev, "TCP sequence num %d\n",
1470                                                 ntohs(tcphdr->seq));
1471                                        dev_info(dev, "TCP ack_seq %d\n",
1472                                                 ntohs(tcphdr->ack_seq));
1473                                } else if (ip_hdr(skb)->protocol ==
1474                                           IPPROTO_UDP) {
1475                                        udphdr = udp_hdr(skb);
1476                                        dev_info(dev, "UDP source port %d\n",
1477                                                 ntohs(udphdr->source));
1478                                        dev_info(dev, "UDP dest port %d\n",
1479                                                 ntohs(udphdr->dest));
1480                                }
1481                                dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1482                                         j, skb, skb->len, skb->protocol);
1483                        }
1484                }
1485        }
1486
1487        if (lancer_chip(adapter)) {
1488                dev_info(dev, "Initiating reset due to tx timeout\n");
1489                dev_info(dev, "Resetting adapter\n");
1490                status = lancer_physdev_ctrl(adapter,
1491                                             PHYSDEV_CONTROL_FW_RESET_MASK);
1492                if (status)
1493                        dev_err(dev, "Reset failed .. Reboot server\n");
1494        }
1495}
1496
1497static inline bool be_in_all_promisc(struct be_adapter *adapter)
1498{
1499        return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1500                        BE_IF_FLAGS_ALL_PROMISCUOUS;
1501}
1502
1503static int be_set_vlan_promisc(struct be_adapter *adapter)
1504{
1505        struct device *dev = &adapter->pdev->dev;
1506        int status;
1507
1508        if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1509                return 0;
1510
1511        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1512        if (!status) {
1513                dev_info(dev, "Enabled VLAN promiscuous mode\n");
1514                adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1515        } else {
1516                dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1517        }
1518        return status;
1519}
1520
1521static int be_clear_vlan_promisc(struct be_adapter *adapter)
1522{
1523        struct device *dev = &adapter->pdev->dev;
1524        int status;
1525
1526        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1527        if (!status) {
1528                dev_info(dev, "Disabling VLAN promiscuous mode\n");
1529                adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1530        }
1531        return status;
1532}
1533
1534/*
1535 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1536 * If the user configures more, place BE in vlan promiscuous mode.
1537 */
1538static int be_vid_config(struct be_adapter *adapter)
1539{
1540        struct device *dev = &adapter->pdev->dev;
1541        u16 vids[BE_NUM_VLANS_SUPPORTED];
1542        u16 num = 0, i = 0;
1543        int status = 0;
1544
1545        /* No need to change the VLAN state if the I/F is in promiscuous */
1546        if (adapter->netdev->flags & IFF_PROMISC)
1547                return 0;
1548
1549        if (adapter->vlans_added > be_max_vlans(adapter))
1550                return be_set_vlan_promisc(adapter);
1551
1552        if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1553                status = be_clear_vlan_promisc(adapter);
1554                if (status)
1555                        return status;
1556        }
1557        /* Construct VLAN Table to give to HW */
1558        for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1559                vids[num++] = cpu_to_le16(i);
1560
1561        status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1562        if (status) {
1563                dev_err(dev, "Setting HW VLAN filtering failed\n");
1564                /* Set to VLAN promisc mode as setting VLAN filter failed */
1565                if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1566                    addl_status(status) ==
1567                                MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1568                        return be_set_vlan_promisc(adapter);
1569        }
1570        return status;
1571}
1572
1573static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1574{
1575        struct be_adapter *adapter = netdev_priv(netdev);
1576        int status = 0;
1577
1578        mutex_lock(&adapter->rx_filter_lock);
1579
1580        /* Packets with VID 0 are always received by Lancer by default */
1581        if (lancer_chip(adapter) && vid == 0)
1582                goto done;
1583
1584        if (test_bit(vid, adapter->vids))
1585                goto done;
1586
1587        set_bit(vid, adapter->vids);
1588        adapter->vlans_added++;
1589
1590        status = be_vid_config(adapter);
1591done:
1592        mutex_unlock(&adapter->rx_filter_lock);
1593        return status;
1594}
1595
1596static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1597{
1598        struct be_adapter *adapter = netdev_priv(netdev);
1599        int status = 0;
1600
1601        mutex_lock(&adapter->rx_filter_lock);
1602
1603        /* Packets with VID 0 are always received by Lancer by default */
1604        if (lancer_chip(adapter) && vid == 0)
1605                goto done;
1606
1607        if (!test_bit(vid, adapter->vids))
1608                goto done;
1609
1610        clear_bit(vid, adapter->vids);
1611        adapter->vlans_added--;
1612
1613        status = be_vid_config(adapter);
1614done:
1615        mutex_unlock(&adapter->rx_filter_lock);
1616        return status;
1617}
1618
1619static void be_set_all_promisc(struct be_adapter *adapter)
1620{
1621        be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1622        adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1623}
1624
1625static void be_set_mc_promisc(struct be_adapter *adapter)
1626{
1627        int status;
1628
1629        if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1630                return;
1631
1632        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1633        if (!status)
1634                adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1635}
1636
1637static void be_set_uc_promisc(struct be_adapter *adapter)
1638{
1639        int status;
1640
1641        if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1642                return;
1643
1644        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1645        if (!status)
1646                adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1647}
1648
1649static void be_clear_uc_promisc(struct be_adapter *adapter)
1650{
1651        int status;
1652
1653        if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1654                return;
1655
1656        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1657        if (!status)
1658                adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1659}
1660
1661/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1662 * We use a single callback function for both sync and unsync. We really don't
1663 * add/remove addresses through this callback. But, we use it to detect changes
1664 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1665 */
1666static int be_uc_list_update(struct net_device *netdev,
1667                             const unsigned char *addr)
1668{
1669        struct be_adapter *adapter = netdev_priv(netdev);
1670
1671        adapter->update_uc_list = true;
1672        return 0;
1673}
1674
1675static int be_mc_list_update(struct net_device *netdev,
1676                             const unsigned char *addr)
1677{
1678        struct be_adapter *adapter = netdev_priv(netdev);
1679
1680        adapter->update_mc_list = true;
1681        return 0;
1682}
1683
1684static void be_set_mc_list(struct be_adapter *adapter)
1685{
1686        struct net_device *netdev = adapter->netdev;
1687        struct netdev_hw_addr *ha;
1688        bool mc_promisc = false;
1689        int status;
1690
1691        netif_addr_lock_bh(netdev);
1692        __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1693
1694        if (netdev->flags & IFF_PROMISC) {
1695                adapter->update_mc_list = false;
1696        } else if (netdev->flags & IFF_ALLMULTI ||
1697                   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1698                /* Enable multicast promisc if num configured exceeds
1699                 * what we support
1700                 */
1701                mc_promisc = true;
1702                adapter->update_mc_list = false;
1703        } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1704                /* Update mc-list unconditionally if the iface was previously
1705                 * in mc-promisc mode and now is out of that mode.
1706                 */
1707                adapter->update_mc_list = true;
1708        }
1709
1710        if (adapter->update_mc_list) {
1711                int i = 0;
1712
1713                /* cache the mc-list in adapter */
1714                netdev_for_each_mc_addr(ha, netdev) {
1715                        ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1716                        i++;
1717                }
1718                adapter->mc_count = netdev_mc_count(netdev);
1719        }
1720        netif_addr_unlock_bh(netdev);
1721
1722        if (mc_promisc) {
1723                be_set_mc_promisc(adapter);
1724        } else if (adapter->update_mc_list) {
1725                status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1726                if (!status)
1727                        adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1728                else
1729                        be_set_mc_promisc(adapter);
1730
1731                adapter->update_mc_list = false;
1732        }
1733}
1734
1735static void be_clear_mc_list(struct be_adapter *adapter)
1736{
1737        struct net_device *netdev = adapter->netdev;
1738
1739        __dev_mc_unsync(netdev, NULL);
1740        be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1741        adapter->mc_count = 0;
1742}
1743
1744static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1745{
1746        if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1747                adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1748                return 0;
1749        }
1750
1751        return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1752                               adapter->if_handle,
1753                               &adapter->pmac_id[uc_idx + 1], 0);
1754}
1755
1756static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1757{
1758        if (pmac_id == adapter->pmac_id[0])
1759                return;
1760
1761        be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1762}
1763
1764static void be_set_uc_list(struct be_adapter *adapter)
1765{
1766        struct net_device *netdev = adapter->netdev;
1767        struct netdev_hw_addr *ha;
1768        bool uc_promisc = false;
1769        int curr_uc_macs = 0, i;
1770
1771        netif_addr_lock_bh(netdev);
1772        __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1773
1774        if (netdev->flags & IFF_PROMISC) {
1775                adapter->update_uc_list = false;
1776        } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1777                uc_promisc = true;
1778                adapter->update_uc_list = false;
1779        }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1780                /* Update uc-list unconditionally if the iface was previously
1781                 * in uc-promisc mode and now is out of that mode.
1782                 */
1783                adapter->update_uc_list = true;
1784        }
1785
1786        if (adapter->update_uc_list) {
1787                /* cache the uc-list in adapter array */
1788                i = 0;
1789                netdev_for_each_uc_addr(ha, netdev) {
1790                        ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1791                        i++;
1792                }
1793                curr_uc_macs = netdev_uc_count(netdev);
1794        }
1795        netif_addr_unlock_bh(netdev);
1796
1797        if (uc_promisc) {
1798                be_set_uc_promisc(adapter);
1799        } else if (adapter->update_uc_list) {
1800                be_clear_uc_promisc(adapter);
1801
1802                for (i = 0; i < adapter->uc_macs; i++)
1803                        be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1804
1805                for (i = 0; i < curr_uc_macs; i++)
1806                        be_uc_mac_add(adapter, i);
1807                adapter->uc_macs = curr_uc_macs;
1808                adapter->update_uc_list = false;
1809        }
1810}
1811
1812static void be_clear_uc_list(struct be_adapter *adapter)
1813{
1814        struct net_device *netdev = adapter->netdev;
1815        int i;
1816
1817        __dev_uc_unsync(netdev, NULL);
1818        for (i = 0; i < adapter->uc_macs; i++)
1819                be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1820
1821        adapter->uc_macs = 0;
1822}
1823
1824static void __be_set_rx_mode(struct be_adapter *adapter)
1825{
1826        struct net_device *netdev = adapter->netdev;
1827
1828        mutex_lock(&adapter->rx_filter_lock);
1829
1830        if (netdev->flags & IFF_PROMISC) {
1831                if (!be_in_all_promisc(adapter))
1832                        be_set_all_promisc(adapter);
1833        } else if (be_in_all_promisc(adapter)) {
1834                /* We need to re-program the vlan-list or clear
1835                 * vlan-promisc mode (if needed) when the interface
1836                 * comes out of promisc mode.
1837                 */
1838                be_vid_config(adapter);
1839        }
1840
1841        be_set_uc_list(adapter);
1842        be_set_mc_list(adapter);
1843
1844        mutex_unlock(&adapter->rx_filter_lock);
1845}
1846
1847static void be_work_set_rx_mode(struct work_struct *work)
1848{
1849        struct be_cmd_work *cmd_work =
1850                                container_of(work, struct be_cmd_work, work);
1851
1852        __be_set_rx_mode(cmd_work->adapter);
1853        kfree(cmd_work);
1854}
1855
1856static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1857{
1858        struct be_adapter *adapter = netdev_priv(netdev);
1859        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1860        int status;
1861
1862        if (!sriov_enabled(adapter))
1863                return -EPERM;
1864
1865        if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1866                return -EINVAL;
1867
1868        /* Proceed further only if user provided MAC is different
1869         * from active MAC
1870         */
1871        if (ether_addr_equal(mac, vf_cfg->mac_addr))
1872                return 0;
1873
1874        if (BEx_chip(adapter)) {
1875                be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1876                                vf + 1);
1877
1878                status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1879                                         &vf_cfg->pmac_id, vf + 1);
1880        } else {
1881                status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1882                                        vf + 1);
1883        }
1884
1885        if (status) {
1886                dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1887                        mac, vf, status);
1888                return be_cmd_status(status);
1889        }
1890
1891        ether_addr_copy(vf_cfg->mac_addr, mac);
1892
1893        return 0;
1894}
1895
1896static int be_get_vf_config(struct net_device *netdev, int vf,
1897                            struct ifla_vf_info *vi)
1898{
1899        struct be_adapter *adapter = netdev_priv(netdev);
1900        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1901
1902        if (!sriov_enabled(adapter))
1903                return -EPERM;
1904
1905        if (vf >= adapter->num_vfs)
1906                return -EINVAL;
1907
1908        vi->vf = vf;
1909        vi->max_tx_rate = vf_cfg->tx_rate;
1910        vi->min_tx_rate = 0;
1911        vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1912        vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1913        memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1914        vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1915        vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1916
1917        return 0;
1918}
1919
1920static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1921{
1922        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1923        u16 vids[BE_NUM_VLANS_SUPPORTED];
1924        int vf_if_id = vf_cfg->if_handle;
1925        int status;
1926
1927        /* Enable Transparent VLAN Tagging */
1928        status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1929        if (status)
1930                return status;
1931
1932        /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1933        vids[0] = 0;
1934        status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1935        if (!status)
1936                dev_info(&adapter->pdev->dev,
1937                         "Cleared guest VLANs on VF%d", vf);
1938
1939        /* After TVT is enabled, disallow VFs to program VLAN filters */
1940        if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1941                status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1942                                                  ~BE_PRIV_FILTMGMT, vf + 1);
1943                if (!status)
1944                        vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1945        }
1946        return 0;
1947}
1948
1949static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1950{
1951        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1952        struct device *dev = &adapter->pdev->dev;
1953        int status;
1954
1955        /* Reset Transparent VLAN Tagging. */
1956        status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1957                                       vf_cfg->if_handle, 0, 0);
1958        if (status)
1959                return status;
1960
1961        /* Allow VFs to program VLAN filtering */
1962        if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1963                status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1964                                                  BE_PRIV_FILTMGMT, vf + 1);
1965                if (!status) {
1966                        vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1967                        dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1968                }
1969        }
1970
1971        dev_info(dev,
1972                 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1973        return 0;
1974}
1975
1976static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1977                          __be16 vlan_proto)
1978{
1979        struct be_adapter *adapter = netdev_priv(netdev);
1980        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1981        int status;
1982
1983        if (!sriov_enabled(adapter))
1984                return -EPERM;
1985
1986        if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1987                return -EINVAL;
1988
1989        if (vlan_proto != htons(ETH_P_8021Q))
1990                return -EPROTONOSUPPORT;
1991
1992        if (vlan || qos) {
1993                vlan |= qos << VLAN_PRIO_SHIFT;
1994                status = be_set_vf_tvt(adapter, vf, vlan);
1995        } else {
1996                status = be_clear_vf_tvt(adapter, vf);
1997        }
1998
1999        if (status) {
2000                dev_err(&adapter->pdev->dev,
2001                        "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2002                        status);
2003                return be_cmd_status(status);
2004        }
2005
2006        vf_cfg->vlan_tag = vlan;
2007        return 0;
2008}
2009
2010static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2011                             int min_tx_rate, int max_tx_rate)
2012{
2013        struct be_adapter *adapter = netdev_priv(netdev);
2014        struct device *dev = &adapter->pdev->dev;
2015        int percent_rate, status = 0;
2016        u16 link_speed = 0;
2017        u8 link_status;
2018
2019        if (!sriov_enabled(adapter))
2020                return -EPERM;
2021
2022        if (vf >= adapter->num_vfs)
2023                return -EINVAL;
2024
2025        if (min_tx_rate)
2026                return -EINVAL;
2027
2028        if (!max_tx_rate)
2029                goto config_qos;
2030
2031        status = be_cmd_link_status_query(adapter, &link_speed,
2032                                          &link_status, 0);
2033        if (status)
2034                goto err;
2035
2036        if (!link_status) {
2037                dev_err(dev, "TX-rate setting not allowed when link is down\n");
2038                status = -ENETDOWN;
2039                goto err;
2040        }
2041
2042        if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2043                dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2044                        link_speed);
2045                status = -EINVAL;
2046                goto err;
2047        }
2048
2049        /* On Skyhawk the QOS setting must be done only as a % value */
2050        percent_rate = link_speed / 100;
2051        if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2052                dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2053                        percent_rate);
2054                status = -EINVAL;
2055                goto err;
2056        }
2057
2058config_qos:
2059        status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2060        if (status)
2061                goto err;
2062
2063        adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2064        return 0;
2065
2066err:
2067        dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2068                max_tx_rate, vf);
2069        return be_cmd_status(status);
2070}
2071
2072static int be_set_vf_link_state(struct net_device *netdev, int vf,
2073                                int link_state)
2074{
2075        struct be_adapter *adapter = netdev_priv(netdev);
2076        int status;
2077
2078        if (!sriov_enabled(adapter))
2079                return -EPERM;
2080
2081        if (vf >= adapter->num_vfs)
2082                return -EINVAL;
2083
2084        status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2085        if (status) {
2086                dev_err(&adapter->pdev->dev,
2087                        "Link state change on VF %d failed: %#x\n", vf, status);
2088                return be_cmd_status(status);
2089        }
2090
2091        adapter->vf_cfg[vf].plink_tracking = link_state;
2092
2093        return 0;
2094}
2095
2096static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2097{
2098        struct be_adapter *adapter = netdev_priv(netdev);
2099        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2100        u8 spoofchk;
2101        int status;
2102
2103        if (!sriov_enabled(adapter))
2104                return -EPERM;
2105
2106        if (vf >= adapter->num_vfs)
2107                return -EINVAL;
2108
2109        if (BEx_chip(adapter))
2110                return -EOPNOTSUPP;
2111
2112        if (enable == vf_cfg->spoofchk)
2113                return 0;
2114
2115        spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2116
2117        status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2118                                       0, spoofchk);
2119        if (status) {
2120                dev_err(&adapter->pdev->dev,
2121                        "Spoofchk change on VF %d failed: %#x\n", vf, status);
2122                return be_cmd_status(status);
2123        }
2124
2125        vf_cfg->spoofchk = enable;
2126        return 0;
2127}
2128
2129static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2130                          ulong now)
2131{
2132        aic->rx_pkts_prev = rx_pkts;
2133        aic->tx_reqs_prev = tx_pkts;
2134        aic->jiffies = now;
2135}
2136
2137static int be_get_new_eqd(struct be_eq_obj *eqo)
2138{
2139        struct be_adapter *adapter = eqo->adapter;
2140        int eqd, start;
2141        struct be_aic_obj *aic;
2142        struct be_rx_obj *rxo;
2143        struct be_tx_obj *txo;
2144        u64 rx_pkts = 0, tx_pkts = 0;
2145        ulong now;
2146        u32 pps, delta;
2147        int i;
2148
2149        aic = &adapter->aic_obj[eqo->idx];
2150        if (!adapter->aic_enabled) {
2151                if (aic->jiffies)
2152                        aic->jiffies = 0;
2153                eqd = aic->et_eqd;
2154                return eqd;
2155        }
2156
2157        for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2158                do {
2159                        start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2160                        rx_pkts += rxo->stats.rx_pkts;
2161                } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2162        }
2163
2164        for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2165                do {
2166                        start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2167                        tx_pkts += txo->stats.tx_reqs;
2168                } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2169        }
2170
2171        /* Skip, if wrapped around or first calculation */
2172        now = jiffies;
2173        if (!aic->jiffies || time_before(now, aic->jiffies) ||
2174            rx_pkts < aic->rx_pkts_prev ||
2175            tx_pkts < aic->tx_reqs_prev) {
2176                be_aic_update(aic, rx_pkts, tx_pkts, now);
2177                return aic->prev_eqd;
2178        }
2179
2180        delta = jiffies_to_msecs(now - aic->jiffies);
2181        if (delta == 0)
2182                return aic->prev_eqd;
2183
2184        pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2185                (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2186        eqd = (pps / 15000) << 2;
2187
2188        if (eqd < 8)
2189                eqd = 0;
2190        eqd = min_t(u32, eqd, aic->max_eqd);
2191        eqd = max_t(u32, eqd, aic->min_eqd);
2192
2193        be_aic_update(aic, rx_pkts, tx_pkts, now);
2194
2195        return eqd;
2196}
2197
2198/* For Skyhawk-R only */
2199static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2200{
2201        struct be_adapter *adapter = eqo->adapter;
2202        struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2203        ulong now = jiffies;
2204        int eqd;
2205        u32 mult_enc;
2206
2207        if (!adapter->aic_enabled)
2208                return 0;
2209
2210        if (jiffies_to_msecs(now - aic->jiffies) < 1)
2211                eqd = aic->prev_eqd;
2212        else
2213                eqd = be_get_new_eqd(eqo);
2214
2215        if (eqd > 100)
2216                mult_enc = R2I_DLY_ENC_1;
2217        else if (eqd > 60)
2218                mult_enc = R2I_DLY_ENC_2;
2219        else if (eqd > 20)
2220                mult_enc = R2I_DLY_ENC_3;
2221        else
2222                mult_enc = R2I_DLY_ENC_0;
2223
2224        aic->prev_eqd = eqd;
2225
2226        return mult_enc;
2227}
2228
2229void be_eqd_update(struct be_adapter *adapter, bool force_update)
2230{
2231        struct be_set_eqd set_eqd[MAX_EVT_QS];
2232        struct be_aic_obj *aic;
2233        struct be_eq_obj *eqo;
2234        int i, num = 0, eqd;
2235
2236        for_all_evt_queues(adapter, eqo, i) {
2237                aic = &adapter->aic_obj[eqo->idx];
2238                eqd = be_get_new_eqd(eqo);
2239                if (force_update || eqd != aic->prev_eqd) {
2240                        set_eqd[num].delay_multiplier = (eqd * 65)/100;
2241                        set_eqd[num].eq_id = eqo->q.id;
2242                        aic->prev_eqd = eqd;
2243                        num++;
2244                }
2245        }
2246
2247        if (num)
2248                be_cmd_modify_eqd(adapter, set_eqd, num);
2249}
2250
2251static void be_rx_stats_update(struct be_rx_obj *rxo,
2252                               struct be_rx_compl_info *rxcp)
2253{
2254        struct be_rx_stats *stats = rx_stats(rxo);
2255
2256        u64_stats_update_begin(&stats->sync);
2257        stats->rx_compl++;
2258        stats->rx_bytes += rxcp->pkt_size;
2259        stats->rx_pkts++;
2260        if (rxcp->tunneled)
2261                stats->rx_vxlan_offload_pkts++;
2262        if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2263                stats->rx_mcast_pkts++;
2264        if (rxcp->err)
2265                stats->rx_compl_err++;
2266        u64_stats_update_end(&stats->sync);
2267}
2268
2269static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2270{
2271        /* L4 checksum is not reliable for non TCP/UDP packets.
2272         * Also ignore ipcksm for ipv6 pkts
2273         */
2274        return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2275                (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2276}
2277
2278static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2279{
2280        struct be_adapter *adapter = rxo->adapter;
2281        struct be_rx_page_info *rx_page_info;
2282        struct be_queue_info *rxq = &rxo->q;
2283        u32 frag_idx = rxq->tail;
2284
2285        rx_page_info = &rxo->page_info_tbl[frag_idx];
2286        BUG_ON(!rx_page_info->page);
2287
2288        if (rx_page_info->last_frag) {
2289                dma_unmap_page(&adapter->pdev->dev,
2290                               dma_unmap_addr(rx_page_info, bus),
2291                               adapter->big_page_size, DMA_FROM_DEVICE);
2292                rx_page_info->last_frag = false;
2293        } else {
2294                dma_sync_single_for_cpu(&adapter->pdev->dev,
2295                                        dma_unmap_addr(rx_page_info, bus),
2296                                        rx_frag_size, DMA_FROM_DEVICE);
2297        }
2298
2299        queue_tail_inc(rxq);
2300        atomic_dec(&rxq->used);
2301        return rx_page_info;
2302}
2303
2304/* Throwaway the data in the Rx completion */
2305static void be_rx_compl_discard(struct be_rx_obj *rxo,
2306                                struct be_rx_compl_info *rxcp)
2307{
2308        struct be_rx_page_info *page_info;
2309        u16 i, num_rcvd = rxcp->num_rcvd;
2310
2311        for (i = 0; i < num_rcvd; i++) {
2312                page_info = get_rx_page_info(rxo);
2313                put_page(page_info->page);
2314                memset(page_info, 0, sizeof(*page_info));
2315        }
2316}
2317
2318/*
2319 * skb_fill_rx_data forms a complete skb for an ether frame
2320 * indicated by rxcp.
2321 */
2322static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2323                             struct be_rx_compl_info *rxcp)
2324{
2325        struct be_rx_page_info *page_info;
2326        u16 i, j;
2327        u16 hdr_len, curr_frag_len, remaining;
2328        u8 *start;
2329
2330        page_info = get_rx_page_info(rxo);
2331        start = page_address(page_info->page) + page_info->page_offset;
2332        prefetch(start);
2333
2334        /* Copy data in the first descriptor of this completion */
2335        curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2336
2337        skb->len = curr_frag_len;
2338        if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2339                memcpy(skb->data, start, curr_frag_len);
2340                /* Complete packet has now been moved to data */
2341                put_page(page_info->page);
2342                skb->data_len = 0;
2343                skb->tail += curr_frag_len;
2344        } else {
2345                hdr_len = ETH_HLEN;
2346                memcpy(skb->data, start, hdr_len);
2347                skb_shinfo(skb)->nr_frags = 1;
2348                skb_frag_set_page(skb, 0, page_info->page);
2349                skb_frag_off_set(&skb_shinfo(skb)->frags[0],
2350                                 page_info->page_offset + hdr_len);
2351                skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2352                                  curr_frag_len - hdr_len);
2353                skb->data_len = curr_frag_len - hdr_len;
2354                skb->truesize += rx_frag_size;
2355                skb->tail += hdr_len;
2356        }
2357        page_info->page = NULL;
2358
2359        if (rxcp->pkt_size <= rx_frag_size) {
2360                BUG_ON(rxcp->num_rcvd != 1);
2361                return;
2362        }
2363
2364        /* More frags present for this completion */
2365        remaining = rxcp->pkt_size - curr_frag_len;
2366        for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2367                page_info = get_rx_page_info(rxo);
2368                curr_frag_len = min(remaining, rx_frag_size);
2369
2370                /* Coalesce all frags from the same physical page in one slot */
2371                if (page_info->page_offset == 0) {
2372                        /* Fresh page */
2373                        j++;
2374                        skb_frag_set_page(skb, j, page_info->page);
2375                        skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2376                                         page_info->page_offset);
2377                        skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2378                        skb_shinfo(skb)->nr_frags++;
2379                } else {
2380                        put_page(page_info->page);
2381                }
2382
2383                skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2384                skb->len += curr_frag_len;
2385                skb->data_len += curr_frag_len;
2386                skb->truesize += rx_frag_size;
2387                remaining -= curr_frag_len;
2388                page_info->page = NULL;
2389        }
2390        BUG_ON(j > MAX_SKB_FRAGS);
2391}
2392
2393/* Process the RX completion indicated by rxcp when GRO is disabled */
2394static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2395                                struct be_rx_compl_info *rxcp)
2396{
2397        struct be_adapter *adapter = rxo->adapter;
2398        struct net_device *netdev = adapter->netdev;
2399        struct sk_buff *skb;
2400
2401        skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2402        if (unlikely(!skb)) {
2403                rx_stats(rxo)->rx_drops_no_skbs++;
2404                be_rx_compl_discard(rxo, rxcp);
2405                return;
2406        }
2407
2408        skb_fill_rx_data(rxo, skb, rxcp);
2409
2410        if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2411                skb->ip_summed = CHECKSUM_UNNECESSARY;
2412        else
2413                skb_checksum_none_assert(skb);
2414
2415        skb->protocol = eth_type_trans(skb, netdev);
2416        skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2417        if (netdev->features & NETIF_F_RXHASH)
2418                skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2419
2420        skb->csum_level = rxcp->tunneled;
2421        skb_mark_napi_id(skb, napi);
2422
2423        if (rxcp->vlanf)
2424                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2425
2426        netif_receive_skb(skb);
2427}
2428
2429/* Process the RX completion indicated by rxcp when GRO is enabled */
2430static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2431                                    struct napi_struct *napi,
2432                                    struct be_rx_compl_info *rxcp)
2433{
2434        struct be_adapter *adapter = rxo->adapter;
2435        struct be_rx_page_info *page_info;
2436        struct sk_buff *skb = NULL;
2437        u16 remaining, curr_frag_len;
2438        u16 i, j;
2439
2440        skb = napi_get_frags(napi);
2441        if (!skb) {
2442                be_rx_compl_discard(rxo, rxcp);
2443                return;
2444        }
2445
2446        remaining = rxcp->pkt_size;
2447        for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2448                page_info = get_rx_page_info(rxo);
2449
2450                curr_frag_len = min(remaining, rx_frag_size);
2451
2452                /* Coalesce all frags from the same physical page in one slot */
2453                if (i == 0 || page_info->page_offset == 0) {
2454                        /* First frag or Fresh page */
2455                        j++;
2456                        skb_frag_set_page(skb, j, page_info->page);
2457                        skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2458                                         page_info->page_offset);
2459                        skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2460                } else {
2461                        put_page(page_info->page);
2462                }
2463                skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2464                skb->truesize += rx_frag_size;
2465                remaining -= curr_frag_len;
2466                memset(page_info, 0, sizeof(*page_info));
2467        }
2468        BUG_ON(j > MAX_SKB_FRAGS);
2469
2470        skb_shinfo(skb)->nr_frags = j + 1;
2471        skb->len = rxcp->pkt_size;
2472        skb->data_len = rxcp->pkt_size;
2473        skb->ip_summed = CHECKSUM_UNNECESSARY;
2474        skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2475        if (adapter->netdev->features & NETIF_F_RXHASH)
2476                skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2477
2478        skb->csum_level = rxcp->tunneled;
2479
2480        if (rxcp->vlanf)
2481                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2482
2483        napi_gro_frags(napi);
2484}
2485
2486static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2487                                 struct be_rx_compl_info *rxcp)
2488{
2489        rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2490        rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2491        rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2492        rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2493        rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2494        rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2495        rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2496        rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2497        rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2498        rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2499        rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2500        if (rxcp->vlanf) {
2501                rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2502                rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2503        }
2504        rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2505        rxcp->tunneled =
2506                GET_RX_COMPL_V1_BITS(tunneled, compl);
2507}
2508
2509static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2510                                 struct be_rx_compl_info *rxcp)
2511{
2512        rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2513        rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2514        rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2515        rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2516        rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2517        rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2518        rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2519        rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2520        rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2521        rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2522        rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2523        if (rxcp->vlanf) {
2524                rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2525                rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2526        }
2527        rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2528        rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2529}
2530
2531static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2532{
2533        struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2534        struct be_rx_compl_info *rxcp = &rxo->rxcp;
2535        struct be_adapter *adapter = rxo->adapter;
2536
2537        /* For checking the valid bit it is Ok to use either definition as the
2538         * valid bit is at the same position in both v0 and v1 Rx compl */
2539        if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2540                return NULL;
2541
2542        rmb();
2543        be_dws_le_to_cpu(compl, sizeof(*compl));
2544
2545        if (adapter->be3_native)
2546                be_parse_rx_compl_v1(compl, rxcp);
2547        else
2548                be_parse_rx_compl_v0(compl, rxcp);
2549
2550        if (rxcp->ip_frag)
2551                rxcp->l4_csum = 0;
2552
2553        if (rxcp->vlanf) {
2554                /* In QNQ modes, if qnq bit is not set, then the packet was
2555                 * tagged only with the transparent outer vlan-tag and must
2556                 * not be treated as a vlan packet by host
2557                 */
2558                if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2559                        rxcp->vlanf = 0;
2560
2561                if (!lancer_chip(adapter))
2562                        rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2563
2564                if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2565                    !test_bit(rxcp->vlan_tag, adapter->vids))
2566                        rxcp->vlanf = 0;
2567        }
2568
2569        /* As the compl has been parsed, reset it; we wont touch it again */
2570        compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2571
2572        queue_tail_inc(&rxo->cq);
2573        return rxcp;
2574}
2575
2576static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2577{
2578        u32 order = get_order(size);
2579
2580        if (order > 0)
2581                gfp |= __GFP_COMP;
2582        return  alloc_pages(gfp, order);
2583}
2584
2585/*
2586 * Allocate a page, split it to fragments of size rx_frag_size and post as
2587 * receive buffers to BE
2588 */
2589static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2590{
2591        struct be_adapter *adapter = rxo->adapter;
2592        struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2593        struct be_queue_info *rxq = &rxo->q;
2594        struct page *pagep = NULL;
2595        struct device *dev = &adapter->pdev->dev;
2596        struct be_eth_rx_d *rxd;
2597        u64 page_dmaaddr = 0, frag_dmaaddr;
2598        u32 posted, page_offset = 0, notify = 0;
2599
2600        page_info = &rxo->page_info_tbl[rxq->head];
2601        for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2602                if (!pagep) {
2603                        pagep = be_alloc_pages(adapter->big_page_size, gfp);
2604                        if (unlikely(!pagep)) {
2605                                rx_stats(rxo)->rx_post_fail++;
2606                                break;
2607                        }
2608                        page_dmaaddr = dma_map_page(dev, pagep, 0,
2609                                                    adapter->big_page_size,
2610                                                    DMA_FROM_DEVICE);
2611                        if (dma_mapping_error(dev, page_dmaaddr)) {
2612                                put_page(pagep);
2613                                pagep = NULL;
2614                                adapter->drv_stats.dma_map_errors++;
2615                                break;
2616                        }
2617                        page_offset = 0;
2618                } else {
2619                        get_page(pagep);
2620                        page_offset += rx_frag_size;
2621                }
2622                page_info->page_offset = page_offset;
2623                page_info->page = pagep;
2624
2625                rxd = queue_head_node(rxq);
2626                frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2627                rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2628                rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2629
2630                /* Any space left in the current big page for another frag? */
2631                if ((page_offset + rx_frag_size + rx_frag_size) >
2632                                        adapter->big_page_size) {
2633                        pagep = NULL;
2634                        page_info->last_frag = true;
2635                        dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2636                } else {
2637                        dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2638                }
2639
2640                prev_page_info = page_info;
2641                queue_head_inc(rxq);
2642                page_info = &rxo->page_info_tbl[rxq->head];
2643        }
2644
2645        /* Mark the last frag of a page when we break out of the above loop
2646         * with no more slots available in the RXQ
2647         */
2648        if (pagep) {
2649                prev_page_info->last_frag = true;
2650                dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2651        }
2652
2653        if (posted) {
2654                atomic_add(posted, &rxq->used);
2655                if (rxo->rx_post_starved)
2656                        rxo->rx_post_starved = false;
2657                do {
2658                        notify = min(MAX_NUM_POST_ERX_DB, posted);
2659                        be_rxq_notify(adapter, rxq->id, notify);
2660                        posted -= notify;
2661                } while (posted);
2662        } else if (atomic_read(&rxq->used) == 0) {
2663                /* Let be_worker replenish when memory is available */
2664                rxo->rx_post_starved = true;
2665        }
2666}
2667
2668static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2669{
2670        switch (status) {
2671        case BE_TX_COMP_HDR_PARSE_ERR:
2672                tx_stats(txo)->tx_hdr_parse_err++;
2673                break;
2674        case BE_TX_COMP_NDMA_ERR:
2675                tx_stats(txo)->tx_dma_err++;
2676                break;
2677        case BE_TX_COMP_ACL_ERR:
2678                tx_stats(txo)->tx_spoof_check_err++;
2679                break;
2680        }
2681}
2682
2683static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2684{
2685        switch (status) {
2686        case LANCER_TX_COMP_LSO_ERR:
2687                tx_stats(txo)->tx_tso_err++;
2688                break;
2689        case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2690        case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2691                tx_stats(txo)->tx_spoof_check_err++;
2692                break;
2693        case LANCER_TX_COMP_QINQ_ERR:
2694                tx_stats(txo)->tx_qinq_err++;
2695                break;
2696        case LANCER_TX_COMP_PARITY_ERR:
2697                tx_stats(txo)->tx_internal_parity_err++;
2698                break;
2699        case LANCER_TX_COMP_DMA_ERR:
2700                tx_stats(txo)->tx_dma_err++;
2701                break;
2702        case LANCER_TX_COMP_SGE_ERR:
2703                tx_stats(txo)->tx_sge_err++;
2704                break;
2705        }
2706}
2707
2708static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2709                                                struct be_tx_obj *txo)
2710{
2711        struct be_queue_info *tx_cq = &txo->cq;
2712        struct be_tx_compl_info *txcp = &txo->txcp;
2713        struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2714
2715        if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2716                return NULL;
2717
2718        /* Ensure load ordering of valid bit dword and other dwords below */
2719        rmb();
2720        be_dws_le_to_cpu(compl, sizeof(*compl));
2721
2722        txcp->status = GET_TX_COMPL_BITS(status, compl);
2723        txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2724
2725        if (txcp->status) {
2726                if (lancer_chip(adapter)) {
2727                        lancer_update_tx_err(txo, txcp->status);
2728                        /* Reset the adapter incase of TSO,
2729                         * SGE or Parity error
2730                         */
2731                        if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2732                            txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2733                            txcp->status == LANCER_TX_COMP_SGE_ERR)
2734                                be_set_error(adapter, BE_ERROR_TX);
2735                } else {
2736                        be_update_tx_err(txo, txcp->status);
2737                }
2738        }
2739
2740        if (be_check_error(adapter, BE_ERROR_TX))
2741                return NULL;
2742
2743        compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2744        queue_tail_inc(tx_cq);
2745        return txcp;
2746}
2747
2748static u16 be_tx_compl_process(struct be_adapter *adapter,
2749                               struct be_tx_obj *txo, u16 last_index)
2750{
2751        struct sk_buff **sent_skbs = txo->sent_skb_list;
2752        struct be_queue_info *txq = &txo->q;
2753        struct sk_buff *skb = NULL;
2754        bool unmap_skb_hdr = false;
2755        struct be_eth_wrb *wrb;
2756        u16 num_wrbs = 0;
2757        u32 frag_index;
2758
2759        do {
2760                if (sent_skbs[txq->tail]) {
2761                        /* Free skb from prev req */
2762                        if (skb)
2763                                dev_consume_skb_any(skb);
2764                        skb = sent_skbs[txq->tail];
2765                        sent_skbs[txq->tail] = NULL;
2766                        queue_tail_inc(txq);  /* skip hdr wrb */
2767                        num_wrbs++;
2768                        unmap_skb_hdr = true;
2769                }
2770                wrb = queue_tail_node(txq);
2771                frag_index = txq->tail;
2772                unmap_tx_frag(&adapter->pdev->dev, wrb,
2773                              (unmap_skb_hdr && skb_headlen(skb)));
2774                unmap_skb_hdr = false;
2775                queue_tail_inc(txq);
2776                num_wrbs++;
2777        } while (frag_index != last_index);
2778        dev_consume_skb_any(skb);
2779
2780        return num_wrbs;
2781}
2782
2783/* Return the number of events in the event queue */
2784static inline int events_get(struct be_eq_obj *eqo)
2785{
2786        struct be_eq_entry *eqe;
2787        int num = 0;
2788
2789        do {
2790                eqe = queue_tail_node(&eqo->q);
2791                if (eqe->evt == 0)
2792                        break;
2793
2794                rmb();
2795                eqe->evt = 0;
2796                num++;
2797                queue_tail_inc(&eqo->q);
2798        } while (true);
2799
2800        return num;
2801}
2802
2803/* Leaves the EQ is disarmed state */
2804static void be_eq_clean(struct be_eq_obj *eqo)
2805{
2806        int num = events_get(eqo);
2807
2808        be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2809}
2810
2811/* Free posted rx buffers that were not used */
2812static void be_rxq_clean(struct be_rx_obj *rxo)
2813{
2814        struct be_queue_info *rxq = &rxo->q;
2815        struct be_rx_page_info *page_info;
2816
2817        while (atomic_read(&rxq->used) > 0) {
2818                page_info = get_rx_page_info(rxo);
2819                put_page(page_info->page);
2820                memset(page_info, 0, sizeof(*page_info));
2821        }
2822        BUG_ON(atomic_read(&rxq->used));
2823        rxq->tail = 0;
2824        rxq->head = 0;
2825}
2826
2827static void be_rx_cq_clean(struct be_rx_obj *rxo)
2828{
2829        struct be_queue_info *rx_cq = &rxo->cq;
2830        struct be_rx_compl_info *rxcp;
2831        struct be_adapter *adapter = rxo->adapter;
2832        int flush_wait = 0;
2833
2834        /* Consume pending rx completions.
2835         * Wait for the flush completion (identified by zero num_rcvd)
2836         * to arrive. Notify CQ even when there are no more CQ entries
2837         * for HW to flush partially coalesced CQ entries.
2838         * In Lancer, there is no need to wait for flush compl.
2839         */
2840        for (;;) {
2841                rxcp = be_rx_compl_get(rxo);
2842                if (!rxcp) {
2843                        if (lancer_chip(adapter))
2844                                break;
2845
2846                        if (flush_wait++ > 50 ||
2847                            be_check_error(adapter,
2848                                           BE_ERROR_HW)) {
2849                                dev_warn(&adapter->pdev->dev,
2850                                         "did not receive flush compl\n");
2851                                break;
2852                        }
2853                        be_cq_notify(adapter, rx_cq->id, true, 0);
2854                        mdelay(1);
2855                } else {
2856                        be_rx_compl_discard(rxo, rxcp);
2857                        be_cq_notify(adapter, rx_cq->id, false, 1);
2858                        if (rxcp->num_rcvd == 0)
2859                                break;
2860                }
2861        }
2862
2863        /* After cleanup, leave the CQ in unarmed state */
2864        be_cq_notify(adapter, rx_cq->id, false, 0);
2865}
2866
2867static void be_tx_compl_clean(struct be_adapter *adapter)
2868{
2869        struct device *dev = &adapter->pdev->dev;
2870        u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2871        struct be_tx_compl_info *txcp;
2872        struct be_queue_info *txq;
2873        u32 end_idx, notified_idx;
2874        struct be_tx_obj *txo;
2875        int i, pending_txqs;
2876
2877        /* Stop polling for compls when HW has been silent for 10ms */
2878        do {
2879                pending_txqs = adapter->num_tx_qs;
2880
2881                for_all_tx_queues(adapter, txo, i) {
2882                        cmpl = 0;
2883                        num_wrbs = 0;
2884                        txq = &txo->q;
2885                        while ((txcp = be_tx_compl_get(adapter, txo))) {
2886                                num_wrbs +=
2887                                        be_tx_compl_process(adapter, txo,
2888                                                            txcp->end_index);
2889                                cmpl++;
2890                        }
2891                        if (cmpl) {
2892                                be_cq_notify(adapter, txo->cq.id, false, cmpl);
2893                                atomic_sub(num_wrbs, &txq->used);
2894                                timeo = 0;
2895                        }
2896                        if (!be_is_tx_compl_pending(txo))
2897                                pending_txqs--;
2898                }
2899
2900                if (pending_txqs == 0 || ++timeo > 10 ||
2901                    be_check_error(adapter, BE_ERROR_HW))
2902                        break;
2903
2904                mdelay(1);
2905        } while (true);
2906
2907        /* Free enqueued TX that was never notified to HW */
2908        for_all_tx_queues(adapter, txo, i) {
2909                txq = &txo->q;
2910
2911                if (atomic_read(&txq->used)) {
2912                        dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2913                                 i, atomic_read(&txq->used));
2914                        notified_idx = txq->tail;
2915                        end_idx = txq->tail;
2916                        index_adv(&end_idx, atomic_read(&txq->used) - 1,
2917                                  txq->len);
2918                        /* Use the tx-compl process logic to handle requests
2919                         * that were not sent to the HW.
2920                         */
2921                        num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2922                        atomic_sub(num_wrbs, &txq->used);
2923                        BUG_ON(atomic_read(&txq->used));
2924                        txo->pend_wrb_cnt = 0;
2925                        /* Since hw was never notified of these requests,
2926                         * reset TXQ indices
2927                         */
2928                        txq->head = notified_idx;
2929                        txq->tail = notified_idx;
2930                }
2931        }
2932}
2933
2934static void be_evt_queues_destroy(struct be_adapter *adapter)
2935{
2936        struct be_eq_obj *eqo;
2937        int i;
2938
2939        for_all_evt_queues(adapter, eqo, i) {
2940                if (eqo->q.created) {
2941                        be_eq_clean(eqo);
2942                        be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2943                        netif_napi_del(&eqo->napi);
2944                        free_cpumask_var(eqo->affinity_mask);
2945                }
2946                be_queue_free(adapter, &eqo->q);
2947        }
2948}
2949
2950static int be_evt_queues_create(struct be_adapter *adapter)
2951{
2952        struct be_queue_info *eq;
2953        struct be_eq_obj *eqo;
2954        struct be_aic_obj *aic;
2955        int i, rc;
2956
2957        /* need enough EQs to service both RX and TX queues */
2958        adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2959                                    max(adapter->cfg_num_rx_irqs,
2960                                        adapter->cfg_num_tx_irqs));
2961
2962        adapter->aic_enabled = true;
2963
2964        for_all_evt_queues(adapter, eqo, i) {
2965                int numa_node = dev_to_node(&adapter->pdev->dev);
2966
2967                aic = &adapter->aic_obj[i];
2968                eqo->adapter = adapter;
2969                eqo->idx = i;
2970                aic->max_eqd = BE_MAX_EQD;
2971
2972                eq = &eqo->q;
2973                rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2974                                    sizeof(struct be_eq_entry));
2975                if (rc)
2976                        return rc;
2977
2978                rc = be_cmd_eq_create(adapter, eqo);
2979                if (rc)
2980                        return rc;
2981
2982                if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2983                        return -ENOMEM;
2984                cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2985                                eqo->affinity_mask);
2986                netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2987                               BE_NAPI_WEIGHT);
2988        }
2989        return 0;
2990}
2991
2992static void be_mcc_queues_destroy(struct be_adapter *adapter)
2993{
2994        struct be_queue_info *q;
2995
2996        q = &adapter->mcc_obj.q;
2997        if (q->created)
2998                be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2999        be_queue_free(adapter, q);
3000
3001        q = &adapter->mcc_obj.cq;
3002        if (q->created)
3003                be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3004        be_queue_free(adapter, q);
3005}
3006
3007/* Must be called only after TX qs are created as MCC shares TX EQ */
3008static int be_mcc_queues_create(struct be_adapter *adapter)
3009{
3010        struct be_queue_info *q, *cq;
3011
3012        cq = &adapter->mcc_obj.cq;
3013        if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3014                           sizeof(struct be_mcc_compl)))
3015                goto err;
3016
3017        /* Use the default EQ for MCC completions */
3018        if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3019                goto mcc_cq_free;
3020
3021        q = &adapter->mcc_obj.q;
3022        if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3023                goto mcc_cq_destroy;
3024
3025        if (be_cmd_mccq_create(adapter, q, cq))
3026                goto mcc_q_free;
3027
3028        return 0;
3029
3030mcc_q_free:
3031        be_queue_free(adapter, q);
3032mcc_cq_destroy:
3033        be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3034mcc_cq_free:
3035        be_queue_free(adapter, cq);
3036err:
3037        return -1;
3038}
3039
3040static void be_tx_queues_destroy(struct be_adapter *adapter)
3041{
3042        struct be_queue_info *q;
3043        struct be_tx_obj *txo;
3044        u8 i;
3045
3046        for_all_tx_queues(adapter, txo, i) {
3047                q = &txo->q;
3048                if (q->created)
3049                        be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3050                be_queue_free(adapter, q);
3051
3052                q = &txo->cq;
3053                if (q->created)
3054                        be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3055                be_queue_free(adapter, q);
3056        }
3057}
3058
3059static int be_tx_qs_create(struct be_adapter *adapter)
3060{
3061        struct be_queue_info *cq;
3062        struct be_tx_obj *txo;
3063        struct be_eq_obj *eqo;
3064        int status, i;
3065
3066        adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3067
3068        for_all_tx_queues(adapter, txo, i) {
3069                cq = &txo->cq;
3070                status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3071                                        sizeof(struct be_eth_tx_compl));
3072                if (status)
3073                        return status;
3074
3075                u64_stats_init(&txo->stats.sync);
3076                u64_stats_init(&txo->stats.sync_compl);
3077
3078                /* If num_evt_qs is less than num_tx_qs, then more than
3079                 * one txq share an eq
3080                 */
3081                eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3082                status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3083                if (status)
3084                        return status;
3085
3086                status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3087                                        sizeof(struct be_eth_wrb));
3088                if (status)
3089                        return status;
3090
3091                status = be_cmd_txq_create(adapter, txo);
3092                if (status)
3093                        return status;
3094
3095                netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3096                                    eqo->idx);
3097        }
3098
3099        dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3100                 adapter->num_tx_qs);
3101        return 0;
3102}
3103
3104static void be_rx_cqs_destroy(struct be_adapter *adapter)
3105{
3106        struct be_queue_info *q;
3107        struct be_rx_obj *rxo;
3108        int i;
3109
3110        for_all_rx_queues(adapter, rxo, i) {
3111                q = &rxo->cq;
3112                if (q->created)
3113                        be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3114                be_queue_free(adapter, q);
3115        }
3116}
3117
3118static int be_rx_cqs_create(struct be_adapter *adapter)
3119{
3120        struct be_queue_info *eq, *cq;
3121        struct be_rx_obj *rxo;
3122        int rc, i;
3123
3124        adapter->num_rss_qs =
3125                        min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3126
3127        /* We'll use RSS only if atleast 2 RSS rings are supported. */
3128        if (adapter->num_rss_qs < 2)
3129                adapter->num_rss_qs = 0;
3130
3131        adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3132
3133        /* When the interface is not capable of RSS rings (and there is no
3134         * need to create a default RXQ) we'll still need one RXQ
3135         */
3136        if (adapter->num_rx_qs == 0)
3137                adapter->num_rx_qs = 1;
3138
3139        adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3140        for_all_rx_queues(adapter, rxo, i) {
3141                rxo->adapter = adapter;
3142                cq = &rxo->cq;
3143                rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3144                                    sizeof(struct be_eth_rx_compl));
3145                if (rc)
3146                        return rc;
3147
3148                u64_stats_init(&rxo->stats.sync);
3149                eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3150                rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3151                if (rc)
3152                        return rc;
3153        }
3154
3155        dev_info(&adapter->pdev->dev,
3156                 "created %d RX queue(s)\n", adapter->num_rx_qs);
3157        return 0;
3158}
3159
3160static irqreturn_t be_intx(int irq, void *dev)
3161{
3162        struct be_eq_obj *eqo = dev;
3163        struct be_adapter *adapter = eqo->adapter;
3164        int num_evts = 0;
3165
3166        /* IRQ is not expected when NAPI is scheduled as the EQ
3167         * will not be armed.
3168         * But, this can happen on Lancer INTx where it takes
3169         * a while to de-assert INTx or in BE2 where occasionaly
3170         * an interrupt may be raised even when EQ is unarmed.
3171         * If NAPI is already scheduled, then counting & notifying
3172         * events will orphan them.
3173         */
3174        if (napi_schedule_prep(&eqo->napi)) {
3175                num_evts = events_get(eqo);
3176                __napi_schedule(&eqo->napi);
3177                if (num_evts)
3178                        eqo->spurious_intr = 0;
3179        }
3180        be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3181
3182        /* Return IRQ_HANDLED only for the the first spurious intr
3183         * after a valid intr to stop the kernel from branding
3184         * this irq as a bad one!
3185         */
3186        if (num_evts || eqo->spurious_intr++ == 0)
3187                return IRQ_HANDLED;
3188        else
3189                return IRQ_NONE;
3190}
3191
3192static irqreturn_t be_msix(int irq, void *dev)
3193{
3194        struct be_eq_obj *eqo = dev;
3195
3196        be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3197        napi_schedule(&eqo->napi);
3198        return IRQ_HANDLED;
3199}
3200
3201static inline bool do_gro(struct be_rx_compl_info *rxcp)
3202{
3203        return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3204}
3205
3206static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3207                         int budget)
3208{
3209        struct be_adapter *adapter = rxo->adapter;
3210        struct be_queue_info *rx_cq = &rxo->cq;
3211        struct be_rx_compl_info *rxcp;
3212        u32 work_done;
3213        u32 frags_consumed = 0;
3214
3215        for (work_done = 0; work_done < budget; work_done++) {
3216                rxcp = be_rx_compl_get(rxo);
3217                if (!rxcp)
3218                        break;
3219
3220                /* Is it a flush compl that has no data */
3221                if (unlikely(rxcp->num_rcvd == 0))
3222                        goto loop_continue;
3223
3224                /* Discard compl with partial DMA Lancer B0 */
3225                if (unlikely(!rxcp->pkt_size)) {
3226                        be_rx_compl_discard(rxo, rxcp);
3227                        goto loop_continue;
3228                }
3229
3230                /* On BE drop pkts that arrive due to imperfect filtering in
3231                 * promiscuous mode on some skews
3232                 */
3233                if (unlikely(rxcp->port != adapter->port_num &&
3234                             !lancer_chip(adapter))) {
3235                        be_rx_compl_discard(rxo, rxcp);
3236                        goto loop_continue;
3237                }
3238
3239                if (do_gro(rxcp))
3240                        be_rx_compl_process_gro(rxo, napi, rxcp);
3241                else
3242                        be_rx_compl_process(rxo, napi, rxcp);
3243
3244loop_continue:
3245                frags_consumed += rxcp->num_rcvd;
3246                be_rx_stats_update(rxo, rxcp);
3247        }
3248
3249        if (work_done) {
3250                be_cq_notify(adapter, rx_cq->id, true, work_done);
3251
3252                /* When an rx-obj gets into post_starved state, just
3253                 * let be_worker do the posting.
3254                 */
3255                if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3256                    !rxo->rx_post_starved)
3257                        be_post_rx_frags(rxo, GFP_ATOMIC,
3258                                         max_t(u32, MAX_RX_POST,
3259                                               frags_consumed));
3260        }
3261
3262        return work_done;
3263}
3264
3265
3266static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3267                          int idx)
3268{
3269        int num_wrbs = 0, work_done = 0;
3270        struct be_tx_compl_info *txcp;
3271
3272        while ((txcp = be_tx_compl_get(adapter, txo))) {
3273                num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3274                work_done++;
3275        }
3276
3277        if (work_done) {
3278                be_cq_notify(adapter, txo->cq.id, true, work_done);
3279                atomic_sub(num_wrbs, &txo->q.used);
3280
3281                /* As Tx wrbs have been freed up, wake up netdev queue
3282                 * if it was stopped due to lack of tx wrbs.  */
3283                if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3284                    be_can_txq_wake(txo)) {
3285                        netif_wake_subqueue(adapter->netdev, idx);
3286                }
3287
3288                u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3289                tx_stats(txo)->tx_compl += work_done;
3290                u64_stats_update_end(&tx_stats(txo)->sync_compl);
3291        }
3292}
3293
3294int be_poll(struct napi_struct *napi, int budget)
3295{
3296        struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3297        struct be_adapter *adapter = eqo->adapter;
3298        int max_work = 0, work, i, num_evts;
3299        struct be_rx_obj *rxo;
3300        struct be_tx_obj *txo;
3301        u32 mult_enc = 0;
3302
3303        num_evts = events_get(eqo);
3304
3305        for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3306                be_process_tx(adapter, txo, i);
3307
3308        /* This loop will iterate twice for EQ0 in which
3309         * completions of the last RXQ (default one) are also processed
3310         * For other EQs the loop iterates only once
3311         */
3312        for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3313                work = be_process_rx(rxo, napi, budget);
3314                max_work = max(work, max_work);
3315        }
3316
3317        if (is_mcc_eqo(eqo))
3318                be_process_mcc(adapter);
3319
3320        if (max_work < budget) {
3321                napi_complete_done(napi, max_work);
3322
3323                /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3324                 * delay via a delay multiplier encoding value
3325                 */
3326                if (skyhawk_chip(adapter))
3327                        mult_enc = be_get_eq_delay_mult_enc(eqo);
3328
3329                be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3330                             mult_enc);
3331        } else {
3332                /* As we'll continue in polling mode, count and clear events */
3333                be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3334        }
3335        return max_work;
3336}
3337
3338void be_detect_error(struct be_adapter *adapter)
3339{
3340        u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3341        u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3342        struct device *dev = &adapter->pdev->dev;
3343        u16 val;
3344        u32 i;
3345
3346        if (be_check_error(adapter, BE_ERROR_HW))
3347                return;
3348
3349        if (lancer_chip(adapter)) {
3350                sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3351                if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3352                        be_set_error(adapter, BE_ERROR_UE);
3353                        sliport_err1 = ioread32(adapter->db +
3354                                                SLIPORT_ERROR1_OFFSET);
3355                        sliport_err2 = ioread32(adapter->db +
3356                                                SLIPORT_ERROR2_OFFSET);
3357                        /* Do not log error messages if its a FW reset */
3358                        if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3359                            sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3360                                dev_info(dev, "Reset is in progress\n");
3361                        } else {
3362                                dev_err(dev, "Error detected in the card\n");
3363                                dev_err(dev, "ERR: sliport status 0x%x\n",
3364                                        sliport_status);
3365                                dev_err(dev, "ERR: sliport error1 0x%x\n",
3366                                        sliport_err1);
3367                                dev_err(dev, "ERR: sliport error2 0x%x\n",
3368                                        sliport_err2);
3369                        }
3370                }
3371        } else {
3372                ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3373                ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3374                ue_lo_mask = ioread32(adapter->pcicfg +
3375                                      PCICFG_UE_STATUS_LOW_MASK);
3376                ue_hi_mask = ioread32(adapter->pcicfg +
3377                                      PCICFG_UE_STATUS_HI_MASK);
3378
3379                ue_lo = (ue_lo & ~ue_lo_mask);
3380                ue_hi = (ue_hi & ~ue_hi_mask);
3381
3382                if (ue_lo || ue_hi) {
3383                        /* On certain platforms BE3 hardware can indicate
3384                         * spurious UEs. In case of a UE in the chip,
3385                         * the POST register correctly reports either a
3386                         * FAT_LOG_START state (FW is currently dumping
3387                         * FAT log data) or a ARMFW_UE state. Check for the
3388                         * above states to ascertain if the UE is valid or not.
3389                         */
3390                        if (BE3_chip(adapter)) {
3391                                val = be_POST_stage_get(adapter);
3392                                if ((val & POST_STAGE_FAT_LOG_START)
3393                                     != POST_STAGE_FAT_LOG_START &&
3394                                    (val & POST_STAGE_ARMFW_UE)
3395                                     != POST_STAGE_ARMFW_UE &&
3396                                    (val & POST_STAGE_RECOVERABLE_ERR)
3397                                     != POST_STAGE_RECOVERABLE_ERR)
3398                                        return;
3399                        }
3400
3401                        dev_err(dev, "Error detected in the adapter");
3402                        be_set_error(adapter, BE_ERROR_UE);
3403
3404                        for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3405                                if (ue_lo & 1)
3406                                        dev_err(dev, "UE: %s bit set\n",
3407                                                ue_status_low_desc[i]);
3408                        }
3409                        for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3410                                if (ue_hi & 1)
3411                                        dev_err(dev, "UE: %s bit set\n",
3412                                                ue_status_hi_desc[i]);
3413                        }
3414                }
3415        }
3416}
3417
3418static void be_msix_disable(struct be_adapter *adapter)
3419{
3420        if (msix_enabled(adapter)) {
3421                pci_disable_msix(adapter->pdev);
3422                adapter->num_msix_vec = 0;
3423                adapter->num_msix_roce_vec = 0;
3424        }
3425}
3426
3427static int be_msix_enable(struct be_adapter *adapter)
3428{
3429        unsigned int i, max_roce_eqs;
3430        struct device *dev = &adapter->pdev->dev;
3431        int num_vec;
3432
3433        /* If RoCE is supported, program the max number of vectors that
3434         * could be used for NIC and RoCE, else, just program the number
3435         * we'll use initially.
3436         */
3437        if (be_roce_supported(adapter)) {
3438                max_roce_eqs =
3439                        be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3440                max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3441                num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3442        } else {
3443                num_vec = max(adapter->cfg_num_rx_irqs,
3444                              adapter->cfg_num_tx_irqs);
3445        }
3446
3447        for (i = 0; i < num_vec; i++)
3448                adapter->msix_entries[i].entry = i;
3449
3450        num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3451                                        MIN_MSIX_VECTORS, num_vec);
3452        if (num_vec < 0)
3453                goto fail;
3454
3455        if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3456                adapter->num_msix_roce_vec = num_vec / 2;
3457                dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3458                         adapter->num_msix_roce_vec);
3459        }
3460
3461        adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3462
3463        dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3464                 adapter->num_msix_vec);
3465        return 0;
3466
3467fail:
3468        dev_warn(dev, "MSIx enable failed\n");
3469
3470        /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3471        if (be_virtfn(adapter))
3472                return num_vec;
3473        return 0;
3474}
3475
3476static inline int be_msix_vec_get(struct be_adapter *adapter,
3477                                  struct be_eq_obj *eqo)
3478{
3479        return adapter->msix_entries[eqo->msix_idx].vector;
3480}
3481
3482static int be_msix_register(struct be_adapter *adapter)
3483{
3484        struct net_device *netdev = adapter->netdev;
3485        struct be_eq_obj *eqo;
3486        int status, i, vec;
3487
3488        for_all_evt_queues(adapter, eqo, i) {
3489                sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3490                vec = be_msix_vec_get(adapter, eqo);
3491                status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3492                if (status)
3493                        goto err_msix;
3494
3495                irq_set_affinity_hint(vec, eqo->affinity_mask);
3496        }
3497
3498        return 0;
3499err_msix:
3500        for (i--; i >= 0; i--) {
3501                eqo = &adapter->eq_obj[i];
3502                free_irq(be_msix_vec_get(adapter, eqo), eqo);
3503        }
3504        dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3505                 status);
3506        be_msix_disable(adapter);
3507        return status;
3508}
3509
3510static int be_irq_register(struct be_adapter *adapter)
3511{
3512        struct net_device *netdev = adapter->netdev;
3513        int status;
3514
3515        if (msix_enabled(adapter)) {
3516                status = be_msix_register(adapter);
3517                if (status == 0)
3518                        goto done;
3519                /* INTx is not supported for VF */
3520                if (be_virtfn(adapter))
3521                        return status;
3522        }
3523
3524        /* INTx: only the first EQ is used */
3525        netdev->irq = adapter->pdev->irq;
3526        status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3527                             &adapter->eq_obj[0]);
3528        if (status) {
3529                dev_err(&adapter->pdev->dev,
3530                        "INTx request IRQ failed - err %d\n", status);
3531                return status;
3532        }
3533done:
3534        adapter->isr_registered = true;
3535        return 0;
3536}
3537
3538static void be_irq_unregister(struct be_adapter *adapter)
3539{
3540        struct net_device *netdev = adapter->netdev;
3541        struct be_eq_obj *eqo;
3542        int i, vec;
3543
3544        if (!adapter->isr_registered)
3545                return;
3546
3547        /* INTx */
3548        if (!msix_enabled(adapter)) {
3549                free_irq(netdev->irq, &adapter->eq_obj[0]);
3550                goto done;
3551        }
3552
3553        /* MSIx */
3554        for_all_evt_queues(adapter, eqo, i) {
3555                vec = be_msix_vec_get(adapter, eqo);
3556                irq_set_affinity_hint(vec, NULL);
3557                free_irq(vec, eqo);
3558        }
3559
3560done:
3561        adapter->isr_registered = false;
3562}
3563
3564static void be_rx_qs_destroy(struct be_adapter *adapter)
3565{
3566        struct rss_info *rss = &adapter->rss_info;
3567        struct be_queue_info *q;
3568        struct be_rx_obj *rxo;
3569        int i;
3570
3571        for_all_rx_queues(adapter, rxo, i) {
3572                q = &rxo->q;
3573                if (q->created) {
3574                        /* If RXQs are destroyed while in an "out of buffer"
3575                         * state, there is a possibility of an HW stall on
3576                         * Lancer. So, post 64 buffers to each queue to relieve
3577                         * the "out of buffer" condition.
3578                         * Make sure there's space in the RXQ before posting.
3579                         */
3580                        if (lancer_chip(adapter)) {
3581                                be_rx_cq_clean(rxo);
3582                                if (atomic_read(&q->used) == 0)
3583                                        be_post_rx_frags(rxo, GFP_KERNEL,
3584                                                         MAX_RX_POST);
3585                        }
3586
3587                        be_cmd_rxq_destroy(adapter, q);
3588                        be_rx_cq_clean(rxo);
3589                        be_rxq_clean(rxo);
3590                }
3591                be_queue_free(adapter, q);
3592        }
3593
3594        if (rss->rss_flags) {
3595                rss->rss_flags = RSS_ENABLE_NONE;
3596                be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3597                                  128, rss->rss_hkey);
3598        }
3599}
3600
3601static void be_disable_if_filters(struct be_adapter *adapter)
3602{
3603        /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3604        if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3605            check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3606                be_dev_mac_del(adapter, adapter->pmac_id[0]);
3607                eth_zero_addr(adapter->dev_mac);
3608        }
3609
3610        be_clear_uc_list(adapter);
3611        be_clear_mc_list(adapter);
3612
3613        /* The IFACE flags are enabled in the open path and cleared
3614         * in the close path. When a VF gets detached from the host and
3615         * assigned to a VM the following happens:
3616         *      - VF's IFACE flags get cleared in the detach path
3617         *      - IFACE create is issued by the VF in the attach path
3618         * Due to a bug in the BE3/Skyhawk-R FW
3619         * (Lancer FW doesn't have the bug), the IFACE capability flags
3620         * specified along with the IFACE create cmd issued by a VF are not
3621         * honoured by FW.  As a consequence, if a *new* driver
3622         * (that enables/disables IFACE flags in open/close)
3623         * is loaded in the host and an *old* driver is * used by a VM/VF,
3624         * the IFACE gets created *without* the needed flags.
3625         * To avoid this, disable RX-filter flags only for Lancer.
3626         */
3627        if (lancer_chip(adapter)) {
3628                be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3629                adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3630        }
3631}
3632
3633static int be_close(struct net_device *netdev)
3634{
3635        struct be_adapter *adapter = netdev_priv(netdev);
3636        struct be_eq_obj *eqo;
3637        int i;
3638
3639        /* This protection is needed as be_close() may be called even when the
3640         * adapter is in cleared state (after eeh perm failure)
3641         */
3642        if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3643                return 0;
3644
3645        /* Before attempting cleanup ensure all the pending cmds in the
3646         * config_wq have finished execution
3647         */
3648        flush_workqueue(be_wq);
3649
3650        be_disable_if_filters(adapter);
3651
3652        if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3653                for_all_evt_queues(adapter, eqo, i) {
3654                        napi_disable(&eqo->napi);
3655                }
3656                adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3657        }
3658
3659        be_async_mcc_disable(adapter);
3660
3661        /* Wait for all pending tx completions to arrive so that
3662         * all tx skbs are freed.
3663         */
3664        netif_tx_disable(netdev);
3665        be_tx_compl_clean(adapter);
3666
3667        be_rx_qs_destroy(adapter);
3668
3669        for_all_evt_queues(adapter, eqo, i) {
3670                if (msix_enabled(adapter))
3671                        synchronize_irq(be_msix_vec_get(adapter, eqo));
3672                else
3673                        synchronize_irq(netdev->irq);
3674                be_eq_clean(eqo);
3675        }
3676
3677        be_irq_unregister(adapter);
3678
3679        return 0;
3680}
3681
3682static int be_rx_qs_create(struct be_adapter *adapter)
3683{
3684        struct rss_info *rss = &adapter->rss_info;
3685        u8 rss_key[RSS_HASH_KEY_LEN];
3686        struct be_rx_obj *rxo;
3687        int rc, i, j;
3688
3689        for_all_rx_queues(adapter, rxo, i) {
3690                rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3691                                    sizeof(struct be_eth_rx_d));
3692                if (rc)
3693                        return rc;
3694        }
3695
3696        if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3697                rxo = default_rxo(adapter);
3698                rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3699                                       rx_frag_size, adapter->if_handle,
3700                                       false, &rxo->rss_id);
3701                if (rc)
3702                        return rc;
3703        }
3704
3705        for_all_rss_queues(adapter, rxo, i) {
3706                rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3707                                       rx_frag_size, adapter->if_handle,
3708                                       true, &rxo->rss_id);
3709                if (rc)
3710                        return rc;
3711        }
3712
3713        if (be_multi_rxq(adapter)) {
3714                for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3715                        for_all_rss_queues(adapter, rxo, i) {
3716                                if ((j + i) >= RSS_INDIR_TABLE_LEN)
3717                                        break;
3718                                rss->rsstable[j + i] = rxo->rss_id;
3719                                rss->rss_queue[j + i] = i;
3720                        }
3721                }
3722                rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3723                        RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3724
3725                if (!BEx_chip(adapter))
3726                        rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3727                                RSS_ENABLE_UDP_IPV6;
3728
3729                netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3730                rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3731                                       RSS_INDIR_TABLE_LEN, rss_key);
3732                if (rc) {
3733                        rss->rss_flags = RSS_ENABLE_NONE;
3734                        return rc;
3735                }
3736
3737                memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3738        } else {
3739                /* Disable RSS, if only default RX Q is created */
3740                rss->rss_flags = RSS_ENABLE_NONE;
3741        }
3742
3743
3744        /* Post 1 less than RXQ-len to avoid head being equal to tail,
3745         * which is a queue empty condition
3746         */
3747        for_all_rx_queues(adapter, rxo, i)
3748                be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3749
3750        return 0;
3751}
3752
3753static int be_enable_if_filters(struct be_adapter *adapter)
3754{
3755        int status;
3756
3757        status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3758        if (status)
3759                return status;
3760
3761        /* Normally this condition usually true as the ->dev_mac is zeroed.
3762         * But on BE3 VFs the initial MAC is pre-programmed by PF and
3763         * subsequent be_dev_mac_add() can fail (after fresh boot)
3764         */
3765        if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3766                int old_pmac_id = -1;
3767
3768                /* Remember old programmed MAC if any - can happen on BE3 VF */
3769                if (!is_zero_ether_addr(adapter->dev_mac))
3770                        old_pmac_id = adapter->pmac_id[0];
3771
3772                status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3773                if (status)
3774                        return status;
3775
3776                /* Delete the old programmed MAC as we successfully programmed
3777                 * a new MAC
3778                 */
3779                if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3780                        be_dev_mac_del(adapter, old_pmac_id);
3781
3782                ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3783        }
3784
3785        if (adapter->vlans_added)
3786                be_vid_config(adapter);
3787
3788        __be_set_rx_mode(adapter);
3789
3790        return 0;
3791}
3792
3793static int be_open(struct net_device *netdev)
3794{
3795        struct be_adapter *adapter = netdev_priv(netdev);
3796        struct be_eq_obj *eqo;
3797        struct be_rx_obj *rxo;
3798        struct be_tx_obj *txo;
3799        u8 link_status;
3800        int status, i;
3801
3802        status = be_rx_qs_create(adapter);
3803        if (status)
3804                goto err;
3805
3806        status = be_enable_if_filters(adapter);
3807        if (status)
3808                goto err;
3809
3810        status = be_irq_register(adapter);
3811        if (status)
3812                goto err;
3813
3814        for_all_rx_queues(adapter, rxo, i)
3815                be_cq_notify(adapter, rxo->cq.id, true, 0);
3816
3817        for_all_tx_queues(adapter, txo, i)
3818                be_cq_notify(adapter, txo->cq.id, true, 0);
3819
3820        be_async_mcc_enable(adapter);
3821
3822        for_all_evt_queues(adapter, eqo, i) {
3823                napi_enable(&eqo->napi);
3824                be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3825        }
3826        adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3827
3828        status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3829        if (!status)
3830                be_link_status_update(adapter, link_status);
3831
3832        netif_tx_start_all_queues(netdev);
3833        if (skyhawk_chip(adapter))
3834                udp_tunnel_get_rx_info(netdev);
3835
3836        return 0;
3837err:
3838        be_close(adapter->netdev);
3839        return -EIO;
3840}
3841
3842static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3843{
3844        u32 addr;
3845
3846        addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3847
3848        mac[5] = (u8)(addr & 0xFF);
3849        mac[4] = (u8)((addr >> 8) & 0xFF);
3850        mac[3] = (u8)((addr >> 16) & 0xFF);
3851        /* Use the OUI from the current MAC address */
3852        memcpy(mac, adapter->netdev->dev_addr, 3);
3853}
3854
3855/*
3856 * Generate a seed MAC address from the PF MAC Address using jhash.
3857 * MAC Address for VFs are assigned incrementally starting from the seed.
3858 * These addresses are programmed in the ASIC by the PF and the VF driver
3859 * queries for the MAC address during its probe.
3860 */
3861static int be_vf_eth_addr_config(struct be_adapter *adapter)
3862{
3863        u32 vf;
3864        int status = 0;
3865        u8 mac[ETH_ALEN];
3866        struct be_vf_cfg *vf_cfg;
3867
3868        be_vf_eth_addr_generate(adapter, mac);
3869
3870        for_all_vfs(adapter, vf_cfg, vf) {
3871                if (BEx_chip(adapter))
3872                        status = be_cmd_pmac_add(adapter, mac,
3873                                                 vf_cfg->if_handle,
3874                                                 &vf_cfg->pmac_id, vf + 1);
3875                else
3876                        status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3877                                                vf + 1);
3878
3879                if (status)
3880                        dev_err(&adapter->pdev->dev,
3881                                "Mac address assignment failed for VF %d\n",
3882                                vf);
3883                else
3884                        memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3885
3886                mac[5] += 1;
3887        }
3888        return status;
3889}
3890
3891static int be_vfs_mac_query(struct be_adapter *adapter)
3892{
3893        int status, vf;
3894        u8 mac[ETH_ALEN];
3895        struct be_vf_cfg *vf_cfg;
3896
3897        for_all_vfs(adapter, vf_cfg, vf) {
3898                status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3899                                               mac, vf_cfg->if_handle,
3900                                               false, vf+1);
3901                if (status)
3902                        return status;
3903                memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3904        }
3905        return 0;
3906}
3907
3908static void be_vf_clear(struct be_adapter *adapter)
3909{
3910        struct be_vf_cfg *vf_cfg;
3911        u32 vf;
3912
3913        if (pci_vfs_assigned(adapter->pdev)) {
3914                dev_warn(&adapter->pdev->dev,
3915                         "VFs are assigned to VMs: not disabling VFs\n");
3916                goto done;
3917        }
3918
3919        pci_disable_sriov(adapter->pdev);
3920
3921        for_all_vfs(adapter, vf_cfg, vf) {
3922                if (BEx_chip(adapter))
3923                        be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3924                                        vf_cfg->pmac_id, vf + 1);
3925                else
3926                        be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3927                                       vf + 1);
3928
3929                be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3930        }
3931
3932        if (BE3_chip(adapter))
3933                be_cmd_set_hsw_config(adapter, 0, 0,
3934                                      adapter->if_handle,
3935                                      PORT_FWD_TYPE_PASSTHRU, 0);
3936done:
3937        kfree(adapter->vf_cfg);
3938        adapter->num_vfs = 0;
3939        adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3940}
3941
3942static void be_clear_queues(struct be_adapter *adapter)
3943{
3944        be_mcc_queues_destroy(adapter);
3945        be_rx_cqs_destroy(adapter);
3946        be_tx_queues_destroy(adapter);
3947        be_evt_queues_destroy(adapter);
3948}
3949
3950static void be_cancel_worker(struct be_adapter *adapter)
3951{
3952        if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3953                cancel_delayed_work_sync(&adapter->work);
3954                adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3955        }
3956}
3957
3958static void be_cancel_err_detection(struct be_adapter *adapter)
3959{
3960        struct be_error_recovery *err_rec = &adapter->error_recovery;
3961
3962        if (!be_err_recovery_workq)
3963                return;
3964
3965        if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3966                cancel_delayed_work_sync(&err_rec->err_detection_work);
3967                adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3968        }
3969}
3970
3971static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3972{
3973        struct net_device *netdev = adapter->netdev;
3974        struct device *dev = &adapter->pdev->dev;
3975        struct be_vxlan_port *vxlan_port;
3976        __be16 port;
3977        int status;
3978
3979        vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3980                                      struct be_vxlan_port, list);
3981        port = vxlan_port->port;
3982
3983        status = be_cmd_manage_iface(adapter, adapter->if_handle,
3984                                     OP_CONVERT_NORMAL_TO_TUNNEL);
3985        if (status) {
3986                dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3987                return status;
3988        }
3989        adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3990
3991        status = be_cmd_set_vxlan_port(adapter, port);
3992        if (status) {
3993                dev_warn(dev, "Failed to add VxLAN port\n");
3994                return status;
3995        }
3996        adapter->vxlan_port = port;
3997
3998        netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3999                                   NETIF_F_TSO | NETIF_F_TSO6 |
4000                                   NETIF_F_GSO_UDP_TUNNEL;
4001
4002        dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4003                 be16_to_cpu(port));
4004        return 0;
4005}
4006
4007static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4008{
4009        struct net_device *netdev = adapter->netdev;
4010
4011        if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4012                be_cmd_manage_iface(adapter, adapter->if_handle,
4013                                    OP_CONVERT_TUNNEL_TO_NORMAL);
4014
4015        if (adapter->vxlan_port)
4016                be_cmd_set_vxlan_port(adapter, 0);
4017
4018        adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4019        adapter->vxlan_port = 0;
4020
4021        netdev->hw_enc_features = 0;
4022}
4023
4024static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4025                                struct be_resources *vft_res)
4026{
4027        struct be_resources res = adapter->pool_res;
4028        u32 vf_if_cap_flags = res.vf_if_cap_flags;
4029        struct be_resources res_mod = {0};
4030        u16 num_vf_qs = 1;
4031
4032        /* Distribute the queue resources among the PF and it's VFs */
4033        if (num_vfs) {
4034                /* Divide the rx queues evenly among the VFs and the PF, capped
4035                 * at VF-EQ-count. Any remainder queues belong to the PF.
4036                 */
4037                num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4038                                res.max_rss_qs / (num_vfs + 1));
4039
4040                /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4041                 * RSS Tables per port. Provide RSS on VFs, only if number of
4042                 * VFs requested is less than it's PF Pool's RSS Tables limit.
4043                 */
4044                if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4045                        num_vf_qs = 1;
4046        }
4047
4048        /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4049         * which are modifiable using SET_PROFILE_CONFIG cmd.
4050         */
4051        be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4052                                  RESOURCE_MODIFIABLE, 0);
4053
4054        /* If RSS IFACE capability flags are modifiable for a VF, set the
4055         * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4056         * more than 1 RSSQ is available for a VF.
4057         * Otherwise, provision only 1 queue pair for VF.
4058         */
4059        if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4060                vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4061                if (num_vf_qs > 1) {
4062                        vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4063                        if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4064                                vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4065                } else {
4066                        vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4067                                             BE_IF_FLAGS_DEFQ_RSS);
4068                }
4069        } else {
4070                num_vf_qs = 1;
4071        }
4072
4073        if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4074                vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4075                vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4076        }
4077
4078        vft_res->vf_if_cap_flags = vf_if_cap_flags;
4079        vft_res->max_rx_qs = num_vf_qs;
4080        vft_res->max_rss_qs = num_vf_qs;
4081        vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4082        vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4083
4084        /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4085         * among the PF and it's VFs, if the fields are changeable
4086         */
4087        if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4088                vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4089
4090        if (res_mod.max_vlans == FIELD_MODIFIABLE)
4091                vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4092
4093        if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4094                vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4095
4096        if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4097                vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4098}
4099
4100static void be_if_destroy(struct be_adapter *adapter)
4101{
4102        be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4103
4104        kfree(adapter->pmac_id);
4105        adapter->pmac_id = NULL;
4106
4107        kfree(adapter->mc_list);
4108        adapter->mc_list = NULL;
4109
4110        kfree(adapter->uc_list);
4111        adapter->uc_list = NULL;
4112}
4113
4114static int be_clear(struct be_adapter *adapter)
4115{
4116        struct pci_dev *pdev = adapter->pdev;
4117        struct  be_resources vft_res = {0};
4118
4119        be_cancel_worker(adapter);
4120
4121        flush_workqueue(be_wq);
4122
4123        if (sriov_enabled(adapter))
4124                be_vf_clear(adapter);
4125
4126        /* Re-configure FW to distribute resources evenly across max-supported
4127         * number of VFs, only when VFs are not already enabled.
4128         */
4129        if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4130            !pci_vfs_assigned(pdev)) {
4131                be_calculate_vf_res(adapter,
4132                                    pci_sriov_get_totalvfs(pdev),
4133                                    &vft_res);
4134                be_cmd_set_sriov_config(adapter, adapter->pool_res,
4135                                        pci_sriov_get_totalvfs(pdev),
4136                                        &vft_res);
4137        }
4138
4139        be_disable_vxlan_offloads(adapter);
4140
4141        be_if_destroy(adapter);
4142
4143        be_clear_queues(adapter);
4144
4145        be_msix_disable(adapter);
4146        adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4147        return 0;
4148}
4149
4150static int be_vfs_if_create(struct be_adapter *adapter)
4151{
4152        struct be_resources res = {0};
4153        u32 cap_flags, en_flags, vf;
4154        struct be_vf_cfg *vf_cfg;
4155        int status;
4156
4157        /* If a FW profile exists, then cap_flags are updated */
4158        cap_flags = BE_VF_IF_EN_FLAGS;
4159
4160        for_all_vfs(adapter, vf_cfg, vf) {
4161                if (!BE3_chip(adapter)) {
4162                        status = be_cmd_get_profile_config(adapter, &res, NULL,
4163                                                           ACTIVE_PROFILE_TYPE,
4164                                                           RESOURCE_LIMITS,
4165                                                           vf + 1);
4166                        if (!status) {
4167                                cap_flags = res.if_cap_flags;
4168                                /* Prevent VFs from enabling VLAN promiscuous
4169                                 * mode
4170                                 */
4171                                cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4172                        }
4173                }
4174
4175                /* PF should enable IF flags during proxy if_create call */
4176                en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4177                status = be_cmd_if_create(adapter, cap_flags, en_flags,
4178                                          &vf_cfg->if_handle, vf + 1);
4179                if (status)
4180                        return status;
4181        }
4182
4183        return 0;
4184}
4185
4186static int be_vf_setup_init(struct be_adapter *adapter)
4187{
4188        struct be_vf_cfg *vf_cfg;
4189        int vf;
4190
4191        adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4192                                  GFP_KERNEL);
4193        if (!adapter->vf_cfg)
4194                return -ENOMEM;
4195
4196        for_all_vfs(adapter, vf_cfg, vf) {
4197                vf_cfg->if_handle = -1;
4198                vf_cfg->pmac_id = -1;
4199        }
4200        return 0;
4201}
4202
4203static int be_vf_setup(struct be_adapter *adapter)
4204{
4205        struct device *dev = &adapter->pdev->dev;
4206        struct be_vf_cfg *vf_cfg;
4207        int status, old_vfs, vf;
4208        bool spoofchk;
4209
4210        old_vfs = pci_num_vf(adapter->pdev);
4211
4212        status = be_vf_setup_init(adapter);
4213        if (status)
4214                goto err;
4215
4216        if (old_vfs) {
4217                for_all_vfs(adapter, vf_cfg, vf) {
4218                        status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4219                        if (status)
4220                                goto err;
4221                }
4222
4223                status = be_vfs_mac_query(adapter);
4224                if (status)
4225                        goto err;
4226        } else {
4227                status = be_vfs_if_create(adapter);
4228                if (status)
4229                        goto err;
4230
4231                status = be_vf_eth_addr_config(adapter);
4232                if (status)
4233                        goto err;
4234        }
4235
4236        for_all_vfs(adapter, vf_cfg, vf) {
4237                /* Allow VFs to programs MAC/VLAN filters */
4238                status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4239                                                  vf + 1);
4240                if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4241                        status = be_cmd_set_fn_privileges(adapter,
4242                                                          vf_cfg->privileges |
4243                                                          BE_PRIV_FILTMGMT,
4244                                                          vf + 1);
4245                        if (!status) {
4246                                vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4247                                dev_info(dev, "VF%d has FILTMGMT privilege\n",
4248                                         vf);
4249                        }
4250                }
4251
4252                /* Allow full available bandwidth */
4253                if (!old_vfs)
4254                        be_cmd_config_qos(adapter, 0, 0, vf + 1);
4255
4256                status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4257                                               vf_cfg->if_handle, NULL,
4258                                               &spoofchk);
4259                if (!status)
4260                        vf_cfg->spoofchk = spoofchk;
4261
4262                if (!old_vfs) {
4263                        be_cmd_enable_vf(adapter, vf + 1);
4264                        be_cmd_set_logical_link_config(adapter,
4265                                                       IFLA_VF_LINK_STATE_AUTO,
4266                                                       vf+1);
4267                }
4268        }
4269
4270        if (!old_vfs) {
4271                status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4272                if (status) {
4273                        dev_err(dev, "SRIOV enable failed\n");
4274                        adapter->num_vfs = 0;
4275                        goto err;
4276                }
4277        }
4278
4279        if (BE3_chip(adapter)) {
4280                /* On BE3, enable VEB only when SRIOV is enabled */
4281                status = be_cmd_set_hsw_config(adapter, 0, 0,
4282                                               adapter->if_handle,
4283                                               PORT_FWD_TYPE_VEB, 0);
4284                if (status)
4285                        goto err;
4286        }
4287
4288        adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4289        return 0;
4290err:
4291        dev_err(dev, "VF setup failed\n");
4292        be_vf_clear(adapter);
4293        return status;
4294}
4295
4296/* Converting function_mode bits on BE3 to SH mc_type enums */
4297
4298static u8 be_convert_mc_type(u32 function_mode)
4299{
4300        if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4301                return vNIC1;
4302        else if (function_mode & QNQ_MODE)
4303                return FLEX10;
4304        else if (function_mode & VNIC_MODE)
4305                return vNIC2;
4306        else if (function_mode & UMC_ENABLED)
4307                return UMC;
4308        else
4309                return MC_NONE;
4310}
4311
4312/* On BE2/BE3 FW does not suggest the supported limits */
4313static void BEx_get_resources(struct be_adapter *adapter,
4314                              struct be_resources *res)
4315{
4316        bool use_sriov = adapter->num_vfs ? 1 : 0;
4317
4318        if (be_physfn(adapter))
4319                res->max_uc_mac = BE_UC_PMAC_COUNT;
4320        else
4321                res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4322
4323        adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4324
4325        if (be_is_mc(adapter)) {
4326                /* Assuming that there are 4 channels per port,
4327                 * when multi-channel is enabled
4328                 */
4329                if (be_is_qnq_mode(adapter))
4330                        res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4331                else
4332                        /* In a non-qnq multichannel mode, the pvid
4333                         * takes up one vlan entry
4334                         */
4335                        res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4336        } else {
4337                res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4338        }
4339
4340        res->max_mcast_mac = BE_MAX_MC;
4341
4342        /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4343         * 2) Create multiple TX rings on a BE3-R multi-channel interface
4344         *    *only* if it is RSS-capable.
4345         */
4346        if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4347            be_virtfn(adapter) ||
4348            (be_is_mc(adapter) &&
4349             !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4350                res->max_tx_qs = 1;
4351        } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4352                struct be_resources super_nic_res = {0};
4353
4354                /* On a SuperNIC profile, the driver needs to use the
4355                 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4356                 */
4357                be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4358                                          ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4359                                          0);
4360                /* Some old versions of BE3 FW don't report max_tx_qs value */
4361                res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4362        } else {
4363                res->max_tx_qs = BE3_MAX_TX_QS;
4364        }
4365
4366        if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4367            !use_sriov && be_physfn(adapter))
4368                res->max_rss_qs = (adapter->be3_native) ?
4369                                           BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4370        res->max_rx_qs = res->max_rss_qs + 1;
4371
4372        if (be_physfn(adapter))
4373                res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4374                                        BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4375        else
4376                res->max_evt_qs = 1;
4377
4378        res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4379        res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4380        if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4381                res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4382}
4383
4384static void be_setup_init(struct be_adapter *adapter)
4385{
4386        adapter->vlan_prio_bmap = 0xff;
4387        adapter->phy.link_speed = -1;
4388        adapter->if_handle = -1;
4389        adapter->be3_native = false;
4390        adapter->if_flags = 0;
4391        adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4392        if (be_physfn(adapter))
4393                adapter->cmd_privileges = MAX_PRIVILEGES;
4394        else
4395                adapter->cmd_privileges = MIN_PRIVILEGES;
4396}
4397
4398/* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4399 * However, this HW limitation is not exposed to the host via any SLI cmd.
4400 * As a result, in the case of SRIOV and in particular multi-partition configs
4401 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4402 * for distribution between the VFs. This self-imposed limit will determine the
4403 * no: of VFs for which RSS can be enabled.
4404 */
4405static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4406{
4407        struct be_port_resources port_res = {0};
4408        u8 rss_tables_on_port;
4409        u16 max_vfs = be_max_vfs(adapter);
4410
4411        be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4412                                  RESOURCE_LIMITS, 0);
4413
4414        rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4415
4416        /* Each PF Pool's RSS Tables limit =
4417         * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4418         */
4419        adapter->pool_res.max_rss_tables =
4420                max_vfs * rss_tables_on_port / port_res.max_vfs;
4421}
4422
4423static int be_get_sriov_config(struct be_adapter *adapter)
4424{
4425        struct be_resources res = {0};
4426        int max_vfs, old_vfs;
4427
4428        be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4429                                  RESOURCE_LIMITS, 0);
4430
4431        /* Some old versions of BE3 FW don't report max_vfs value */
4432        if (BE3_chip(adapter) && !res.max_vfs) {
4433                max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4434                res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4435        }
4436
4437        adapter->pool_res = res;
4438
4439        /* If during previous unload of the driver, the VFs were not disabled,
4440         * then we cannot rely on the PF POOL limits for the TotalVFs value.
4441         * Instead use the TotalVFs value stored in the pci-dev struct.
4442         */
4443        old_vfs = pci_num_vf(adapter->pdev);
4444        if (old_vfs) {
4445                dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4446                         old_vfs);
4447
4448                adapter->pool_res.max_vfs =
4449                        pci_sriov_get_totalvfs(adapter->pdev);
4450                adapter->num_vfs = old_vfs;
4451        }
4452
4453        if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4454                be_calculate_pf_pool_rss_tables(adapter);
4455                dev_info(&adapter->pdev->dev,
4456                         "RSS can be enabled for all VFs if num_vfs <= %d\n",
4457                         be_max_pf_pool_rss_tables(adapter));
4458        }
4459        return 0;
4460}
4461
4462static void be_alloc_sriov_res(struct be_adapter *adapter)
4463{
4464        int old_vfs = pci_num_vf(adapter->pdev);
4465        struct  be_resources vft_res = {0};
4466        int status;
4467
4468        be_get_sriov_config(adapter);
4469
4470        if (!old_vfs)
4471                pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4472
4473        /* When the HW is in SRIOV capable configuration, the PF-pool
4474         * resources are given to PF during driver load, if there are no
4475         * old VFs. This facility is not available in BE3 FW.
4476         * Also, this is done by FW in Lancer chip.
4477         */
4478        if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4479                be_calculate_vf_res(adapter, 0, &vft_res);
4480                status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4481                                                 &vft_res);
4482                if (status)
4483                        dev_err(&adapter->pdev->dev,
4484                                "Failed to optimize SRIOV resources\n");
4485        }
4486}
4487
4488static int be_get_resources(struct be_adapter *adapter)
4489{
4490        struct device *dev = &adapter->pdev->dev;
4491        struct be_resources res = {0};
4492        int status;
4493
4494        /* For Lancer, SH etc read per-function resource limits from FW.
4495         * GET_FUNC_CONFIG returns per function guaranteed limits.
4496         * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4497         */
4498        if (BEx_chip(adapter)) {
4499                BEx_get_resources(adapter, &res);
4500        } else {
4501                status = be_cmd_get_func_config(adapter, &res);
4502                if (status)
4503                        return status;
4504
4505                /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4506                if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4507                    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4508                        res.max_rss_qs -= 1;
4509        }
4510
4511        /* If RoCE is supported stash away half the EQs for RoCE */
4512        res.max_nic_evt_qs = be_roce_supported(adapter) ?
4513                                res.max_evt_qs / 2 : res.max_evt_qs;
4514        adapter->res = res;
4515
4516        /* If FW supports RSS default queue, then skip creating non-RSS
4517         * queue for non-IP traffic.
4518         */
4519        adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4520                                 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4521
4522        dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4523                 be_max_txqs(adapter), be_max_rxqs(adapter),
4524                 be_max_rss(adapter), be_max_nic_eqs(adapter),
4525                 be_max_vfs(adapter));
4526        dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4527                 be_max_uc(adapter), be_max_mc(adapter),
4528                 be_max_vlans(adapter));
4529
4530        /* Ensure RX and TX queues are created in pairs at init time */
4531        adapter->cfg_num_rx_irqs =
4532                                min_t(u16, netif_get_num_default_rss_queues(),
4533                                      be_max_qp_irqs(adapter));
4534        adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4535        return 0;
4536}
4537
4538static int be_get_config(struct be_adapter *adapter)
4539{
4540        int status, level;
4541        u16 profile_id;
4542
4543        status = be_cmd_get_cntl_attributes(adapter);
4544        if (status)
4545                return status;
4546
4547        status = be_cmd_query_fw_cfg(adapter);
4548        if (status)
4549                return status;
4550
4551        if (!lancer_chip(adapter) && be_physfn(adapter))
4552                be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4553
4554        if (BEx_chip(adapter)) {
4555                level = be_cmd_get_fw_log_level(adapter);
4556                adapter->msg_enable =
4557                        level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4558        }
4559
4560        be_cmd_get_acpi_wol_cap(adapter);
4561        pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4562        pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4563
4564        be_cmd_query_port_name(adapter);
4565
4566        if (be_physfn(adapter)) {
4567                status = be_cmd_get_active_profile(adapter, &profile_id);
4568                if (!status)
4569                        dev_info(&adapter->pdev->dev,
4570                                 "Using profile 0x%x\n", profile_id);
4571        }
4572
4573        return 0;
4574}
4575
4576static int be_mac_setup(struct be_adapter *adapter)
4577{
4578        u8 mac[ETH_ALEN];
4579        int status;
4580
4581        if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4582                status = be_cmd_get_perm_mac(adapter, mac);
4583                if (status)
4584                        return status;
4585
4586                memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4587                memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4588
4589                /* Initial MAC for BE3 VFs is already programmed by PF */
4590                if (BEx_chip(adapter) && be_virtfn(adapter))
4591                        memcpy(adapter->dev_mac, mac, ETH_ALEN);
4592        }
4593
4594        return 0;
4595}
4596
4597static void be_schedule_worker(struct be_adapter *adapter)
4598{
4599        queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4600        adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4601}
4602
4603static void be_destroy_err_recovery_workq(void)
4604{
4605        if (!be_err_recovery_workq)
4606                return;
4607
4608        flush_workqueue(be_err_recovery_workq);
4609        destroy_workqueue(be_err_recovery_workq);
4610        be_err_recovery_workq = NULL;
4611}
4612
4613static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4614{
4615        struct be_error_recovery *err_rec = &adapter->error_recovery;
4616
4617        if (!be_err_recovery_workq)
4618                return;
4619
4620        queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4621                           msecs_to_jiffies(delay));
4622        adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4623}
4624
4625static int be_setup_queues(struct be_adapter *adapter)
4626{
4627        struct net_device *netdev = adapter->netdev;
4628        int status;
4629
4630        status = be_evt_queues_create(adapter);
4631        if (status)
4632                goto err;
4633
4634        status = be_tx_qs_create(adapter);
4635        if (status)
4636                goto err;
4637
4638        status = be_rx_cqs_create(adapter);
4639        if (status)
4640                goto err;
4641
4642        status = be_mcc_queues_create(adapter);
4643        if (status)
4644                goto err;
4645
4646        status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4647        if (status)
4648                goto err;
4649
4650        status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4651        if (status)
4652                goto err;
4653
4654        return 0;
4655err:
4656        dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4657        return status;
4658}
4659
4660static int be_if_create(struct be_adapter *adapter)
4661{
4662        u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4663        u32 cap_flags = be_if_cap_flags(adapter);
4664        int status;
4665
4666        /* alloc required memory for other filtering fields */
4667        adapter->pmac_id = kcalloc(be_max_uc(adapter),
4668                                   sizeof(*adapter->pmac_id), GFP_KERNEL);
4669        if (!adapter->pmac_id)
4670                return -ENOMEM;
4671
4672        adapter->mc_list = kcalloc(be_max_mc(adapter),
4673                                   sizeof(*adapter->mc_list), GFP_KERNEL);
4674        if (!adapter->mc_list)
4675                return -ENOMEM;
4676
4677        adapter->uc_list = kcalloc(be_max_uc(adapter),
4678                                   sizeof(*adapter->uc_list), GFP_KERNEL);
4679        if (!adapter->uc_list)
4680                return -ENOMEM;
4681
4682        if (adapter->cfg_num_rx_irqs == 1)
4683                cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4684
4685        en_flags &= cap_flags;
4686        /* will enable all the needed filter flags in be_open() */
4687        status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4688                                  &adapter->if_handle, 0);
4689
4690        if (status)
4691                return status;
4692
4693        return 0;
4694}
4695
4696int be_update_queues(struct be_adapter *adapter)
4697{
4698        struct net_device *netdev = adapter->netdev;
4699        int status;
4700
4701        if (netif_running(netdev)) {
4702                /* be_tx_timeout() must not run concurrently with this
4703                 * function, synchronize with an already-running dev_watchdog
4704                 */
4705                netif_tx_lock_bh(netdev);
4706                /* device cannot transmit now, avoid dev_watchdog timeouts */
4707                netif_carrier_off(netdev);
4708                netif_tx_unlock_bh(netdev);
4709
4710                be_close(netdev);
4711        }
4712
4713        be_cancel_worker(adapter);
4714
4715        /* If any vectors have been shared with RoCE we cannot re-program
4716         * the MSIx table.
4717         */
4718        if (!adapter->num_msix_roce_vec)
4719                be_msix_disable(adapter);
4720
4721        be_clear_queues(adapter);
4722        status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4723        if (status)
4724                return status;
4725
4726        if (!msix_enabled(adapter)) {
4727                status = be_msix_enable(adapter);
4728                if (status)
4729                        return status;
4730        }
4731
4732        status = be_if_create(adapter);
4733        if (status)
4734                return status;
4735
4736        status = be_setup_queues(adapter);
4737        if (status)
4738                return status;
4739
4740        be_schedule_worker(adapter);
4741
4742        /* The IF was destroyed and re-created. We need to clear
4743         * all promiscuous flags valid for the destroyed IF.
4744         * Without this promisc mode is not restored during
4745         * be_open() because the driver thinks that it is
4746         * already enabled in HW.
4747         */
4748        adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4749
4750        if (netif_running(netdev))
4751                status = be_open(netdev);
4752
4753        return status;
4754}
4755
4756static inline int fw_major_num(const char *fw_ver)
4757{
4758        int fw_major = 0, i;
4759
4760        i = sscanf(fw_ver, "%d.", &fw_major);
4761        if (i != 1)
4762                return 0;
4763
4764        return fw_major;
4765}
4766
4767/* If it is error recovery, FLR the PF
4768 * Else if any VFs are already enabled don't FLR the PF
4769 */
4770static bool be_reset_required(struct be_adapter *adapter)
4771{
4772        if (be_error_recovering(adapter))
4773                return true;
4774        else
4775                return pci_num_vf(adapter->pdev) == 0;
4776}
4777
4778/* Wait for the FW to be ready and perform the required initialization */
4779static int be_func_init(struct be_adapter *adapter)
4780{
4781        int status;
4782
4783        status = be_fw_wait_ready(adapter);
4784        if (status)
4785                return status;
4786
4787        /* FW is now ready; clear errors to allow cmds/doorbell */
4788        be_clear_error(adapter, BE_CLEAR_ALL);
4789
4790        if (be_reset_required(adapter)) {
4791                status = be_cmd_reset_function(adapter);
4792                if (status)
4793                        return status;
4794
4795                /* Wait for interrupts to quiesce after an FLR */
4796                msleep(100);
4797        }
4798
4799        /* Tell FW we're ready to fire cmds */
4800        status = be_cmd_fw_init(adapter);
4801        if (status)
4802                return status;
4803
4804        /* Allow interrupts for other ULPs running on NIC function */
4805        be_intr_set(adapter, true);
4806
4807        return 0;
4808}
4809
4810static int be_setup(struct be_adapter *adapter)
4811{
4812        struct device *dev = &adapter->pdev->dev;
4813        int status;
4814
4815        status = be_func_init(adapter);
4816        if (status)
4817                return status;
4818
4819        be_setup_init(adapter);
4820
4821        if (!lancer_chip(adapter))
4822                be_cmd_req_native_mode(adapter);
4823
4824        /* invoke this cmd first to get pf_num and vf_num which are needed
4825         * for issuing profile related cmds
4826         */
4827        if (!BEx_chip(adapter)) {
4828                status = be_cmd_get_func_config(adapter, NULL);
4829                if (status)
4830                        return status;
4831        }
4832
4833        status = be_get_config(adapter);
4834        if (status)
4835                goto err;
4836
4837        if (!BE2_chip(adapter) && be_physfn(adapter))
4838                be_alloc_sriov_res(adapter);
4839
4840        status = be_get_resources(adapter);
4841        if (status)
4842                goto err;
4843
4844        status = be_msix_enable(adapter);
4845        if (status)
4846                goto err;
4847
4848        /* will enable all the needed filter flags in be_open() */
4849        status = be_if_create(adapter);
4850        if (status)
4851                goto err;
4852
4853        /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4854        rtnl_lock();
4855        status = be_setup_queues(adapter);
4856        rtnl_unlock();
4857        if (status)
4858                goto err;
4859
4860        be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4861
4862        status = be_mac_setup(adapter);
4863        if (status)
4864                goto err;
4865
4866        be_cmd_get_fw_ver(adapter);
4867        dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4868
4869        if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4870                dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4871                        adapter->fw_ver);
4872                dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4873        }
4874
4875        status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4876                                         adapter->rx_fc);
4877        if (status)
4878                be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4879                                        &adapter->rx_fc);
4880
4881        dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4882                 adapter->tx_fc, adapter->rx_fc);
4883
4884        if (be_physfn(adapter))
4885                be_cmd_set_logical_link_config(adapter,
4886                                               IFLA_VF_LINK_STATE_AUTO, 0);
4887
4888        /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4889         * confusing a linux bridge or OVS that it might be connected to.
4890         * Set the EVB to PASSTHRU mode which effectively disables the EVB
4891         * when SRIOV is not enabled.
4892         */
4893        if (BE3_chip(adapter))
4894                be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4895                                      PORT_FWD_TYPE_PASSTHRU, 0);
4896
4897        if (adapter->num_vfs)
4898                be_vf_setup(adapter);
4899
4900        status = be_cmd_get_phy_info(adapter);
4901        if (!status && be_pause_supported(adapter))
4902                adapter->phy.fc_autoneg = 1;
4903
4904        if (be_physfn(adapter) && !lancer_chip(adapter))
4905                be_cmd_set_features(adapter);
4906
4907        be_schedule_worker(adapter);
4908        adapter->flags |= BE_FLAGS_SETUP_DONE;
4909        return 0;
4910err:
4911        be_clear(adapter);
4912        return status;
4913}
4914
4915#ifdef CONFIG_NET_POLL_CONTROLLER
4916static void be_netpoll(struct net_device *netdev)
4917{
4918        struct be_adapter *adapter = netdev_priv(netdev);
4919        struct be_eq_obj *eqo;
4920        int i;
4921
4922        for_all_evt_queues(adapter, eqo, i) {
4923                be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4924                napi_schedule(&eqo->napi);
4925        }
4926}
4927#endif
4928
4929int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4930{
4931        const struct firmware *fw;
4932        int status;
4933
4934        if (!netif_running(adapter->netdev)) {
4935                dev_err(&adapter->pdev->dev,
4936                        "Firmware load not allowed (interface is down)\n");
4937                return -ENETDOWN;
4938        }
4939
4940        status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4941        if (status)
4942                goto fw_exit;
4943
4944        dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4945
4946        if (lancer_chip(adapter))
4947                status = lancer_fw_download(adapter, fw);
4948        else
4949                status = be_fw_download(adapter, fw);
4950
4951        if (!status)
4952                be_cmd_get_fw_ver(adapter);
4953
4954fw_exit:
4955        release_firmware(fw);
4956        return status;
4957}
4958
4959static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4960                                 u16 flags, struct netlink_ext_ack *extack)
4961{
4962        struct be_adapter *adapter = netdev_priv(dev);
4963        struct nlattr *attr, *br_spec;
4964        int rem;
4965        int status = 0;
4966        u16 mode = 0;
4967
4968        if (!sriov_enabled(adapter))
4969                return -EOPNOTSUPP;
4970
4971        br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4972        if (!br_spec)
4973                return -EINVAL;
4974
4975        nla_for_each_nested(attr, br_spec, rem) {
4976                if (nla_type(attr) != IFLA_BRIDGE_MODE)
4977                        continue;
4978
4979                if (nla_len(attr) < sizeof(mode))
4980                        return -EINVAL;
4981
4982                mode = nla_get_u16(attr);
4983                if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4984                        return -EOPNOTSUPP;
4985
4986                if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4987                        return -EINVAL;
4988
4989                status = be_cmd_set_hsw_config(adapter, 0, 0,
4990                                               adapter->if_handle,
4991                                               mode == BRIDGE_MODE_VEPA ?
4992                                               PORT_FWD_TYPE_VEPA :
4993                                               PORT_FWD_TYPE_VEB, 0);
4994                if (status)
4995                        goto err;
4996
4997                dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4998                         mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4999
5000                return status;
5001        }
5002err:
5003        dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5004                mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5005
5006        return status;
5007}
5008
5009static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5010                                 struct net_device *dev, u32 filter_mask,
5011                                 int nlflags)
5012{
5013        struct be_adapter *adapter = netdev_priv(dev);
5014        int status = 0;
5015        u8 hsw_mode;
5016
5017        /* BE and Lancer chips support VEB mode only */
5018        if (BEx_chip(adapter) || lancer_chip(adapter)) {
5019                /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5020                if (!pci_sriov_get_totalvfs(adapter->pdev))
5021                        return 0;
5022                hsw_mode = PORT_FWD_TYPE_VEB;
5023        } else {
5024                status = be_cmd_get_hsw_config(adapter, NULL, 0,
5025                                               adapter->if_handle, &hsw_mode,
5026                                               NULL);
5027                if (status)
5028                        return 0;
5029
5030                if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5031                        return 0;
5032        }
5033
5034        return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5035                                       hsw_mode == PORT_FWD_TYPE_VEPA ?
5036                                       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5037                                       0, 0, nlflags, filter_mask, NULL);
5038}
5039
5040static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5041                                         void (*func)(struct work_struct *))
5042{
5043        struct be_cmd_work *work;
5044
5045        work = kzalloc(sizeof(*work), GFP_ATOMIC);
5046        if (!work) {
5047                dev_err(&adapter->pdev->dev,
5048                        "be_work memory allocation failed\n");
5049                return NULL;
5050        }
5051
5052        INIT_WORK(&work->work, func);
5053        work->adapter = adapter;
5054        return work;
5055}
5056
5057/* VxLAN offload Notes:
5058 *
5059 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5060 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5061 * is expected to work across all types of IP tunnels once exported. Skyhawk
5062 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5063 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5064 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5065 * those other tunnels are unexported on the fly through ndo_features_check().
5066 *
5067 * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5068 * adds more than one port, disable offloads and re-enable them again when
5069 * there's only one port left. We maintain a list of ports for this purpose.
5070 */
5071static void be_work_add_vxlan_port(struct work_struct *work)
5072{
5073        struct be_cmd_work *cmd_work =
5074                                container_of(work, struct be_cmd_work, work);
5075        struct be_adapter *adapter = cmd_work->adapter;
5076        struct device *dev = &adapter->pdev->dev;
5077        __be16 port = cmd_work->info.vxlan_port;
5078        struct be_vxlan_port *vxlan_port;
5079        int status;
5080
5081        /* Bump up the alias count if it is an existing port */
5082        list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5083                if (vxlan_port->port == port) {
5084                        vxlan_port->port_aliases++;
5085                        goto done;
5086                }
5087        }
5088
5089        /* Add a new port to our list. We don't need a lock here since port
5090         * add/delete are done only in the context of a single-threaded work
5091         * queue (be_wq).
5092         */
5093        vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5094        if (!vxlan_port)
5095                goto done;
5096
5097        vxlan_port->port = port;
5098        INIT_LIST_HEAD(&vxlan_port->list);
5099        list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5100        adapter->vxlan_port_count++;
5101
5102        if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5103                dev_info(dev,
5104                         "Only one UDP port supported for VxLAN offloads\n");
5105                dev_info(dev, "Disabling VxLAN offloads\n");
5106                goto err;
5107        }
5108
5109        if (adapter->vxlan_port_count > 1)
5110                goto done;
5111
5112        status = be_enable_vxlan_offloads(adapter);
5113        if (!status)
5114                goto done;
5115
5116err:
5117        be_disable_vxlan_offloads(adapter);
5118done:
5119        kfree(cmd_work);
5120        return;
5121}
5122
5123static void be_work_del_vxlan_port(struct work_struct *work)
5124{
5125        struct be_cmd_work *cmd_work =
5126                                container_of(work, struct be_cmd_work, work);
5127        struct be_adapter *adapter = cmd_work->adapter;
5128        __be16 port = cmd_work->info.vxlan_port;
5129        struct be_vxlan_port *vxlan_port;
5130
5131        /* Nothing to be done if a port alias is being deleted */
5132        list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5133                if (vxlan_port->port == port) {
5134                        if (vxlan_port->port_aliases) {
5135                                vxlan_port->port_aliases--;
5136                                goto done;
5137                        }
5138                        break;
5139                }
5140        }
5141
5142        /* No port aliases left; delete the port from the list */
5143        list_del(&vxlan_port->list);
5144        adapter->vxlan_port_count--;
5145
5146        /* Disable VxLAN offload if this is the offloaded port */
5147        if (adapter->vxlan_port == vxlan_port->port) {
5148                WARN_ON(adapter->vxlan_port_count);
5149                be_disable_vxlan_offloads(adapter);
5150                dev_info(&adapter->pdev->dev,
5151                         "Disabled VxLAN offloads for UDP port %d\n",
5152                         be16_to_cpu(port));
5153                goto out;
5154        }
5155
5156        /* If only 1 port is left, re-enable VxLAN offload */
5157        if (adapter->vxlan_port_count == 1)
5158                be_enable_vxlan_offloads(adapter);
5159
5160out:
5161        kfree(vxlan_port);
5162done:
5163        kfree(cmd_work);
5164}
5165
5166static void be_cfg_vxlan_port(struct net_device *netdev,
5167                              struct udp_tunnel_info *ti,
5168                              void (*func)(struct work_struct *))
5169{
5170        struct be_adapter *adapter = netdev_priv(netdev);
5171        struct be_cmd_work *cmd_work;
5172
5173        if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5174                return;
5175
5176        if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5177                return;
5178
5179        cmd_work = be_alloc_work(adapter, func);
5180        if (cmd_work) {
5181                cmd_work->info.vxlan_port = ti->port;
5182                queue_work(be_wq, &cmd_work->work);
5183        }
5184}
5185
5186static void be_del_vxlan_port(struct net_device *netdev,
5187                              struct udp_tunnel_info *ti)
5188{
5189        be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5190}
5191
5192static void be_add_vxlan_port(struct net_device *netdev,
5193                              struct udp_tunnel_info *ti)
5194{
5195        be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5196}
5197
5198static netdev_features_t be_features_check(struct sk_buff *skb,
5199                                           struct net_device *dev,
5200                                           netdev_features_t features)
5201{
5202        struct be_adapter *adapter = netdev_priv(dev);
5203        u8 l4_hdr = 0;
5204
5205        if (skb_is_gso(skb)) {
5206                /* IPv6 TSO requests with extension hdrs are a problem
5207                 * to Lancer and BE3 HW. Disable TSO6 feature.
5208                 */
5209                if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5210                        features &= ~NETIF_F_TSO6;
5211
5212                /* Lancer cannot handle the packet with MSS less than 256.
5213                 * Also it can't handle a TSO packet with a single segment
5214                 * Disable the GSO support in such cases
5215                 */
5216                if (lancer_chip(adapter) &&
5217                    (skb_shinfo(skb)->gso_size < 256 ||
5218                     skb_shinfo(skb)->gso_segs == 1))
5219                        features &= ~NETIF_F_GSO_MASK;
5220        }
5221
5222        /* The code below restricts offload features for some tunneled and
5223         * Q-in-Q packets.
5224         * Offload features for normal (non tunnel) packets are unchanged.
5225         */
5226        features = vlan_features_check(skb, features);
5227        if (!skb->encapsulation ||
5228            !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5229                return features;
5230
5231        /* It's an encapsulated packet and VxLAN offloads are enabled. We
5232         * should disable tunnel offload features if it's not a VxLAN packet,
5233         * as tunnel offloads have been enabled only for VxLAN. This is done to
5234         * allow other tunneled traffic like GRE work fine while VxLAN
5235         * offloads are configured in Skyhawk-R.
5236         */
5237        switch (vlan_get_protocol(skb)) {
5238        case htons(ETH_P_IP):
5239                l4_hdr = ip_hdr(skb)->protocol;
5240                break;
5241        case htons(ETH_P_IPV6):
5242                l4_hdr = ipv6_hdr(skb)->nexthdr;
5243                break;
5244        default:
5245                return features;
5246        }
5247
5248        if (l4_hdr != IPPROTO_UDP ||
5249            skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5250            skb->inner_protocol != htons(ETH_P_TEB) ||
5251            skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5252                sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5253            !adapter->vxlan_port ||
5254            udp_hdr(skb)->dest != adapter->vxlan_port)
5255                return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5256
5257        return features;
5258}
5259
5260static int be_get_phys_port_id(struct net_device *dev,
5261                               struct netdev_phys_item_id *ppid)
5262{
5263        int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5264        struct be_adapter *adapter = netdev_priv(dev);
5265        u8 *id;
5266
5267        if (MAX_PHYS_ITEM_ID_LEN < id_len)
5268                return -ENOSPC;
5269
5270        ppid->id[0] = adapter->hba_port_num + 1;
5271        id = &ppid->id[1];
5272        for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5273             i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5274                memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5275
5276        ppid->id_len = id_len;
5277
5278        return 0;
5279}
5280
5281static void be_set_rx_mode(struct net_device *dev)
5282{
5283        struct be_adapter *adapter = netdev_priv(dev);
5284        struct be_cmd_work *work;
5285
5286        work = be_alloc_work(adapter, be_work_set_rx_mode);
5287        if (work)
5288                queue_work(be_wq, &work->work);
5289}
5290
5291static const struct net_device_ops be_netdev_ops = {
5292        .ndo_open               = be_open,
5293        .ndo_stop               = be_close,
5294        .ndo_start_xmit         = be_xmit,
5295        .ndo_set_rx_mode        = be_set_rx_mode,
5296        .ndo_set_mac_address    = be_mac_addr_set,
5297        .ndo_get_stats64        = be_get_stats64,
5298        .ndo_validate_addr      = eth_validate_addr,
5299        .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5300        .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5301        .ndo_set_vf_mac         = be_set_vf_mac,
5302        .ndo_set_vf_vlan        = be_set_vf_vlan,
5303        .ndo_set_vf_rate        = be_set_vf_tx_rate,
5304        .ndo_get_vf_config      = be_get_vf_config,
5305        .ndo_set_vf_link_state  = be_set_vf_link_state,
5306        .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5307        .ndo_tx_timeout         = be_tx_timeout,
5308#ifdef CONFIG_NET_POLL_CONTROLLER
5309        .ndo_poll_controller    = be_netpoll,
5310#endif
5311        .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5312        .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5313        .ndo_udp_tunnel_add     = be_add_vxlan_port,
5314        .ndo_udp_tunnel_del     = be_del_vxlan_port,
5315        .ndo_features_check     = be_features_check,
5316        .ndo_get_phys_port_id   = be_get_phys_port_id,
5317};
5318
5319static void be_netdev_init(struct net_device *netdev)
5320{
5321        struct be_adapter *adapter = netdev_priv(netdev);
5322
5323        netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5324                NETIF_F_GSO_UDP_TUNNEL |
5325                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5326                NETIF_F_HW_VLAN_CTAG_TX;
5327        if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5328                netdev->hw_features |= NETIF_F_RXHASH;
5329
5330        netdev->features |= netdev->hw_features |
5331                NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5332
5333        netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5334                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5335
5336        netdev->priv_flags |= IFF_UNICAST_FLT;
5337
5338        netdev->flags |= IFF_MULTICAST;
5339
5340        netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5341
5342        netdev->netdev_ops = &be_netdev_ops;
5343
5344        netdev->ethtool_ops = &be_ethtool_ops;
5345
5346        /* MTU range: 256 - 9000 */
5347        netdev->min_mtu = BE_MIN_MTU;
5348        netdev->max_mtu = BE_MAX_MTU;
5349}
5350
5351static void be_cleanup(struct be_adapter *adapter)
5352{
5353        struct net_device *netdev = adapter->netdev;
5354
5355        rtnl_lock();
5356        netif_device_detach(netdev);
5357        if (netif_running(netdev))
5358                be_close(netdev);
5359        rtnl_unlock();
5360
5361        be_clear(adapter);
5362}
5363
5364static int be_resume(struct be_adapter *adapter)
5365{
5366        struct net_device *netdev = adapter->netdev;
5367        int status;
5368
5369        status = be_setup(adapter);
5370        if (status)
5371                return status;
5372
5373        rtnl_lock();
5374        if (netif_running(netdev))
5375                status = be_open(netdev);
5376        rtnl_unlock();
5377
5378        if (status)
5379                return status;
5380
5381        netif_device_attach(netdev);
5382
5383        return 0;
5384}
5385
5386static void be_soft_reset(struct be_adapter *adapter)
5387{
5388        u32 val;
5389
5390        dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5391        val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5392        val |= SLIPORT_SOFTRESET_SR_MASK;
5393        iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5394}
5395
5396static bool be_err_is_recoverable(struct be_adapter *adapter)
5397{
5398        struct be_error_recovery *err_rec = &adapter->error_recovery;
5399        unsigned long initial_idle_time =
5400                msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5401        unsigned long recovery_interval =
5402                msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5403        u16 ue_err_code;
5404        u32 val;
5405
5406        val = be_POST_stage_get(adapter);
5407        if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5408                return false;
5409        ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5410        if (ue_err_code == 0)
5411                return false;
5412
5413        dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5414                ue_err_code);
5415
5416        if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5417                dev_err(&adapter->pdev->dev,
5418                        "Cannot recover within %lu sec from driver load\n",
5419                        jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5420                return false;
5421        }
5422
5423        if (err_rec->last_recovery_time && time_before_eq(
5424                jiffies - err_rec->last_recovery_time, recovery_interval)) {
5425                dev_err(&adapter->pdev->dev,
5426                        "Cannot recover within %lu sec from last recovery\n",
5427                        jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5428                return false;
5429        }
5430
5431        if (ue_err_code == err_rec->last_err_code) {
5432                dev_err(&adapter->pdev->dev,
5433                        "Cannot recover from a consecutive TPE error\n");
5434                return false;
5435        }
5436
5437        err_rec->last_recovery_time = jiffies;
5438        err_rec->last_err_code = ue_err_code;
5439        return true;
5440}
5441
5442static int be_tpe_recover(struct be_adapter *adapter)
5443{
5444        struct be_error_recovery *err_rec = &adapter->error_recovery;
5445        int status = -EAGAIN;
5446        u32 val;
5447
5448        switch (err_rec->recovery_state) {
5449        case ERR_RECOVERY_ST_NONE:
5450                err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5451                err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5452                break;
5453
5454        case ERR_RECOVERY_ST_DETECT:
5455                val = be_POST_stage_get(adapter);
5456                if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5457                    POST_STAGE_RECOVERABLE_ERR) {
5458                        dev_err(&adapter->pdev->dev,
5459                                "Unrecoverable HW error detected: 0x%x\n", val);
5460                        status = -EINVAL;
5461                        err_rec->resched_delay = 0;
5462                        break;
5463                }
5464
5465                dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5466
5467                /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5468                 * milliseconds before it checks for final error status in
5469                 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5470                 * If it does, then PF0 initiates a Soft Reset.
5471                 */
5472                if (adapter->pf_num == 0) {
5473                        err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5474                        err_rec->resched_delay = err_rec->ue_to_reset_time -
5475                                        ERR_RECOVERY_UE_DETECT_DURATION;
5476                        break;
5477                }
5478
5479                err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5480                err_rec->resched_delay = err_rec->ue_to_poll_time -
5481                                        ERR_RECOVERY_UE_DETECT_DURATION;
5482                break;
5483
5484        case ERR_RECOVERY_ST_RESET:
5485                if (!be_err_is_recoverable(adapter)) {
5486                        dev_err(&adapter->pdev->dev,
5487                                "Failed to meet recovery criteria\n");
5488                        status = -EIO;
5489                        err_rec->resched_delay = 0;
5490                        break;
5491                }
5492                be_soft_reset(adapter);
5493                err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5494                err_rec->resched_delay = err_rec->ue_to_poll_time -
5495                                        err_rec->ue_to_reset_time;
5496                break;
5497
5498        case ERR_RECOVERY_ST_PRE_POLL:
5499                err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5500                err_rec->resched_delay = 0;
5501                status = 0;                     /* done */
5502                break;
5503
5504        default:
5505                status = -EINVAL;
5506                err_rec->resched_delay = 0;
5507                break;
5508        }
5509
5510        return status;
5511}
5512
5513static int be_err_recover(struct be_adapter *adapter)
5514{
5515        int status;
5516
5517        if (!lancer_chip(adapter)) {
5518                if (!adapter->error_recovery.recovery_supported ||
5519                    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5520                        return -EIO;
5521                status = be_tpe_recover(adapter);
5522                if (status)
5523                        goto err;
5524        }
5525
5526        /* Wait for adapter to reach quiescent state before
5527         * destroying queues
5528         */
5529        status = be_fw_wait_ready(adapter);
5530        if (status)
5531                goto err;
5532
5533        adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5534
5535        be_cleanup(adapter);
5536
5537        status = be_resume(adapter);
5538        if (status)
5539                goto err;
5540
5541        adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5542
5543err:
5544        return status;
5545}
5546
5547static void be_err_detection_task(struct work_struct *work)
5548{
5549        struct be_error_recovery *err_rec =
5550                        container_of(work, struct be_error_recovery,
5551                                     err_detection_work.work);
5552        struct be_adapter *adapter =
5553                        container_of(err_rec, struct be_adapter,
5554                                     error_recovery);
5555        u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5556        struct device *dev = &adapter->pdev->dev;
5557        int recovery_status;
5558
5559        be_detect_error(adapter);
5560        if (!be_check_error(adapter, BE_ERROR_HW))
5561                goto reschedule_task;
5562
5563        recovery_status = be_err_recover(adapter);
5564        if (!recovery_status) {
5565                err_rec->recovery_retries = 0;
5566                err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5567                dev_info(dev, "Adapter recovery successful\n");
5568                goto reschedule_task;
5569        } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5570                /* BEx/SH recovery state machine */
5571                if (adapter->pf_num == 0 &&
5572                    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5573                        dev_err(&adapter->pdev->dev,
5574                                "Adapter recovery in progress\n");
5575                resched_delay = err_rec->resched_delay;
5576                goto reschedule_task;
5577        } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5578                /* For VFs, check if PF have allocated resources
5579                 * every second.
5580                 */
5581                dev_err(dev, "Re-trying adapter recovery\n");
5582                goto reschedule_task;
5583        } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5584                   ERR_RECOVERY_MAX_RETRY_COUNT) {
5585                /* In case of another error during recovery, it takes 30 sec
5586                 * for adapter to come out of error. Retry error recovery after
5587                 * this time interval.
5588                 */
5589                dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5590                resched_delay = ERR_RECOVERY_RETRY_DELAY;
5591                goto reschedule_task;
5592        } else {
5593                dev_err(dev, "Adapter recovery failed\n");
5594                dev_err(dev, "Please reboot server to recover\n");
5595        }
5596
5597        return;
5598
5599reschedule_task:
5600        be_schedule_err_detection(adapter, resched_delay);
5601}
5602
5603static void be_log_sfp_info(struct be_adapter *adapter)
5604{
5605        int status;
5606
5607        status = be_cmd_query_sfp_info(adapter);
5608        if (!status) {
5609                dev_err(&adapter->pdev->dev,
5610                        "Port %c: %s Vendor: %s part no: %s",
5611                        adapter->port_name,
5612                        be_misconfig_evt_port_state[adapter->phy_state],
5613                        adapter->phy.vendor_name,
5614                        adapter->phy.vendor_pn);
5615        }
5616        adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5617}
5618
5619static void be_worker(struct work_struct *work)
5620{
5621        struct be_adapter *adapter =
5622                container_of(work, struct be_adapter, work.work);
5623        struct be_rx_obj *rxo;
5624        int i;
5625
5626        if (be_physfn(adapter) &&
5627            MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5628                be_cmd_get_die_temperature(adapter);
5629
5630        /* when interrupts are not yet enabled, just reap any pending
5631         * mcc completions
5632         */
5633        if (!netif_running(adapter->netdev)) {
5634                be_process_mcc(adapter);
5635                goto reschedule;
5636        }
5637
5638        if (!adapter->stats_cmd_sent) {
5639                if (lancer_chip(adapter))
5640                        lancer_cmd_get_pport_stats(adapter,
5641                                                   &adapter->stats_cmd);
5642                else
5643                        be_cmd_get_stats(adapter, &adapter->stats_cmd);
5644        }
5645
5646        for_all_rx_queues(adapter, rxo, i) {
5647                /* Replenish RX-queues starved due to memory
5648                 * allocation failures.
5649                 */
5650                if (rxo->rx_post_starved)
5651                        be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5652        }
5653
5654        /* EQ-delay update for Skyhawk is done while notifying EQ */
5655        if (!skyhawk_chip(adapter))
5656                be_eqd_update(adapter, false);
5657
5658        if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5659                be_log_sfp_info(adapter);
5660
5661reschedule:
5662        adapter->work_counter++;
5663        queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5664}
5665
5666static void be_unmap_pci_bars(struct be_adapter *adapter)
5667{
5668        if (adapter->csr)
5669                pci_iounmap(adapter->pdev, adapter->csr);
5670        if (adapter->db)
5671                pci_iounmap(adapter->pdev, adapter->db);
5672        if (adapter->pcicfg && adapter->pcicfg_mapped)
5673                pci_iounmap(adapter->pdev, adapter->pcicfg);
5674}
5675
5676static int db_bar(struct be_adapter *adapter)
5677{
5678        if (lancer_chip(adapter) || be_virtfn(adapter))
5679                return 0;
5680        else
5681                return 4;
5682}
5683
5684static int be_roce_map_pci_bars(struct be_adapter *adapter)
5685{
5686        if (skyhawk_chip(adapter)) {
5687                adapter->roce_db.size = 4096;
5688                adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5689                                                              db_bar(adapter));
5690                adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5691                                                               db_bar(adapter));
5692        }
5693        return 0;
5694}
5695
5696static int be_map_pci_bars(struct be_adapter *adapter)
5697{
5698        struct pci_dev *pdev = adapter->pdev;
5699        u8 __iomem *addr;
5700        u32 sli_intf;
5701
5702        pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5703        adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5704                                SLI_INTF_FAMILY_SHIFT;
5705        adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5706
5707        if (BEx_chip(adapter) && be_physfn(adapter)) {
5708                adapter->csr = pci_iomap(pdev, 2, 0);
5709                if (!adapter->csr)
5710                        return -ENOMEM;
5711        }
5712
5713        addr = pci_iomap(pdev, db_bar(adapter), 0);
5714        if (!addr)
5715                goto pci_map_err;
5716        adapter->db = addr;
5717
5718        if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5719                if (be_physfn(adapter)) {
5720                        /* PCICFG is the 2nd BAR in BE2 */
5721                        addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5722                        if (!addr)
5723                                goto pci_map_err;
5724                        adapter->pcicfg = addr;
5725                        adapter->pcicfg_mapped = true;
5726                } else {
5727                        adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5728                        adapter->pcicfg_mapped = false;
5729                }
5730        }
5731
5732        be_roce_map_pci_bars(adapter);
5733        return 0;
5734
5735pci_map_err:
5736        dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5737        be_unmap_pci_bars(adapter);
5738        return -ENOMEM;
5739}
5740
5741static void be_drv_cleanup(struct be_adapter *adapter)
5742{
5743        struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5744        struct device *dev = &adapter->pdev->dev;
5745
5746        if (mem->va)
5747                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5748
5749        mem = &adapter->rx_filter;
5750        if (mem->va)
5751                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5752
5753        mem = &adapter->stats_cmd;
5754        if (mem->va)
5755                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5756}
5757
5758/* Allocate and initialize various fields in be_adapter struct */
5759static int be_drv_init(struct be_adapter *adapter)
5760{
5761        struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5762        struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5763        struct be_dma_mem *rx_filter = &adapter->rx_filter;
5764        struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5765        struct device *dev = &adapter->pdev->dev;
5766        int status = 0;
5767
5768        mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5769        mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5770                                                &mbox_mem_alloc->dma,
5771                                                GFP_KERNEL);
5772        if (!mbox_mem_alloc->va)
5773                return -ENOMEM;
5774
5775        mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5776        mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5777        mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5778
5779        rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5780        rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5781                                           &rx_filter->dma, GFP_KERNEL);
5782        if (!rx_filter->va) {
5783                status = -ENOMEM;
5784                goto free_mbox;
5785        }
5786
5787        if (lancer_chip(adapter))
5788                stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5789        else if (BE2_chip(adapter))
5790                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5791        else if (BE3_chip(adapter))
5792                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5793        else
5794                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5795        stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5796                                           &stats_cmd->dma, GFP_KERNEL);
5797        if (!stats_cmd->va) {
5798                status = -ENOMEM;
5799                goto free_rx_filter;
5800        }
5801
5802        mutex_init(&adapter->mbox_lock);
5803        mutex_init(&adapter->mcc_lock);
5804        mutex_init(&adapter->rx_filter_lock);
5805        spin_lock_init(&adapter->mcc_cq_lock);
5806        init_completion(&adapter->et_cmd_compl);
5807
5808        pci_save_state(adapter->pdev);
5809
5810        INIT_DELAYED_WORK(&adapter->work, be_worker);
5811
5812        adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5813        adapter->error_recovery.resched_delay = 0;
5814        INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5815                          be_err_detection_task);
5816
5817        adapter->rx_fc = true;
5818        adapter->tx_fc = true;
5819
5820        /* Must be a power of 2 or else MODULO will BUG_ON */
5821        adapter->be_get_temp_freq = 64;
5822
5823        INIT_LIST_HEAD(&adapter->vxlan_port_list);
5824        return 0;
5825
5826free_rx_filter:
5827        dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5828free_mbox:
5829        dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5830                          mbox_mem_alloc->dma);
5831        return status;
5832}
5833
5834static void be_remove(struct pci_dev *pdev)
5835{
5836        struct be_adapter *adapter = pci_get_drvdata(pdev);
5837
5838        if (!adapter)
5839                return;
5840
5841        be_roce_dev_remove(adapter);
5842        be_intr_set(adapter, false);
5843
5844        be_cancel_err_detection(adapter);
5845
5846        unregister_netdev(adapter->netdev);
5847
5848        be_clear(adapter);
5849
5850        if (!pci_vfs_assigned(adapter->pdev))
5851                be_cmd_reset_function(adapter);
5852
5853        /* tell fw we're done with firing cmds */
5854        be_cmd_fw_clean(adapter);
5855
5856        be_unmap_pci_bars(adapter);
5857        be_drv_cleanup(adapter);
5858
5859        pci_disable_pcie_error_reporting(pdev);
5860
5861        pci_release_regions(pdev);
5862        pci_disable_device(pdev);
5863
5864        free_netdev(adapter->netdev);
5865}
5866
5867static ssize_t be_hwmon_show_temp(struct device *dev,
5868                                  struct device_attribute *dev_attr,
5869                                  char *buf)
5870{
5871        struct be_adapter *adapter = dev_get_drvdata(dev);
5872
5873        /* Unit: millidegree Celsius */
5874        if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5875                return -EIO;
5876        else
5877                return sprintf(buf, "%u\n",
5878                               adapter->hwmon_info.be_on_die_temp * 1000);
5879}
5880
5881static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5882                          be_hwmon_show_temp, NULL, 1);
5883
5884static struct attribute *be_hwmon_attrs[] = {
5885        &sensor_dev_attr_temp1_input.dev_attr.attr,
5886        NULL
5887};
5888
5889ATTRIBUTE_GROUPS(be_hwmon);
5890
5891static char *mc_name(struct be_adapter *adapter)
5892{
5893        char *str = ""; /* default */
5894
5895        switch (adapter->mc_type) {
5896        case UMC:
5897                str = "UMC";
5898                break;
5899        case FLEX10:
5900                str = "FLEX10";
5901                break;
5902        case vNIC1:
5903                str = "vNIC-1";
5904                break;
5905        case nPAR:
5906                str = "nPAR";
5907                break;
5908        case UFP:
5909                str = "UFP";
5910                break;
5911        case vNIC2:
5912                str = "vNIC-2";
5913                break;
5914        default:
5915                str = "";
5916        }
5917
5918        return str;
5919}
5920
5921static inline char *func_name(struct be_adapter *adapter)
5922{
5923        return be_physfn(adapter) ? "PF" : "VF";
5924}
5925
5926static inline char *nic_name(struct pci_dev *pdev)
5927{
5928        switch (pdev->device) {
5929        case OC_DEVICE_ID1:
5930                return OC_NAME;
5931        case OC_DEVICE_ID2:
5932                return OC_NAME_BE;
5933        case OC_DEVICE_ID3:
5934        case OC_DEVICE_ID4:
5935                return OC_NAME_LANCER;
5936        case BE_DEVICE_ID2:
5937                return BE3_NAME;
5938        case OC_DEVICE_ID5:
5939        case OC_DEVICE_ID6:
5940                return OC_NAME_SH;
5941        default:
5942                return BE_NAME;
5943        }
5944}
5945
5946static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5947{
5948        struct be_adapter *adapter;
5949        struct net_device *netdev;
5950        int status = 0;
5951
5952        dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5953
5954        status = pci_enable_device(pdev);
5955        if (status)
5956                goto do_none;
5957
5958        status = pci_request_regions(pdev, DRV_NAME);
5959        if (status)
5960                goto disable_dev;
5961        pci_set_master(pdev);
5962
5963        netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5964        if (!netdev) {
5965                status = -ENOMEM;
5966                goto rel_reg;
5967        }
5968        adapter = netdev_priv(netdev);
5969        adapter->pdev = pdev;
5970        pci_set_drvdata(pdev, adapter);
5971        adapter->netdev = netdev;
5972        SET_NETDEV_DEV(netdev, &pdev->dev);
5973
5974        status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5975        if (!status) {
5976                netdev->features |= NETIF_F_HIGHDMA;
5977        } else {
5978                status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5979                if (status) {
5980                        dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5981                        goto free_netdev;
5982                }
5983        }
5984
5985        status = pci_enable_pcie_error_reporting(pdev);
5986        if (!status)
5987                dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5988
5989        status = be_map_pci_bars(adapter);
5990        if (status)
5991                goto free_netdev;
5992
5993        status = be_drv_init(adapter);
5994        if (status)
5995                goto unmap_bars;
5996
5997        status = be_setup(adapter);
5998        if (status)
5999                goto drv_cleanup;
6000
6001        be_netdev_init(netdev);
6002        status = register_netdev(netdev);
6003        if (status != 0)
6004                goto unsetup;
6005
6006        be_roce_dev_add(adapter);
6007
6008        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6009        adapter->error_recovery.probe_time = jiffies;
6010
6011        /* On Die temperature not supported for VF. */
6012        if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
6013                adapter->hwmon_info.hwmon_dev =
6014                        devm_hwmon_device_register_with_groups(&pdev->dev,
6015                                                               DRV_NAME,
6016                                                               adapter,
6017                                                               be_hwmon_groups);
6018                adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
6019        }
6020
6021        dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
6022                 func_name(adapter), mc_name(adapter), adapter->port_name);
6023
6024        return 0;
6025
6026unsetup:
6027        be_clear(adapter);
6028drv_cleanup:
6029        be_drv_cleanup(adapter);
6030unmap_bars:
6031        be_unmap_pci_bars(adapter);
6032free_netdev:
6033        free_netdev(netdev);
6034rel_reg:
6035        pci_release_regions(pdev);
6036disable_dev:
6037        pci_disable_device(pdev);
6038do_none:
6039        dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6040        return status;
6041}
6042
6043static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6044{
6045        struct be_adapter *adapter = pci_get_drvdata(pdev);
6046
6047        be_intr_set(adapter, false);
6048        be_cancel_err_detection(adapter);
6049
6050        be_cleanup(adapter);
6051
6052        pci_save_state(pdev);
6053        pci_disable_device(pdev);
6054        pci_set_power_state(pdev, pci_choose_state(pdev, state));
6055        return 0;
6056}
6057
6058static int be_pci_resume(struct pci_dev *pdev)
6059{
6060        struct be_adapter *adapter = pci_get_drvdata(pdev);
6061        int status = 0;
6062
6063        status = pci_enable_device(pdev);
6064        if (status)
6065                return status;
6066
6067        pci_restore_state(pdev);
6068
6069        status = be_resume(adapter);
6070        if (status)
6071                return status;
6072
6073        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6074
6075        return 0;
6076}
6077
6078/*
6079 * An FLR will stop BE from DMAing any data.
6080 */
6081static void be_shutdown(struct pci_dev *pdev)
6082{
6083        struct be_adapter *adapter = pci_get_drvdata(pdev);
6084
6085        if (!adapter)
6086                return;
6087
6088        be_roce_dev_shutdown(adapter);
6089        cancel_delayed_work_sync(&adapter->work);
6090        be_cancel_err_detection(adapter);
6091
6092        netif_device_detach(adapter->netdev);
6093
6094        be_cmd_reset_function(adapter);
6095
6096        pci_disable_device(pdev);
6097}
6098
6099static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6100                                            pci_channel_state_t state)
6101{
6102        struct be_adapter *adapter = pci_get_drvdata(pdev);
6103
6104        dev_err(&adapter->pdev->dev, "EEH error detected\n");
6105
6106        be_roce_dev_remove(adapter);
6107
6108        if (!be_check_error(adapter, BE_ERROR_EEH)) {
6109                be_set_error(adapter, BE_ERROR_EEH);
6110
6111                be_cancel_err_detection(adapter);
6112
6113                be_cleanup(adapter);
6114        }
6115
6116        if (state == pci_channel_io_perm_failure)
6117                return PCI_ERS_RESULT_DISCONNECT;
6118
6119        pci_disable_device(pdev);
6120
6121        /* The error could cause the FW to trigger a flash debug dump.
6122         * Resetting the card while flash dump is in progress
6123         * can cause it not to recover; wait for it to finish.
6124         * Wait only for first function as it is needed only once per
6125         * adapter.
6126         */
6127        if (pdev->devfn == 0)
6128                ssleep(30);
6129
6130        return PCI_ERS_RESULT_NEED_RESET;
6131}
6132
6133static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6134{
6135        struct be_adapter *adapter = pci_get_drvdata(pdev);
6136        int status;
6137
6138        dev_info(&adapter->pdev->dev, "EEH reset\n");
6139
6140        status = pci_enable_device(pdev);
6141        if (status)
6142                return PCI_ERS_RESULT_DISCONNECT;
6143
6144        pci_set_master(pdev);
6145        pci_restore_state(pdev);
6146
6147        /* Check if card is ok and fw is ready */
6148        dev_info(&adapter->pdev->dev,
6149                 "Waiting for FW to be ready after EEH reset\n");
6150        status = be_fw_wait_ready(adapter);
6151        if (status)
6152                return PCI_ERS_RESULT_DISCONNECT;
6153
6154        be_clear_error(adapter, BE_CLEAR_ALL);
6155        return PCI_ERS_RESULT_RECOVERED;
6156}
6157
6158static void be_eeh_resume(struct pci_dev *pdev)
6159{
6160        int status = 0;
6161        struct be_adapter *adapter = pci_get_drvdata(pdev);
6162
6163        dev_info(&adapter->pdev->dev, "EEH resume\n");
6164
6165        pci_save_state(pdev);
6166
6167        status = be_resume(adapter);
6168        if (status)
6169                goto err;
6170
6171        be_roce_dev_add(adapter);
6172
6173        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6174        return;
6175err:
6176        dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6177}
6178
6179static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6180{
6181        struct be_adapter *adapter = pci_get_drvdata(pdev);
6182        struct be_resources vft_res = {0};
6183        int status;
6184
6185        if (!num_vfs)
6186                be_vf_clear(adapter);
6187
6188        adapter->num_vfs = num_vfs;
6189
6190        if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6191                dev_warn(&pdev->dev,
6192                         "Cannot disable VFs while they are assigned\n");
6193                return -EBUSY;
6194        }
6195
6196        /* When the HW is in SRIOV capable configuration, the PF-pool resources
6197         * are equally distributed across the max-number of VFs. The user may
6198         * request only a subset of the max-vfs to be enabled.
6199         * Based on num_vfs, redistribute the resources across num_vfs so that
6200         * each VF will have access to more number of resources.
6201         * This facility is not available in BE3 FW.
6202         * Also, this is done by FW in Lancer chip.
6203         */
6204        if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6205                be_calculate_vf_res(adapter, adapter->num_vfs,
6206                                    &vft_res);
6207                status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6208                                                 adapter->num_vfs, &vft_res);
6209                if (status)
6210                        dev_err(&pdev->dev,
6211                                "Failed to optimize SR-IOV resources\n");
6212        }
6213
6214        status = be_get_resources(adapter);
6215        if (status)
6216                return be_cmd_status(status);
6217
6218        /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6219        rtnl_lock();
6220        status = be_update_queues(adapter);
6221        rtnl_unlock();
6222        if (status)
6223                return be_cmd_status(status);
6224
6225        if (adapter->num_vfs)
6226                status = be_vf_setup(adapter);
6227
6228        if (!status)
6229                return adapter->num_vfs;
6230
6231        return 0;
6232}
6233
6234static const struct pci_error_handlers be_eeh_handlers = {
6235        .error_detected = be_eeh_err_detected,
6236        .slot_reset = be_eeh_reset,
6237        .resume = be_eeh_resume,
6238};
6239
6240static struct pci_driver be_driver = {
6241        .name = DRV_NAME,
6242        .id_table = be_dev_ids,
6243        .probe = be_probe,
6244        .remove = be_remove,
6245        .suspend = be_suspend,
6246        .resume = be_pci_resume,
6247        .shutdown = be_shutdown,
6248        .sriov_configure = be_pci_sriov_configure,
6249        .err_handler = &be_eeh_handlers
6250};
6251
6252static int __init be_init_module(void)
6253{
6254        int status;
6255
6256        if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6257            rx_frag_size != 2048) {
6258                printk(KERN_WARNING DRV_NAME
6259                        " : Module param rx_frag_size must be 2048/4096/8192."
6260                        " Using 2048\n");
6261                rx_frag_size = 2048;
6262        }
6263
6264        if (num_vfs > 0) {
6265                pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6266                pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6267        }
6268
6269        be_wq = create_singlethread_workqueue("be_wq");
6270        if (!be_wq) {
6271                pr_warn(DRV_NAME "workqueue creation failed\n");
6272                return -1;
6273        }
6274
6275        be_err_recovery_workq =
6276                create_singlethread_workqueue("be_err_recover");
6277        if (!be_err_recovery_workq)
6278                pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6279
6280        status = pci_register_driver(&be_driver);
6281        if (status) {
6282                destroy_workqueue(be_wq);
6283                be_destroy_err_recovery_workq();
6284        }
6285        return status;
6286}
6287module_init(be_init_module);
6288
6289static void __exit be_exit_module(void)
6290{
6291        pci_unregister_driver(&be_driver);
6292
6293        be_destroy_err_recovery_workq();
6294
6295        if (be_wq)
6296                destroy_workqueue(be_wq);
6297}
6298module_exit(be_exit_module);
6299