linux/drivers/net/ethernet/emulex/benet/be_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2005 - 2016 Broadcom
   4 * All rights reserved.
   5 *
   6 * Contact Information:
   7 * linux-drivers@emulex.com
   8 *
   9 * Emulex
  10 * 3333 Susan Street
  11 * Costa Mesa, CA 92626
  12 */
  13
  14#include <linux/prefetch.h>
  15#include <linux/module.h>
  16#include "be.h"
  17#include "be_cmds.h"
  18#include <asm/div64.h>
  19#include <linux/aer.h>
  20#include <linux/if_bridge.h>
  21#include <net/busy_poll.h>
  22#include <net/vxlan.h>
  23
  24MODULE_VERSION(DRV_VER);
  25MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
  26MODULE_AUTHOR("Emulex Corporation");
  27MODULE_LICENSE("GPL");
  28
  29/* num_vfs module param is obsolete.
  30 * Use sysfs method to enable/disable VFs.
  31 */
  32static unsigned int num_vfs;
  33module_param(num_vfs, uint, 0444);
  34MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
  35
  36static ushort rx_frag_size = 2048;
  37module_param(rx_frag_size, ushort, 0444);
  38MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
  39
  40/* Per-module error detection/recovery workq shared across all functions.
  41 * Each function schedules its own work request on this shared workq.
  42 */
  43static struct workqueue_struct *be_err_recovery_workq;
  44
  45static const struct pci_device_id be_dev_ids[] = {
  46#ifdef CONFIG_BE2NET_BE2
  47        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
  48        { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
  49#endif /* CONFIG_BE2NET_BE2 */
  50#ifdef CONFIG_BE2NET_BE3
  51        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
  52        { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
  53#endif /* CONFIG_BE2NET_BE3 */
  54#ifdef CONFIG_BE2NET_LANCER
  55        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
  56        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
  57#endif /* CONFIG_BE2NET_LANCER */
  58#ifdef CONFIG_BE2NET_SKYHAWK
  59        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
  60        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
  61#endif /* CONFIG_BE2NET_SKYHAWK */
  62        { 0 }
  63};
  64MODULE_DEVICE_TABLE(pci, be_dev_ids);
  65
  66/* Workqueue used by all functions for defering cmd calls to the adapter */
  67static struct workqueue_struct *be_wq;
  68
  69/* UE Status Low CSR */
  70static const char * const ue_status_low_desc[] = {
  71        "CEV",
  72        "CTX",
  73        "DBUF",
  74        "ERX",
  75        "Host",
  76        "MPU",
  77        "NDMA",
  78        "PTC ",
  79        "RDMA ",
  80        "RXF ",
  81        "RXIPS ",
  82        "RXULP0 ",
  83        "RXULP1 ",
  84        "RXULP2 ",
  85        "TIM ",
  86        "TPOST ",
  87        "TPRE ",
  88        "TXIPS ",
  89        "TXULP0 ",
  90        "TXULP1 ",
  91        "UC ",
  92        "WDMA ",
  93        "TXULP2 ",
  94        "HOST1 ",
  95        "P0_OB_LINK ",
  96        "P1_OB_LINK ",
  97        "HOST_GPIO ",
  98        "MBOX ",
  99        "ERX2 ",
 100        "SPARE ",
 101        "JTAG ",
 102        "MPU_INTPEND "
 103};
 104
 105/* UE Status High CSR */
 106static const char * const ue_status_hi_desc[] = {
 107        "LPCMEMHOST",
 108        "MGMT_MAC",
 109        "PCS0ONLINE",
 110        "MPU_IRAM",
 111        "PCS1ONLINE",
 112        "PCTL0",
 113        "PCTL1",
 114        "PMEM",
 115        "RR",
 116        "TXPB",
 117        "RXPP",
 118        "XAUI",
 119        "TXP",
 120        "ARM",
 121        "IPC",
 122        "HOST2",
 123        "HOST3",
 124        "HOST4",
 125        "HOST5",
 126        "HOST6",
 127        "HOST7",
 128        "ECRC",
 129        "Poison TLP",
 130        "NETC",
 131        "PERIPH",
 132        "LLTXULP",
 133        "D2P",
 134        "RCON",
 135        "LDMA",
 136        "LLTXP",
 137        "LLTXPB",
 138        "Unknown"
 139};
 140
 141#define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
 142                                 BE_IF_FLAGS_BROADCAST | \
 143                                 BE_IF_FLAGS_MULTICAST | \
 144                                 BE_IF_FLAGS_PASS_L3L4_ERRORS)
 145
 146static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
 147{
 148        struct be_dma_mem *mem = &q->dma_mem;
 149
 150        if (mem->va) {
 151                dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
 152                                  mem->dma);
 153                mem->va = NULL;
 154        }
 155}
 156
 157static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
 158                          u16 len, u16 entry_size)
 159{
 160        struct be_dma_mem *mem = &q->dma_mem;
 161
 162        memset(q, 0, sizeof(*q));
 163        q->len = len;
 164        q->entry_size = entry_size;
 165        mem->size = len * entry_size;
 166        mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
 167                                     &mem->dma, GFP_KERNEL);
 168        if (!mem->va)
 169                return -ENOMEM;
 170        return 0;
 171}
 172
 173static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
 174{
 175        u32 reg, enabled;
 176
 177        pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
 178                              &reg);
 179        enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 180
 181        if (!enabled && enable)
 182                reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 183        else if (enabled && !enable)
 184                reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 185        else
 186                return;
 187
 188        pci_write_config_dword(adapter->pdev,
 189                               PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
 190}
 191
 192static void be_intr_set(struct be_adapter *adapter, bool enable)
 193{
 194        int status = 0;
 195
 196        /* On lancer interrupts can't be controlled via this register */
 197        if (lancer_chip(adapter))
 198                return;
 199
 200        if (be_check_error(adapter, BE_ERROR_EEH))
 201                return;
 202
 203        status = be_cmd_intr_set(adapter, enable);
 204        if (status)
 205                be_reg_intr_set(adapter, enable);
 206}
 207
 208static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
 209{
 210        u32 val = 0;
 211
 212        if (be_check_error(adapter, BE_ERROR_HW))
 213                return;
 214
 215        val |= qid & DB_RQ_RING_ID_MASK;
 216        val |= posted << DB_RQ_NUM_POSTED_SHIFT;
 217
 218        wmb();
 219        iowrite32(val, adapter->db + DB_RQ_OFFSET);
 220}
 221
 222static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
 223                          u16 posted)
 224{
 225        u32 val = 0;
 226
 227        if (be_check_error(adapter, BE_ERROR_HW))
 228                return;
 229
 230        val |= txo->q.id & DB_TXULP_RING_ID_MASK;
 231        val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
 232
 233        wmb();
 234        iowrite32(val, adapter->db + txo->db_offset);
 235}
 236
 237static void be_eq_notify(struct be_adapter *adapter, u16 qid,
 238                         bool arm, bool clear_int, u16 num_popped,
 239                         u32 eq_delay_mult_enc)
 240{
 241        u32 val = 0;
 242
 243        val |= qid & DB_EQ_RING_ID_MASK;
 244        val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
 245
 246        if (be_check_error(adapter, BE_ERROR_HW))
 247                return;
 248
 249        if (arm)
 250                val |= 1 << DB_EQ_REARM_SHIFT;
 251        if (clear_int)
 252                val |= 1 << DB_EQ_CLR_SHIFT;
 253        val |= 1 << DB_EQ_EVNT_SHIFT;
 254        val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
 255        val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
 256        iowrite32(val, adapter->db + DB_EQ_OFFSET);
 257}
 258
 259void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
 260{
 261        u32 val = 0;
 262
 263        val |= qid & DB_CQ_RING_ID_MASK;
 264        val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
 265                        DB_CQ_RING_ID_EXT_MASK_SHIFT);
 266
 267        if (be_check_error(adapter, BE_ERROR_HW))
 268                return;
 269
 270        if (arm)
 271                val |= 1 << DB_CQ_REARM_SHIFT;
 272        val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
 273        iowrite32(val, adapter->db + DB_CQ_OFFSET);
 274}
 275
 276static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
 277{
 278        int i;
 279
 280        /* Check if mac has already been added as part of uc-list */
 281        for (i = 0; i < adapter->uc_macs; i++) {
 282                if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
 283                        /* mac already added, skip addition */
 284                        adapter->pmac_id[0] = adapter->pmac_id[i + 1];
 285                        return 0;
 286                }
 287        }
 288
 289        return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
 290                               &adapter->pmac_id[0], 0);
 291}
 292
 293static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
 294{
 295        int i;
 296
 297        /* Skip deletion if the programmed mac is
 298         * being used in uc-list
 299         */
 300        for (i = 0; i < adapter->uc_macs; i++) {
 301                if (adapter->pmac_id[i + 1] == pmac_id)
 302                        return;
 303        }
 304        be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
 305}
 306
 307static int be_mac_addr_set(struct net_device *netdev, void *p)
 308{
 309        struct be_adapter *adapter = netdev_priv(netdev);
 310        struct device *dev = &adapter->pdev->dev;
 311        struct sockaddr *addr = p;
 312        int status;
 313        u8 mac[ETH_ALEN];
 314        u32 old_pmac_id = adapter->pmac_id[0];
 315
 316        if (!is_valid_ether_addr(addr->sa_data))
 317                return -EADDRNOTAVAIL;
 318
 319        /* Proceed further only if, User provided MAC is different
 320         * from active MAC
 321         */
 322        if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
 323                return 0;
 324
 325        /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
 326         * address
 327         */
 328        if (BEx_chip(adapter) && be_virtfn(adapter) &&
 329            !check_privilege(adapter, BE_PRIV_FILTMGMT))
 330                return -EPERM;
 331
 332        /* if device is not running, copy MAC to netdev->dev_addr */
 333        if (!netif_running(netdev))
 334                goto done;
 335
 336        /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
 337         * privilege or if PF did not provision the new MAC address.
 338         * On BE3, this cmd will always fail if the VF doesn't have the
 339         * FILTMGMT privilege. This failure is OK, only if the PF programmed
 340         * the MAC for the VF.
 341         */
 342        mutex_lock(&adapter->rx_filter_lock);
 343        status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
 344        if (!status) {
 345
 346                /* Delete the old programmed MAC. This call may fail if the
 347                 * old MAC was already deleted by the PF driver.
 348                 */
 349                if (adapter->pmac_id[0] != old_pmac_id)
 350                        be_dev_mac_del(adapter, old_pmac_id);
 351        }
 352
 353        mutex_unlock(&adapter->rx_filter_lock);
 354        /* Decide if the new MAC is successfully activated only after
 355         * querying the FW
 356         */
 357        status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
 358                                       adapter->if_handle, true, 0);
 359        if (status)
 360                goto err;
 361
 362        /* The MAC change did not happen, either due to lack of privilege
 363         * or PF didn't pre-provision.
 364         */
 365        if (!ether_addr_equal(addr->sa_data, mac)) {
 366                status = -EPERM;
 367                goto err;
 368        }
 369
 370        /* Remember currently programmed MAC */
 371        ether_addr_copy(adapter->dev_mac, addr->sa_data);
 372done:
 373        ether_addr_copy(netdev->dev_addr, addr->sa_data);
 374        dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
 375        return 0;
 376err:
 377        dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
 378        return status;
 379}
 380
 381/* BE2 supports only v0 cmd */
 382static void *hw_stats_from_cmd(struct be_adapter *adapter)
 383{
 384        if (BE2_chip(adapter)) {
 385                struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
 386
 387                return &cmd->hw_stats;
 388        } else if (BE3_chip(adapter)) {
 389                struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
 390
 391                return &cmd->hw_stats;
 392        } else {
 393                struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
 394
 395                return &cmd->hw_stats;
 396        }
 397}
 398
 399/* BE2 supports only v0 cmd */
 400static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
 401{
 402        if (BE2_chip(adapter)) {
 403                struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 404
 405                return &hw_stats->erx;
 406        } else if (BE3_chip(adapter)) {
 407                struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 408
 409                return &hw_stats->erx;
 410        } else {
 411                struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 412
 413                return &hw_stats->erx;
 414        }
 415}
 416
 417static void populate_be_v0_stats(struct be_adapter *adapter)
 418{
 419        struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 420        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 421        struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
 422        struct be_port_rxf_stats_v0 *port_stats =
 423                                        &rxf_stats->port[adapter->port_num];
 424        struct be_drv_stats *drvs = &adapter->drv_stats;
 425
 426        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 427        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 428        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 429        drvs->rx_control_frames = port_stats->rx_control_frames;
 430        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 431        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 432        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 433        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 434        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 435        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 436        drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
 437        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 438        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 439        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 440        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 441        drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
 442        drvs->rx_dropped_header_too_small =
 443                port_stats->rx_dropped_header_too_small;
 444        drvs->rx_address_filtered =
 445                                        port_stats->rx_address_filtered +
 446                                        port_stats->rx_vlan_filtered;
 447        drvs->rx_alignment_symbol_errors =
 448                port_stats->rx_alignment_symbol_errors;
 449
 450        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 451        drvs->tx_controlframes = port_stats->tx_controlframes;
 452
 453        if (adapter->port_num)
 454                drvs->jabber_events = rxf_stats->port1_jabber_events;
 455        else
 456                drvs->jabber_events = rxf_stats->port0_jabber_events;
 457        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 458        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 459        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 460        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 461        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 462        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 463        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 464}
 465
 466static void populate_be_v1_stats(struct be_adapter *adapter)
 467{
 468        struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 469        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 470        struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
 471        struct be_port_rxf_stats_v1 *port_stats =
 472                                        &rxf_stats->port[adapter->port_num];
 473        struct be_drv_stats *drvs = &adapter->drv_stats;
 474
 475        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 476        drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 477        drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 478        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 479        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 480        drvs->rx_control_frames = port_stats->rx_control_frames;
 481        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 482        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 483        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 484        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 485        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 486        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 487        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 488        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 489        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 490        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 491        drvs->rx_dropped_header_too_small =
 492                port_stats->rx_dropped_header_too_small;
 493        drvs->rx_input_fifo_overflow_drop =
 494                port_stats->rx_input_fifo_overflow_drop;
 495        drvs->rx_address_filtered = port_stats->rx_address_filtered;
 496        drvs->rx_alignment_symbol_errors =
 497                port_stats->rx_alignment_symbol_errors;
 498        drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 499        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 500        drvs->tx_controlframes = port_stats->tx_controlframes;
 501        drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 502        drvs->jabber_events = port_stats->jabber_events;
 503        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 504        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 505        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 506        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 507        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 508        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 509        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 510}
 511
 512static void populate_be_v2_stats(struct be_adapter *adapter)
 513{
 514        struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 515        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 516        struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
 517        struct be_port_rxf_stats_v2 *port_stats =
 518                                        &rxf_stats->port[adapter->port_num];
 519        struct be_drv_stats *drvs = &adapter->drv_stats;
 520
 521        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 522        drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 523        drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 524        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 525        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 526        drvs->rx_control_frames = port_stats->rx_control_frames;
 527        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 528        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 529        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 530        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 531        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 532        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 533        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 534        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 535        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 536        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 537        drvs->rx_dropped_header_too_small =
 538                port_stats->rx_dropped_header_too_small;
 539        drvs->rx_input_fifo_overflow_drop =
 540                port_stats->rx_input_fifo_overflow_drop;
 541        drvs->rx_address_filtered = port_stats->rx_address_filtered;
 542        drvs->rx_alignment_symbol_errors =
 543                port_stats->rx_alignment_symbol_errors;
 544        drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 545        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 546        drvs->tx_controlframes = port_stats->tx_controlframes;
 547        drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 548        drvs->jabber_events = port_stats->jabber_events;
 549        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 550        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 551        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 552        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 553        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 554        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 555        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 556        if (be_roce_supported(adapter)) {
 557                drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
 558                drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
 559                drvs->rx_roce_frames = port_stats->roce_frames_received;
 560                drvs->roce_drops_crc = port_stats->roce_drops_crc;
 561                drvs->roce_drops_payload_len =
 562                        port_stats->roce_drops_payload_len;
 563        }
 564}
 565
 566static void populate_lancer_stats(struct be_adapter *adapter)
 567{
 568        struct be_drv_stats *drvs = &adapter->drv_stats;
 569        struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
 570
 571        be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
 572        drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
 573        drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
 574        drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
 575        drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
 576        drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
 577        drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
 578        drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
 579        drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
 580        drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
 581        drvs->rx_dropped_tcp_length =
 582                                pport_stats->rx_dropped_invalid_tcp_length;
 583        drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
 584        drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
 585        drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
 586        drvs->rx_dropped_header_too_small =
 587                                pport_stats->rx_dropped_header_too_small;
 588        drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 589        drvs->rx_address_filtered =
 590                                        pport_stats->rx_address_filtered +
 591                                        pport_stats->rx_vlan_filtered;
 592        drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
 593        drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 594        drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
 595        drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
 596        drvs->jabber_events = pport_stats->rx_jabbers;
 597        drvs->forwarded_packets = pport_stats->num_forwards_lo;
 598        drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
 599        drvs->rx_drops_too_many_frags =
 600                                pport_stats->rx_drops_too_many_frags_lo;
 601}
 602
 603static void accumulate_16bit_val(u32 *acc, u16 val)
 604{
 605#define lo(x)                   (x & 0xFFFF)
 606#define hi(x)                   (x & 0xFFFF0000)
 607        bool wrapped = val < lo(*acc);
 608        u32 newacc = hi(*acc) + val;
 609
 610        if (wrapped)
 611                newacc += 65536;
 612        WRITE_ONCE(*acc, newacc);
 613}
 614
 615static void populate_erx_stats(struct be_adapter *adapter,
 616                               struct be_rx_obj *rxo, u32 erx_stat)
 617{
 618        if (!BEx_chip(adapter))
 619                rx_stats(rxo)->rx_drops_no_frags = erx_stat;
 620        else
 621                /* below erx HW counter can actually wrap around after
 622                 * 65535. Driver accumulates a 32-bit value
 623                 */
 624                accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
 625                                     (u16)erx_stat);
 626}
 627
 628void be_parse_stats(struct be_adapter *adapter)
 629{
 630        struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
 631        struct be_rx_obj *rxo;
 632        int i;
 633        u32 erx_stat;
 634
 635        if (lancer_chip(adapter)) {
 636                populate_lancer_stats(adapter);
 637        } else {
 638                if (BE2_chip(adapter))
 639                        populate_be_v0_stats(adapter);
 640                else if (BE3_chip(adapter))
 641                        /* for BE3 */
 642                        populate_be_v1_stats(adapter);
 643                else
 644                        populate_be_v2_stats(adapter);
 645
 646                /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
 647                for_all_rx_queues(adapter, rxo, i) {
 648                        erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
 649                        populate_erx_stats(adapter, rxo, erx_stat);
 650                }
 651        }
 652}
 653
 654static void be_get_stats64(struct net_device *netdev,
 655                           struct rtnl_link_stats64 *stats)
 656{
 657        struct be_adapter *adapter = netdev_priv(netdev);
 658        struct be_drv_stats *drvs = &adapter->drv_stats;
 659        struct be_rx_obj *rxo;
 660        struct be_tx_obj *txo;
 661        u64 pkts, bytes;
 662        unsigned int start;
 663        int i;
 664
 665        for_all_rx_queues(adapter, rxo, i) {
 666                const struct be_rx_stats *rx_stats = rx_stats(rxo);
 667
 668                do {
 669                        start = u64_stats_fetch_begin_irq(&rx_stats->sync);
 670                        pkts = rx_stats(rxo)->rx_pkts;
 671                        bytes = rx_stats(rxo)->rx_bytes;
 672                } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
 673                stats->rx_packets += pkts;
 674                stats->rx_bytes += bytes;
 675                stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
 676                stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
 677                                        rx_stats(rxo)->rx_drops_no_frags;
 678        }
 679
 680        for_all_tx_queues(adapter, txo, i) {
 681                const struct be_tx_stats *tx_stats = tx_stats(txo);
 682
 683                do {
 684                        start = u64_stats_fetch_begin_irq(&tx_stats->sync);
 685                        pkts = tx_stats(txo)->tx_pkts;
 686                        bytes = tx_stats(txo)->tx_bytes;
 687                } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
 688                stats->tx_packets += pkts;
 689                stats->tx_bytes += bytes;
 690        }
 691
 692        /* bad pkts received */
 693        stats->rx_errors = drvs->rx_crc_errors +
 694                drvs->rx_alignment_symbol_errors +
 695                drvs->rx_in_range_errors +
 696                drvs->rx_out_range_errors +
 697                drvs->rx_frame_too_long +
 698                drvs->rx_dropped_too_small +
 699                drvs->rx_dropped_too_short +
 700                drvs->rx_dropped_header_too_small +
 701                drvs->rx_dropped_tcp_length +
 702                drvs->rx_dropped_runt;
 703
 704        /* detailed rx errors */
 705        stats->rx_length_errors = drvs->rx_in_range_errors +
 706                drvs->rx_out_range_errors +
 707                drvs->rx_frame_too_long;
 708
 709        stats->rx_crc_errors = drvs->rx_crc_errors;
 710
 711        /* frame alignment errors */
 712        stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
 713
 714        /* receiver fifo overrun */
 715        /* drops_no_pbuf is no per i/f, it's per BE card */
 716        stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
 717                                drvs->rx_input_fifo_overflow_drop +
 718                                drvs->rx_drops_no_pbuf;
 719}
 720
 721void be_link_status_update(struct be_adapter *adapter, u8 link_status)
 722{
 723        struct net_device *netdev = adapter->netdev;
 724
 725        if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
 726                netif_carrier_off(netdev);
 727                adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
 728        }
 729
 730        if (link_status)
 731                netif_carrier_on(netdev);
 732        else
 733                netif_carrier_off(netdev);
 734
 735        netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
 736}
 737
 738static int be_gso_hdr_len(struct sk_buff *skb)
 739{
 740        if (skb->encapsulation)
 741                return skb_inner_transport_offset(skb) +
 742                       inner_tcp_hdrlen(skb);
 743        return skb_transport_offset(skb) + tcp_hdrlen(skb);
 744}
 745
 746static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
 747{
 748        struct be_tx_stats *stats = tx_stats(txo);
 749        u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
 750        /* Account for headers which get duplicated in TSO pkt */
 751        u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
 752
 753        u64_stats_update_begin(&stats->sync);
 754        stats->tx_reqs++;
 755        stats->tx_bytes += skb->len + dup_hdr_len;
 756        stats->tx_pkts += tx_pkts;
 757        if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
 758                stats->tx_vxlan_offload_pkts += tx_pkts;
 759        u64_stats_update_end(&stats->sync);
 760}
 761
 762/* Returns number of WRBs needed for the skb */
 763static u32 skb_wrb_cnt(struct sk_buff *skb)
 764{
 765        /* +1 for the header wrb */
 766        return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
 767}
 768
 769static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
 770{
 771        wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
 772        wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
 773        wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
 774        wrb->rsvd0 = 0;
 775}
 776
 777/* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
 778 * to avoid the swap and shift/mask operations in wrb_fill().
 779 */
 780static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
 781{
 782        wrb->frag_pa_hi = 0;
 783        wrb->frag_pa_lo = 0;
 784        wrb->frag_len = 0;
 785        wrb->rsvd0 = 0;
 786}
 787
 788static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
 789                                     struct sk_buff *skb)
 790{
 791        u8 vlan_prio;
 792        u16 vlan_tag;
 793
 794        vlan_tag = skb_vlan_tag_get(skb);
 795        vlan_prio = skb_vlan_tag_get_prio(skb);
 796        /* If vlan priority provided by OS is NOT in available bmap */
 797        if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
 798                vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
 799                                adapter->recommended_prio_bits;
 800
 801        return vlan_tag;
 802}
 803
 804/* Used only for IP tunnel packets */
 805static u16 skb_inner_ip_proto(struct sk_buff *skb)
 806{
 807        return (inner_ip_hdr(skb)->version == 4) ?
 808                inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
 809}
 810
 811static u16 skb_ip_proto(struct sk_buff *skb)
 812{
 813        return (ip_hdr(skb)->version == 4) ?
 814                ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
 815}
 816
 817static inline bool be_is_txq_full(struct be_tx_obj *txo)
 818{
 819        return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
 820}
 821
 822static inline bool be_can_txq_wake(struct be_tx_obj *txo)
 823{
 824        return atomic_read(&txo->q.used) < txo->q.len / 2;
 825}
 826
 827static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
 828{
 829        return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
 830}
 831
 832static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
 833                                       struct sk_buff *skb,
 834                                       struct be_wrb_params *wrb_params)
 835{
 836        u16 proto;
 837
 838        if (skb_is_gso(skb)) {
 839                BE_WRB_F_SET(wrb_params->features, LSO, 1);
 840                wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
 841                if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
 842                        BE_WRB_F_SET(wrb_params->features, LSO6, 1);
 843        } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 844                if (skb->encapsulation) {
 845                        BE_WRB_F_SET(wrb_params->features, IPCS, 1);
 846                        proto = skb_inner_ip_proto(skb);
 847                } else {
 848                        proto = skb_ip_proto(skb);
 849                }
 850                if (proto == IPPROTO_TCP)
 851                        BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
 852                else if (proto == IPPROTO_UDP)
 853                        BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
 854        }
 855
 856        if (skb_vlan_tag_present(skb)) {
 857                BE_WRB_F_SET(wrb_params->features, VLAN, 1);
 858                wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
 859        }
 860
 861        BE_WRB_F_SET(wrb_params->features, CRC, 1);
 862}
 863
 864static void wrb_fill_hdr(struct be_adapter *adapter,
 865                         struct be_eth_hdr_wrb *hdr,
 866                         struct be_wrb_params *wrb_params,
 867                         struct sk_buff *skb)
 868{
 869        memset(hdr, 0, sizeof(*hdr));
 870
 871        SET_TX_WRB_HDR_BITS(crc, hdr,
 872                            BE_WRB_F_GET(wrb_params->features, CRC));
 873        SET_TX_WRB_HDR_BITS(ipcs, hdr,
 874                            BE_WRB_F_GET(wrb_params->features, IPCS));
 875        SET_TX_WRB_HDR_BITS(tcpcs, hdr,
 876                            BE_WRB_F_GET(wrb_params->features, TCPCS));
 877        SET_TX_WRB_HDR_BITS(udpcs, hdr,
 878                            BE_WRB_F_GET(wrb_params->features, UDPCS));
 879
 880        SET_TX_WRB_HDR_BITS(lso, hdr,
 881                            BE_WRB_F_GET(wrb_params->features, LSO));
 882        SET_TX_WRB_HDR_BITS(lso6, hdr,
 883                            BE_WRB_F_GET(wrb_params->features, LSO6));
 884        SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
 885
 886        /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
 887         * hack is not needed, the evt bit is set while ringing DB.
 888         */
 889        SET_TX_WRB_HDR_BITS(event, hdr,
 890                            BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
 891        SET_TX_WRB_HDR_BITS(vlan, hdr,
 892                            BE_WRB_F_GET(wrb_params->features, VLAN));
 893        SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
 894
 895        SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
 896        SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
 897        SET_TX_WRB_HDR_BITS(mgmt, hdr,
 898                            BE_WRB_F_GET(wrb_params->features, OS2BMC));
 899}
 900
 901static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
 902                          bool unmap_single)
 903{
 904        dma_addr_t dma;
 905        u32 frag_len = le32_to_cpu(wrb->frag_len);
 906
 907
 908        dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
 909                (u64)le32_to_cpu(wrb->frag_pa_lo);
 910        if (frag_len) {
 911                if (unmap_single)
 912                        dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
 913                else
 914                        dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
 915        }
 916}
 917
 918/* Grab a WRB header for xmit */
 919static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
 920{
 921        u32 head = txo->q.head;
 922
 923        queue_head_inc(&txo->q);
 924        return head;
 925}
 926
 927/* Set up the WRB header for xmit */
 928static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
 929                                struct be_tx_obj *txo,
 930                                struct be_wrb_params *wrb_params,
 931                                struct sk_buff *skb, u16 head)
 932{
 933        u32 num_frags = skb_wrb_cnt(skb);
 934        struct be_queue_info *txq = &txo->q;
 935        struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
 936
 937        wrb_fill_hdr(adapter, hdr, wrb_params, skb);
 938        be_dws_cpu_to_le(hdr, sizeof(*hdr));
 939
 940        BUG_ON(txo->sent_skb_list[head]);
 941        txo->sent_skb_list[head] = skb;
 942        txo->last_req_hdr = head;
 943        atomic_add(num_frags, &txq->used);
 944        txo->last_req_wrb_cnt = num_frags;
 945        txo->pend_wrb_cnt += num_frags;
 946}
 947
 948/* Setup a WRB fragment (buffer descriptor) for xmit */
 949static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
 950                                 int len)
 951{
 952        struct be_eth_wrb *wrb;
 953        struct be_queue_info *txq = &txo->q;
 954
 955        wrb = queue_head_node(txq);
 956        wrb_fill(wrb, busaddr, len);
 957        queue_head_inc(txq);
 958}
 959
 960/* Bring the queue back to the state it was in before be_xmit_enqueue() routine
 961 * was invoked. The producer index is restored to the previous packet and the
 962 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
 963 */
 964static void be_xmit_restore(struct be_adapter *adapter,
 965                            struct be_tx_obj *txo, u32 head, bool map_single,
 966                            u32 copied)
 967{
 968        struct device *dev;
 969        struct be_eth_wrb *wrb;
 970        struct be_queue_info *txq = &txo->q;
 971
 972        dev = &adapter->pdev->dev;
 973        txq->head = head;
 974
 975        /* skip the first wrb (hdr); it's not mapped */
 976        queue_head_inc(txq);
 977        while (copied) {
 978                wrb = queue_head_node(txq);
 979                unmap_tx_frag(dev, wrb, map_single);
 980                map_single = false;
 981                copied -= le32_to_cpu(wrb->frag_len);
 982                queue_head_inc(txq);
 983        }
 984
 985        txq->head = head;
 986}
 987
 988/* Enqueue the given packet for transmit. This routine allocates WRBs for the
 989 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
 990 * of WRBs used up by the packet.
 991 */
 992static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
 993                           struct sk_buff *skb,
 994                           struct be_wrb_params *wrb_params)
 995{
 996        u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
 997        struct device *dev = &adapter->pdev->dev;
 998        bool map_single = false;
 999        u32 head;
1000        dma_addr_t busaddr;
1001        int len;
1002
1003        head = be_tx_get_wrb_hdr(txo);
1004
1005        if (skb->len > skb->data_len) {
1006                len = skb_headlen(skb);
1007
1008                busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1009                if (dma_mapping_error(dev, busaddr))
1010                        goto dma_err;
1011                map_single = true;
1012                be_tx_setup_wrb_frag(txo, busaddr, len);
1013                copied += len;
1014        }
1015
1016        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1017                const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1018                len = skb_frag_size(frag);
1019
1020                busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1021                if (dma_mapping_error(dev, busaddr))
1022                        goto dma_err;
1023                be_tx_setup_wrb_frag(txo, busaddr, len);
1024                copied += len;
1025        }
1026
1027        be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1028
1029        be_tx_stats_update(txo, skb);
1030        return wrb_cnt;
1031
1032dma_err:
1033        adapter->drv_stats.dma_map_errors++;
1034        be_xmit_restore(adapter, txo, head, map_single, copied);
1035        return 0;
1036}
1037
1038static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1039{
1040        return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1041}
1042
1043static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1044                                             struct sk_buff *skb,
1045                                             struct be_wrb_params
1046                                             *wrb_params)
1047{
1048        bool insert_vlan = false;
1049        u16 vlan_tag = 0;
1050
1051        skb = skb_share_check(skb, GFP_ATOMIC);
1052        if (unlikely(!skb))
1053                return skb;
1054
1055        if (skb_vlan_tag_present(skb)) {
1056                vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1057                insert_vlan = true;
1058        }
1059
1060        if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1061                if (!insert_vlan) {
1062                        vlan_tag = adapter->pvid;
1063                        insert_vlan = true;
1064                }
1065                /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1066                 * skip VLAN insertion
1067                 */
1068                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1069        }
1070
1071        if (insert_vlan) {
1072                skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1073                                                vlan_tag);
1074                if (unlikely(!skb))
1075                        return skb;
1076                __vlan_hwaccel_clear_tag(skb);
1077        }
1078
1079        /* Insert the outer VLAN, if any */
1080        if (adapter->qnq_vid) {
1081                vlan_tag = adapter->qnq_vid;
1082                skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1083                                                vlan_tag);
1084                if (unlikely(!skb))
1085                        return skb;
1086                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1087        }
1088
1089        return skb;
1090}
1091
1092static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1093{
1094        struct ethhdr *eh = (struct ethhdr *)skb->data;
1095        u16 offset = ETH_HLEN;
1096
1097        if (eh->h_proto == htons(ETH_P_IPV6)) {
1098                struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1099
1100                offset += sizeof(struct ipv6hdr);
1101                if (ip6h->nexthdr != NEXTHDR_TCP &&
1102                    ip6h->nexthdr != NEXTHDR_UDP) {
1103                        struct ipv6_opt_hdr *ehdr =
1104                                (struct ipv6_opt_hdr *)(skb->data + offset);
1105
1106                        /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1107                        if (ehdr->hdrlen == 0xff)
1108                                return true;
1109                }
1110        }
1111        return false;
1112}
1113
1114static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1115{
1116        return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1117}
1118
1119static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1120{
1121        return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1122}
1123
1124static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1125                                                  struct sk_buff *skb,
1126                                                  struct be_wrb_params
1127                                                  *wrb_params)
1128{
1129        struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1130        unsigned int eth_hdr_len;
1131        struct iphdr *ip;
1132
1133        /* For padded packets, BE HW modifies tot_len field in IP header
1134         * incorrecly when VLAN tag is inserted by HW.
1135         * For padded packets, Lancer computes incorrect checksum.
1136         */
1137        eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1138                                                VLAN_ETH_HLEN : ETH_HLEN;
1139        if (skb->len <= 60 &&
1140            (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1141            is_ipv4_pkt(skb)) {
1142                ip = (struct iphdr *)ip_hdr(skb);
1143                pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1144        }
1145
1146        /* If vlan tag is already inlined in the packet, skip HW VLAN
1147         * tagging in pvid-tagging mode
1148         */
1149        if (be_pvid_tagging_enabled(adapter) &&
1150            veh->h_vlan_proto == htons(ETH_P_8021Q))
1151                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1152
1153        /* HW has a bug wherein it will calculate CSUM for VLAN
1154         * pkts even though it is disabled.
1155         * Manually insert VLAN in pkt.
1156         */
1157        if (skb->ip_summed != CHECKSUM_PARTIAL &&
1158            skb_vlan_tag_present(skb)) {
1159                skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1160                if (unlikely(!skb))
1161                        goto err;
1162        }
1163
1164        /* HW may lockup when VLAN HW tagging is requested on
1165         * certain ipv6 packets. Drop such pkts if the HW workaround to
1166         * skip HW tagging is not enabled by FW.
1167         */
1168        if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1169                     (adapter->pvid || adapter->qnq_vid) &&
1170                     !qnq_async_evt_rcvd(adapter)))
1171                goto tx_drop;
1172
1173        /* Manual VLAN tag insertion to prevent:
1174         * ASIC lockup when the ASIC inserts VLAN tag into
1175         * certain ipv6 packets. Insert VLAN tags in driver,
1176         * and set event, completion, vlan bits accordingly
1177         * in the Tx WRB.
1178         */
1179        if (be_ipv6_tx_stall_chk(adapter, skb) &&
1180            be_vlan_tag_tx_chk(adapter, skb)) {
1181                skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1182                if (unlikely(!skb))
1183                        goto err;
1184        }
1185
1186        return skb;
1187tx_drop:
1188        dev_kfree_skb_any(skb);
1189err:
1190        return NULL;
1191}
1192
1193static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1194                                           struct sk_buff *skb,
1195                                           struct be_wrb_params *wrb_params)
1196{
1197        int err;
1198
1199        /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1200         * packets that are 32b or less may cause a transmit stall
1201         * on that port. The workaround is to pad such packets
1202         * (len <= 32 bytes) to a minimum length of 36b.
1203         */
1204        if (skb->len <= 32) {
1205                if (skb_put_padto(skb, 36))
1206                        return NULL;
1207        }
1208
1209        if (BEx_chip(adapter) || lancer_chip(adapter)) {
1210                skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1211                if (!skb)
1212                        return NULL;
1213        }
1214
1215        /* The stack can send us skbs with length greater than
1216         * what the HW can handle. Trim the extra bytes.
1217         */
1218        WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1219        err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1220        WARN_ON(err);
1221
1222        return skb;
1223}
1224
1225static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1226{
1227        struct be_queue_info *txq = &txo->q;
1228        struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1229
1230        /* Mark the last request eventable if it hasn't been marked already */
1231        if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1232                hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1233
1234        /* compose a dummy wrb if there are odd set of wrbs to notify */
1235        if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1236                wrb_fill_dummy(queue_head_node(txq));
1237                queue_head_inc(txq);
1238                atomic_inc(&txq->used);
1239                txo->pend_wrb_cnt++;
1240                hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1241                                           TX_HDR_WRB_NUM_SHIFT);
1242                hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1243                                          TX_HDR_WRB_NUM_SHIFT);
1244        }
1245        be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1246        txo->pend_wrb_cnt = 0;
1247}
1248
1249/* OS2BMC related */
1250
1251#define DHCP_CLIENT_PORT        68
1252#define DHCP_SERVER_PORT        67
1253#define NET_BIOS_PORT1          137
1254#define NET_BIOS_PORT2          138
1255#define DHCPV6_RAS_PORT         547
1256
1257#define is_mc_allowed_on_bmc(adapter, eh)       \
1258        (!is_multicast_filt_enabled(adapter) && \
1259         is_multicast_ether_addr(eh->h_dest) && \
1260         !is_broadcast_ether_addr(eh->h_dest))
1261
1262#define is_bc_allowed_on_bmc(adapter, eh)       \
1263        (!is_broadcast_filt_enabled(adapter) && \
1264         is_broadcast_ether_addr(eh->h_dest))
1265
1266#define is_arp_allowed_on_bmc(adapter, skb)     \
1267        (is_arp(skb) && is_arp_filt_enabled(adapter))
1268
1269#define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1270
1271#define is_arp_filt_enabled(adapter)    \
1272                (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1273
1274#define is_dhcp_client_filt_enabled(adapter)    \
1275                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1276
1277#define is_dhcp_srvr_filt_enabled(adapter)      \
1278                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1279
1280#define is_nbios_filt_enabled(adapter)  \
1281                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1282
1283#define is_ipv6_na_filt_enabled(adapter)        \
1284                (adapter->bmc_filt_mask &       \
1285                        BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1286
1287#define is_ipv6_ra_filt_enabled(adapter)        \
1288                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1289
1290#define is_ipv6_ras_filt_enabled(adapter)       \
1291                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1292
1293#define is_broadcast_filt_enabled(adapter)      \
1294                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1295
1296#define is_multicast_filt_enabled(adapter)      \
1297                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1298
1299static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1300                               struct sk_buff **skb)
1301{
1302        struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1303        bool os2bmc = false;
1304
1305        if (!be_is_os2bmc_enabled(adapter))
1306                goto done;
1307
1308        if (!is_multicast_ether_addr(eh->h_dest))
1309                goto done;
1310
1311        if (is_mc_allowed_on_bmc(adapter, eh) ||
1312            is_bc_allowed_on_bmc(adapter, eh) ||
1313            is_arp_allowed_on_bmc(adapter, (*skb))) {
1314                os2bmc = true;
1315                goto done;
1316        }
1317
1318        if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1319                struct ipv6hdr *hdr = ipv6_hdr((*skb));
1320                u8 nexthdr = hdr->nexthdr;
1321
1322                if (nexthdr == IPPROTO_ICMPV6) {
1323                        struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1324
1325                        switch (icmp6->icmp6_type) {
1326                        case NDISC_ROUTER_ADVERTISEMENT:
1327                                os2bmc = is_ipv6_ra_filt_enabled(adapter);
1328                                goto done;
1329                        case NDISC_NEIGHBOUR_ADVERTISEMENT:
1330                                os2bmc = is_ipv6_na_filt_enabled(adapter);
1331                                goto done;
1332                        default:
1333                                break;
1334                        }
1335                }
1336        }
1337
1338        if (is_udp_pkt((*skb))) {
1339                struct udphdr *udp = udp_hdr((*skb));
1340
1341                switch (ntohs(udp->dest)) {
1342                case DHCP_CLIENT_PORT:
1343                        os2bmc = is_dhcp_client_filt_enabled(adapter);
1344                        goto done;
1345                case DHCP_SERVER_PORT:
1346                        os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1347                        goto done;
1348                case NET_BIOS_PORT1:
1349                case NET_BIOS_PORT2:
1350                        os2bmc = is_nbios_filt_enabled(adapter);
1351                        goto done;
1352                case DHCPV6_RAS_PORT:
1353                        os2bmc = is_ipv6_ras_filt_enabled(adapter);
1354                        goto done;
1355                default:
1356                        break;
1357                }
1358        }
1359done:
1360        /* For packets over a vlan, which are destined
1361         * to BMC, asic expects the vlan to be inline in the packet.
1362         */
1363        if (os2bmc)
1364                *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1365
1366        return os2bmc;
1367}
1368
1369static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1370{
1371        struct be_adapter *adapter = netdev_priv(netdev);
1372        u16 q_idx = skb_get_queue_mapping(skb);
1373        struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1374        struct be_wrb_params wrb_params = { 0 };
1375        bool flush = !netdev_xmit_more();
1376        u16 wrb_cnt;
1377
1378        skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1379        if (unlikely(!skb))
1380                goto drop;
1381
1382        be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1383
1384        wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1385        if (unlikely(!wrb_cnt)) {
1386                dev_kfree_skb_any(skb);
1387                goto drop;
1388        }
1389
1390        /* if os2bmc is enabled and if the pkt is destined to bmc,
1391         * enqueue the pkt a 2nd time with mgmt bit set.
1392         */
1393        if (be_send_pkt_to_bmc(adapter, &skb)) {
1394                BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1395                wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1396                if (unlikely(!wrb_cnt))
1397                        goto drop;
1398                else
1399                        skb_get(skb);
1400        }
1401
1402        if (be_is_txq_full(txo)) {
1403                netif_stop_subqueue(netdev, q_idx);
1404                tx_stats(txo)->tx_stops++;
1405        }
1406
1407        if (flush || __netif_subqueue_stopped(netdev, q_idx))
1408                be_xmit_flush(adapter, txo);
1409
1410        return NETDEV_TX_OK;
1411drop:
1412        tx_stats(txo)->tx_drv_drops++;
1413        /* Flush the already enqueued tx requests */
1414        if (flush && txo->pend_wrb_cnt)
1415                be_xmit_flush(adapter, txo);
1416
1417        return NETDEV_TX_OK;
1418}
1419
1420static void be_tx_timeout(struct net_device *netdev)
1421{
1422        struct be_adapter *adapter = netdev_priv(netdev);
1423        struct device *dev = &adapter->pdev->dev;
1424        struct be_tx_obj *txo;
1425        struct sk_buff *skb;
1426        struct tcphdr *tcphdr;
1427        struct udphdr *udphdr;
1428        u32 *entry;
1429        int status;
1430        int i, j;
1431
1432        for_all_tx_queues(adapter, txo, i) {
1433                dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1434                         i, txo->q.head, txo->q.tail,
1435                         atomic_read(&txo->q.used), txo->q.id);
1436
1437                entry = txo->q.dma_mem.va;
1438                for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1439                        if (entry[j] != 0 || entry[j + 1] != 0 ||
1440                            entry[j + 2] != 0 || entry[j + 3] != 0) {
1441                                dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1442                                         j, entry[j], entry[j + 1],
1443                                         entry[j + 2], entry[j + 3]);
1444                        }
1445                }
1446
1447                entry = txo->cq.dma_mem.va;
1448                dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1449                         i, txo->cq.head, txo->cq.tail,
1450                         atomic_read(&txo->cq.used));
1451                for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1452                        if (entry[j] != 0 || entry[j + 1] != 0 ||
1453                            entry[j + 2] != 0 || entry[j + 3] != 0) {
1454                                dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1455                                         j, entry[j], entry[j + 1],
1456                                         entry[j + 2], entry[j + 3]);
1457                        }
1458                }
1459
1460                for (j = 0; j < TX_Q_LEN; j++) {
1461                        if (txo->sent_skb_list[j]) {
1462                                skb = txo->sent_skb_list[j];
1463                                if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1464                                        tcphdr = tcp_hdr(skb);
1465                                        dev_info(dev, "TCP source port %d\n",
1466                                                 ntohs(tcphdr->source));
1467                                        dev_info(dev, "TCP dest port %d\n",
1468                                                 ntohs(tcphdr->dest));
1469                                        dev_info(dev, "TCP sequence num %d\n",
1470                                                 ntohs(tcphdr->seq));
1471                                        dev_info(dev, "TCP ack_seq %d\n",
1472                                                 ntohs(tcphdr->ack_seq));
1473                                } else if (ip_hdr(skb)->protocol ==
1474                                           IPPROTO_UDP) {
1475                                        udphdr = udp_hdr(skb);
1476                                        dev_info(dev, "UDP source port %d\n",
1477                                                 ntohs(udphdr->source));
1478                                        dev_info(dev, "UDP dest port %d\n",
1479                                                 ntohs(udphdr->dest));
1480                                }
1481                                dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1482                                         j, skb, skb->len, skb->protocol);
1483                        }
1484                }
1485        }
1486
1487        if (lancer_chip(adapter)) {
1488                dev_info(dev, "Initiating reset due to tx timeout\n");
1489                dev_info(dev, "Resetting adapter\n");
1490                status = lancer_physdev_ctrl(adapter,
1491                                             PHYSDEV_CONTROL_FW_RESET_MASK);
1492                if (status)
1493                        dev_err(dev, "Reset failed .. Reboot server\n");
1494        }
1495}
1496
1497static inline bool be_in_all_promisc(struct be_adapter *adapter)
1498{
1499        return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1500                        BE_IF_FLAGS_ALL_PROMISCUOUS;
1501}
1502
1503static int be_set_vlan_promisc(struct be_adapter *adapter)
1504{
1505        struct device *dev = &adapter->pdev->dev;
1506        int status;
1507
1508        if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1509                return 0;
1510
1511        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1512        if (!status) {
1513                dev_info(dev, "Enabled VLAN promiscuous mode\n");
1514                adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1515        } else {
1516                dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1517        }
1518        return status;
1519}
1520
1521static int be_clear_vlan_promisc(struct be_adapter *adapter)
1522{
1523        struct device *dev = &adapter->pdev->dev;
1524        int status;
1525
1526        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1527        if (!status) {
1528                dev_info(dev, "Disabling VLAN promiscuous mode\n");
1529                adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1530        }
1531        return status;
1532}
1533
1534/*
1535 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1536 * If the user configures more, place BE in vlan promiscuous mode.
1537 */
1538static int be_vid_config(struct be_adapter *adapter)
1539{
1540        struct device *dev = &adapter->pdev->dev;
1541        u16 vids[BE_NUM_VLANS_SUPPORTED];
1542        u16 num = 0, i = 0;
1543        int status = 0;
1544
1545        /* No need to change the VLAN state if the I/F is in promiscuous */
1546        if (adapter->netdev->flags & IFF_PROMISC)
1547                return 0;
1548
1549        if (adapter->vlans_added > be_max_vlans(adapter))
1550                return be_set_vlan_promisc(adapter);
1551
1552        if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1553                status = be_clear_vlan_promisc(adapter);
1554                if (status)
1555                        return status;
1556        }
1557        /* Construct VLAN Table to give to HW */
1558        for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1559                vids[num++] = cpu_to_le16(i);
1560
1561        status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1562        if (status) {
1563                dev_err(dev, "Setting HW VLAN filtering failed\n");
1564                /* Set to VLAN promisc mode as setting VLAN filter failed */
1565                if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1566                    addl_status(status) ==
1567                                MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1568                        return be_set_vlan_promisc(adapter);
1569        }
1570        return status;
1571}
1572
1573static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1574{
1575        struct be_adapter *adapter = netdev_priv(netdev);
1576        int status = 0;
1577
1578        mutex_lock(&adapter->rx_filter_lock);
1579
1580        /* Packets with VID 0 are always received by Lancer by default */
1581        if (lancer_chip(adapter) && vid == 0)
1582                goto done;
1583
1584        if (test_bit(vid, adapter->vids))
1585                goto done;
1586
1587        set_bit(vid, adapter->vids);
1588        adapter->vlans_added++;
1589
1590        status = be_vid_config(adapter);
1591done:
1592        mutex_unlock(&adapter->rx_filter_lock);
1593        return status;
1594}
1595
1596static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1597{
1598        struct be_adapter *adapter = netdev_priv(netdev);
1599        int status = 0;
1600
1601        mutex_lock(&adapter->rx_filter_lock);
1602
1603        /* Packets with VID 0 are always received by Lancer by default */
1604        if (lancer_chip(adapter) && vid == 0)
1605                goto done;
1606
1607        if (!test_bit(vid, adapter->vids))
1608                goto done;
1609
1610        clear_bit(vid, adapter->vids);
1611        adapter->vlans_added--;
1612
1613        status = be_vid_config(adapter);
1614done:
1615        mutex_unlock(&adapter->rx_filter_lock);
1616        return status;
1617}
1618
1619static void be_set_all_promisc(struct be_adapter *adapter)
1620{
1621        be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1622        adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1623}
1624
1625static void be_set_mc_promisc(struct be_adapter *adapter)
1626{
1627        int status;
1628
1629        if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1630                return;
1631
1632        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1633        if (!status)
1634                adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1635}
1636
1637static void be_set_uc_promisc(struct be_adapter *adapter)
1638{
1639        int status;
1640
1641        if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1642                return;
1643
1644        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1645        if (!status)
1646                adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1647}
1648
1649static void be_clear_uc_promisc(struct be_adapter *adapter)
1650{
1651        int status;
1652
1653        if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1654                return;
1655
1656        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1657        if (!status)
1658                adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1659}
1660
1661/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1662 * We use a single callback function for both sync and unsync. We really don't
1663 * add/remove addresses through this callback. But, we use it to detect changes
1664 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1665 */
1666static int be_uc_list_update(struct net_device *netdev,
1667                             const unsigned char *addr)
1668{
1669        struct be_adapter *adapter = netdev_priv(netdev);
1670
1671        adapter->update_uc_list = true;
1672        return 0;
1673}
1674
1675static int be_mc_list_update(struct net_device *netdev,
1676                             const unsigned char *addr)
1677{
1678        struct be_adapter *adapter = netdev_priv(netdev);
1679
1680        adapter->update_mc_list = true;
1681        return 0;
1682}
1683
1684static void be_set_mc_list(struct be_adapter *adapter)
1685{
1686        struct net_device *netdev = adapter->netdev;
1687        struct netdev_hw_addr *ha;
1688        bool mc_promisc = false;
1689        int status;
1690
1691        netif_addr_lock_bh(netdev);
1692        __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1693
1694        if (netdev->flags & IFF_PROMISC) {
1695                adapter->update_mc_list = false;
1696        } else if (netdev->flags & IFF_ALLMULTI ||
1697                   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1698                /* Enable multicast promisc if num configured exceeds
1699                 * what we support
1700                 */
1701                mc_promisc = true;
1702                adapter->update_mc_list = false;
1703        } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1704                /* Update mc-list unconditionally if the iface was previously
1705                 * in mc-promisc mode and now is out of that mode.
1706                 */
1707                adapter->update_mc_list = true;
1708        }
1709
1710        if (adapter->update_mc_list) {
1711                int i = 0;
1712
1713                /* cache the mc-list in adapter */
1714                netdev_for_each_mc_addr(ha, netdev) {
1715                        ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1716                        i++;
1717                }
1718                adapter->mc_count = netdev_mc_count(netdev);
1719        }
1720        netif_addr_unlock_bh(netdev);
1721
1722        if (mc_promisc) {
1723                be_set_mc_promisc(adapter);
1724        } else if (adapter->update_mc_list) {
1725                status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1726                if (!status)
1727                        adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1728                else
1729                        be_set_mc_promisc(adapter);
1730
1731                adapter->update_mc_list = false;
1732        }
1733}
1734
1735static void be_clear_mc_list(struct be_adapter *adapter)
1736{
1737        struct net_device *netdev = adapter->netdev;
1738
1739        __dev_mc_unsync(netdev, NULL);
1740        be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1741        adapter->mc_count = 0;
1742}
1743
1744static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1745{
1746        if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1747                adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1748                return 0;
1749        }
1750
1751        return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1752                               adapter->if_handle,
1753                               &adapter->pmac_id[uc_idx + 1], 0);
1754}
1755
1756static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1757{
1758        if (pmac_id == adapter->pmac_id[0])
1759                return;
1760
1761        be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1762}
1763
1764static void be_set_uc_list(struct be_adapter *adapter)
1765{
1766        struct net_device *netdev = adapter->netdev;
1767        struct netdev_hw_addr *ha;
1768        bool uc_promisc = false;
1769        int curr_uc_macs = 0, i;
1770
1771        netif_addr_lock_bh(netdev);
1772        __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1773
1774        if (netdev->flags & IFF_PROMISC) {
1775                adapter->update_uc_list = false;
1776        } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1777                uc_promisc = true;
1778                adapter->update_uc_list = false;
1779        }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1780                /* Update uc-list unconditionally if the iface was previously
1781                 * in uc-promisc mode and now is out of that mode.
1782                 */
1783                adapter->update_uc_list = true;
1784        }
1785
1786        if (adapter->update_uc_list) {
1787                /* cache the uc-list in adapter array */
1788                i = 0;
1789                netdev_for_each_uc_addr(ha, netdev) {
1790                        ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1791                        i++;
1792                }
1793                curr_uc_macs = netdev_uc_count(netdev);
1794        }
1795        netif_addr_unlock_bh(netdev);
1796
1797        if (uc_promisc) {
1798                be_set_uc_promisc(adapter);
1799        } else if (adapter->update_uc_list) {
1800                be_clear_uc_promisc(adapter);
1801
1802                for (i = 0; i < adapter->uc_macs; i++)
1803                        be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1804
1805                for (i = 0; i < curr_uc_macs; i++)
1806                        be_uc_mac_add(adapter, i);
1807                adapter->uc_macs = curr_uc_macs;
1808                adapter->update_uc_list = false;
1809        }
1810}
1811
1812static void be_clear_uc_list(struct be_adapter *adapter)
1813{
1814        struct net_device *netdev = adapter->netdev;
1815        int i;
1816
1817        __dev_uc_unsync(netdev, NULL);
1818        for (i = 0; i < adapter->uc_macs; i++)
1819                be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1820
1821        adapter->uc_macs = 0;
1822}
1823
1824static void __be_set_rx_mode(struct be_adapter *adapter)
1825{
1826        struct net_device *netdev = adapter->netdev;
1827
1828        mutex_lock(&adapter->rx_filter_lock);
1829
1830        if (netdev->flags & IFF_PROMISC) {
1831                if (!be_in_all_promisc(adapter))
1832                        be_set_all_promisc(adapter);
1833        } else if (be_in_all_promisc(adapter)) {
1834                /* We need to re-program the vlan-list or clear
1835                 * vlan-promisc mode (if needed) when the interface
1836                 * comes out of promisc mode.
1837                 */
1838                be_vid_config(adapter);
1839        }
1840
1841        be_set_uc_list(adapter);
1842        be_set_mc_list(adapter);
1843
1844        mutex_unlock(&adapter->rx_filter_lock);
1845}
1846
1847static void be_work_set_rx_mode(struct work_struct *work)
1848{
1849        struct be_cmd_work *cmd_work =
1850                                container_of(work, struct be_cmd_work, work);
1851
1852        __be_set_rx_mode(cmd_work->adapter);
1853        kfree(cmd_work);
1854}
1855
1856static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1857{
1858        struct be_adapter *adapter = netdev_priv(netdev);
1859        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1860        int status;
1861
1862        if (!sriov_enabled(adapter))
1863                return -EPERM;
1864
1865        if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1866                return -EINVAL;
1867
1868        /* Proceed further only if user provided MAC is different
1869         * from active MAC
1870         */
1871        if (ether_addr_equal(mac, vf_cfg->mac_addr))
1872                return 0;
1873
1874        if (BEx_chip(adapter)) {
1875                be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1876                                vf + 1);
1877
1878                status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1879                                         &vf_cfg->pmac_id, vf + 1);
1880        } else {
1881                status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1882                                        vf + 1);
1883        }
1884
1885        if (status) {
1886                dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1887                        mac, vf, status);
1888                return be_cmd_status(status);
1889        }
1890
1891        ether_addr_copy(vf_cfg->mac_addr, mac);
1892
1893        return 0;
1894}
1895
1896static int be_get_vf_config(struct net_device *netdev, int vf,
1897                            struct ifla_vf_info *vi)
1898{
1899        struct be_adapter *adapter = netdev_priv(netdev);
1900        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1901
1902        if (!sriov_enabled(adapter))
1903                return -EPERM;
1904
1905        if (vf >= adapter->num_vfs)
1906                return -EINVAL;
1907
1908        vi->vf = vf;
1909        vi->max_tx_rate = vf_cfg->tx_rate;
1910        vi->min_tx_rate = 0;
1911        vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1912        vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1913        memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1914        vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1915        vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1916
1917        return 0;
1918}
1919
1920static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1921{
1922        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1923        u16 vids[BE_NUM_VLANS_SUPPORTED];
1924        int vf_if_id = vf_cfg->if_handle;
1925        int status;
1926
1927        /* Enable Transparent VLAN Tagging */
1928        status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1929        if (status)
1930                return status;
1931
1932        /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1933        vids[0] = 0;
1934        status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1935        if (!status)
1936                dev_info(&adapter->pdev->dev,
1937                         "Cleared guest VLANs on VF%d", vf);
1938
1939        /* After TVT is enabled, disallow VFs to program VLAN filters */
1940        if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1941                status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1942                                                  ~BE_PRIV_FILTMGMT, vf + 1);
1943                if (!status)
1944                        vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1945        }
1946        return 0;
1947}
1948
1949static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1950{
1951        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1952        struct device *dev = &adapter->pdev->dev;
1953        int status;
1954
1955        /* Reset Transparent VLAN Tagging. */
1956        status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1957                                       vf_cfg->if_handle, 0, 0);
1958        if (status)
1959                return status;
1960
1961        /* Allow VFs to program VLAN filtering */
1962        if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1963                status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1964                                                  BE_PRIV_FILTMGMT, vf + 1);
1965                if (!status) {
1966                        vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1967                        dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1968                }
1969        }
1970
1971        dev_info(dev,
1972                 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1973        return 0;
1974}
1975
1976static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1977                          __be16 vlan_proto)
1978{
1979        struct be_adapter *adapter = netdev_priv(netdev);
1980        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1981        int status;
1982
1983        if (!sriov_enabled(adapter))
1984                return -EPERM;
1985
1986        if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1987                return -EINVAL;
1988
1989        if (vlan_proto != htons(ETH_P_8021Q))
1990                return -EPROTONOSUPPORT;
1991
1992        if (vlan || qos) {
1993                vlan |= qos << VLAN_PRIO_SHIFT;
1994                status = be_set_vf_tvt(adapter, vf, vlan);
1995        } else {
1996                status = be_clear_vf_tvt(adapter, vf);
1997        }
1998
1999        if (status) {
2000                dev_err(&adapter->pdev->dev,
2001                        "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2002                        status);
2003                return be_cmd_status(status);
2004        }
2005
2006        vf_cfg->vlan_tag = vlan;
2007        return 0;
2008}
2009
2010static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2011                             int min_tx_rate, int max_tx_rate)
2012{
2013        struct be_adapter *adapter = netdev_priv(netdev);
2014        struct device *dev = &adapter->pdev->dev;
2015        int percent_rate, status = 0;
2016        u16 link_speed = 0;
2017        u8 link_status;
2018
2019        if (!sriov_enabled(adapter))
2020                return -EPERM;
2021
2022        if (vf >= adapter->num_vfs)
2023                return -EINVAL;
2024
2025        if (min_tx_rate)
2026                return -EINVAL;
2027
2028        if (!max_tx_rate)
2029                goto config_qos;
2030
2031        status = be_cmd_link_status_query(adapter, &link_speed,
2032                                          &link_status, 0);
2033        if (status)
2034                goto err;
2035
2036        if (!link_status) {
2037                dev_err(dev, "TX-rate setting not allowed when link is down\n");
2038                status = -ENETDOWN;
2039                goto err;
2040        }
2041
2042        if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2043                dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2044                        link_speed);
2045                status = -EINVAL;
2046                goto err;
2047        }
2048
2049        /* On Skyhawk the QOS setting must be done only as a % value */
2050        percent_rate = link_speed / 100;
2051        if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2052                dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2053                        percent_rate);
2054                status = -EINVAL;
2055                goto err;
2056        }
2057
2058config_qos:
2059        status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2060        if (status)
2061                goto err;
2062
2063        adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2064        return 0;
2065
2066err:
2067        dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2068                max_tx_rate, vf);
2069        return be_cmd_status(status);
2070}
2071
2072static int be_set_vf_link_state(struct net_device *netdev, int vf,
2073                                int link_state)
2074{
2075        struct be_adapter *adapter = netdev_priv(netdev);
2076        int status;
2077
2078        if (!sriov_enabled(adapter))
2079                return -EPERM;
2080
2081        if (vf >= adapter->num_vfs)
2082                return -EINVAL;
2083
2084        status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2085        if (status) {
2086                dev_err(&adapter->pdev->dev,
2087                        "Link state change on VF %d failed: %#x\n", vf, status);
2088                return be_cmd_status(status);
2089        }
2090
2091        adapter->vf_cfg[vf].plink_tracking = link_state;
2092
2093        return 0;
2094}
2095
2096static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2097{
2098        struct be_adapter *adapter = netdev_priv(netdev);
2099        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2100        u8 spoofchk;
2101        int status;
2102
2103        if (!sriov_enabled(adapter))
2104                return -EPERM;
2105
2106        if (vf >= adapter->num_vfs)
2107                return -EINVAL;
2108
2109        if (BEx_chip(adapter))
2110                return -EOPNOTSUPP;
2111
2112        if (enable == vf_cfg->spoofchk)
2113                return 0;
2114
2115        spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2116
2117        status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2118                                       0, spoofchk);
2119        if (status) {
2120                dev_err(&adapter->pdev->dev,
2121                        "Spoofchk change on VF %d failed: %#x\n", vf, status);
2122                return be_cmd_status(status);
2123        }
2124
2125        vf_cfg->spoofchk = enable;
2126        return 0;
2127}
2128
2129static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2130                          ulong now)
2131{
2132        aic->rx_pkts_prev = rx_pkts;
2133        aic->tx_reqs_prev = tx_pkts;
2134        aic->jiffies = now;
2135}
2136
2137static int be_get_new_eqd(struct be_eq_obj *eqo)
2138{
2139        struct be_adapter *adapter = eqo->adapter;
2140        int eqd, start;
2141        struct be_aic_obj *aic;
2142        struct be_rx_obj *rxo;
2143        struct be_tx_obj *txo;
2144        u64 rx_pkts = 0, tx_pkts = 0;
2145        ulong now;
2146        u32 pps, delta;
2147        int i;
2148
2149        aic = &adapter->aic_obj[eqo->idx];
2150        if (!aic->enable) {
2151                if (aic->jiffies)
2152                        aic->jiffies = 0;
2153                eqd = aic->et_eqd;
2154                return eqd;
2155        }
2156
2157        for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2158                do {
2159                        start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2160                        rx_pkts += rxo->stats.rx_pkts;
2161                } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2162        }
2163
2164        for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2165                do {
2166                        start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2167                        tx_pkts += txo->stats.tx_reqs;
2168                } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2169        }
2170
2171        /* Skip, if wrapped around or first calculation */
2172        now = jiffies;
2173        if (!aic->jiffies || time_before(now, aic->jiffies) ||
2174            rx_pkts < aic->rx_pkts_prev ||
2175            tx_pkts < aic->tx_reqs_prev) {
2176                be_aic_update(aic, rx_pkts, tx_pkts, now);
2177                return aic->prev_eqd;
2178        }
2179
2180        delta = jiffies_to_msecs(now - aic->jiffies);
2181        if (delta == 0)
2182                return aic->prev_eqd;
2183
2184        pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2185                (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2186        eqd = (pps / 15000) << 2;
2187
2188        if (eqd < 8)
2189                eqd = 0;
2190        eqd = min_t(u32, eqd, aic->max_eqd);
2191        eqd = max_t(u32, eqd, aic->min_eqd);
2192
2193        be_aic_update(aic, rx_pkts, tx_pkts, now);
2194
2195        return eqd;
2196}
2197
2198/* For Skyhawk-R only */
2199static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2200{
2201        struct be_adapter *adapter = eqo->adapter;
2202        struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2203        ulong now = jiffies;
2204        int eqd;
2205        u32 mult_enc;
2206
2207        if (!aic->enable)
2208                return 0;
2209
2210        if (jiffies_to_msecs(now - aic->jiffies) < 1)
2211                eqd = aic->prev_eqd;
2212        else
2213                eqd = be_get_new_eqd(eqo);
2214
2215        if (eqd > 100)
2216                mult_enc = R2I_DLY_ENC_1;
2217        else if (eqd > 60)
2218                mult_enc = R2I_DLY_ENC_2;
2219        else if (eqd > 20)
2220                mult_enc = R2I_DLY_ENC_3;
2221        else
2222                mult_enc = R2I_DLY_ENC_0;
2223
2224        aic->prev_eqd = eqd;
2225
2226        return mult_enc;
2227}
2228
2229void be_eqd_update(struct be_adapter *adapter, bool force_update)
2230{
2231        struct be_set_eqd set_eqd[MAX_EVT_QS];
2232        struct be_aic_obj *aic;
2233        struct be_eq_obj *eqo;
2234        int i, num = 0, eqd;
2235
2236        for_all_evt_queues(adapter, eqo, i) {
2237                aic = &adapter->aic_obj[eqo->idx];
2238                eqd = be_get_new_eqd(eqo);
2239                if (force_update || eqd != aic->prev_eqd) {
2240                        set_eqd[num].delay_multiplier = (eqd * 65)/100;
2241                        set_eqd[num].eq_id = eqo->q.id;
2242                        aic->prev_eqd = eqd;
2243                        num++;
2244                }
2245        }
2246
2247        if (num)
2248                be_cmd_modify_eqd(adapter, set_eqd, num);
2249}
2250
2251static void be_rx_stats_update(struct be_rx_obj *rxo,
2252                               struct be_rx_compl_info *rxcp)
2253{
2254        struct be_rx_stats *stats = rx_stats(rxo);
2255
2256        u64_stats_update_begin(&stats->sync);
2257        stats->rx_compl++;
2258        stats->rx_bytes += rxcp->pkt_size;
2259        stats->rx_pkts++;
2260        if (rxcp->tunneled)
2261                stats->rx_vxlan_offload_pkts++;
2262        if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2263                stats->rx_mcast_pkts++;
2264        if (rxcp->err)
2265                stats->rx_compl_err++;
2266        u64_stats_update_end(&stats->sync);
2267}
2268
2269static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2270{
2271        /* L4 checksum is not reliable for non TCP/UDP packets.
2272         * Also ignore ipcksm for ipv6 pkts
2273         */
2274        return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2275                (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2276}
2277
2278static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2279{
2280        struct be_adapter *adapter = rxo->adapter;
2281        struct be_rx_page_info *rx_page_info;
2282        struct be_queue_info *rxq = &rxo->q;
2283        u32 frag_idx = rxq->tail;
2284
2285        rx_page_info = &rxo->page_info_tbl[frag_idx];
2286        BUG_ON(!rx_page_info->page);
2287
2288        if (rx_page_info->last_frag) {
2289                dma_unmap_page(&adapter->pdev->dev,
2290                               dma_unmap_addr(rx_page_info, bus),
2291                               adapter->big_page_size, DMA_FROM_DEVICE);
2292                rx_page_info->last_frag = false;
2293        } else {
2294                dma_sync_single_for_cpu(&adapter->pdev->dev,
2295                                        dma_unmap_addr(rx_page_info, bus),
2296                                        rx_frag_size, DMA_FROM_DEVICE);
2297        }
2298
2299        queue_tail_inc(rxq);
2300        atomic_dec(&rxq->used);
2301        return rx_page_info;
2302}
2303
2304/* Throwaway the data in the Rx completion */
2305static void be_rx_compl_discard(struct be_rx_obj *rxo,
2306                                struct be_rx_compl_info *rxcp)
2307{
2308        struct be_rx_page_info *page_info;
2309        u16 i, num_rcvd = rxcp->num_rcvd;
2310
2311        for (i = 0; i < num_rcvd; i++) {
2312                page_info = get_rx_page_info(rxo);
2313                put_page(page_info->page);
2314                memset(page_info, 0, sizeof(*page_info));
2315        }
2316}
2317
2318/*
2319 * skb_fill_rx_data forms a complete skb for an ether frame
2320 * indicated by rxcp.
2321 */
2322static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2323                             struct be_rx_compl_info *rxcp)
2324{
2325        struct be_rx_page_info *page_info;
2326        u16 i, j;
2327        u16 hdr_len, curr_frag_len, remaining;
2328        u8 *start;
2329
2330        page_info = get_rx_page_info(rxo);
2331        start = page_address(page_info->page) + page_info->page_offset;
2332        prefetch(start);
2333
2334        /* Copy data in the first descriptor of this completion */
2335        curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2336
2337        skb->len = curr_frag_len;
2338        if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2339                memcpy(skb->data, start, curr_frag_len);
2340                /* Complete packet has now been moved to data */
2341                put_page(page_info->page);
2342                skb->data_len = 0;
2343                skb->tail += curr_frag_len;
2344        } else {
2345                hdr_len = ETH_HLEN;
2346                memcpy(skb->data, start, hdr_len);
2347                skb_shinfo(skb)->nr_frags = 1;
2348                skb_frag_set_page(skb, 0, page_info->page);
2349                skb_shinfo(skb)->frags[0].page_offset =
2350                                        page_info->page_offset + hdr_len;
2351                skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2352                                  curr_frag_len - hdr_len);
2353                skb->data_len = curr_frag_len - hdr_len;
2354                skb->truesize += rx_frag_size;
2355                skb->tail += hdr_len;
2356        }
2357        page_info->page = NULL;
2358
2359        if (rxcp->pkt_size <= rx_frag_size) {
2360                BUG_ON(rxcp->num_rcvd != 1);
2361                return;
2362        }
2363
2364        /* More frags present for this completion */
2365        remaining = rxcp->pkt_size - curr_frag_len;
2366        for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2367                page_info = get_rx_page_info(rxo);
2368                curr_frag_len = min(remaining, rx_frag_size);
2369
2370                /* Coalesce all frags from the same physical page in one slot */
2371                if (page_info->page_offset == 0) {
2372                        /* Fresh page */
2373                        j++;
2374                        skb_frag_set_page(skb, j, page_info->page);
2375                        skb_shinfo(skb)->frags[j].page_offset =
2376                                                        page_info->page_offset;
2377                        skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2378                        skb_shinfo(skb)->nr_frags++;
2379                } else {
2380                        put_page(page_info->page);
2381                }
2382
2383                skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2384                skb->len += curr_frag_len;
2385                skb->data_len += curr_frag_len;
2386                skb->truesize += rx_frag_size;
2387                remaining -= curr_frag_len;
2388                page_info->page = NULL;
2389        }
2390        BUG_ON(j > MAX_SKB_FRAGS);
2391}
2392
2393/* Process the RX completion indicated by rxcp when GRO is disabled */
2394static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2395                                struct be_rx_compl_info *rxcp)
2396{
2397        struct be_adapter *adapter = rxo->adapter;
2398        struct net_device *netdev = adapter->netdev;
2399        struct sk_buff *skb;
2400
2401        skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2402        if (unlikely(!skb)) {
2403                rx_stats(rxo)->rx_drops_no_skbs++;
2404                be_rx_compl_discard(rxo, rxcp);
2405                return;
2406        }
2407
2408        skb_fill_rx_data(rxo, skb, rxcp);
2409
2410        if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2411                skb->ip_summed = CHECKSUM_UNNECESSARY;
2412        else
2413                skb_checksum_none_assert(skb);
2414
2415        skb->protocol = eth_type_trans(skb, netdev);
2416        skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2417        if (netdev->features & NETIF_F_RXHASH)
2418                skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2419
2420        skb->csum_level = rxcp->tunneled;
2421        skb_mark_napi_id(skb, napi);
2422
2423        if (rxcp->vlanf)
2424                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2425
2426        netif_receive_skb(skb);
2427}
2428
2429/* Process the RX completion indicated by rxcp when GRO is enabled */
2430static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2431                                    struct napi_struct *napi,
2432                                    struct be_rx_compl_info *rxcp)
2433{
2434        struct be_adapter *adapter = rxo->adapter;
2435        struct be_rx_page_info *page_info;
2436        struct sk_buff *skb = NULL;
2437        u16 remaining, curr_frag_len;
2438        u16 i, j;
2439
2440        skb = napi_get_frags(napi);
2441        if (!skb) {
2442                be_rx_compl_discard(rxo, rxcp);
2443                return;
2444        }
2445
2446        remaining = rxcp->pkt_size;
2447        for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2448                page_info = get_rx_page_info(rxo);
2449
2450                curr_frag_len = min(remaining, rx_frag_size);
2451
2452                /* Coalesce all frags from the same physical page in one slot */
2453                if (i == 0 || page_info->page_offset == 0) {
2454                        /* First frag or Fresh page */
2455                        j++;
2456                        skb_frag_set_page(skb, j, page_info->page);
2457                        skb_shinfo(skb)->frags[j].page_offset =
2458                                                        page_info->page_offset;
2459                        skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2460                } else {
2461                        put_page(page_info->page);
2462                }
2463                skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2464                skb->truesize += rx_frag_size;
2465                remaining -= curr_frag_len;
2466                memset(page_info, 0, sizeof(*page_info));
2467        }
2468        BUG_ON(j > MAX_SKB_FRAGS);
2469
2470        skb_shinfo(skb)->nr_frags = j + 1;
2471        skb->len = rxcp->pkt_size;
2472        skb->data_len = rxcp->pkt_size;
2473        skb->ip_summed = CHECKSUM_UNNECESSARY;
2474        skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2475        if (adapter->netdev->features & NETIF_F_RXHASH)
2476                skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2477
2478        skb->csum_level = rxcp->tunneled;
2479
2480        if (rxcp->vlanf)
2481                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2482
2483        napi_gro_frags(napi);
2484}
2485
2486static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2487                                 struct be_rx_compl_info *rxcp)
2488{
2489        rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2490        rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2491        rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2492        rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2493        rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2494        rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2495        rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2496        rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2497        rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2498        rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2499        rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2500        if (rxcp->vlanf) {
2501                rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2502                rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2503        }
2504        rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2505        rxcp->tunneled =
2506                GET_RX_COMPL_V1_BITS(tunneled, compl);
2507}
2508
2509static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2510                                 struct be_rx_compl_info *rxcp)
2511{
2512        rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2513        rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2514        rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2515        rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2516        rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2517        rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2518        rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2519        rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2520        rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2521        rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2522        rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2523        if (rxcp->vlanf) {
2524                rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2525                rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2526        }
2527        rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2528        rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2529}
2530
2531static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2532{
2533        struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2534        struct be_rx_compl_info *rxcp = &rxo->rxcp;
2535        struct be_adapter *adapter = rxo->adapter;
2536
2537        /* For checking the valid bit it is Ok to use either definition as the
2538         * valid bit is at the same position in both v0 and v1 Rx compl */
2539        if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2540                return NULL;
2541
2542        rmb();
2543        be_dws_le_to_cpu(compl, sizeof(*compl));
2544
2545        if (adapter->be3_native)
2546                be_parse_rx_compl_v1(compl, rxcp);
2547        else
2548                be_parse_rx_compl_v0(compl, rxcp);
2549
2550        if (rxcp->ip_frag)
2551                rxcp->l4_csum = 0;
2552
2553        if (rxcp->vlanf) {
2554                /* In QNQ modes, if qnq bit is not set, then the packet was
2555                 * tagged only with the transparent outer vlan-tag and must
2556                 * not be treated as a vlan packet by host
2557                 */
2558                if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2559                        rxcp->vlanf = 0;
2560
2561                if (!lancer_chip(adapter))
2562                        rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2563
2564                if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2565                    !test_bit(rxcp->vlan_tag, adapter->vids))
2566                        rxcp->vlanf = 0;
2567        }
2568
2569        /* As the compl has been parsed, reset it; we wont touch it again */
2570        compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2571
2572        queue_tail_inc(&rxo->cq);
2573        return rxcp;
2574}
2575
2576static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2577{
2578        u32 order = get_order(size);
2579
2580        if (order > 0)
2581                gfp |= __GFP_COMP;
2582        return  alloc_pages(gfp, order);
2583}
2584
2585/*
2586 * Allocate a page, split it to fragments of size rx_frag_size and post as
2587 * receive buffers to BE
2588 */
2589static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2590{
2591        struct be_adapter *adapter = rxo->adapter;
2592        struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2593        struct be_queue_info *rxq = &rxo->q;
2594        struct page *pagep = NULL;
2595        struct device *dev = &adapter->pdev->dev;
2596        struct be_eth_rx_d *rxd;
2597        u64 page_dmaaddr = 0, frag_dmaaddr;
2598        u32 posted, page_offset = 0, notify = 0;
2599
2600        page_info = &rxo->page_info_tbl[rxq->head];
2601        for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2602                if (!pagep) {
2603                        pagep = be_alloc_pages(adapter->big_page_size, gfp);
2604                        if (unlikely(!pagep)) {
2605                                rx_stats(rxo)->rx_post_fail++;
2606                                break;
2607                        }
2608                        page_dmaaddr = dma_map_page(dev, pagep, 0,
2609                                                    adapter->big_page_size,
2610                                                    DMA_FROM_DEVICE);
2611                        if (dma_mapping_error(dev, page_dmaaddr)) {
2612                                put_page(pagep);
2613                                pagep = NULL;
2614                                adapter->drv_stats.dma_map_errors++;
2615                                break;
2616                        }
2617                        page_offset = 0;
2618                } else {
2619                        get_page(pagep);
2620                        page_offset += rx_frag_size;
2621                }
2622                page_info->page_offset = page_offset;
2623                page_info->page = pagep;
2624
2625                rxd = queue_head_node(rxq);
2626                frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2627                rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2628                rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2629
2630                /* Any space left in the current big page for another frag? */
2631                if ((page_offset + rx_frag_size + rx_frag_size) >
2632                                        adapter->big_page_size) {
2633                        pagep = NULL;
2634                        page_info->last_frag = true;
2635                        dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2636                } else {
2637                        dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2638                }
2639
2640                prev_page_info = page_info;
2641                queue_head_inc(rxq);
2642                page_info = &rxo->page_info_tbl[rxq->head];
2643        }
2644
2645        /* Mark the last frag of a page when we break out of the above loop
2646         * with no more slots available in the RXQ
2647         */
2648        if (pagep) {
2649                prev_page_info->last_frag = true;
2650                dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2651        }
2652
2653        if (posted) {
2654                atomic_add(posted, &rxq->used);
2655                if (rxo->rx_post_starved)
2656                        rxo->rx_post_starved = false;
2657                do {
2658                        notify = min(MAX_NUM_POST_ERX_DB, posted);
2659                        be_rxq_notify(adapter, rxq->id, notify);
2660                        posted -= notify;
2661                } while (posted);
2662        } else if (atomic_read(&rxq->used) == 0) {
2663                /* Let be_worker replenish when memory is available */
2664                rxo->rx_post_starved = true;
2665        }
2666}
2667
2668static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2669{
2670        switch (status) {
2671        case BE_TX_COMP_HDR_PARSE_ERR:
2672                tx_stats(txo)->tx_hdr_parse_err++;
2673                break;
2674        case BE_TX_COMP_NDMA_ERR:
2675                tx_stats(txo)->tx_dma_err++;
2676                break;
2677        case BE_TX_COMP_ACL_ERR:
2678                tx_stats(txo)->tx_spoof_check_err++;
2679                break;
2680        }
2681}
2682
2683static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2684{
2685        switch (status) {
2686        case LANCER_TX_COMP_LSO_ERR:
2687                tx_stats(txo)->tx_tso_err++;
2688                break;
2689        case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2690        case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2691                tx_stats(txo)->tx_spoof_check_err++;
2692                break;
2693        case LANCER_TX_COMP_QINQ_ERR:
2694                tx_stats(txo)->tx_qinq_err++;
2695                break;
2696        case LANCER_TX_COMP_PARITY_ERR:
2697                tx_stats(txo)->tx_internal_parity_err++;
2698                break;
2699        case LANCER_TX_COMP_DMA_ERR:
2700                tx_stats(txo)->tx_dma_err++;
2701                break;
2702        case LANCER_TX_COMP_SGE_ERR:
2703                tx_stats(txo)->tx_sge_err++;
2704                break;
2705        }
2706}
2707
2708static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2709                                                struct be_tx_obj *txo)
2710{
2711        struct be_queue_info *tx_cq = &txo->cq;
2712        struct be_tx_compl_info *txcp = &txo->txcp;
2713        struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2714
2715        if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2716                return NULL;
2717
2718        /* Ensure load ordering of valid bit dword and other dwords below */
2719        rmb();
2720        be_dws_le_to_cpu(compl, sizeof(*compl));
2721
2722        txcp->status = GET_TX_COMPL_BITS(status, compl);
2723        txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2724
2725        if (txcp->status) {
2726                if (lancer_chip(adapter)) {
2727                        lancer_update_tx_err(txo, txcp->status);
2728                        /* Reset the adapter incase of TSO,
2729                         * SGE or Parity error
2730                         */
2731                        if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2732                            txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2733                            txcp->status == LANCER_TX_COMP_SGE_ERR)
2734                                be_set_error(adapter, BE_ERROR_TX);
2735                } else {
2736                        be_update_tx_err(txo, txcp->status);
2737                }
2738        }
2739
2740        if (be_check_error(adapter, BE_ERROR_TX))
2741                return NULL;
2742
2743        compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2744        queue_tail_inc(tx_cq);
2745        return txcp;
2746}
2747
2748static u16 be_tx_compl_process(struct be_adapter *adapter,
2749                               struct be_tx_obj *txo, u16 last_index)
2750{
2751        struct sk_buff **sent_skbs = txo->sent_skb_list;
2752        struct be_queue_info *txq = &txo->q;
2753        struct sk_buff *skb = NULL;
2754        bool unmap_skb_hdr = false;
2755        struct be_eth_wrb *wrb;
2756        u16 num_wrbs = 0;
2757        u32 frag_index;
2758
2759        do {
2760                if (sent_skbs[txq->tail]) {
2761                        /* Free skb from prev req */
2762                        if (skb)
2763                                dev_consume_skb_any(skb);
2764                        skb = sent_skbs[txq->tail];
2765                        sent_skbs[txq->tail] = NULL;
2766                        queue_tail_inc(txq);  /* skip hdr wrb */
2767                        num_wrbs++;
2768                        unmap_skb_hdr = true;
2769                }
2770                wrb = queue_tail_node(txq);
2771                frag_index = txq->tail;
2772                unmap_tx_frag(&adapter->pdev->dev, wrb,
2773                              (unmap_skb_hdr && skb_headlen(skb)));
2774                unmap_skb_hdr = false;
2775                queue_tail_inc(txq);
2776                num_wrbs++;
2777        } while (frag_index != last_index);
2778        dev_consume_skb_any(skb);
2779
2780        return num_wrbs;
2781}
2782
2783/* Return the number of events in the event queue */
2784static inline int events_get(struct be_eq_obj *eqo)
2785{
2786        struct be_eq_entry *eqe;
2787        int num = 0;
2788
2789        do {
2790                eqe = queue_tail_node(&eqo->q);
2791                if (eqe->evt == 0)
2792                        break;
2793
2794                rmb();
2795                eqe->evt = 0;
2796                num++;
2797                queue_tail_inc(&eqo->q);
2798        } while (true);
2799
2800        return num;
2801}
2802
2803/* Leaves the EQ is disarmed state */
2804static void be_eq_clean(struct be_eq_obj *eqo)
2805{
2806        int num = events_get(eqo);
2807
2808        be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2809}
2810
2811/* Free posted rx buffers that were not used */
2812static void be_rxq_clean(struct be_rx_obj *rxo)
2813{
2814        struct be_queue_info *rxq = &rxo->q;
2815        struct be_rx_page_info *page_info;
2816
2817        while (atomic_read(&rxq->used) > 0) {
2818                page_info = get_rx_page_info(rxo);
2819                put_page(page_info->page);
2820                memset(page_info, 0, sizeof(*page_info));
2821        }
2822        BUG_ON(atomic_read(&rxq->used));
2823        rxq->tail = 0;
2824        rxq->head = 0;
2825}
2826
2827static void be_rx_cq_clean(struct be_rx_obj *rxo)
2828{
2829        struct be_queue_info *rx_cq = &rxo->cq;
2830        struct be_rx_compl_info *rxcp;
2831        struct be_adapter *adapter = rxo->adapter;
2832        int flush_wait = 0;
2833
2834        /* Consume pending rx completions.
2835         * Wait for the flush completion (identified by zero num_rcvd)
2836         * to arrive. Notify CQ even when there are no more CQ entries
2837         * for HW to flush partially coalesced CQ entries.
2838         * In Lancer, there is no need to wait for flush compl.
2839         */
2840        for (;;) {
2841                rxcp = be_rx_compl_get(rxo);
2842                if (!rxcp) {
2843                        if (lancer_chip(adapter))
2844                                break;
2845
2846                        if (flush_wait++ > 50 ||
2847                            be_check_error(adapter,
2848                                           BE_ERROR_HW)) {
2849                                dev_warn(&adapter->pdev->dev,
2850                                         "did not receive flush compl\n");
2851                                break;
2852                        }
2853                        be_cq_notify(adapter, rx_cq->id, true, 0);
2854                        mdelay(1);
2855                } else {
2856                        be_rx_compl_discard(rxo, rxcp);
2857                        be_cq_notify(adapter, rx_cq->id, false, 1);
2858                        if (rxcp->num_rcvd == 0)
2859                                break;
2860                }
2861        }
2862
2863        /* After cleanup, leave the CQ in unarmed state */
2864        be_cq_notify(adapter, rx_cq->id, false, 0);
2865}
2866
2867static void be_tx_compl_clean(struct be_adapter *adapter)
2868{
2869        struct device *dev = &adapter->pdev->dev;
2870        u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2871        struct be_tx_compl_info *txcp;
2872        struct be_queue_info *txq;
2873        u32 end_idx, notified_idx;
2874        struct be_tx_obj *txo;
2875        int i, pending_txqs;
2876
2877        /* Stop polling for compls when HW has been silent for 10ms */
2878        do {
2879                pending_txqs = adapter->num_tx_qs;
2880
2881                for_all_tx_queues(adapter, txo, i) {
2882                        cmpl = 0;
2883                        num_wrbs = 0;
2884                        txq = &txo->q;
2885                        while ((txcp = be_tx_compl_get(adapter, txo))) {
2886                                num_wrbs +=
2887                                        be_tx_compl_process(adapter, txo,
2888                                                            txcp->end_index);
2889                                cmpl++;
2890                        }
2891                        if (cmpl) {
2892                                be_cq_notify(adapter, txo->cq.id, false, cmpl);
2893                                atomic_sub(num_wrbs, &txq->used);
2894                                timeo = 0;
2895                        }
2896                        if (!be_is_tx_compl_pending(txo))
2897                                pending_txqs--;
2898                }
2899
2900                if (pending_txqs == 0 || ++timeo > 10 ||
2901                    be_check_error(adapter, BE_ERROR_HW))
2902                        break;
2903
2904                mdelay(1);
2905        } while (true);
2906
2907        /* Free enqueued TX that was never notified to HW */
2908        for_all_tx_queues(adapter, txo, i) {
2909                txq = &txo->q;
2910
2911                if (atomic_read(&txq->used)) {
2912                        dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2913                                 i, atomic_read(&txq->used));
2914                        notified_idx = txq->tail;
2915                        end_idx = txq->tail;
2916                        index_adv(&end_idx, atomic_read(&txq->used) - 1,
2917                                  txq->len);
2918                        /* Use the tx-compl process logic to handle requests
2919                         * that were not sent to the HW.
2920                         */
2921                        num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2922                        atomic_sub(num_wrbs, &txq->used);
2923                        BUG_ON(atomic_read(&txq->used));
2924                        txo->pend_wrb_cnt = 0;
2925                        /* Since hw was never notified of these requests,
2926                         * reset TXQ indices
2927                         */
2928                        txq->head = notified_idx;
2929                        txq->tail = notified_idx;
2930                }
2931        }
2932}
2933
2934static void be_evt_queues_destroy(struct be_adapter *adapter)
2935{
2936        struct be_eq_obj *eqo;
2937        int i;
2938
2939        for_all_evt_queues(adapter, eqo, i) {
2940                if (eqo->q.created) {
2941                        be_eq_clean(eqo);
2942                        be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2943                        netif_napi_del(&eqo->napi);
2944                        free_cpumask_var(eqo->affinity_mask);
2945                }
2946                be_queue_free(adapter, &eqo->q);
2947        }
2948}
2949
2950static int be_evt_queues_create(struct be_adapter *adapter)
2951{
2952        struct be_queue_info *eq;
2953        struct be_eq_obj *eqo;
2954        struct be_aic_obj *aic;
2955        int i, rc;
2956
2957        /* need enough EQs to service both RX and TX queues */
2958        adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2959                                    max(adapter->cfg_num_rx_irqs,
2960                                        adapter->cfg_num_tx_irqs));
2961
2962        for_all_evt_queues(adapter, eqo, i) {
2963                int numa_node = dev_to_node(&adapter->pdev->dev);
2964
2965                aic = &adapter->aic_obj[i];
2966                eqo->adapter = adapter;
2967                eqo->idx = i;
2968                aic->max_eqd = BE_MAX_EQD;
2969                aic->enable = true;
2970
2971                eq = &eqo->q;
2972                rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2973                                    sizeof(struct be_eq_entry));
2974                if (rc)
2975                        return rc;
2976
2977                rc = be_cmd_eq_create(adapter, eqo);
2978                if (rc)
2979                        return rc;
2980
2981                if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2982                        return -ENOMEM;
2983                cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2984                                eqo->affinity_mask);
2985                netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2986                               BE_NAPI_WEIGHT);
2987        }
2988        return 0;
2989}
2990
2991static void be_mcc_queues_destroy(struct be_adapter *adapter)
2992{
2993        struct be_queue_info *q;
2994
2995        q = &adapter->mcc_obj.q;
2996        if (q->created)
2997                be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2998        be_queue_free(adapter, q);
2999
3000        q = &adapter->mcc_obj.cq;
3001        if (q->created)
3002                be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3003        be_queue_free(adapter, q);
3004}
3005
3006/* Must be called only after TX qs are created as MCC shares TX EQ */
3007static int be_mcc_queues_create(struct be_adapter *adapter)
3008{
3009        struct be_queue_info *q, *cq;
3010
3011        cq = &adapter->mcc_obj.cq;
3012        if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3013                           sizeof(struct be_mcc_compl)))
3014                goto err;
3015
3016        /* Use the default EQ for MCC completions */
3017        if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3018                goto mcc_cq_free;
3019
3020        q = &adapter->mcc_obj.q;
3021        if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3022                goto mcc_cq_destroy;
3023
3024        if (be_cmd_mccq_create(adapter, q, cq))
3025                goto mcc_q_free;
3026
3027        return 0;
3028
3029mcc_q_free:
3030        be_queue_free(adapter, q);
3031mcc_cq_destroy:
3032        be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3033mcc_cq_free:
3034        be_queue_free(adapter, cq);
3035err:
3036        return -1;
3037}
3038
3039static void be_tx_queues_destroy(struct be_adapter *adapter)
3040{
3041        struct be_queue_info *q;
3042        struct be_tx_obj *txo;
3043        u8 i;
3044
3045        for_all_tx_queues(adapter, txo, i) {
3046                q = &txo->q;
3047                if (q->created)
3048                        be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3049                be_queue_free(adapter, q);
3050
3051                q = &txo->cq;
3052                if (q->created)
3053                        be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3054                be_queue_free(adapter, q);
3055        }
3056}
3057
3058static int be_tx_qs_create(struct be_adapter *adapter)
3059{
3060        struct be_queue_info *cq;
3061        struct be_tx_obj *txo;
3062        struct be_eq_obj *eqo;
3063        int status, i;
3064
3065        adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3066
3067        for_all_tx_queues(adapter, txo, i) {
3068                cq = &txo->cq;
3069                status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3070                                        sizeof(struct be_eth_tx_compl));
3071                if (status)
3072                        return status;
3073
3074                u64_stats_init(&txo->stats.sync);
3075                u64_stats_init(&txo->stats.sync_compl);
3076
3077                /* If num_evt_qs is less than num_tx_qs, then more than
3078                 * one txq share an eq
3079                 */
3080                eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3081                status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3082                if (status)
3083                        return status;
3084
3085                status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3086                                        sizeof(struct be_eth_wrb));
3087                if (status)
3088                        return status;
3089
3090                status = be_cmd_txq_create(adapter, txo);
3091                if (status)
3092                        return status;
3093
3094                netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3095                                    eqo->idx);
3096        }
3097
3098        dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3099                 adapter->num_tx_qs);
3100        return 0;
3101}
3102
3103static void be_rx_cqs_destroy(struct be_adapter *adapter)
3104{
3105        struct be_queue_info *q;
3106        struct be_rx_obj *rxo;
3107        int i;
3108
3109        for_all_rx_queues(adapter, rxo, i) {
3110                q = &rxo->cq;
3111                if (q->created)
3112                        be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3113                be_queue_free(adapter, q);
3114        }
3115}
3116
3117static int be_rx_cqs_create(struct be_adapter *adapter)
3118{
3119        struct be_queue_info *eq, *cq;
3120        struct be_rx_obj *rxo;
3121        int rc, i;
3122
3123        adapter->num_rss_qs =
3124                        min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3125
3126        /* We'll use RSS only if atleast 2 RSS rings are supported. */
3127        if (adapter->num_rss_qs < 2)
3128                adapter->num_rss_qs = 0;
3129
3130        adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3131
3132        /* When the interface is not capable of RSS rings (and there is no
3133         * need to create a default RXQ) we'll still need one RXQ
3134         */
3135        if (adapter->num_rx_qs == 0)
3136                adapter->num_rx_qs = 1;
3137
3138        adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3139        for_all_rx_queues(adapter, rxo, i) {
3140                rxo->adapter = adapter;
3141                cq = &rxo->cq;
3142                rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3143                                    sizeof(struct be_eth_rx_compl));
3144                if (rc)
3145                        return rc;
3146
3147                u64_stats_init(&rxo->stats.sync);
3148                eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3149                rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3150                if (rc)
3151                        return rc;
3152        }
3153
3154        dev_info(&adapter->pdev->dev,
3155                 "created %d RX queue(s)\n", adapter->num_rx_qs);
3156        return 0;
3157}
3158
3159static irqreturn_t be_intx(int irq, void *dev)
3160{
3161        struct be_eq_obj *eqo = dev;
3162        struct be_adapter *adapter = eqo->adapter;
3163        int num_evts = 0;
3164
3165        /* IRQ is not expected when NAPI is scheduled as the EQ
3166         * will not be armed.
3167         * But, this can happen on Lancer INTx where it takes
3168         * a while to de-assert INTx or in BE2 where occasionaly
3169         * an interrupt may be raised even when EQ is unarmed.
3170         * If NAPI is already scheduled, then counting & notifying
3171         * events will orphan them.
3172         */
3173        if (napi_schedule_prep(&eqo->napi)) {
3174                num_evts = events_get(eqo);
3175                __napi_schedule(&eqo->napi);
3176                if (num_evts)
3177                        eqo->spurious_intr = 0;
3178        }
3179        be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3180
3181        /* Return IRQ_HANDLED only for the the first spurious intr
3182         * after a valid intr to stop the kernel from branding
3183         * this irq as a bad one!
3184         */
3185        if (num_evts || eqo->spurious_intr++ == 0)
3186                return IRQ_HANDLED;
3187        else
3188                return IRQ_NONE;
3189}
3190
3191static irqreturn_t be_msix(int irq, void *dev)
3192{
3193        struct be_eq_obj *eqo = dev;
3194
3195        be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3196        napi_schedule(&eqo->napi);
3197        return IRQ_HANDLED;
3198}
3199
3200static inline bool do_gro(struct be_rx_compl_info *rxcp)
3201{
3202        return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3203}
3204
3205static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3206                         int budget)
3207{
3208        struct be_adapter *adapter = rxo->adapter;
3209        struct be_queue_info *rx_cq = &rxo->cq;
3210        struct be_rx_compl_info *rxcp;
3211        u32 work_done;
3212        u32 frags_consumed = 0;
3213
3214        for (work_done = 0; work_done < budget; work_done++) {
3215                rxcp = be_rx_compl_get(rxo);
3216                if (!rxcp)
3217                        break;
3218
3219                /* Is it a flush compl that has no data */
3220                if (unlikely(rxcp->num_rcvd == 0))
3221                        goto loop_continue;
3222
3223                /* Discard compl with partial DMA Lancer B0 */
3224                if (unlikely(!rxcp->pkt_size)) {
3225                        be_rx_compl_discard(rxo, rxcp);
3226                        goto loop_continue;
3227                }
3228
3229                /* On BE drop pkts that arrive due to imperfect filtering in
3230                 * promiscuous mode on some skews
3231                 */
3232                if (unlikely(rxcp->port != adapter->port_num &&
3233                             !lancer_chip(adapter))) {
3234                        be_rx_compl_discard(rxo, rxcp);
3235                        goto loop_continue;
3236                }
3237
3238                if (do_gro(rxcp))
3239                        be_rx_compl_process_gro(rxo, napi, rxcp);
3240                else
3241                        be_rx_compl_process(rxo, napi, rxcp);
3242
3243loop_continue:
3244                frags_consumed += rxcp->num_rcvd;
3245                be_rx_stats_update(rxo, rxcp);
3246        }
3247
3248        if (work_done) {
3249                be_cq_notify(adapter, rx_cq->id, true, work_done);
3250
3251                /* When an rx-obj gets into post_starved state, just
3252                 * let be_worker do the posting.
3253                 */
3254                if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3255                    !rxo->rx_post_starved)
3256                        be_post_rx_frags(rxo, GFP_ATOMIC,
3257                                         max_t(u32, MAX_RX_POST,
3258                                               frags_consumed));
3259        }
3260
3261        return work_done;
3262}
3263
3264
3265static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3266                          int idx)
3267{
3268        int num_wrbs = 0, work_done = 0;
3269        struct be_tx_compl_info *txcp;
3270
3271        while ((txcp = be_tx_compl_get(adapter, txo))) {
3272                num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3273                work_done++;
3274        }
3275
3276        if (work_done) {
3277                be_cq_notify(adapter, txo->cq.id, true, work_done);
3278                atomic_sub(num_wrbs, &txo->q.used);
3279
3280                /* As Tx wrbs have been freed up, wake up netdev queue
3281                 * if it was stopped due to lack of tx wrbs.  */
3282                if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3283                    be_can_txq_wake(txo)) {
3284                        netif_wake_subqueue(adapter->netdev, idx);
3285                }
3286
3287                u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3288                tx_stats(txo)->tx_compl += work_done;
3289                u64_stats_update_end(&tx_stats(txo)->sync_compl);
3290        }
3291}
3292
3293int be_poll(struct napi_struct *napi, int budget)
3294{
3295        struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3296        struct be_adapter *adapter = eqo->adapter;
3297        int max_work = 0, work, i, num_evts;
3298        struct be_rx_obj *rxo;
3299        struct be_tx_obj *txo;
3300        u32 mult_enc = 0;
3301
3302        num_evts = events_get(eqo);
3303
3304        for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3305                be_process_tx(adapter, txo, i);
3306
3307        /* This loop will iterate twice for EQ0 in which
3308         * completions of the last RXQ (default one) are also processed
3309         * For other EQs the loop iterates only once
3310         */
3311        for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3312                work = be_process_rx(rxo, napi, budget);
3313                max_work = max(work, max_work);
3314        }
3315
3316        if (is_mcc_eqo(eqo))
3317                be_process_mcc(adapter);
3318
3319        if (max_work < budget) {
3320                napi_complete_done(napi, max_work);
3321
3322                /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3323                 * delay via a delay multiplier encoding value
3324                 */
3325                if (skyhawk_chip(adapter))
3326                        mult_enc = be_get_eq_delay_mult_enc(eqo);
3327
3328                be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3329                             mult_enc);
3330        } else {
3331                /* As we'll continue in polling mode, count and clear events */
3332                be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3333        }
3334        return max_work;
3335}
3336
3337void be_detect_error(struct be_adapter *adapter)
3338{
3339        u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3340        u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3341        struct device *dev = &adapter->pdev->dev;
3342        u16 val;
3343        u32 i;
3344
3345        if (be_check_error(adapter, BE_ERROR_HW))
3346                return;
3347
3348        if (lancer_chip(adapter)) {
3349                sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3350                if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3351                        be_set_error(adapter, BE_ERROR_UE);
3352                        sliport_err1 = ioread32(adapter->db +
3353                                                SLIPORT_ERROR1_OFFSET);
3354                        sliport_err2 = ioread32(adapter->db +
3355                                                SLIPORT_ERROR2_OFFSET);
3356                        /* Do not log error messages if its a FW reset */
3357                        if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3358                            sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3359                                dev_info(dev, "Reset is in progress\n");
3360                        } else {
3361                                dev_err(dev, "Error detected in the card\n");
3362                                dev_err(dev, "ERR: sliport status 0x%x\n",
3363                                        sliport_status);
3364                                dev_err(dev, "ERR: sliport error1 0x%x\n",
3365                                        sliport_err1);
3366                                dev_err(dev, "ERR: sliport error2 0x%x\n",
3367                                        sliport_err2);
3368                        }
3369                }
3370        } else {
3371                ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3372                ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3373                ue_lo_mask = ioread32(adapter->pcicfg +
3374                                      PCICFG_UE_STATUS_LOW_MASK);
3375                ue_hi_mask = ioread32(adapter->pcicfg +
3376                                      PCICFG_UE_STATUS_HI_MASK);
3377
3378                ue_lo = (ue_lo & ~ue_lo_mask);
3379                ue_hi = (ue_hi & ~ue_hi_mask);
3380
3381                if (ue_lo || ue_hi) {
3382                        /* On certain platforms BE3 hardware can indicate
3383                         * spurious UEs. In case of a UE in the chip,
3384                         * the POST register correctly reports either a
3385                         * FAT_LOG_START state (FW is currently dumping
3386                         * FAT log data) or a ARMFW_UE state. Check for the
3387                         * above states to ascertain if the UE is valid or not.
3388                         */
3389                        if (BE3_chip(adapter)) {
3390                                val = be_POST_stage_get(adapter);
3391                                if ((val & POST_STAGE_FAT_LOG_START)
3392                                     != POST_STAGE_FAT_LOG_START &&
3393                                    (val & POST_STAGE_ARMFW_UE)
3394                                     != POST_STAGE_ARMFW_UE &&
3395                                    (val & POST_STAGE_RECOVERABLE_ERR)
3396                                     != POST_STAGE_RECOVERABLE_ERR)
3397                                        return;
3398                        }
3399
3400                        dev_err(dev, "Error detected in the adapter");
3401                        be_set_error(adapter, BE_ERROR_UE);
3402
3403                        for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3404                                if (ue_lo & 1)
3405                                        dev_err(dev, "UE: %s bit set\n",
3406                                                ue_status_low_desc[i]);
3407                        }
3408                        for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3409                                if (ue_hi & 1)
3410                                        dev_err(dev, "UE: %s bit set\n",
3411                                                ue_status_hi_desc[i]);
3412                        }
3413                }
3414        }
3415}
3416
3417static void be_msix_disable(struct be_adapter *adapter)
3418{
3419        if (msix_enabled(adapter)) {
3420                pci_disable_msix(adapter->pdev);
3421                adapter->num_msix_vec = 0;
3422                adapter->num_msix_roce_vec = 0;
3423        }
3424}
3425
3426static int be_msix_enable(struct be_adapter *adapter)
3427{
3428        unsigned int i, max_roce_eqs;
3429        struct device *dev = &adapter->pdev->dev;
3430        int num_vec;
3431
3432        /* If RoCE is supported, program the max number of vectors that
3433         * could be used for NIC and RoCE, else, just program the number
3434         * we'll use initially.
3435         */
3436        if (be_roce_supported(adapter)) {
3437                max_roce_eqs =
3438                        be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3439                max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3440                num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3441        } else {
3442                num_vec = max(adapter->cfg_num_rx_irqs,
3443                              adapter->cfg_num_tx_irqs);
3444        }
3445
3446        for (i = 0; i < num_vec; i++)
3447                adapter->msix_entries[i].entry = i;
3448
3449        num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3450                                        MIN_MSIX_VECTORS, num_vec);
3451        if (num_vec < 0)
3452                goto fail;
3453
3454        if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3455                adapter->num_msix_roce_vec = num_vec / 2;
3456                dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3457                         adapter->num_msix_roce_vec);
3458        }
3459
3460        adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3461
3462        dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3463                 adapter->num_msix_vec);
3464        return 0;
3465
3466fail:
3467        dev_warn(dev, "MSIx enable failed\n");
3468
3469        /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3470        if (be_virtfn(adapter))
3471                return num_vec;
3472        return 0;
3473}
3474
3475static inline int be_msix_vec_get(struct be_adapter *adapter,
3476                                  struct be_eq_obj *eqo)
3477{
3478        return adapter->msix_entries[eqo->msix_idx].vector;
3479}
3480
3481static int be_msix_register(struct be_adapter *adapter)
3482{
3483        struct net_device *netdev = adapter->netdev;
3484        struct be_eq_obj *eqo;
3485        int status, i, vec;
3486
3487        for_all_evt_queues(adapter, eqo, i) {
3488                sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3489                vec = be_msix_vec_get(adapter, eqo);
3490                status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3491                if (status)
3492                        goto err_msix;
3493
3494                irq_set_affinity_hint(vec, eqo->affinity_mask);
3495        }
3496
3497        return 0;
3498err_msix:
3499        for (i--; i >= 0; i--) {
3500                eqo = &adapter->eq_obj[i];
3501                free_irq(be_msix_vec_get(adapter, eqo), eqo);
3502        }
3503        dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3504                 status);
3505        be_msix_disable(adapter);
3506        return status;
3507}
3508
3509static int be_irq_register(struct be_adapter *adapter)
3510{
3511        struct net_device *netdev = adapter->netdev;
3512        int status;
3513
3514        if (msix_enabled(adapter)) {
3515                status = be_msix_register(adapter);
3516                if (status == 0)
3517                        goto done;
3518                /* INTx is not supported for VF */
3519                if (be_virtfn(adapter))
3520                        return status;
3521        }
3522
3523        /* INTx: only the first EQ is used */
3524        netdev->irq = adapter->pdev->irq;
3525        status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3526                             &adapter->eq_obj[0]);
3527        if (status) {
3528                dev_err(&adapter->pdev->dev,
3529                        "INTx request IRQ failed - err %d\n", status);
3530                return status;
3531        }
3532done:
3533        adapter->isr_registered = true;
3534        return 0;
3535}
3536
3537static void be_irq_unregister(struct be_adapter *adapter)
3538{
3539        struct net_device *netdev = adapter->netdev;
3540        struct be_eq_obj *eqo;
3541        int i, vec;
3542
3543        if (!adapter->isr_registered)
3544                return;
3545
3546        /* INTx */
3547        if (!msix_enabled(adapter)) {
3548                free_irq(netdev->irq, &adapter->eq_obj[0]);
3549                goto done;
3550        }
3551
3552        /* MSIx */
3553        for_all_evt_queues(adapter, eqo, i) {
3554                vec = be_msix_vec_get(adapter, eqo);
3555                irq_set_affinity_hint(vec, NULL);
3556                free_irq(vec, eqo);
3557        }
3558
3559done:
3560        adapter->isr_registered = false;
3561}
3562
3563static void be_rx_qs_destroy(struct be_adapter *adapter)
3564{
3565        struct rss_info *rss = &adapter->rss_info;
3566        struct be_queue_info *q;
3567        struct be_rx_obj *rxo;
3568        int i;
3569
3570        for_all_rx_queues(adapter, rxo, i) {
3571                q = &rxo->q;
3572                if (q->created) {
3573                        /* If RXQs are destroyed while in an "out of buffer"
3574                         * state, there is a possibility of an HW stall on
3575                         * Lancer. So, post 64 buffers to each queue to relieve
3576                         * the "out of buffer" condition.
3577                         * Make sure there's space in the RXQ before posting.
3578                         */
3579                        if (lancer_chip(adapter)) {
3580                                be_rx_cq_clean(rxo);
3581                                if (atomic_read(&q->used) == 0)
3582                                        be_post_rx_frags(rxo, GFP_KERNEL,
3583                                                         MAX_RX_POST);
3584                        }
3585
3586                        be_cmd_rxq_destroy(adapter, q);
3587                        be_rx_cq_clean(rxo);
3588                        be_rxq_clean(rxo);
3589                }
3590                be_queue_free(adapter, q);
3591        }
3592
3593        if (rss->rss_flags) {
3594                rss->rss_flags = RSS_ENABLE_NONE;
3595                be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3596                                  128, rss->rss_hkey);
3597        }
3598}
3599
3600static void be_disable_if_filters(struct be_adapter *adapter)
3601{
3602        /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3603        if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3604            check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3605                be_dev_mac_del(adapter, adapter->pmac_id[0]);
3606                eth_zero_addr(adapter->dev_mac);
3607        }
3608
3609        be_clear_uc_list(adapter);
3610        be_clear_mc_list(adapter);
3611
3612        /* The IFACE flags are enabled in the open path and cleared
3613         * in the close path. When a VF gets detached from the host and
3614         * assigned to a VM the following happens:
3615         *      - VF's IFACE flags get cleared in the detach path
3616         *      - IFACE create is issued by the VF in the attach path
3617         * Due to a bug in the BE3/Skyhawk-R FW
3618         * (Lancer FW doesn't have the bug), the IFACE capability flags
3619         * specified along with the IFACE create cmd issued by a VF are not
3620         * honoured by FW.  As a consequence, if a *new* driver
3621         * (that enables/disables IFACE flags in open/close)
3622         * is loaded in the host and an *old* driver is * used by a VM/VF,
3623         * the IFACE gets created *without* the needed flags.
3624         * To avoid this, disable RX-filter flags only for Lancer.
3625         */
3626        if (lancer_chip(adapter)) {
3627                be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3628                adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3629        }
3630}
3631
3632static int be_close(struct net_device *netdev)
3633{
3634        struct be_adapter *adapter = netdev_priv(netdev);
3635        struct be_eq_obj *eqo;
3636        int i;
3637
3638        /* This protection is needed as be_close() may be called even when the
3639         * adapter is in cleared state (after eeh perm failure)
3640         */
3641        if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3642                return 0;
3643
3644        /* Before attempting cleanup ensure all the pending cmds in the
3645         * config_wq have finished execution
3646         */
3647        flush_workqueue(be_wq);
3648
3649        be_disable_if_filters(adapter);
3650
3651        if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3652                for_all_evt_queues(adapter, eqo, i) {
3653                        napi_disable(&eqo->napi);
3654                }
3655                adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3656        }
3657
3658        be_async_mcc_disable(adapter);
3659
3660        /* Wait for all pending tx completions to arrive so that
3661         * all tx skbs are freed.
3662         */
3663        netif_tx_disable(netdev);
3664        be_tx_compl_clean(adapter);
3665
3666        be_rx_qs_destroy(adapter);
3667
3668        for_all_evt_queues(adapter, eqo, i) {
3669                if (msix_enabled(adapter))
3670                        synchronize_irq(be_msix_vec_get(adapter, eqo));
3671                else
3672                        synchronize_irq(netdev->irq);
3673                be_eq_clean(eqo);
3674        }
3675
3676        be_irq_unregister(adapter);
3677
3678        return 0;
3679}
3680
3681static int be_rx_qs_create(struct be_adapter *adapter)
3682{
3683        struct rss_info *rss = &adapter->rss_info;
3684        u8 rss_key[RSS_HASH_KEY_LEN];
3685        struct be_rx_obj *rxo;
3686        int rc, i, j;
3687
3688        for_all_rx_queues(adapter, rxo, i) {
3689                rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3690                                    sizeof(struct be_eth_rx_d));
3691                if (rc)
3692                        return rc;
3693        }
3694
3695        if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3696                rxo = default_rxo(adapter);
3697                rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3698                                       rx_frag_size, adapter->if_handle,
3699                                       false, &rxo->rss_id);
3700                if (rc)
3701                        return rc;
3702        }
3703
3704        for_all_rss_queues(adapter, rxo, i) {
3705                rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3706                                       rx_frag_size, adapter->if_handle,
3707                                       true, &rxo->rss_id);
3708                if (rc)
3709                        return rc;
3710        }
3711
3712        if (be_multi_rxq(adapter)) {
3713                for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3714                        for_all_rss_queues(adapter, rxo, i) {
3715                                if ((j + i) >= RSS_INDIR_TABLE_LEN)
3716                                        break;
3717                                rss->rsstable[j + i] = rxo->rss_id;
3718                                rss->rss_queue[j + i] = i;
3719                        }
3720                }
3721                rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3722                        RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3723
3724                if (!BEx_chip(adapter))
3725                        rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3726                                RSS_ENABLE_UDP_IPV6;
3727
3728                netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3729                rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3730                                       RSS_INDIR_TABLE_LEN, rss_key);
3731                if (rc) {
3732                        rss->rss_flags = RSS_ENABLE_NONE;
3733                        return rc;
3734                }
3735
3736                memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3737        } else {
3738                /* Disable RSS, if only default RX Q is created */
3739                rss->rss_flags = RSS_ENABLE_NONE;
3740        }
3741
3742
3743        /* Post 1 less than RXQ-len to avoid head being equal to tail,
3744         * which is a queue empty condition
3745         */
3746        for_all_rx_queues(adapter, rxo, i)
3747                be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3748
3749        return 0;
3750}
3751
3752static int be_enable_if_filters(struct be_adapter *adapter)
3753{
3754        int status;
3755
3756        status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3757        if (status)
3758                return status;
3759
3760        /* Normally this condition usually true as the ->dev_mac is zeroed.
3761         * But on BE3 VFs the initial MAC is pre-programmed by PF and
3762         * subsequent be_dev_mac_add() can fail (after fresh boot)
3763         */
3764        if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3765                int old_pmac_id = -1;
3766
3767                /* Remember old programmed MAC if any - can happen on BE3 VF */
3768                if (!is_zero_ether_addr(adapter->dev_mac))
3769                        old_pmac_id = adapter->pmac_id[0];
3770
3771                status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3772                if (status)
3773                        return status;
3774
3775                /* Delete the old programmed MAC as we successfully programmed
3776                 * a new MAC
3777                 */
3778                if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3779                        be_dev_mac_del(adapter, old_pmac_id);
3780
3781                ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3782        }
3783
3784        if (adapter->vlans_added)
3785                be_vid_config(adapter);
3786
3787        __be_set_rx_mode(adapter);
3788
3789        return 0;
3790}
3791
3792static int be_open(struct net_device *netdev)
3793{
3794        struct be_adapter *adapter = netdev_priv(netdev);
3795        struct be_eq_obj *eqo;
3796        struct be_rx_obj *rxo;
3797        struct be_tx_obj *txo;
3798        u8 link_status;
3799        int status, i;
3800
3801        status = be_rx_qs_create(adapter);
3802        if (status)
3803                goto err;
3804
3805        status = be_enable_if_filters(adapter);
3806        if (status)
3807                goto err;
3808
3809        status = be_irq_register(adapter);
3810        if (status)
3811                goto err;
3812
3813        for_all_rx_queues(adapter, rxo, i)
3814                be_cq_notify(adapter, rxo->cq.id, true, 0);
3815
3816        for_all_tx_queues(adapter, txo, i)
3817                be_cq_notify(adapter, txo->cq.id, true, 0);
3818
3819        be_async_mcc_enable(adapter);
3820
3821        for_all_evt_queues(adapter, eqo, i) {
3822                napi_enable(&eqo->napi);
3823                be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3824        }
3825        adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3826
3827        status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3828        if (!status)
3829                be_link_status_update(adapter, link_status);
3830
3831        netif_tx_start_all_queues(netdev);
3832        if (skyhawk_chip(adapter))
3833                udp_tunnel_get_rx_info(netdev);
3834
3835        return 0;
3836err:
3837        be_close(adapter->netdev);
3838        return -EIO;
3839}
3840
3841static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3842{
3843        u32 addr;
3844
3845        addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3846
3847        mac[5] = (u8)(addr & 0xFF);
3848        mac[4] = (u8)((addr >> 8) & 0xFF);
3849        mac[3] = (u8)((addr >> 16) & 0xFF);
3850        /* Use the OUI from the current MAC address */
3851        memcpy(mac, adapter->netdev->dev_addr, 3);
3852}
3853
3854/*
3855 * Generate a seed MAC address from the PF MAC Address using jhash.
3856 * MAC Address for VFs are assigned incrementally starting from the seed.
3857 * These addresses are programmed in the ASIC by the PF and the VF driver
3858 * queries for the MAC address during its probe.
3859 */
3860static int be_vf_eth_addr_config(struct be_adapter *adapter)
3861{
3862        u32 vf;
3863        int status = 0;
3864        u8 mac[ETH_ALEN];
3865        struct be_vf_cfg *vf_cfg;
3866
3867        be_vf_eth_addr_generate(adapter, mac);
3868
3869        for_all_vfs(adapter, vf_cfg, vf) {
3870                if (BEx_chip(adapter))
3871                        status = be_cmd_pmac_add(adapter, mac,
3872                                                 vf_cfg->if_handle,
3873                                                 &vf_cfg->pmac_id, vf + 1);
3874                else
3875                        status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3876                                                vf + 1);
3877
3878                if (status)
3879                        dev_err(&adapter->pdev->dev,
3880                                "Mac address assignment failed for VF %d\n",
3881                                vf);
3882                else
3883                        memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3884
3885                mac[5] += 1;
3886        }
3887        return status;
3888}
3889
3890static int be_vfs_mac_query(struct be_adapter *adapter)
3891{
3892        int status, vf;
3893        u8 mac[ETH_ALEN];
3894        struct be_vf_cfg *vf_cfg;
3895
3896        for_all_vfs(adapter, vf_cfg, vf) {
3897                status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3898                                               mac, vf_cfg->if_handle,
3899                                               false, vf+1);
3900                if (status)
3901                        return status;
3902                memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3903        }
3904        return 0;
3905}
3906
3907static void be_vf_clear(struct be_adapter *adapter)
3908{
3909        struct be_vf_cfg *vf_cfg;
3910        u32 vf;
3911
3912        if (pci_vfs_assigned(adapter->pdev)) {
3913                dev_warn(&adapter->pdev->dev,
3914                         "VFs are assigned to VMs: not disabling VFs\n");
3915                goto done;
3916        }
3917
3918        pci_disable_sriov(adapter->pdev);
3919
3920        for_all_vfs(adapter, vf_cfg, vf) {
3921                if (BEx_chip(adapter))
3922                        be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3923                                        vf_cfg->pmac_id, vf + 1);
3924                else
3925                        be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3926                                       vf + 1);
3927
3928                be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3929        }
3930
3931        if (BE3_chip(adapter))
3932                be_cmd_set_hsw_config(adapter, 0, 0,
3933                                      adapter->if_handle,
3934                                      PORT_FWD_TYPE_PASSTHRU, 0);
3935done:
3936        kfree(adapter->vf_cfg);
3937        adapter->num_vfs = 0;
3938        adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3939}
3940
3941static void be_clear_queues(struct be_adapter *adapter)
3942{
3943        be_mcc_queues_destroy(adapter);
3944        be_rx_cqs_destroy(adapter);
3945        be_tx_queues_destroy(adapter);
3946        be_evt_queues_destroy(adapter);
3947}
3948
3949static void be_cancel_worker(struct be_adapter *adapter)
3950{
3951        if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3952                cancel_delayed_work_sync(&adapter->work);
3953                adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3954        }
3955}
3956
3957static void be_cancel_err_detection(struct be_adapter *adapter)
3958{
3959        struct be_error_recovery *err_rec = &adapter->error_recovery;
3960
3961        if (!be_err_recovery_workq)
3962                return;
3963
3964        if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3965                cancel_delayed_work_sync(&err_rec->err_detection_work);
3966                adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3967        }
3968}
3969
3970static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3971{
3972        struct net_device *netdev = adapter->netdev;
3973        struct device *dev = &adapter->pdev->dev;
3974        struct be_vxlan_port *vxlan_port;
3975        __be16 port;
3976        int status;
3977
3978        vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3979                                      struct be_vxlan_port, list);
3980        port = vxlan_port->port;
3981
3982        status = be_cmd_manage_iface(adapter, adapter->if_handle,
3983                                     OP_CONVERT_NORMAL_TO_TUNNEL);
3984        if (status) {
3985                dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3986                return status;
3987        }
3988        adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3989
3990        status = be_cmd_set_vxlan_port(adapter, port);
3991        if (status) {
3992                dev_warn(dev, "Failed to add VxLAN port\n");
3993                return status;
3994        }
3995        adapter->vxlan_port = port;
3996
3997        netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3998                                   NETIF_F_TSO | NETIF_F_TSO6 |
3999                                   NETIF_F_GSO_UDP_TUNNEL;
4000
4001        dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4002                 be16_to_cpu(port));
4003        return 0;
4004}
4005
4006static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4007{
4008        struct net_device *netdev = adapter->netdev;
4009
4010        if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4011                be_cmd_manage_iface(adapter, adapter->if_handle,
4012                                    OP_CONVERT_TUNNEL_TO_NORMAL);
4013
4014        if (adapter->vxlan_port)
4015                be_cmd_set_vxlan_port(adapter, 0);
4016
4017        adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4018        adapter->vxlan_port = 0;
4019
4020        netdev->hw_enc_features = 0;
4021}
4022
4023static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4024                                struct be_resources *vft_res)
4025{
4026        struct be_resources res = adapter->pool_res;
4027        u32 vf_if_cap_flags = res.vf_if_cap_flags;
4028        struct be_resources res_mod = {0};
4029        u16 num_vf_qs = 1;
4030
4031        /* Distribute the queue resources among the PF and it's VFs */
4032        if (num_vfs) {
4033                /* Divide the rx queues evenly among the VFs and the PF, capped
4034                 * at VF-EQ-count. Any remainder queues belong to the PF.
4035                 */
4036                num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4037                                res.max_rss_qs / (num_vfs + 1));
4038
4039                /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4040                 * RSS Tables per port. Provide RSS on VFs, only if number of
4041                 * VFs requested is less than it's PF Pool's RSS Tables limit.
4042                 */
4043                if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4044                        num_vf_qs = 1;
4045        }
4046
4047        /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4048         * which are modifiable using SET_PROFILE_CONFIG cmd.
4049         */
4050        be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4051                                  RESOURCE_MODIFIABLE, 0);
4052
4053        /* If RSS IFACE capability flags are modifiable for a VF, set the
4054         * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4055         * more than 1 RSSQ is available for a VF.
4056         * Otherwise, provision only 1 queue pair for VF.
4057         */
4058        if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4059                vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4060                if (num_vf_qs > 1) {
4061                        vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4062                        if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4063                                vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4064                } else {
4065                        vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4066                                             BE_IF_FLAGS_DEFQ_RSS);
4067                }
4068        } else {
4069                num_vf_qs = 1;
4070        }
4071
4072        if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4073                vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4074                vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4075        }
4076
4077        vft_res->vf_if_cap_flags = vf_if_cap_flags;
4078        vft_res->max_rx_qs = num_vf_qs;
4079        vft_res->max_rss_qs = num_vf_qs;
4080        vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4081        vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4082
4083        /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4084         * among the PF and it's VFs, if the fields are changeable
4085         */
4086        if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4087                vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4088
4089        if (res_mod.max_vlans == FIELD_MODIFIABLE)
4090                vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4091
4092        if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4093                vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4094
4095        if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4096                vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4097}
4098
4099static void be_if_destroy(struct be_adapter *adapter)
4100{
4101        be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4102
4103        kfree(adapter->pmac_id);
4104        adapter->pmac_id = NULL;
4105
4106        kfree(adapter->mc_list);
4107        adapter->mc_list = NULL;
4108
4109        kfree(adapter->uc_list);
4110        adapter->uc_list = NULL;
4111}
4112
4113static int be_clear(struct be_adapter *adapter)
4114{
4115        struct pci_dev *pdev = adapter->pdev;
4116        struct  be_resources vft_res = {0};
4117
4118        be_cancel_worker(adapter);
4119
4120        flush_workqueue(be_wq);
4121
4122        if (sriov_enabled(adapter))
4123                be_vf_clear(adapter);
4124
4125        /* Re-configure FW to distribute resources evenly across max-supported
4126         * number of VFs, only when VFs are not already enabled.
4127         */
4128        if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4129            !pci_vfs_assigned(pdev)) {
4130                be_calculate_vf_res(adapter,
4131                                    pci_sriov_get_totalvfs(pdev),
4132                                    &vft_res);
4133                be_cmd_set_sriov_config(adapter, adapter->pool_res,
4134                                        pci_sriov_get_totalvfs(pdev),
4135                                        &vft_res);
4136        }
4137
4138        be_disable_vxlan_offloads(adapter);
4139
4140        be_if_destroy(adapter);
4141
4142        be_clear_queues(adapter);
4143
4144        be_msix_disable(adapter);
4145        adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4146        return 0;
4147}
4148
4149static int be_vfs_if_create(struct be_adapter *adapter)
4150{
4151        struct be_resources res = {0};
4152        u32 cap_flags, en_flags, vf;
4153        struct be_vf_cfg *vf_cfg;
4154        int status;
4155
4156        /* If a FW profile exists, then cap_flags are updated */
4157        cap_flags = BE_VF_IF_EN_FLAGS;
4158
4159        for_all_vfs(adapter, vf_cfg, vf) {
4160                if (!BE3_chip(adapter)) {
4161                        status = be_cmd_get_profile_config(adapter, &res, NULL,
4162                                                           ACTIVE_PROFILE_TYPE,
4163                                                           RESOURCE_LIMITS,
4164                                                           vf + 1);
4165                        if (!status) {
4166                                cap_flags = res.if_cap_flags;
4167                                /* Prevent VFs from enabling VLAN promiscuous
4168                                 * mode
4169                                 */
4170                                cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4171                        }
4172                }
4173
4174                /* PF should enable IF flags during proxy if_create call */
4175                en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4176                status = be_cmd_if_create(adapter, cap_flags, en_flags,
4177                                          &vf_cfg->if_handle, vf + 1);
4178                if (status)
4179                        return status;
4180        }
4181
4182        return 0;
4183}
4184
4185static int be_vf_setup_init(struct be_adapter *adapter)
4186{
4187        struct be_vf_cfg *vf_cfg;
4188        int vf;
4189
4190        adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4191                                  GFP_KERNEL);
4192        if (!adapter->vf_cfg)
4193                return -ENOMEM;
4194
4195        for_all_vfs(adapter, vf_cfg, vf) {
4196                vf_cfg->if_handle = -1;
4197                vf_cfg->pmac_id = -1;
4198        }
4199        return 0;
4200}
4201
4202static int be_vf_setup(struct be_adapter *adapter)
4203{
4204        struct device *dev = &adapter->pdev->dev;
4205        struct be_vf_cfg *vf_cfg;
4206        int status, old_vfs, vf;
4207        bool spoofchk;
4208
4209        old_vfs = pci_num_vf(adapter->pdev);
4210
4211        status = be_vf_setup_init(adapter);
4212        if (status)
4213                goto err;
4214
4215        if (old_vfs) {
4216                for_all_vfs(adapter, vf_cfg, vf) {
4217                        status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4218                        if (status)
4219                                goto err;
4220                }
4221
4222                status = be_vfs_mac_query(adapter);
4223                if (status)
4224                        goto err;
4225        } else {
4226                status = be_vfs_if_create(adapter);
4227                if (status)
4228                        goto err;
4229
4230                status = be_vf_eth_addr_config(adapter);
4231                if (status)
4232                        goto err;
4233        }
4234
4235        for_all_vfs(adapter, vf_cfg, vf) {
4236                /* Allow VFs to programs MAC/VLAN filters */
4237                status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4238                                                  vf + 1);
4239                if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4240                        status = be_cmd_set_fn_privileges(adapter,
4241                                                          vf_cfg->privileges |
4242                                                          BE_PRIV_FILTMGMT,
4243                                                          vf + 1);
4244                        if (!status) {
4245                                vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4246                                dev_info(dev, "VF%d has FILTMGMT privilege\n",
4247                                         vf);
4248                        }
4249                }
4250
4251                /* Allow full available bandwidth */
4252                if (!old_vfs)
4253                        be_cmd_config_qos(adapter, 0, 0, vf + 1);
4254
4255                status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4256                                               vf_cfg->if_handle, NULL,
4257                                               &spoofchk);
4258                if (!status)
4259                        vf_cfg->spoofchk = spoofchk;
4260
4261                if (!old_vfs) {
4262                        be_cmd_enable_vf(adapter, vf + 1);
4263                        be_cmd_set_logical_link_config(adapter,
4264                                                       IFLA_VF_LINK_STATE_AUTO,
4265                                                       vf+1);
4266                }
4267        }
4268
4269        if (!old_vfs) {
4270                status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4271                if (status) {
4272                        dev_err(dev, "SRIOV enable failed\n");
4273                        adapter->num_vfs = 0;
4274                        goto err;
4275                }
4276        }
4277
4278        if (BE3_chip(adapter)) {
4279                /* On BE3, enable VEB only when SRIOV is enabled */
4280                status = be_cmd_set_hsw_config(adapter, 0, 0,
4281                                               adapter->if_handle,
4282                                               PORT_FWD_TYPE_VEB, 0);
4283                if (status)
4284                        goto err;
4285        }
4286
4287        adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4288        return 0;
4289err:
4290        dev_err(dev, "VF setup failed\n");
4291        be_vf_clear(adapter);
4292        return status;
4293}
4294
4295/* Converting function_mode bits on BE3 to SH mc_type enums */
4296
4297static u8 be_convert_mc_type(u32 function_mode)
4298{
4299        if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4300                return vNIC1;
4301        else if (function_mode & QNQ_MODE)
4302                return FLEX10;
4303        else if (function_mode & VNIC_MODE)
4304                return vNIC2;
4305        else if (function_mode & UMC_ENABLED)
4306                return UMC;
4307        else
4308                return MC_NONE;
4309}
4310
4311/* On BE2/BE3 FW does not suggest the supported limits */
4312static void BEx_get_resources(struct be_adapter *adapter,
4313                              struct be_resources *res)
4314{
4315        bool use_sriov = adapter->num_vfs ? 1 : 0;
4316
4317        if (be_physfn(adapter))
4318                res->max_uc_mac = BE_UC_PMAC_COUNT;
4319        else
4320                res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4321
4322        adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4323
4324        if (be_is_mc(adapter)) {
4325                /* Assuming that there are 4 channels per port,
4326                 * when multi-channel is enabled
4327                 */
4328                if (be_is_qnq_mode(adapter))
4329                        res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4330                else
4331                        /* In a non-qnq multichannel mode, the pvid
4332                         * takes up one vlan entry
4333                         */
4334                        res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4335        } else {
4336                res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4337        }
4338
4339        res->max_mcast_mac = BE_MAX_MC;
4340
4341        /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4342         * 2) Create multiple TX rings on a BE3-R multi-channel interface
4343         *    *only* if it is RSS-capable.
4344         */
4345        if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4346            be_virtfn(adapter) ||
4347            (be_is_mc(adapter) &&
4348             !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4349                res->max_tx_qs = 1;
4350        } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4351                struct be_resources super_nic_res = {0};
4352
4353                /* On a SuperNIC profile, the driver needs to use the
4354                 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4355                 */
4356                be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4357                                          ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4358                                          0);
4359                /* Some old versions of BE3 FW don't report max_tx_qs value */
4360                res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4361        } else {
4362                res->max_tx_qs = BE3_MAX_TX_QS;
4363        }
4364
4365        if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4366            !use_sriov && be_physfn(adapter))
4367                res->max_rss_qs = (adapter->be3_native) ?
4368                                           BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4369        res->max_rx_qs = res->max_rss_qs + 1;
4370
4371        if (be_physfn(adapter))
4372                res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4373                                        BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4374        else
4375                res->max_evt_qs = 1;
4376
4377        res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4378        res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4379        if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4380                res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4381}
4382
4383static void be_setup_init(struct be_adapter *adapter)
4384{
4385        adapter->vlan_prio_bmap = 0xff;
4386        adapter->phy.link_speed = -1;
4387        adapter->if_handle = -1;
4388        adapter->be3_native = false;
4389        adapter->if_flags = 0;
4390        adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4391        if (be_physfn(adapter))
4392                adapter->cmd_privileges = MAX_PRIVILEGES;
4393        else
4394                adapter->cmd_privileges = MIN_PRIVILEGES;
4395}
4396
4397/* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4398 * However, this HW limitation is not exposed to the host via any SLI cmd.
4399 * As a result, in the case of SRIOV and in particular multi-partition configs
4400 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4401 * for distribution between the VFs. This self-imposed limit will determine the
4402 * no: of VFs for which RSS can be enabled.
4403 */
4404static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4405{
4406        struct be_port_resources port_res = {0};
4407        u8 rss_tables_on_port;
4408        u16 max_vfs = be_max_vfs(adapter);
4409
4410        be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4411                                  RESOURCE_LIMITS, 0);
4412
4413        rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4414
4415        /* Each PF Pool's RSS Tables limit =
4416         * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4417         */
4418        adapter->pool_res.max_rss_tables =
4419                max_vfs * rss_tables_on_port / port_res.max_vfs;
4420}
4421
4422static int be_get_sriov_config(struct be_adapter *adapter)
4423{
4424        struct be_resources res = {0};
4425        int max_vfs, old_vfs;
4426
4427        be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4428                                  RESOURCE_LIMITS, 0);
4429
4430        /* Some old versions of BE3 FW don't report max_vfs value */
4431        if (BE3_chip(adapter) && !res.max_vfs) {
4432                max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4433                res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4434        }
4435
4436        adapter->pool_res = res;
4437
4438        /* If during previous unload of the driver, the VFs were not disabled,
4439         * then we cannot rely on the PF POOL limits for the TotalVFs value.
4440         * Instead use the TotalVFs value stored in the pci-dev struct.
4441         */
4442        old_vfs = pci_num_vf(adapter->pdev);
4443        if (old_vfs) {
4444                dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4445                         old_vfs);
4446
4447                adapter->pool_res.max_vfs =
4448                        pci_sriov_get_totalvfs(adapter->pdev);
4449                adapter->num_vfs = old_vfs;
4450        }
4451
4452        if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4453                be_calculate_pf_pool_rss_tables(adapter);
4454                dev_info(&adapter->pdev->dev,
4455                         "RSS can be enabled for all VFs if num_vfs <= %d\n",
4456                         be_max_pf_pool_rss_tables(adapter));
4457        }
4458        return 0;
4459}
4460
4461static void be_alloc_sriov_res(struct be_adapter *adapter)
4462{
4463        int old_vfs = pci_num_vf(adapter->pdev);
4464        struct  be_resources vft_res = {0};
4465        int status;
4466
4467        be_get_sriov_config(adapter);
4468
4469        if (!old_vfs)
4470                pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4471
4472        /* When the HW is in SRIOV capable configuration, the PF-pool
4473         * resources are given to PF during driver load, if there are no
4474         * old VFs. This facility is not available in BE3 FW.
4475         * Also, this is done by FW in Lancer chip.
4476         */
4477        if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4478                be_calculate_vf_res(adapter, 0, &vft_res);
4479                status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4480                                                 &vft_res);
4481                if (status)
4482                        dev_err(&adapter->pdev->dev,
4483                                "Failed to optimize SRIOV resources\n");
4484        }
4485}
4486
4487static int be_get_resources(struct be_adapter *adapter)
4488{
4489        struct device *dev = &adapter->pdev->dev;
4490        struct be_resources res = {0};
4491        int status;
4492
4493        /* For Lancer, SH etc read per-function resource limits from FW.
4494         * GET_FUNC_CONFIG returns per function guaranteed limits.
4495         * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4496         */
4497        if (BEx_chip(adapter)) {
4498                BEx_get_resources(adapter, &res);
4499        } else {
4500                status = be_cmd_get_func_config(adapter, &res);
4501                if (status)
4502                        return status;
4503
4504                /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4505                if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4506                    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4507                        res.max_rss_qs -= 1;
4508        }
4509
4510        /* If RoCE is supported stash away half the EQs for RoCE */
4511        res.max_nic_evt_qs = be_roce_supported(adapter) ?
4512                                res.max_evt_qs / 2 : res.max_evt_qs;
4513        adapter->res = res;
4514
4515        /* If FW supports RSS default queue, then skip creating non-RSS
4516         * queue for non-IP traffic.
4517         */
4518        adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4519                                 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4520
4521        dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4522                 be_max_txqs(adapter), be_max_rxqs(adapter),
4523                 be_max_rss(adapter), be_max_nic_eqs(adapter),
4524                 be_max_vfs(adapter));
4525        dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4526                 be_max_uc(adapter), be_max_mc(adapter),
4527                 be_max_vlans(adapter));
4528
4529        /* Ensure RX and TX queues are created in pairs at init time */
4530        adapter->cfg_num_rx_irqs =
4531                                min_t(u16, netif_get_num_default_rss_queues(),
4532                                      be_max_qp_irqs(adapter));
4533        adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4534        return 0;
4535}
4536
4537static int be_get_config(struct be_adapter *adapter)
4538{
4539        int status, level;
4540        u16 profile_id;
4541
4542        status = be_cmd_get_cntl_attributes(adapter);
4543        if (status)
4544                return status;
4545
4546        status = be_cmd_query_fw_cfg(adapter);
4547        if (status)
4548                return status;
4549
4550        if (!lancer_chip(adapter) && be_physfn(adapter))
4551                be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4552
4553        if (BEx_chip(adapter)) {
4554                level = be_cmd_get_fw_log_level(adapter);
4555                adapter->msg_enable =
4556                        level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4557        }
4558
4559        be_cmd_get_acpi_wol_cap(adapter);
4560        pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4561        pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4562
4563        be_cmd_query_port_name(adapter);
4564
4565        if (be_physfn(adapter)) {
4566                status = be_cmd_get_active_profile(adapter, &profile_id);
4567                if (!status)
4568                        dev_info(&adapter->pdev->dev,
4569                                 "Using profile 0x%x\n", profile_id);
4570        }
4571
4572        return 0;
4573}
4574
4575static int be_mac_setup(struct be_adapter *adapter)
4576{
4577        u8 mac[ETH_ALEN];
4578        int status;
4579
4580        if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4581                status = be_cmd_get_perm_mac(adapter, mac);
4582                if (status)
4583                        return status;
4584
4585                memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4586                memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4587
4588                /* Initial MAC for BE3 VFs is already programmed by PF */
4589                if (BEx_chip(adapter) && be_virtfn(adapter))
4590                        memcpy(adapter->dev_mac, mac, ETH_ALEN);
4591        }
4592
4593        return 0;
4594}
4595
4596static void be_schedule_worker(struct be_adapter *adapter)
4597{
4598        queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4599        adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4600}
4601
4602static void be_destroy_err_recovery_workq(void)
4603{
4604        if (!be_err_recovery_workq)
4605                return;
4606
4607        flush_workqueue(be_err_recovery_workq);
4608        destroy_workqueue(be_err_recovery_workq);
4609        be_err_recovery_workq = NULL;
4610}
4611
4612static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4613{
4614        struct be_error_recovery *err_rec = &adapter->error_recovery;
4615
4616        if (!be_err_recovery_workq)
4617                return;
4618
4619        queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4620                           msecs_to_jiffies(delay));
4621        adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4622}
4623
4624static int be_setup_queues(struct be_adapter *adapter)
4625{
4626        struct net_device *netdev = adapter->netdev;
4627        int status;
4628
4629        status = be_evt_queues_create(adapter);
4630        if (status)
4631                goto err;
4632
4633        status = be_tx_qs_create(adapter);
4634        if (status)
4635                goto err;
4636
4637        status = be_rx_cqs_create(adapter);
4638        if (status)
4639                goto err;
4640
4641        status = be_mcc_queues_create(adapter);
4642        if (status)
4643                goto err;
4644
4645        status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4646        if (status)
4647                goto err;
4648
4649        status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4650        if (status)
4651                goto err;
4652
4653        return 0;
4654err:
4655        dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4656        return status;
4657}
4658
4659static int be_if_create(struct be_adapter *adapter)
4660{
4661        u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4662        u32 cap_flags = be_if_cap_flags(adapter);
4663        int status;
4664
4665        /* alloc required memory for other filtering fields */
4666        adapter->pmac_id = kcalloc(be_max_uc(adapter),
4667                                   sizeof(*adapter->pmac_id), GFP_KERNEL);
4668        if (!adapter->pmac_id)
4669                return -ENOMEM;
4670
4671        adapter->mc_list = kcalloc(be_max_mc(adapter),
4672                                   sizeof(*adapter->mc_list), GFP_KERNEL);
4673        if (!adapter->mc_list)
4674                return -ENOMEM;
4675
4676        adapter->uc_list = kcalloc(be_max_uc(adapter),
4677                                   sizeof(*adapter->uc_list), GFP_KERNEL);
4678        if (!adapter->uc_list)
4679                return -ENOMEM;
4680
4681        if (adapter->cfg_num_rx_irqs == 1)
4682                cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4683
4684        en_flags &= cap_flags;
4685        /* will enable all the needed filter flags in be_open() */
4686        status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4687                                  &adapter->if_handle, 0);
4688
4689        if (status)
4690                return status;
4691
4692        return 0;
4693}
4694
4695int be_update_queues(struct be_adapter *adapter)
4696{
4697        struct net_device *netdev = adapter->netdev;
4698        int status;
4699
4700        if (netif_running(netdev)) {
4701                /* be_tx_timeout() must not run concurrently with this
4702                 * function, synchronize with an already-running dev_watchdog
4703                 */
4704                netif_tx_lock_bh(netdev);
4705                /* device cannot transmit now, avoid dev_watchdog timeouts */
4706                netif_carrier_off(netdev);
4707                netif_tx_unlock_bh(netdev);
4708
4709                be_close(netdev);
4710        }
4711
4712        be_cancel_worker(adapter);
4713
4714        /* If any vectors have been shared with RoCE we cannot re-program
4715         * the MSIx table.
4716         */
4717        if (!adapter->num_msix_roce_vec)
4718                be_msix_disable(adapter);
4719
4720        be_clear_queues(adapter);
4721        status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4722        if (status)
4723                return status;
4724
4725        if (!msix_enabled(adapter)) {
4726                status = be_msix_enable(adapter);
4727                if (status)
4728                        return status;
4729        }
4730
4731        status = be_if_create(adapter);
4732        if (status)
4733                return status;
4734
4735        status = be_setup_queues(adapter);
4736        if (status)
4737                return status;
4738
4739        be_schedule_worker(adapter);
4740
4741        /* The IF was destroyed and re-created. We need to clear
4742         * all promiscuous flags valid for the destroyed IF.
4743         * Without this promisc mode is not restored during
4744         * be_open() because the driver thinks that it is
4745         * already enabled in HW.
4746         */
4747        adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4748
4749        if (netif_running(netdev))
4750                status = be_open(netdev);
4751
4752        return status;
4753}
4754
4755static inline int fw_major_num(const char *fw_ver)
4756{
4757        int fw_major = 0, i;
4758
4759        i = sscanf(fw_ver, "%d.", &fw_major);
4760        if (i != 1)
4761                return 0;
4762
4763        return fw_major;
4764}
4765
4766/* If it is error recovery, FLR the PF
4767 * Else if any VFs are already enabled don't FLR the PF
4768 */
4769static bool be_reset_required(struct be_adapter *adapter)
4770{
4771        if (be_error_recovering(adapter))
4772                return true;
4773        else
4774                return pci_num_vf(adapter->pdev) == 0;
4775}
4776
4777/* Wait for the FW to be ready and perform the required initialization */
4778static int be_func_init(struct be_adapter *adapter)
4779{
4780        int status;
4781
4782        status = be_fw_wait_ready(adapter);
4783        if (status)
4784                return status;
4785
4786        /* FW is now ready; clear errors to allow cmds/doorbell */
4787        be_clear_error(adapter, BE_CLEAR_ALL);
4788
4789        if (be_reset_required(adapter)) {
4790                status = be_cmd_reset_function(adapter);
4791                if (status)
4792                        return status;
4793
4794                /* Wait for interrupts to quiesce after an FLR */
4795                msleep(100);
4796        }
4797
4798        /* Tell FW we're ready to fire cmds */
4799        status = be_cmd_fw_init(adapter);
4800        if (status)
4801                return status;
4802
4803        /* Allow interrupts for other ULPs running on NIC function */
4804        be_intr_set(adapter, true);
4805
4806        return 0;
4807}
4808
4809static int be_setup(struct be_adapter *adapter)
4810{
4811        struct device *dev = &adapter->pdev->dev;
4812        int status;
4813
4814        status = be_func_init(adapter);
4815        if (status)
4816                return status;
4817
4818        be_setup_init(adapter);
4819
4820        if (!lancer_chip(adapter))
4821                be_cmd_req_native_mode(adapter);
4822
4823        /* invoke this cmd first to get pf_num and vf_num which are needed
4824         * for issuing profile related cmds
4825         */
4826        if (!BEx_chip(adapter)) {
4827                status = be_cmd_get_func_config(adapter, NULL);
4828                if (status)
4829                        return status;
4830        }
4831
4832        status = be_get_config(adapter);
4833        if (status)
4834                goto err;
4835
4836        if (!BE2_chip(adapter) && be_physfn(adapter))
4837                be_alloc_sriov_res(adapter);
4838
4839        status = be_get_resources(adapter);
4840        if (status)
4841                goto err;
4842
4843        status = be_msix_enable(adapter);
4844        if (status)
4845                goto err;
4846
4847        /* will enable all the needed filter flags in be_open() */
4848        status = be_if_create(adapter);
4849        if (status)
4850                goto err;
4851
4852        /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4853        rtnl_lock();
4854        status = be_setup_queues(adapter);
4855        rtnl_unlock();
4856        if (status)
4857                goto err;
4858
4859        be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4860
4861        status = be_mac_setup(adapter);
4862        if (status)
4863                goto err;
4864
4865        be_cmd_get_fw_ver(adapter);
4866        dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4867
4868        if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4869                dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4870                        adapter->fw_ver);
4871                dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4872        }
4873
4874        status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4875                                         adapter->rx_fc);
4876        if (status)
4877                be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4878                                        &adapter->rx_fc);
4879
4880        dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4881                 adapter->tx_fc, adapter->rx_fc);
4882
4883        if (be_physfn(adapter))
4884                be_cmd_set_logical_link_config(adapter,
4885                                               IFLA_VF_LINK_STATE_AUTO, 0);
4886
4887        /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4888         * confusing a linux bridge or OVS that it might be connected to.
4889         * Set the EVB to PASSTHRU mode which effectively disables the EVB
4890         * when SRIOV is not enabled.
4891         */
4892        if (BE3_chip(adapter))
4893                be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4894                                      PORT_FWD_TYPE_PASSTHRU, 0);
4895
4896        if (adapter->num_vfs)
4897                be_vf_setup(adapter);
4898
4899        status = be_cmd_get_phy_info(adapter);
4900        if (!status && be_pause_supported(adapter))
4901                adapter->phy.fc_autoneg = 1;
4902
4903        if (be_physfn(adapter) && !lancer_chip(adapter))
4904                be_cmd_set_features(adapter);
4905
4906        be_schedule_worker(adapter);
4907        adapter->flags |= BE_FLAGS_SETUP_DONE;
4908        return 0;
4909err:
4910        be_clear(adapter);
4911        return status;
4912}
4913
4914#ifdef CONFIG_NET_POLL_CONTROLLER
4915static void be_netpoll(struct net_device *netdev)
4916{
4917        struct be_adapter *adapter = netdev_priv(netdev);
4918        struct be_eq_obj *eqo;
4919        int i;
4920
4921        for_all_evt_queues(adapter, eqo, i) {
4922                be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4923                napi_schedule(&eqo->napi);
4924        }
4925}
4926#endif
4927
4928int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4929{
4930        const struct firmware *fw;
4931        int status;
4932
4933        if (!netif_running(adapter->netdev)) {
4934                dev_err(&adapter->pdev->dev,
4935                        "Firmware load not allowed (interface is down)\n");
4936                return -ENETDOWN;
4937        }
4938
4939        status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4940        if (status)
4941                goto fw_exit;
4942
4943        dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4944
4945        if (lancer_chip(adapter))
4946                status = lancer_fw_download(adapter, fw);
4947        else
4948                status = be_fw_download(adapter, fw);
4949
4950        if (!status)
4951                be_cmd_get_fw_ver(adapter);
4952
4953fw_exit:
4954        release_firmware(fw);
4955        return status;
4956}
4957
4958static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4959                                 u16 flags, struct netlink_ext_ack *extack)
4960{
4961        struct be_adapter *adapter = netdev_priv(dev);
4962        struct nlattr *attr, *br_spec;
4963        int rem;
4964        int status = 0;
4965        u16 mode = 0;
4966
4967        if (!sriov_enabled(adapter))
4968                return -EOPNOTSUPP;
4969
4970        br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4971        if (!br_spec)
4972                return -EINVAL;
4973
4974        nla_for_each_nested(attr, br_spec, rem) {
4975                if (nla_type(attr) != IFLA_BRIDGE_MODE)
4976                        continue;
4977
4978                if (nla_len(attr) < sizeof(mode))
4979                        return -EINVAL;
4980
4981                mode = nla_get_u16(attr);
4982                if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4983                        return -EOPNOTSUPP;
4984
4985                if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4986                        return -EINVAL;
4987
4988                status = be_cmd_set_hsw_config(adapter, 0, 0,
4989                                               adapter->if_handle,
4990                                               mode == BRIDGE_MODE_VEPA ?
4991                                               PORT_FWD_TYPE_VEPA :
4992                                               PORT_FWD_TYPE_VEB, 0);
4993                if (status)
4994                        goto err;
4995
4996                dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4997                         mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4998
4999                return status;
5000        }
5001err:
5002        dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5003                mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5004
5005        return status;
5006}
5007
5008static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5009                                 struct net_device *dev, u32 filter_mask,
5010                                 int nlflags)
5011{
5012        struct be_adapter *adapter = netdev_priv(dev);
5013        int status = 0;
5014        u8 hsw_mode;
5015
5016        /* BE and Lancer chips support VEB mode only */
5017        if (BEx_chip(adapter) || lancer_chip(adapter)) {
5018                /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5019                if (!pci_sriov_get_totalvfs(adapter->pdev))
5020                        return 0;
5021                hsw_mode = PORT_FWD_TYPE_VEB;
5022        } else {
5023                status = be_cmd_get_hsw_config(adapter, NULL, 0,
5024                                               adapter->if_handle, &hsw_mode,
5025                                               NULL);
5026                if (status)
5027                        return 0;
5028
5029                if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5030                        return 0;
5031        }
5032
5033        return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5034                                       hsw_mode == PORT_FWD_TYPE_VEPA ?
5035                                       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5036                                       0, 0, nlflags, filter_mask, NULL);
5037}
5038
5039static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5040                                         void (*func)(struct work_struct *))
5041{
5042        struct be_cmd_work *work;
5043
5044        work = kzalloc(sizeof(*work), GFP_ATOMIC);
5045        if (!work) {
5046                dev_err(&adapter->pdev->dev,
5047                        "be_work memory allocation failed\n");
5048                return NULL;
5049        }
5050
5051        INIT_WORK(&work->work, func);
5052        work->adapter = adapter;
5053        return work;
5054}
5055
5056/* VxLAN offload Notes:
5057 *
5058 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5059 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5060 * is expected to work across all types of IP tunnels once exported. Skyhawk
5061 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5062 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5063 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5064 * those other tunnels are unexported on the fly through ndo_features_check().
5065 *
5066 * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5067 * adds more than one port, disable offloads and re-enable them again when
5068 * there's only one port left. We maintain a list of ports for this purpose.
5069 */
5070static void be_work_add_vxlan_port(struct work_struct *work)
5071{
5072        struct be_cmd_work *cmd_work =
5073                                container_of(work, struct be_cmd_work, work);
5074        struct be_adapter *adapter = cmd_work->adapter;
5075        struct device *dev = &adapter->pdev->dev;
5076        __be16 port = cmd_work->info.vxlan_port;
5077        struct be_vxlan_port *vxlan_port;
5078        int status;
5079
5080        /* Bump up the alias count if it is an existing port */
5081        list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5082                if (vxlan_port->port == port) {
5083                        vxlan_port->port_aliases++;
5084                        goto done;
5085                }
5086        }
5087
5088        /* Add a new port to our list. We don't need a lock here since port
5089         * add/delete are done only in the context of a single-threaded work
5090         * queue (be_wq).
5091         */
5092        vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5093        if (!vxlan_port)
5094                goto done;
5095
5096        vxlan_port->port = port;
5097        INIT_LIST_HEAD(&vxlan_port->list);
5098        list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5099        adapter->vxlan_port_count++;
5100
5101        if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5102                dev_info(dev,
5103                         "Only one UDP port supported for VxLAN offloads\n");
5104                dev_info(dev, "Disabling VxLAN offloads\n");
5105                goto err;
5106        }
5107
5108        if (adapter->vxlan_port_count > 1)
5109                goto done;
5110
5111        status = be_enable_vxlan_offloads(adapter);
5112        if (!status)
5113                goto done;
5114
5115err:
5116        be_disable_vxlan_offloads(adapter);
5117done:
5118        kfree(cmd_work);
5119        return;
5120}
5121
5122static void be_work_del_vxlan_port(struct work_struct *work)
5123{
5124        struct be_cmd_work *cmd_work =
5125                                container_of(work, struct be_cmd_work, work);
5126        struct be_adapter *adapter = cmd_work->adapter;
5127        __be16 port = cmd_work->info.vxlan_port;
5128        struct be_vxlan_port *vxlan_port;
5129
5130        /* Nothing to be done if a port alias is being deleted */
5131        list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5132                if (vxlan_port->port == port) {
5133                        if (vxlan_port->port_aliases) {
5134                                vxlan_port->port_aliases--;
5135                                goto done;
5136                        }
5137                        break;
5138                }
5139        }
5140
5141        /* No port aliases left; delete the port from the list */
5142        list_del(&vxlan_port->list);
5143        adapter->vxlan_port_count--;
5144
5145        /* Disable VxLAN offload if this is the offloaded port */
5146        if (adapter->vxlan_port == vxlan_port->port) {
5147                WARN_ON(adapter->vxlan_port_count);
5148                be_disable_vxlan_offloads(adapter);
5149                dev_info(&adapter->pdev->dev,
5150                         "Disabled VxLAN offloads for UDP port %d\n",
5151                         be16_to_cpu(port));
5152                goto out;
5153        }
5154
5155        /* If only 1 port is left, re-enable VxLAN offload */
5156        if (adapter->vxlan_port_count == 1)
5157                be_enable_vxlan_offloads(adapter);
5158
5159out:
5160        kfree(vxlan_port);
5161done:
5162        kfree(cmd_work);
5163}
5164
5165static void be_cfg_vxlan_port(struct net_device *netdev,
5166                              struct udp_tunnel_info *ti,
5167                              void (*func)(struct work_struct *))
5168{
5169        struct be_adapter *adapter = netdev_priv(netdev);
5170        struct be_cmd_work *cmd_work;
5171
5172        if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5173                return;
5174
5175        if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5176                return;
5177
5178        cmd_work = be_alloc_work(adapter, func);
5179        if (cmd_work) {
5180                cmd_work->info.vxlan_port = ti->port;
5181                queue_work(be_wq, &cmd_work->work);
5182        }
5183}
5184
5185static void be_del_vxlan_port(struct net_device *netdev,
5186                              struct udp_tunnel_info *ti)
5187{
5188        be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5189}
5190
5191static void be_add_vxlan_port(struct net_device *netdev,
5192                              struct udp_tunnel_info *ti)
5193{
5194        be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5195}
5196
5197static netdev_features_t be_features_check(struct sk_buff *skb,
5198                                           struct net_device *dev,
5199                                           netdev_features_t features)
5200{
5201        struct be_adapter *adapter = netdev_priv(dev);
5202        u8 l4_hdr = 0;
5203
5204        if (skb_is_gso(skb)) {
5205                /* IPv6 TSO requests with extension hdrs are a problem
5206                 * to Lancer and BE3 HW. Disable TSO6 feature.
5207                 */
5208                if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5209                        features &= ~NETIF_F_TSO6;
5210
5211                /* Lancer cannot handle the packet with MSS less than 256.
5212                 * Also it can't handle a TSO packet with a single segment
5213                 * Disable the GSO support in such cases
5214                 */
5215                if (lancer_chip(adapter) &&
5216                    (skb_shinfo(skb)->gso_size < 256 ||
5217                     skb_shinfo(skb)->gso_segs == 1))
5218                        features &= ~NETIF_F_GSO_MASK;
5219        }
5220
5221        /* The code below restricts offload features for some tunneled and
5222         * Q-in-Q packets.
5223         * Offload features for normal (non tunnel) packets are unchanged.
5224         */
5225        features = vlan_features_check(skb, features);
5226        if (!skb->encapsulation ||
5227            !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5228                return features;
5229
5230        /* It's an encapsulated packet and VxLAN offloads are enabled. We
5231         * should disable tunnel offload features if it's not a VxLAN packet,
5232         * as tunnel offloads have been enabled only for VxLAN. This is done to
5233         * allow other tunneled traffic like GRE work fine while VxLAN
5234         * offloads are configured in Skyhawk-R.
5235         */
5236        switch (vlan_get_protocol(skb)) {
5237        case htons(ETH_P_IP):
5238                l4_hdr = ip_hdr(skb)->protocol;
5239                break;
5240        case htons(ETH_P_IPV6):
5241                l4_hdr = ipv6_hdr(skb)->nexthdr;
5242                break;
5243        default:
5244                return features;
5245        }
5246
5247        if (l4_hdr != IPPROTO_UDP ||
5248            skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5249            skb->inner_protocol != htons(ETH_P_TEB) ||
5250            skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5251                sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5252            !adapter->vxlan_port ||
5253            udp_hdr(skb)->dest != adapter->vxlan_port)
5254                return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5255
5256        return features;
5257}
5258
5259static int be_get_phys_port_id(struct net_device *dev,
5260                               struct netdev_phys_item_id *ppid)
5261{
5262        int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5263        struct be_adapter *adapter = netdev_priv(dev);
5264        u8 *id;
5265
5266        if (MAX_PHYS_ITEM_ID_LEN < id_len)
5267                return -ENOSPC;
5268
5269        ppid->id[0] = adapter->hba_port_num + 1;
5270        id = &ppid->id[1];
5271        for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5272             i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5273                memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5274
5275        ppid->id_len = id_len;
5276
5277        return 0;
5278}
5279
5280static void be_set_rx_mode(struct net_device *dev)
5281{
5282        struct be_adapter *adapter = netdev_priv(dev);
5283        struct be_cmd_work *work;
5284
5285        work = be_alloc_work(adapter, be_work_set_rx_mode);
5286        if (work)
5287                queue_work(be_wq, &work->work);
5288}
5289
5290static const struct net_device_ops be_netdev_ops = {
5291        .ndo_open               = be_open,
5292        .ndo_stop               = be_close,
5293        .ndo_start_xmit         = be_xmit,
5294        .ndo_set_rx_mode        = be_set_rx_mode,
5295        .ndo_set_mac_address    = be_mac_addr_set,
5296        .ndo_get_stats64        = be_get_stats64,
5297        .ndo_validate_addr      = eth_validate_addr,
5298        .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5299        .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5300        .ndo_set_vf_mac         = be_set_vf_mac,
5301        .ndo_set_vf_vlan        = be_set_vf_vlan,
5302        .ndo_set_vf_rate        = be_set_vf_tx_rate,
5303        .ndo_get_vf_config      = be_get_vf_config,
5304        .ndo_set_vf_link_state  = be_set_vf_link_state,
5305        .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5306        .ndo_tx_timeout         = be_tx_timeout,
5307#ifdef CONFIG_NET_POLL_CONTROLLER
5308        .ndo_poll_controller    = be_netpoll,
5309#endif
5310        .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5311        .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5312        .ndo_udp_tunnel_add     = be_add_vxlan_port,
5313        .ndo_udp_tunnel_del     = be_del_vxlan_port,
5314        .ndo_features_check     = be_features_check,
5315        .ndo_get_phys_port_id   = be_get_phys_port_id,
5316};
5317
5318static void be_netdev_init(struct net_device *netdev)
5319{
5320        struct be_adapter *adapter = netdev_priv(netdev);
5321
5322        netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5323                NETIF_F_GSO_UDP_TUNNEL |
5324                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5325                NETIF_F_HW_VLAN_CTAG_TX;
5326        if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5327                netdev->hw_features |= NETIF_F_RXHASH;
5328
5329        netdev->features |= netdev->hw_features |
5330                NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5331
5332        netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5333                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5334
5335        netdev->priv_flags |= IFF_UNICAST_FLT;
5336
5337        netdev->flags |= IFF_MULTICAST;
5338
5339        netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5340
5341        netdev->netdev_ops = &be_netdev_ops;
5342
5343        netdev->ethtool_ops = &be_ethtool_ops;
5344
5345        /* MTU range: 256 - 9000 */
5346        netdev->min_mtu = BE_MIN_MTU;
5347        netdev->max_mtu = BE_MAX_MTU;
5348}
5349
5350static void be_cleanup(struct be_adapter *adapter)
5351{
5352        struct net_device *netdev = adapter->netdev;
5353
5354        rtnl_lock();
5355        netif_device_detach(netdev);
5356        if (netif_running(netdev))
5357                be_close(netdev);
5358        rtnl_unlock();
5359
5360        be_clear(adapter);
5361}
5362
5363static int be_resume(struct be_adapter *adapter)
5364{
5365        struct net_device *netdev = adapter->netdev;
5366        int status;
5367
5368        status = be_setup(adapter);
5369        if (status)
5370                return status;
5371
5372        rtnl_lock();
5373        if (netif_running(netdev))
5374                status = be_open(netdev);
5375        rtnl_unlock();
5376
5377        if (status)
5378                return status;
5379
5380        netif_device_attach(netdev);
5381
5382        return 0;
5383}
5384
5385static void be_soft_reset(struct be_adapter *adapter)
5386{
5387        u32 val;
5388
5389        dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5390        val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5391        val |= SLIPORT_SOFTRESET_SR_MASK;
5392        iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5393}
5394
5395static bool be_err_is_recoverable(struct be_adapter *adapter)
5396{
5397        struct be_error_recovery *err_rec = &adapter->error_recovery;
5398        unsigned long initial_idle_time =
5399                msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5400        unsigned long recovery_interval =
5401                msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5402        u16 ue_err_code;
5403        u32 val;
5404
5405        val = be_POST_stage_get(adapter);
5406        if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5407                return false;
5408        ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5409        if (ue_err_code == 0)
5410                return false;
5411
5412        dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5413                ue_err_code);
5414
5415        if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5416                dev_err(&adapter->pdev->dev,
5417                        "Cannot recover within %lu sec from driver load\n",
5418                        jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5419                return false;
5420        }
5421
5422        if (err_rec->last_recovery_time && time_before_eq(
5423                jiffies - err_rec->last_recovery_time, recovery_interval)) {
5424                dev_err(&adapter->pdev->dev,
5425                        "Cannot recover within %lu sec from last recovery\n",
5426                        jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5427                return false;
5428        }
5429
5430        if (ue_err_code == err_rec->last_err_code) {
5431                dev_err(&adapter->pdev->dev,
5432                        "Cannot recover from a consecutive TPE error\n");
5433                return false;
5434        }
5435
5436        err_rec->last_recovery_time = jiffies;
5437        err_rec->last_err_code = ue_err_code;
5438        return true;
5439}
5440
5441static int be_tpe_recover(struct be_adapter *adapter)
5442{
5443        struct be_error_recovery *err_rec = &adapter->error_recovery;
5444        int status = -EAGAIN;
5445        u32 val;
5446
5447        switch (err_rec->recovery_state) {
5448        case ERR_RECOVERY_ST_NONE:
5449                err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5450                err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5451                break;
5452
5453        case ERR_RECOVERY_ST_DETECT:
5454                val = be_POST_stage_get(adapter);
5455                if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5456                    POST_STAGE_RECOVERABLE_ERR) {
5457                        dev_err(&adapter->pdev->dev,
5458                                "Unrecoverable HW error detected: 0x%x\n", val);
5459                        status = -EINVAL;
5460                        err_rec->resched_delay = 0;
5461                        break;
5462                }
5463
5464                dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5465
5466                /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5467                 * milliseconds before it checks for final error status in
5468                 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5469                 * If it does, then PF0 initiates a Soft Reset.
5470                 */
5471                if (adapter->pf_num == 0) {
5472                        err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5473                        err_rec->resched_delay = err_rec->ue_to_reset_time -
5474                                        ERR_RECOVERY_UE_DETECT_DURATION;
5475                        break;
5476                }
5477
5478                err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5479                err_rec->resched_delay = err_rec->ue_to_poll_time -
5480                                        ERR_RECOVERY_UE_DETECT_DURATION;
5481                break;
5482
5483        case ERR_RECOVERY_ST_RESET:
5484                if (!be_err_is_recoverable(adapter)) {
5485                        dev_err(&adapter->pdev->dev,
5486                                "Failed to meet recovery criteria\n");
5487                        status = -EIO;
5488                        err_rec->resched_delay = 0;
5489                        break;
5490                }
5491                be_soft_reset(adapter);
5492                err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5493                err_rec->resched_delay = err_rec->ue_to_poll_time -
5494                                        err_rec->ue_to_reset_time;
5495                break;
5496
5497        case ERR_RECOVERY_ST_PRE_POLL:
5498                err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5499                err_rec->resched_delay = 0;
5500                status = 0;                     /* done */
5501                break;
5502
5503        default:
5504                status = -EINVAL;
5505                err_rec->resched_delay = 0;
5506                break;
5507        }
5508
5509        return status;
5510}
5511
5512static int be_err_recover(struct be_adapter *adapter)
5513{
5514        int status;
5515
5516        if (!lancer_chip(adapter)) {
5517                if (!adapter->error_recovery.recovery_supported ||
5518                    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5519                        return -EIO;
5520                status = be_tpe_recover(adapter);
5521                if (status)
5522                        goto err;
5523        }
5524
5525        /* Wait for adapter to reach quiescent state before
5526         * destroying queues
5527         */
5528        status = be_fw_wait_ready(adapter);
5529        if (status)
5530                goto err;
5531
5532        adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5533
5534        be_cleanup(adapter);
5535
5536        status = be_resume(adapter);
5537        if (status)
5538                goto err;
5539
5540        adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5541
5542err:
5543        return status;
5544}
5545
5546static void be_err_detection_task(struct work_struct *work)
5547{
5548        struct be_error_recovery *err_rec =
5549                        container_of(work, struct be_error_recovery,
5550                                     err_detection_work.work);
5551        struct be_adapter *adapter =
5552                        container_of(err_rec, struct be_adapter,
5553                                     error_recovery);
5554        u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5555        struct device *dev = &adapter->pdev->dev;
5556        int recovery_status;
5557
5558        be_detect_error(adapter);
5559        if (!be_check_error(adapter, BE_ERROR_HW))
5560                goto reschedule_task;
5561
5562        recovery_status = be_err_recover(adapter);
5563        if (!recovery_status) {
5564                err_rec->recovery_retries = 0;
5565                err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5566                dev_info(dev, "Adapter recovery successful\n");
5567                goto reschedule_task;
5568        } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5569                /* BEx/SH recovery state machine */
5570                if (adapter->pf_num == 0 &&
5571                    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5572                        dev_err(&adapter->pdev->dev,
5573                                "Adapter recovery in progress\n");
5574                resched_delay = err_rec->resched_delay;
5575                goto reschedule_task;
5576        } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5577                /* For VFs, check if PF have allocated resources
5578                 * every second.
5579                 */
5580                dev_err(dev, "Re-trying adapter recovery\n");
5581                goto reschedule_task;
5582        } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5583                   ERR_RECOVERY_MAX_RETRY_COUNT) {
5584                /* In case of another error during recovery, it takes 30 sec
5585                 * for adapter to come out of error. Retry error recovery after
5586                 * this time interval.
5587                 */
5588                dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5589                resched_delay = ERR_RECOVERY_RETRY_DELAY;
5590                goto reschedule_task;
5591        } else {
5592                dev_err(dev, "Adapter recovery failed\n");
5593                dev_err(dev, "Please reboot server to recover\n");
5594        }
5595
5596        return;
5597
5598reschedule_task:
5599        be_schedule_err_detection(adapter, resched_delay);
5600}
5601
5602static void be_log_sfp_info(struct be_adapter *adapter)
5603{
5604        int status;
5605
5606        status = be_cmd_query_sfp_info(adapter);
5607        if (!status) {
5608                dev_err(&adapter->pdev->dev,
5609                        "Port %c: %s Vendor: %s part no: %s",
5610                        adapter->port_name,
5611                        be_misconfig_evt_port_state[adapter->phy_state],
5612                        adapter->phy.vendor_name,
5613                        adapter->phy.vendor_pn);
5614        }
5615        adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5616}
5617
5618static void be_worker(struct work_struct *work)
5619{
5620        struct be_adapter *adapter =
5621                container_of(work, struct be_adapter, work.work);
5622        struct be_rx_obj *rxo;
5623        int i;
5624
5625        if (be_physfn(adapter) &&
5626            MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5627                be_cmd_get_die_temperature(adapter);
5628
5629        /* when interrupts are not yet enabled, just reap any pending
5630         * mcc completions
5631         */
5632        if (!netif_running(adapter->netdev)) {
5633                be_process_mcc(adapter);
5634                goto reschedule;
5635        }
5636
5637        if (!adapter->stats_cmd_sent) {
5638                if (lancer_chip(adapter))
5639                        lancer_cmd_get_pport_stats(adapter,
5640                                                   &adapter->stats_cmd);
5641                else
5642                        be_cmd_get_stats(adapter, &adapter->stats_cmd);
5643        }
5644
5645        for_all_rx_queues(adapter, rxo, i) {
5646                /* Replenish RX-queues starved due to memory
5647                 * allocation failures.
5648                 */
5649                if (rxo->rx_post_starved)
5650                        be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5651        }
5652
5653        /* EQ-delay update for Skyhawk is done while notifying EQ */
5654        if (!skyhawk_chip(adapter))
5655                be_eqd_update(adapter, false);
5656
5657        if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5658                be_log_sfp_info(adapter);
5659
5660reschedule:
5661        adapter->work_counter++;
5662        queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5663}
5664
5665static void be_unmap_pci_bars(struct be_adapter *adapter)
5666{
5667        if (adapter->csr)
5668                pci_iounmap(adapter->pdev, adapter->csr);
5669        if (adapter->db)
5670                pci_iounmap(adapter->pdev, adapter->db);
5671        if (adapter->pcicfg && adapter->pcicfg_mapped)
5672                pci_iounmap(adapter->pdev, adapter->pcicfg);
5673}
5674
5675static int db_bar(struct be_adapter *adapter)
5676{
5677        if (lancer_chip(adapter) || be_virtfn(adapter))
5678                return 0;
5679        else
5680                return 4;
5681}
5682
5683static int be_roce_map_pci_bars(struct be_adapter *adapter)
5684{
5685        if (skyhawk_chip(adapter)) {
5686                adapter->roce_db.size = 4096;
5687                adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5688                                                              db_bar(adapter));
5689                adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5690                                                               db_bar(adapter));
5691        }
5692        return 0;
5693}
5694
5695static int be_map_pci_bars(struct be_adapter *adapter)
5696{
5697        struct pci_dev *pdev = adapter->pdev;
5698        u8 __iomem *addr;
5699        u32 sli_intf;
5700
5701        pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5702        adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5703                                SLI_INTF_FAMILY_SHIFT;
5704        adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5705
5706        if (BEx_chip(adapter) && be_physfn(adapter)) {
5707                adapter->csr = pci_iomap(pdev, 2, 0);
5708                if (!adapter->csr)
5709                        return -ENOMEM;
5710        }
5711
5712        addr = pci_iomap(pdev, db_bar(adapter), 0);
5713        if (!addr)
5714                goto pci_map_err;
5715        adapter->db = addr;
5716
5717        if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5718                if (be_physfn(adapter)) {
5719                        /* PCICFG is the 2nd BAR in BE2 */
5720                        addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5721                        if (!addr)
5722                                goto pci_map_err;
5723                        adapter->pcicfg = addr;
5724                        adapter->pcicfg_mapped = true;
5725                } else {
5726                        adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5727                        adapter->pcicfg_mapped = false;
5728                }
5729        }
5730
5731        be_roce_map_pci_bars(adapter);
5732        return 0;
5733
5734pci_map_err:
5735        dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5736        be_unmap_pci_bars(adapter);
5737        return -ENOMEM;
5738}
5739
5740static void be_drv_cleanup(struct be_adapter *adapter)
5741{
5742        struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5743        struct device *dev = &adapter->pdev->dev;
5744
5745        if (mem->va)
5746                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5747
5748        mem = &adapter->rx_filter;
5749        if (mem->va)
5750                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5751
5752        mem = &adapter->stats_cmd;
5753        if (mem->va)
5754                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5755}
5756
5757/* Allocate and initialize various fields in be_adapter struct */
5758static int be_drv_init(struct be_adapter *adapter)
5759{
5760        struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5761        struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5762        struct be_dma_mem *rx_filter = &adapter->rx_filter;
5763        struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5764        struct device *dev = &adapter->pdev->dev;
5765        int status = 0;
5766
5767        mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5768        mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5769                                                &mbox_mem_alloc->dma,
5770                                                GFP_KERNEL);
5771        if (!mbox_mem_alloc->va)
5772                return -ENOMEM;
5773
5774        mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5775        mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5776        mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5777
5778        rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5779        rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5780                                           &rx_filter->dma, GFP_KERNEL);
5781        if (!rx_filter->va) {
5782                status = -ENOMEM;
5783                goto free_mbox;
5784        }
5785
5786        if (lancer_chip(adapter))
5787                stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5788        else if (BE2_chip(adapter))
5789                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5790        else if (BE3_chip(adapter))
5791                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5792        else
5793                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5794        stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5795                                           &stats_cmd->dma, GFP_KERNEL);
5796        if (!stats_cmd->va) {
5797                status = -ENOMEM;
5798                goto free_rx_filter;
5799        }
5800
5801        mutex_init(&adapter->mbox_lock);
5802        mutex_init(&adapter->mcc_lock);
5803        mutex_init(&adapter->rx_filter_lock);
5804        spin_lock_init(&adapter->mcc_cq_lock);
5805        init_completion(&adapter->et_cmd_compl);
5806
5807        pci_save_state(adapter->pdev);
5808
5809        INIT_DELAYED_WORK(&adapter->work, be_worker);
5810
5811        adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5812        adapter->error_recovery.resched_delay = 0;
5813        INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5814                          be_err_detection_task);
5815
5816        adapter->rx_fc = true;
5817        adapter->tx_fc = true;
5818
5819        /* Must be a power of 2 or else MODULO will BUG_ON */
5820        adapter->be_get_temp_freq = 64;
5821
5822        INIT_LIST_HEAD(&adapter->vxlan_port_list);
5823        return 0;
5824
5825free_rx_filter:
5826        dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5827free_mbox:
5828        dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5829                          mbox_mem_alloc->dma);
5830        return status;
5831}
5832
5833static void be_remove(struct pci_dev *pdev)
5834{
5835        struct be_adapter *adapter = pci_get_drvdata(pdev);
5836
5837        if (!adapter)
5838                return;
5839
5840        be_roce_dev_remove(adapter);
5841        be_intr_set(adapter, false);
5842
5843        be_cancel_err_detection(adapter);
5844
5845        unregister_netdev(adapter->netdev);
5846
5847        be_clear(adapter);
5848
5849        if (!pci_vfs_assigned(adapter->pdev))
5850                be_cmd_reset_function(adapter);
5851
5852        /* tell fw we're done with firing cmds */
5853        be_cmd_fw_clean(adapter);
5854
5855        be_unmap_pci_bars(adapter);
5856        be_drv_cleanup(adapter);
5857
5858        pci_disable_pcie_error_reporting(pdev);
5859
5860        pci_release_regions(pdev);
5861        pci_disable_device(pdev);
5862
5863        free_netdev(adapter->netdev);
5864}
5865
5866static ssize_t be_hwmon_show_temp(struct device *dev,
5867                                  struct device_attribute *dev_attr,
5868                                  char *buf)
5869{
5870        struct be_adapter *adapter = dev_get_drvdata(dev);
5871
5872        /* Unit: millidegree Celsius */
5873        if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5874                return -EIO;
5875        else
5876                return sprintf(buf, "%u\n",
5877                               adapter->hwmon_info.be_on_die_temp * 1000);
5878}
5879
5880static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5881                          be_hwmon_show_temp, NULL, 1);
5882
5883static struct attribute *be_hwmon_attrs[] = {
5884        &sensor_dev_attr_temp1_input.dev_attr.attr,
5885        NULL
5886};
5887
5888ATTRIBUTE_GROUPS(be_hwmon);
5889
5890static char *mc_name(struct be_adapter *adapter)
5891{
5892        char *str = ""; /* default */
5893
5894        switch (adapter->mc_type) {
5895        case UMC:
5896                str = "UMC";
5897                break;
5898        case FLEX10:
5899                str = "FLEX10";
5900                break;
5901        case vNIC1:
5902                str = "vNIC-1";
5903                break;
5904        case nPAR:
5905                str = "nPAR";
5906                break;
5907        case UFP:
5908                str = "UFP";
5909                break;
5910        case vNIC2:
5911                str = "vNIC-2";
5912                break;
5913        default:
5914                str = "";
5915        }
5916
5917        return str;
5918}
5919
5920static inline char *func_name(struct be_adapter *adapter)
5921{
5922        return be_physfn(adapter) ? "PF" : "VF";
5923}
5924
5925static inline char *nic_name(struct pci_dev *pdev)
5926{
5927        switch (pdev->device) {
5928        case OC_DEVICE_ID1:
5929                return OC_NAME;
5930        case OC_DEVICE_ID2:
5931                return OC_NAME_BE;
5932        case OC_DEVICE_ID3:
5933        case OC_DEVICE_ID4:
5934                return OC_NAME_LANCER;
5935        case BE_DEVICE_ID2:
5936                return BE3_NAME;
5937        case OC_DEVICE_ID5:
5938        case OC_DEVICE_ID6:
5939                return OC_NAME_SH;
5940        default:
5941                return BE_NAME;
5942        }
5943}
5944
5945static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5946{
5947        struct be_adapter *adapter;
5948        struct net_device *netdev;
5949        int status = 0;
5950
5951        dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5952
5953        status = pci_enable_device(pdev);
5954        if (status)
5955                goto do_none;
5956
5957        status = pci_request_regions(pdev, DRV_NAME);
5958        if (status)
5959                goto disable_dev;
5960        pci_set_master(pdev);
5961
5962        netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5963        if (!netdev) {
5964                status = -ENOMEM;
5965                goto rel_reg;
5966        }
5967        adapter = netdev_priv(netdev);
5968        adapter->pdev = pdev;
5969        pci_set_drvdata(pdev, adapter);
5970        adapter->netdev = netdev;
5971        SET_NETDEV_DEV(netdev, &pdev->dev);
5972
5973        status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5974        if (!status) {
5975                netdev->features |= NETIF_F_HIGHDMA;
5976        } else {
5977                status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5978                if (status) {
5979                        dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5980                        goto free_netdev;
5981                }
5982        }
5983
5984        status = pci_enable_pcie_error_reporting(pdev);
5985        if (!status)
5986                dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5987
5988        status = be_map_pci_bars(adapter);
5989        if (status)
5990                goto free_netdev;
5991
5992        status = be_drv_init(adapter);
5993        if (status)
5994                goto unmap_bars;
5995
5996        status = be_setup(adapter);
5997        if (status)
5998                goto drv_cleanup;
5999
6000        be_netdev_init(netdev);
6001        status = register_netdev(netdev);
6002        if (status != 0)
6003                goto unsetup;
6004
6005        be_roce_dev_add(adapter);
6006
6007        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6008        adapter->error_recovery.probe_time = jiffies;
6009
6010        /* On Die temperature not supported for VF. */
6011        if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
6012                adapter->hwmon_info.hwmon_dev =
6013                        devm_hwmon_device_register_with_groups(&pdev->dev,
6014                                                               DRV_NAME,
6015                                                               adapter,
6016                                                               be_hwmon_groups);
6017                adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
6018        }
6019
6020        dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
6021                 func_name(adapter), mc_name(adapter), adapter->port_name);
6022
6023        return 0;
6024
6025unsetup:
6026        be_clear(adapter);
6027drv_cleanup:
6028        be_drv_cleanup(adapter);
6029unmap_bars:
6030        be_unmap_pci_bars(adapter);
6031free_netdev:
6032        free_netdev(netdev);
6033rel_reg:
6034        pci_release_regions(pdev);
6035disable_dev:
6036        pci_disable_device(pdev);
6037do_none:
6038        dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6039        return status;
6040}
6041
6042static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6043{
6044        struct be_adapter *adapter = pci_get_drvdata(pdev);
6045
6046        be_intr_set(adapter, false);
6047        be_cancel_err_detection(adapter);
6048
6049        be_cleanup(adapter);
6050
6051        pci_save_state(pdev);
6052        pci_disable_device(pdev);
6053        pci_set_power_state(pdev, pci_choose_state(pdev, state));
6054        return 0;
6055}
6056
6057static int be_pci_resume(struct pci_dev *pdev)
6058{
6059        struct be_adapter *adapter = pci_get_drvdata(pdev);
6060        int status = 0;
6061
6062        status = pci_enable_device(pdev);
6063        if (status)
6064                return status;
6065
6066        pci_restore_state(pdev);
6067
6068        status = be_resume(adapter);
6069        if (status)
6070                return status;
6071
6072        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6073
6074        return 0;
6075}
6076
6077/*
6078 * An FLR will stop BE from DMAing any data.
6079 */
6080static void be_shutdown(struct pci_dev *pdev)
6081{
6082        struct be_adapter *adapter = pci_get_drvdata(pdev);
6083
6084        if (!adapter)
6085                return;
6086
6087        be_roce_dev_shutdown(adapter);
6088        cancel_delayed_work_sync(&adapter->work);
6089        be_cancel_err_detection(adapter);
6090
6091        netif_device_detach(adapter->netdev);
6092
6093        be_cmd_reset_function(adapter);
6094
6095        pci_disable_device(pdev);
6096}
6097
6098static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6099                                            pci_channel_state_t state)
6100{
6101        struct be_adapter *adapter = pci_get_drvdata(pdev);
6102
6103        dev_err(&adapter->pdev->dev, "EEH error detected\n");
6104
6105        be_roce_dev_remove(adapter);
6106
6107        if (!be_check_error(adapter, BE_ERROR_EEH)) {
6108                be_set_error(adapter, BE_ERROR_EEH);
6109
6110                be_cancel_err_detection(adapter);
6111
6112                be_cleanup(adapter);
6113        }
6114
6115        if (state == pci_channel_io_perm_failure)
6116                return PCI_ERS_RESULT_DISCONNECT;
6117
6118        pci_disable_device(pdev);
6119
6120        /* The error could cause the FW to trigger a flash debug dump.
6121         * Resetting the card while flash dump is in progress
6122         * can cause it not to recover; wait for it to finish.
6123         * Wait only for first function as it is needed only once per
6124         * adapter.
6125         */
6126        if (pdev->devfn == 0)
6127                ssleep(30);
6128
6129        return PCI_ERS_RESULT_NEED_RESET;
6130}
6131
6132static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6133{
6134        struct be_adapter *adapter = pci_get_drvdata(pdev);
6135        int status;
6136
6137        dev_info(&adapter->pdev->dev, "EEH reset\n");
6138
6139        status = pci_enable_device(pdev);
6140        if (status)
6141                return PCI_ERS_RESULT_DISCONNECT;
6142
6143        pci_set_master(pdev);
6144        pci_restore_state(pdev);
6145
6146        /* Check if card is ok and fw is ready */
6147        dev_info(&adapter->pdev->dev,
6148                 "Waiting for FW to be ready after EEH reset\n");
6149        status = be_fw_wait_ready(adapter);
6150        if (status)
6151                return PCI_ERS_RESULT_DISCONNECT;
6152
6153        be_clear_error(adapter, BE_CLEAR_ALL);
6154        return PCI_ERS_RESULT_RECOVERED;
6155}
6156
6157static void be_eeh_resume(struct pci_dev *pdev)
6158{
6159        int status = 0;
6160        struct be_adapter *adapter = pci_get_drvdata(pdev);
6161
6162        dev_info(&adapter->pdev->dev, "EEH resume\n");
6163
6164        pci_save_state(pdev);
6165
6166        status = be_resume(adapter);
6167        if (status)
6168                goto err;
6169
6170        be_roce_dev_add(adapter);
6171
6172        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6173        return;
6174err:
6175        dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6176}
6177
6178static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6179{
6180        struct be_adapter *adapter = pci_get_drvdata(pdev);
6181        struct be_resources vft_res = {0};
6182        int status;
6183
6184        if (!num_vfs)
6185                be_vf_clear(adapter);
6186
6187        adapter->num_vfs = num_vfs;
6188
6189        if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6190                dev_warn(&pdev->dev,
6191                         "Cannot disable VFs while they are assigned\n");
6192                return -EBUSY;
6193        }
6194
6195        /* When the HW is in SRIOV capable configuration, the PF-pool resources
6196         * are equally distributed across the max-number of VFs. The user may
6197         * request only a subset of the max-vfs to be enabled.
6198         * Based on num_vfs, redistribute the resources across num_vfs so that
6199         * each VF will have access to more number of resources.
6200         * This facility is not available in BE3 FW.
6201         * Also, this is done by FW in Lancer chip.
6202         */
6203        if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6204                be_calculate_vf_res(adapter, adapter->num_vfs,
6205                                    &vft_res);
6206                status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6207                                                 adapter->num_vfs, &vft_res);
6208                if (status)
6209                        dev_err(&pdev->dev,
6210                                "Failed to optimize SR-IOV resources\n");
6211        }
6212
6213        status = be_get_resources(adapter);
6214        if (status)
6215                return be_cmd_status(status);
6216
6217        /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6218        rtnl_lock();
6219        status = be_update_queues(adapter);
6220        rtnl_unlock();
6221        if (status)
6222                return be_cmd_status(status);
6223
6224        if (adapter->num_vfs)
6225                status = be_vf_setup(adapter);
6226
6227        if (!status)
6228                return adapter->num_vfs;
6229
6230        return 0;
6231}
6232
6233static const struct pci_error_handlers be_eeh_handlers = {
6234        .error_detected = be_eeh_err_detected,
6235        .slot_reset = be_eeh_reset,
6236        .resume = be_eeh_resume,
6237};
6238
6239static struct pci_driver be_driver = {
6240        .name = DRV_NAME,
6241        .id_table = be_dev_ids,
6242        .probe = be_probe,
6243        .remove = be_remove,
6244        .suspend = be_suspend,
6245        .resume = be_pci_resume,
6246        .shutdown = be_shutdown,
6247        .sriov_configure = be_pci_sriov_configure,
6248        .err_handler = &be_eeh_handlers
6249};
6250
6251static int __init be_init_module(void)
6252{
6253        int status;
6254
6255        if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6256            rx_frag_size != 2048) {
6257                printk(KERN_WARNING DRV_NAME
6258                        " : Module param rx_frag_size must be 2048/4096/8192."
6259                        " Using 2048\n");
6260                rx_frag_size = 2048;
6261        }
6262
6263        if (num_vfs > 0) {
6264                pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6265                pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6266        }
6267
6268        be_wq = create_singlethread_workqueue("be_wq");
6269        if (!be_wq) {
6270                pr_warn(DRV_NAME "workqueue creation failed\n");
6271                return -1;
6272        }
6273
6274        be_err_recovery_workq =
6275                create_singlethread_workqueue("be_err_recover");
6276        if (!be_err_recovery_workq)
6277                pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6278
6279        status = pci_register_driver(&be_driver);
6280        if (status) {
6281                destroy_workqueue(be_wq);
6282                be_destroy_err_recovery_workq();
6283        }
6284        return status;
6285}
6286module_init(be_init_module);
6287
6288static void __exit be_exit_module(void)
6289{
6290        pci_unregister_driver(&be_driver);
6291
6292        be_destroy_err_recovery_workq();
6293
6294        if (be_wq)
6295                destroy_workqueue(be_wq);
6296}
6297module_exit(be_exit_module);
6298