linux/drivers/net/ethernet/emulex/benet/be_main.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2005 - 2016 Broadcom
   3 * All rights reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License version 2
   7 * as published by the Free Software Foundation.  The full GNU General
   8 * Public License is included in this distribution in the file called COPYING.
   9 *
  10 * Contact Information:
  11 * linux-drivers@emulex.com
  12 *
  13 * Emulex
  14 * 3333 Susan Street
  15 * Costa Mesa, CA 92626
  16 */
  17
  18#include <linux/prefetch.h>
  19#include <linux/module.h>
  20#include "be.h"
  21#include "be_cmds.h"
  22#include <asm/div64.h>
  23#include <linux/aer.h>
  24#include <linux/if_bridge.h>
  25#include <net/busy_poll.h>
  26#include <net/vxlan.h>
  27
  28MODULE_VERSION(DRV_VER);
  29MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
  30MODULE_AUTHOR("Emulex Corporation");
  31MODULE_LICENSE("GPL");
  32
  33/* num_vfs module param is obsolete.
  34 * Use sysfs method to enable/disable VFs.
  35 */
  36static unsigned int num_vfs;
  37module_param(num_vfs, uint, S_IRUGO);
  38MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
  39
  40static ushort rx_frag_size = 2048;
  41module_param(rx_frag_size, ushort, S_IRUGO);
  42MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
  43
  44/* Per-module error detection/recovery workq shared across all functions.
  45 * Each function schedules its own work request on this shared workq.
  46 */
  47static struct workqueue_struct *be_err_recovery_workq;
  48
  49static const struct pci_device_id be_dev_ids[] = {
  50        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
  51        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
  52        { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
  53        { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
  54        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
  55        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
  56        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
  57        { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
  58        { 0 }
  59};
  60MODULE_DEVICE_TABLE(pci, be_dev_ids);
  61
  62/* Workqueue used by all functions for defering cmd calls to the adapter */
  63static struct workqueue_struct *be_wq;
  64
  65/* UE Status Low CSR */
  66static const char * const ue_status_low_desc[] = {
  67        "CEV",
  68        "CTX",
  69        "DBUF",
  70        "ERX",
  71        "Host",
  72        "MPU",
  73        "NDMA",
  74        "PTC ",
  75        "RDMA ",
  76        "RXF ",
  77        "RXIPS ",
  78        "RXULP0 ",
  79        "RXULP1 ",
  80        "RXULP2 ",
  81        "TIM ",
  82        "TPOST ",
  83        "TPRE ",
  84        "TXIPS ",
  85        "TXULP0 ",
  86        "TXULP1 ",
  87        "UC ",
  88        "WDMA ",
  89        "TXULP2 ",
  90        "HOST1 ",
  91        "P0_OB_LINK ",
  92        "P1_OB_LINK ",
  93        "HOST_GPIO ",
  94        "MBOX ",
  95        "ERX2 ",
  96        "SPARE ",
  97        "JTAG ",
  98        "MPU_INTPEND "
  99};
 100
 101/* UE Status High CSR */
 102static const char * const ue_status_hi_desc[] = {
 103        "LPCMEMHOST",
 104        "MGMT_MAC",
 105        "PCS0ONLINE",
 106        "MPU_IRAM",
 107        "PCS1ONLINE",
 108        "PCTL0",
 109        "PCTL1",
 110        "PMEM",
 111        "RR",
 112        "TXPB",
 113        "RXPP",
 114        "XAUI",
 115        "TXP",
 116        "ARM",
 117        "IPC",
 118        "HOST2",
 119        "HOST3",
 120        "HOST4",
 121        "HOST5",
 122        "HOST6",
 123        "HOST7",
 124        "ECRC",
 125        "Poison TLP",
 126        "NETC",
 127        "PERIPH",
 128        "LLTXULP",
 129        "D2P",
 130        "RCON",
 131        "LDMA",
 132        "LLTXP",
 133        "LLTXPB",
 134        "Unknown"
 135};
 136
 137#define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
 138                                 BE_IF_FLAGS_BROADCAST | \
 139                                 BE_IF_FLAGS_MULTICAST | \
 140                                 BE_IF_FLAGS_PASS_L3L4_ERRORS)
 141
 142static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
 143{
 144        struct be_dma_mem *mem = &q->dma_mem;
 145
 146        if (mem->va) {
 147                dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
 148                                  mem->dma);
 149                mem->va = NULL;
 150        }
 151}
 152
 153static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
 154                          u16 len, u16 entry_size)
 155{
 156        struct be_dma_mem *mem = &q->dma_mem;
 157
 158        memset(q, 0, sizeof(*q));
 159        q->len = len;
 160        q->entry_size = entry_size;
 161        mem->size = len * entry_size;
 162        mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
 163                                      GFP_KERNEL);
 164        if (!mem->va)
 165                return -ENOMEM;
 166        return 0;
 167}
 168
 169static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
 170{
 171        u32 reg, enabled;
 172
 173        pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
 174                              &reg);
 175        enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 176
 177        if (!enabled && enable)
 178                reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 179        else if (enabled && !enable)
 180                reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 181        else
 182                return;
 183
 184        pci_write_config_dword(adapter->pdev,
 185                               PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
 186}
 187
 188static void be_intr_set(struct be_adapter *adapter, bool enable)
 189{
 190        int status = 0;
 191
 192        /* On lancer interrupts can't be controlled via this register */
 193        if (lancer_chip(adapter))
 194                return;
 195
 196        if (be_check_error(adapter, BE_ERROR_EEH))
 197                return;
 198
 199        status = be_cmd_intr_set(adapter, enable);
 200        if (status)
 201                be_reg_intr_set(adapter, enable);
 202}
 203
 204static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
 205{
 206        u32 val = 0;
 207
 208        if (be_check_error(adapter, BE_ERROR_HW))
 209                return;
 210
 211        val |= qid & DB_RQ_RING_ID_MASK;
 212        val |= posted << DB_RQ_NUM_POSTED_SHIFT;
 213
 214        wmb();
 215        iowrite32(val, adapter->db + DB_RQ_OFFSET);
 216}
 217
 218static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
 219                          u16 posted)
 220{
 221        u32 val = 0;
 222
 223        if (be_check_error(adapter, BE_ERROR_HW))
 224                return;
 225
 226        val |= txo->q.id & DB_TXULP_RING_ID_MASK;
 227        val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
 228
 229        wmb();
 230        iowrite32(val, adapter->db + txo->db_offset);
 231}
 232
 233static void be_eq_notify(struct be_adapter *adapter, u16 qid,
 234                         bool arm, bool clear_int, u16 num_popped,
 235                         u32 eq_delay_mult_enc)
 236{
 237        u32 val = 0;
 238
 239        val |= qid & DB_EQ_RING_ID_MASK;
 240        val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
 241
 242        if (be_check_error(adapter, BE_ERROR_HW))
 243                return;
 244
 245        if (arm)
 246                val |= 1 << DB_EQ_REARM_SHIFT;
 247        if (clear_int)
 248                val |= 1 << DB_EQ_CLR_SHIFT;
 249        val |= 1 << DB_EQ_EVNT_SHIFT;
 250        val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
 251        val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
 252        iowrite32(val, adapter->db + DB_EQ_OFFSET);
 253}
 254
 255void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
 256{
 257        u32 val = 0;
 258
 259        val |= qid & DB_CQ_RING_ID_MASK;
 260        val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
 261                        DB_CQ_RING_ID_EXT_MASK_SHIFT);
 262
 263        if (be_check_error(adapter, BE_ERROR_HW))
 264                return;
 265
 266        if (arm)
 267                val |= 1 << DB_CQ_REARM_SHIFT;
 268        val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
 269        iowrite32(val, adapter->db + DB_CQ_OFFSET);
 270}
 271
 272static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
 273{
 274        int i;
 275
 276        /* Check if mac has already been added as part of uc-list */
 277        for (i = 0; i < adapter->uc_macs; i++) {
 278                if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
 279                        /* mac already added, skip addition */
 280                        adapter->pmac_id[0] = adapter->pmac_id[i + 1];
 281                        return 0;
 282                }
 283        }
 284
 285        return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
 286                               &adapter->pmac_id[0], 0);
 287}
 288
 289static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
 290{
 291        int i;
 292
 293        /* Skip deletion if the programmed mac is
 294         * being used in uc-list
 295         */
 296        for (i = 0; i < adapter->uc_macs; i++) {
 297                if (adapter->pmac_id[i + 1] == pmac_id)
 298                        return;
 299        }
 300        be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
 301}
 302
 303static int be_mac_addr_set(struct net_device *netdev, void *p)
 304{
 305        struct be_adapter *adapter = netdev_priv(netdev);
 306        struct device *dev = &adapter->pdev->dev;
 307        struct sockaddr *addr = p;
 308        int status;
 309        u8 mac[ETH_ALEN];
 310        u32 old_pmac_id = adapter->pmac_id[0];
 311
 312        if (!is_valid_ether_addr(addr->sa_data))
 313                return -EADDRNOTAVAIL;
 314
 315        /* Proceed further only if, User provided MAC is different
 316         * from active MAC
 317         */
 318        if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
 319                return 0;
 320
 321        /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
 322         * address
 323         */
 324        if (BEx_chip(adapter) && be_virtfn(adapter) &&
 325            !check_privilege(adapter, BE_PRIV_FILTMGMT))
 326                return -EPERM;
 327
 328        /* if device is not running, copy MAC to netdev->dev_addr */
 329        if (!netif_running(netdev))
 330                goto done;
 331
 332        /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
 333         * privilege or if PF did not provision the new MAC address.
 334         * On BE3, this cmd will always fail if the VF doesn't have the
 335         * FILTMGMT privilege. This failure is OK, only if the PF programmed
 336         * the MAC for the VF.
 337         */
 338        mutex_lock(&adapter->rx_filter_lock);
 339        status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
 340        if (!status) {
 341
 342                /* Delete the old programmed MAC. This call may fail if the
 343                 * old MAC was already deleted by the PF driver.
 344                 */
 345                if (adapter->pmac_id[0] != old_pmac_id)
 346                        be_dev_mac_del(adapter, old_pmac_id);
 347        }
 348
 349        mutex_unlock(&adapter->rx_filter_lock);
 350        /* Decide if the new MAC is successfully activated only after
 351         * querying the FW
 352         */
 353        status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
 354                                       adapter->if_handle, true, 0);
 355        if (status)
 356                goto err;
 357
 358        /* The MAC change did not happen, either due to lack of privilege
 359         * or PF didn't pre-provision.
 360         */
 361        if (!ether_addr_equal(addr->sa_data, mac)) {
 362                status = -EPERM;
 363                goto err;
 364        }
 365
 366        /* Remember currently programmed MAC */
 367        ether_addr_copy(adapter->dev_mac, addr->sa_data);
 368done:
 369        ether_addr_copy(netdev->dev_addr, addr->sa_data);
 370        dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
 371        return 0;
 372err:
 373        dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
 374        return status;
 375}
 376
 377/* BE2 supports only v0 cmd */
 378static void *hw_stats_from_cmd(struct be_adapter *adapter)
 379{
 380        if (BE2_chip(adapter)) {
 381                struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
 382
 383                return &cmd->hw_stats;
 384        } else if (BE3_chip(adapter)) {
 385                struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
 386
 387                return &cmd->hw_stats;
 388        } else {
 389                struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
 390
 391                return &cmd->hw_stats;
 392        }
 393}
 394
 395/* BE2 supports only v0 cmd */
 396static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
 397{
 398        if (BE2_chip(adapter)) {
 399                struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 400
 401                return &hw_stats->erx;
 402        } else if (BE3_chip(adapter)) {
 403                struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 404
 405                return &hw_stats->erx;
 406        } else {
 407                struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 408
 409                return &hw_stats->erx;
 410        }
 411}
 412
 413static void populate_be_v0_stats(struct be_adapter *adapter)
 414{
 415        struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
 416        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 417        struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
 418        struct be_port_rxf_stats_v0 *port_stats =
 419                                        &rxf_stats->port[adapter->port_num];
 420        struct be_drv_stats *drvs = &adapter->drv_stats;
 421
 422        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 423        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 424        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 425        drvs->rx_control_frames = port_stats->rx_control_frames;
 426        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 427        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 428        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 429        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 430        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 431        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 432        drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
 433        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 434        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 435        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 436        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 437        drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
 438        drvs->rx_dropped_header_too_small =
 439                port_stats->rx_dropped_header_too_small;
 440        drvs->rx_address_filtered =
 441                                        port_stats->rx_address_filtered +
 442                                        port_stats->rx_vlan_filtered;
 443        drvs->rx_alignment_symbol_errors =
 444                port_stats->rx_alignment_symbol_errors;
 445
 446        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 447        drvs->tx_controlframes = port_stats->tx_controlframes;
 448
 449        if (adapter->port_num)
 450                drvs->jabber_events = rxf_stats->port1_jabber_events;
 451        else
 452                drvs->jabber_events = rxf_stats->port0_jabber_events;
 453        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 454        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 455        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 456        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 457        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 458        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 459        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 460}
 461
 462static void populate_be_v1_stats(struct be_adapter *adapter)
 463{
 464        struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
 465        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 466        struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
 467        struct be_port_rxf_stats_v1 *port_stats =
 468                                        &rxf_stats->port[adapter->port_num];
 469        struct be_drv_stats *drvs = &adapter->drv_stats;
 470
 471        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 472        drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 473        drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 474        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 475        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 476        drvs->rx_control_frames = port_stats->rx_control_frames;
 477        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 478        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 479        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 480        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 481        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 482        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 483        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 484        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 485        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 486        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 487        drvs->rx_dropped_header_too_small =
 488                port_stats->rx_dropped_header_too_small;
 489        drvs->rx_input_fifo_overflow_drop =
 490                port_stats->rx_input_fifo_overflow_drop;
 491        drvs->rx_address_filtered = port_stats->rx_address_filtered;
 492        drvs->rx_alignment_symbol_errors =
 493                port_stats->rx_alignment_symbol_errors;
 494        drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 495        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 496        drvs->tx_controlframes = port_stats->tx_controlframes;
 497        drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 498        drvs->jabber_events = port_stats->jabber_events;
 499        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 500        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 501        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 502        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 503        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 504        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 505        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 506}
 507
 508static void populate_be_v2_stats(struct be_adapter *adapter)
 509{
 510        struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
 511        struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
 512        struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
 513        struct be_port_rxf_stats_v2 *port_stats =
 514                                        &rxf_stats->port[adapter->port_num];
 515        struct be_drv_stats *drvs = &adapter->drv_stats;
 516
 517        be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
 518        drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
 519        drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 520        drvs->rx_pause_frames = port_stats->rx_pause_frames;
 521        drvs->rx_crc_errors = port_stats->rx_crc_errors;
 522        drvs->rx_control_frames = port_stats->rx_control_frames;
 523        drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
 524        drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
 525        drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
 526        drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
 527        drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
 528        drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
 529        drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
 530        drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
 531        drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
 532        drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
 533        drvs->rx_dropped_header_too_small =
 534                port_stats->rx_dropped_header_too_small;
 535        drvs->rx_input_fifo_overflow_drop =
 536                port_stats->rx_input_fifo_overflow_drop;
 537        drvs->rx_address_filtered = port_stats->rx_address_filtered;
 538        drvs->rx_alignment_symbol_errors =
 539                port_stats->rx_alignment_symbol_errors;
 540        drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
 541        drvs->tx_pauseframes = port_stats->tx_pauseframes;
 542        drvs->tx_controlframes = port_stats->tx_controlframes;
 543        drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
 544        drvs->jabber_events = port_stats->jabber_events;
 545        drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
 546        drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
 547        drvs->forwarded_packets = rxf_stats->forwarded_packets;
 548        drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
 549        drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
 550        drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
 551        adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
 552        if (be_roce_supported(adapter)) {
 553                drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
 554                drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
 555                drvs->rx_roce_frames = port_stats->roce_frames_received;
 556                drvs->roce_drops_crc = port_stats->roce_drops_crc;
 557                drvs->roce_drops_payload_len =
 558                        port_stats->roce_drops_payload_len;
 559        }
 560}
 561
 562static void populate_lancer_stats(struct be_adapter *adapter)
 563{
 564        struct be_drv_stats *drvs = &adapter->drv_stats;
 565        struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
 566
 567        be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
 568        drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
 569        drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
 570        drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
 571        drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
 572        drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
 573        drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
 574        drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
 575        drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
 576        drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
 577        drvs->rx_dropped_tcp_length =
 578                                pport_stats->rx_dropped_invalid_tcp_length;
 579        drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
 580        drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
 581        drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
 582        drvs->rx_dropped_header_too_small =
 583                                pport_stats->rx_dropped_header_too_small;
 584        drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 585        drvs->rx_address_filtered =
 586                                        pport_stats->rx_address_filtered +
 587                                        pport_stats->rx_vlan_filtered;
 588        drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
 589        drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
 590        drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
 591        drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
 592        drvs->jabber_events = pport_stats->rx_jabbers;
 593        drvs->forwarded_packets = pport_stats->num_forwards_lo;
 594        drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
 595        drvs->rx_drops_too_many_frags =
 596                                pport_stats->rx_drops_too_many_frags_lo;
 597}
 598
 599static void accumulate_16bit_val(u32 *acc, u16 val)
 600{
 601#define lo(x)                   (x & 0xFFFF)
 602#define hi(x)                   (x & 0xFFFF0000)
 603        bool wrapped = val < lo(*acc);
 604        u32 newacc = hi(*acc) + val;
 605
 606        if (wrapped)
 607                newacc += 65536;
 608        ACCESS_ONCE(*acc) = newacc;
 609}
 610
 611static void populate_erx_stats(struct be_adapter *adapter,
 612                               struct be_rx_obj *rxo, u32 erx_stat)
 613{
 614        if (!BEx_chip(adapter))
 615                rx_stats(rxo)->rx_drops_no_frags = erx_stat;
 616        else
 617                /* below erx HW counter can actually wrap around after
 618                 * 65535. Driver accumulates a 32-bit value
 619                 */
 620                accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
 621                                     (u16)erx_stat);
 622}
 623
 624void be_parse_stats(struct be_adapter *adapter)
 625{
 626        struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
 627        struct be_rx_obj *rxo;
 628        int i;
 629        u32 erx_stat;
 630
 631        if (lancer_chip(adapter)) {
 632                populate_lancer_stats(adapter);
 633        } else {
 634                if (BE2_chip(adapter))
 635                        populate_be_v0_stats(adapter);
 636                else if (BE3_chip(adapter))
 637                        /* for BE3 */
 638                        populate_be_v1_stats(adapter);
 639                else
 640                        populate_be_v2_stats(adapter);
 641
 642                /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
 643                for_all_rx_queues(adapter, rxo, i) {
 644                        erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
 645                        populate_erx_stats(adapter, rxo, erx_stat);
 646                }
 647        }
 648}
 649
 650static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
 651                                                struct rtnl_link_stats64 *stats)
 652{
 653        struct be_adapter *adapter = netdev_priv(netdev);
 654        struct be_drv_stats *drvs = &adapter->drv_stats;
 655        struct be_rx_obj *rxo;
 656        struct be_tx_obj *txo;
 657        u64 pkts, bytes;
 658        unsigned int start;
 659        int i;
 660
 661        for_all_rx_queues(adapter, rxo, i) {
 662                const struct be_rx_stats *rx_stats = rx_stats(rxo);
 663
 664                do {
 665                        start = u64_stats_fetch_begin_irq(&rx_stats->sync);
 666                        pkts = rx_stats(rxo)->rx_pkts;
 667                        bytes = rx_stats(rxo)->rx_bytes;
 668                } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
 669                stats->rx_packets += pkts;
 670                stats->rx_bytes += bytes;
 671                stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
 672                stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
 673                                        rx_stats(rxo)->rx_drops_no_frags;
 674        }
 675
 676        for_all_tx_queues(adapter, txo, i) {
 677                const struct be_tx_stats *tx_stats = tx_stats(txo);
 678
 679                do {
 680                        start = u64_stats_fetch_begin_irq(&tx_stats->sync);
 681                        pkts = tx_stats(txo)->tx_pkts;
 682                        bytes = tx_stats(txo)->tx_bytes;
 683                } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
 684                stats->tx_packets += pkts;
 685                stats->tx_bytes += bytes;
 686        }
 687
 688        /* bad pkts received */
 689        stats->rx_errors = drvs->rx_crc_errors +
 690                drvs->rx_alignment_symbol_errors +
 691                drvs->rx_in_range_errors +
 692                drvs->rx_out_range_errors +
 693                drvs->rx_frame_too_long +
 694                drvs->rx_dropped_too_small +
 695                drvs->rx_dropped_too_short +
 696                drvs->rx_dropped_header_too_small +
 697                drvs->rx_dropped_tcp_length +
 698                drvs->rx_dropped_runt;
 699
 700        /* detailed rx errors */
 701        stats->rx_length_errors = drvs->rx_in_range_errors +
 702                drvs->rx_out_range_errors +
 703                drvs->rx_frame_too_long;
 704
 705        stats->rx_crc_errors = drvs->rx_crc_errors;
 706
 707        /* frame alignment errors */
 708        stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
 709
 710        /* receiver fifo overrun */
 711        /* drops_no_pbuf is no per i/f, it's per BE card */
 712        stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
 713                                drvs->rx_input_fifo_overflow_drop +
 714                                drvs->rx_drops_no_pbuf;
 715        return stats;
 716}
 717
 718void be_link_status_update(struct be_adapter *adapter, u8 link_status)
 719{
 720        struct net_device *netdev = adapter->netdev;
 721
 722        if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
 723                netif_carrier_off(netdev);
 724                adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
 725        }
 726
 727        if (link_status)
 728                netif_carrier_on(netdev);
 729        else
 730                netif_carrier_off(netdev);
 731
 732        netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
 733}
 734
 735static int be_gso_hdr_len(struct sk_buff *skb)
 736{
 737        if (skb->encapsulation)
 738                return skb_inner_transport_offset(skb) +
 739                       inner_tcp_hdrlen(skb);
 740        return skb_transport_offset(skb) + tcp_hdrlen(skb);
 741}
 742
 743static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
 744{
 745        struct be_tx_stats *stats = tx_stats(txo);
 746        u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
 747        /* Account for headers which get duplicated in TSO pkt */
 748        u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
 749
 750        u64_stats_update_begin(&stats->sync);
 751        stats->tx_reqs++;
 752        stats->tx_bytes += skb->len + dup_hdr_len;
 753        stats->tx_pkts += tx_pkts;
 754        if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
 755                stats->tx_vxlan_offload_pkts += tx_pkts;
 756        u64_stats_update_end(&stats->sync);
 757}
 758
 759/* Returns number of WRBs needed for the skb */
 760static u32 skb_wrb_cnt(struct sk_buff *skb)
 761{
 762        /* +1 for the header wrb */
 763        return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
 764}
 765
 766static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
 767{
 768        wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
 769        wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
 770        wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
 771        wrb->rsvd0 = 0;
 772}
 773
 774/* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
 775 * to avoid the swap and shift/mask operations in wrb_fill().
 776 */
 777static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
 778{
 779        wrb->frag_pa_hi = 0;
 780        wrb->frag_pa_lo = 0;
 781        wrb->frag_len = 0;
 782        wrb->rsvd0 = 0;
 783}
 784
 785static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
 786                                     struct sk_buff *skb)
 787{
 788        u8 vlan_prio;
 789        u16 vlan_tag;
 790
 791        vlan_tag = skb_vlan_tag_get(skb);
 792        vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 793        /* If vlan priority provided by OS is NOT in available bmap */
 794        if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
 795                vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
 796                                adapter->recommended_prio_bits;
 797
 798        return vlan_tag;
 799}
 800
 801/* Used only for IP tunnel packets */
 802static u16 skb_inner_ip_proto(struct sk_buff *skb)
 803{
 804        return (inner_ip_hdr(skb)->version == 4) ?
 805                inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
 806}
 807
 808static u16 skb_ip_proto(struct sk_buff *skb)
 809{
 810        return (ip_hdr(skb)->version == 4) ?
 811                ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
 812}
 813
 814static inline bool be_is_txq_full(struct be_tx_obj *txo)
 815{
 816        return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
 817}
 818
 819static inline bool be_can_txq_wake(struct be_tx_obj *txo)
 820{
 821        return atomic_read(&txo->q.used) < txo->q.len / 2;
 822}
 823
 824static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
 825{
 826        return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
 827}
 828
 829static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
 830                                       struct sk_buff *skb,
 831                                       struct be_wrb_params *wrb_params)
 832{
 833        u16 proto;
 834
 835        if (skb_is_gso(skb)) {
 836                BE_WRB_F_SET(wrb_params->features, LSO, 1);
 837                wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
 838                if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
 839                        BE_WRB_F_SET(wrb_params->features, LSO6, 1);
 840        } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 841                if (skb->encapsulation) {
 842                        BE_WRB_F_SET(wrb_params->features, IPCS, 1);
 843                        proto = skb_inner_ip_proto(skb);
 844                } else {
 845                        proto = skb_ip_proto(skb);
 846                }
 847                if (proto == IPPROTO_TCP)
 848                        BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
 849                else if (proto == IPPROTO_UDP)
 850                        BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
 851        }
 852
 853        if (skb_vlan_tag_present(skb)) {
 854                BE_WRB_F_SET(wrb_params->features, VLAN, 1);
 855                wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
 856        }
 857
 858        BE_WRB_F_SET(wrb_params->features, CRC, 1);
 859}
 860
 861static void wrb_fill_hdr(struct be_adapter *adapter,
 862                         struct be_eth_hdr_wrb *hdr,
 863                         struct be_wrb_params *wrb_params,
 864                         struct sk_buff *skb)
 865{
 866        memset(hdr, 0, sizeof(*hdr));
 867
 868        SET_TX_WRB_HDR_BITS(crc, hdr,
 869                            BE_WRB_F_GET(wrb_params->features, CRC));
 870        SET_TX_WRB_HDR_BITS(ipcs, hdr,
 871                            BE_WRB_F_GET(wrb_params->features, IPCS));
 872        SET_TX_WRB_HDR_BITS(tcpcs, hdr,
 873                            BE_WRB_F_GET(wrb_params->features, TCPCS));
 874        SET_TX_WRB_HDR_BITS(udpcs, hdr,
 875                            BE_WRB_F_GET(wrb_params->features, UDPCS));
 876
 877        SET_TX_WRB_HDR_BITS(lso, hdr,
 878                            BE_WRB_F_GET(wrb_params->features, LSO));
 879        SET_TX_WRB_HDR_BITS(lso6, hdr,
 880                            BE_WRB_F_GET(wrb_params->features, LSO6));
 881        SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
 882
 883        /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
 884         * hack is not needed, the evt bit is set while ringing DB.
 885         */
 886        SET_TX_WRB_HDR_BITS(event, hdr,
 887                            BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
 888        SET_TX_WRB_HDR_BITS(vlan, hdr,
 889                            BE_WRB_F_GET(wrb_params->features, VLAN));
 890        SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
 891
 892        SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
 893        SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
 894        SET_TX_WRB_HDR_BITS(mgmt, hdr,
 895                            BE_WRB_F_GET(wrb_params->features, OS2BMC));
 896}
 897
 898static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
 899                          bool unmap_single)
 900{
 901        dma_addr_t dma;
 902        u32 frag_len = le32_to_cpu(wrb->frag_len);
 903
 904
 905        dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
 906                (u64)le32_to_cpu(wrb->frag_pa_lo);
 907        if (frag_len) {
 908                if (unmap_single)
 909                        dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
 910                else
 911                        dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
 912        }
 913}
 914
 915/* Grab a WRB header for xmit */
 916static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
 917{
 918        u32 head = txo->q.head;
 919
 920        queue_head_inc(&txo->q);
 921        return head;
 922}
 923
 924/* Set up the WRB header for xmit */
 925static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
 926                                struct be_tx_obj *txo,
 927                                struct be_wrb_params *wrb_params,
 928                                struct sk_buff *skb, u16 head)
 929{
 930        u32 num_frags = skb_wrb_cnt(skb);
 931        struct be_queue_info *txq = &txo->q;
 932        struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
 933
 934        wrb_fill_hdr(adapter, hdr, wrb_params, skb);
 935        be_dws_cpu_to_le(hdr, sizeof(*hdr));
 936
 937        BUG_ON(txo->sent_skb_list[head]);
 938        txo->sent_skb_list[head] = skb;
 939        txo->last_req_hdr = head;
 940        atomic_add(num_frags, &txq->used);
 941        txo->last_req_wrb_cnt = num_frags;
 942        txo->pend_wrb_cnt += num_frags;
 943}
 944
 945/* Setup a WRB fragment (buffer descriptor) for xmit */
 946static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
 947                                 int len)
 948{
 949        struct be_eth_wrb *wrb;
 950        struct be_queue_info *txq = &txo->q;
 951
 952        wrb = queue_head_node(txq);
 953        wrb_fill(wrb, busaddr, len);
 954        queue_head_inc(txq);
 955}
 956
 957/* Bring the queue back to the state it was in before be_xmit_enqueue() routine
 958 * was invoked. The producer index is restored to the previous packet and the
 959 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
 960 */
 961static void be_xmit_restore(struct be_adapter *adapter,
 962                            struct be_tx_obj *txo, u32 head, bool map_single,
 963                            u32 copied)
 964{
 965        struct device *dev;
 966        struct be_eth_wrb *wrb;
 967        struct be_queue_info *txq = &txo->q;
 968
 969        dev = &adapter->pdev->dev;
 970        txq->head = head;
 971
 972        /* skip the first wrb (hdr); it's not mapped */
 973        queue_head_inc(txq);
 974        while (copied) {
 975                wrb = queue_head_node(txq);
 976                unmap_tx_frag(dev, wrb, map_single);
 977                map_single = false;
 978                copied -= le32_to_cpu(wrb->frag_len);
 979                queue_head_inc(txq);
 980        }
 981
 982        txq->head = head;
 983}
 984
 985/* Enqueue the given packet for transmit. This routine allocates WRBs for the
 986 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
 987 * of WRBs used up by the packet.
 988 */
 989static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
 990                           struct sk_buff *skb,
 991                           struct be_wrb_params *wrb_params)
 992{
 993        u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
 994        struct device *dev = &adapter->pdev->dev;
 995        struct be_queue_info *txq = &txo->q;
 996        bool map_single = false;
 997        u32 head = txq->head;
 998        dma_addr_t busaddr;
 999        int len;
1000
1001        head = be_tx_get_wrb_hdr(txo);
1002
1003        if (skb->len > skb->data_len) {
1004                len = skb_headlen(skb);
1005
1006                busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1007                if (dma_mapping_error(dev, busaddr))
1008                        goto dma_err;
1009                map_single = true;
1010                be_tx_setup_wrb_frag(txo, busaddr, len);
1011                copied += len;
1012        }
1013
1014        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1015                const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1016                len = skb_frag_size(frag);
1017
1018                busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1019                if (dma_mapping_error(dev, busaddr))
1020                        goto dma_err;
1021                be_tx_setup_wrb_frag(txo, busaddr, len);
1022                copied += len;
1023        }
1024
1025        be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1026
1027        be_tx_stats_update(txo, skb);
1028        return wrb_cnt;
1029
1030dma_err:
1031        adapter->drv_stats.dma_map_errors++;
1032        be_xmit_restore(adapter, txo, head, map_single, copied);
1033        return 0;
1034}
1035
1036static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1037{
1038        return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1039}
1040
1041static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1042                                             struct sk_buff *skb,
1043                                             struct be_wrb_params
1044                                             *wrb_params)
1045{
1046        u16 vlan_tag = 0;
1047
1048        skb = skb_share_check(skb, GFP_ATOMIC);
1049        if (unlikely(!skb))
1050                return skb;
1051
1052        if (skb_vlan_tag_present(skb))
1053                vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1054
1055        if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1056                if (!vlan_tag)
1057                        vlan_tag = adapter->pvid;
1058                /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1059                 * skip VLAN insertion
1060                 */
1061                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1062        }
1063
1064        if (vlan_tag) {
1065                skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1066                                                vlan_tag);
1067                if (unlikely(!skb))
1068                        return skb;
1069                skb->vlan_tci = 0;
1070        }
1071
1072        /* Insert the outer VLAN, if any */
1073        if (adapter->qnq_vid) {
1074                vlan_tag = adapter->qnq_vid;
1075                skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1076                                                vlan_tag);
1077                if (unlikely(!skb))
1078                        return skb;
1079                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1080        }
1081
1082        return skb;
1083}
1084
1085static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1086{
1087        struct ethhdr *eh = (struct ethhdr *)skb->data;
1088        u16 offset = ETH_HLEN;
1089
1090        if (eh->h_proto == htons(ETH_P_IPV6)) {
1091                struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1092
1093                offset += sizeof(struct ipv6hdr);
1094                if (ip6h->nexthdr != NEXTHDR_TCP &&
1095                    ip6h->nexthdr != NEXTHDR_UDP) {
1096                        struct ipv6_opt_hdr *ehdr =
1097                                (struct ipv6_opt_hdr *)(skb->data + offset);
1098
1099                        /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1100                        if (ehdr->hdrlen == 0xff)
1101                                return true;
1102                }
1103        }
1104        return false;
1105}
1106
1107static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1108{
1109        return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1110}
1111
1112static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1113{
1114        return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1115}
1116
1117static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1118                                                  struct sk_buff *skb,
1119                                                  struct be_wrb_params
1120                                                  *wrb_params)
1121{
1122        struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1123        unsigned int eth_hdr_len;
1124        struct iphdr *ip;
1125
1126        /* For padded packets, BE HW modifies tot_len field in IP header
1127         * incorrecly when VLAN tag is inserted by HW.
1128         * For padded packets, Lancer computes incorrect checksum.
1129         */
1130        eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1131                                                VLAN_ETH_HLEN : ETH_HLEN;
1132        if (skb->len <= 60 &&
1133            (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1134            is_ipv4_pkt(skb)) {
1135                ip = (struct iphdr *)ip_hdr(skb);
1136                pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1137        }
1138
1139        /* If vlan tag is already inlined in the packet, skip HW VLAN
1140         * tagging in pvid-tagging mode
1141         */
1142        if (be_pvid_tagging_enabled(adapter) &&
1143            veh->h_vlan_proto == htons(ETH_P_8021Q))
1144                BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1145
1146        /* HW has a bug wherein it will calculate CSUM for VLAN
1147         * pkts even though it is disabled.
1148         * Manually insert VLAN in pkt.
1149         */
1150        if (skb->ip_summed != CHECKSUM_PARTIAL &&
1151            skb_vlan_tag_present(skb)) {
1152                skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1153                if (unlikely(!skb))
1154                        goto err;
1155        }
1156
1157        /* HW may lockup when VLAN HW tagging is requested on
1158         * certain ipv6 packets. Drop such pkts if the HW workaround to
1159         * skip HW tagging is not enabled by FW.
1160         */
1161        if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1162                     (adapter->pvid || adapter->qnq_vid) &&
1163                     !qnq_async_evt_rcvd(adapter)))
1164                goto tx_drop;
1165
1166        /* Manual VLAN tag insertion to prevent:
1167         * ASIC lockup when the ASIC inserts VLAN tag into
1168         * certain ipv6 packets. Insert VLAN tags in driver,
1169         * and set event, completion, vlan bits accordingly
1170         * in the Tx WRB.
1171         */
1172        if (be_ipv6_tx_stall_chk(adapter, skb) &&
1173            be_vlan_tag_tx_chk(adapter, skb)) {
1174                skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1175                if (unlikely(!skb))
1176                        goto err;
1177        }
1178
1179        return skb;
1180tx_drop:
1181        dev_kfree_skb_any(skb);
1182err:
1183        return NULL;
1184}
1185
1186static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1187                                           struct sk_buff *skb,
1188                                           struct be_wrb_params *wrb_params)
1189{
1190        int err;
1191
1192        /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1193         * packets that are 32b or less may cause a transmit stall
1194         * on that port. The workaround is to pad such packets
1195         * (len <= 32 bytes) to a minimum length of 36b.
1196         */
1197        if (skb->len <= 32) {
1198                if (skb_put_padto(skb, 36))
1199                        return NULL;
1200        }
1201
1202        if (BEx_chip(adapter) || lancer_chip(adapter)) {
1203                skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1204                if (!skb)
1205                        return NULL;
1206        }
1207
1208        /* The stack can send us skbs with length greater than
1209         * what the HW can handle. Trim the extra bytes.
1210         */
1211        WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1212        err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1213        WARN_ON(err);
1214
1215        return skb;
1216}
1217
1218static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1219{
1220        struct be_queue_info *txq = &txo->q;
1221        struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1222
1223        /* Mark the last request eventable if it hasn't been marked already */
1224        if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1225                hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1226
1227        /* compose a dummy wrb if there are odd set of wrbs to notify */
1228        if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1229                wrb_fill_dummy(queue_head_node(txq));
1230                queue_head_inc(txq);
1231                atomic_inc(&txq->used);
1232                txo->pend_wrb_cnt++;
1233                hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1234                                           TX_HDR_WRB_NUM_SHIFT);
1235                hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1236                                          TX_HDR_WRB_NUM_SHIFT);
1237        }
1238        be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1239        txo->pend_wrb_cnt = 0;
1240}
1241
1242/* OS2BMC related */
1243
1244#define DHCP_CLIENT_PORT        68
1245#define DHCP_SERVER_PORT        67
1246#define NET_BIOS_PORT1          137
1247#define NET_BIOS_PORT2          138
1248#define DHCPV6_RAS_PORT         547
1249
1250#define is_mc_allowed_on_bmc(adapter, eh)       \
1251        (!is_multicast_filt_enabled(adapter) && \
1252         is_multicast_ether_addr(eh->h_dest) && \
1253         !is_broadcast_ether_addr(eh->h_dest))
1254
1255#define is_bc_allowed_on_bmc(adapter, eh)       \
1256        (!is_broadcast_filt_enabled(adapter) && \
1257         is_broadcast_ether_addr(eh->h_dest))
1258
1259#define is_arp_allowed_on_bmc(adapter, skb)     \
1260        (is_arp(skb) && is_arp_filt_enabled(adapter))
1261
1262#define is_broadcast_packet(eh, adapter)        \
1263                (is_multicast_ether_addr(eh->h_dest) && \
1264                !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1265
1266#define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1267
1268#define is_arp_filt_enabled(adapter)    \
1269                (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1270
1271#define is_dhcp_client_filt_enabled(adapter)    \
1272                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1273
1274#define is_dhcp_srvr_filt_enabled(adapter)      \
1275                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1276
1277#define is_nbios_filt_enabled(adapter)  \
1278                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1279
1280#define is_ipv6_na_filt_enabled(adapter)        \
1281                (adapter->bmc_filt_mask &       \
1282                        BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1283
1284#define is_ipv6_ra_filt_enabled(adapter)        \
1285                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1286
1287#define is_ipv6_ras_filt_enabled(adapter)       \
1288                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1289
1290#define is_broadcast_filt_enabled(adapter)      \
1291                (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1292
1293#define is_multicast_filt_enabled(adapter)      \
1294                (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1295
1296static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1297                               struct sk_buff **skb)
1298{
1299        struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1300        bool os2bmc = false;
1301
1302        if (!be_is_os2bmc_enabled(adapter))
1303                goto done;
1304
1305        if (!is_multicast_ether_addr(eh->h_dest))
1306                goto done;
1307
1308        if (is_mc_allowed_on_bmc(adapter, eh) ||
1309            is_bc_allowed_on_bmc(adapter, eh) ||
1310            is_arp_allowed_on_bmc(adapter, (*skb))) {
1311                os2bmc = true;
1312                goto done;
1313        }
1314
1315        if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1316                struct ipv6hdr *hdr = ipv6_hdr((*skb));
1317                u8 nexthdr = hdr->nexthdr;
1318
1319                if (nexthdr == IPPROTO_ICMPV6) {
1320                        struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1321
1322                        switch (icmp6->icmp6_type) {
1323                        case NDISC_ROUTER_ADVERTISEMENT:
1324                                os2bmc = is_ipv6_ra_filt_enabled(adapter);
1325                                goto done;
1326                        case NDISC_NEIGHBOUR_ADVERTISEMENT:
1327                                os2bmc = is_ipv6_na_filt_enabled(adapter);
1328                                goto done;
1329                        default:
1330                                break;
1331                        }
1332                }
1333        }
1334
1335        if (is_udp_pkt((*skb))) {
1336                struct udphdr *udp = udp_hdr((*skb));
1337
1338                switch (ntohs(udp->dest)) {
1339                case DHCP_CLIENT_PORT:
1340                        os2bmc = is_dhcp_client_filt_enabled(adapter);
1341                        goto done;
1342                case DHCP_SERVER_PORT:
1343                        os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1344                        goto done;
1345                case NET_BIOS_PORT1:
1346                case NET_BIOS_PORT2:
1347                        os2bmc = is_nbios_filt_enabled(adapter);
1348                        goto done;
1349                case DHCPV6_RAS_PORT:
1350                        os2bmc = is_ipv6_ras_filt_enabled(adapter);
1351                        goto done;
1352                default:
1353                        break;
1354                }
1355        }
1356done:
1357        /* For packets over a vlan, which are destined
1358         * to BMC, asic expects the vlan to be inline in the packet.
1359         */
1360        if (os2bmc)
1361                *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1362
1363        return os2bmc;
1364}
1365
1366static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1367{
1368        struct be_adapter *adapter = netdev_priv(netdev);
1369        u16 q_idx = skb_get_queue_mapping(skb);
1370        struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1371        struct be_wrb_params wrb_params = { 0 };
1372        bool flush = !skb->xmit_more;
1373        u16 wrb_cnt;
1374
1375        skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1376        if (unlikely(!skb))
1377                goto drop;
1378
1379        be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1380
1381        wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1382        if (unlikely(!wrb_cnt)) {
1383                dev_kfree_skb_any(skb);
1384                goto drop;
1385        }
1386
1387        /* if os2bmc is enabled and if the pkt is destined to bmc,
1388         * enqueue the pkt a 2nd time with mgmt bit set.
1389         */
1390        if (be_send_pkt_to_bmc(adapter, &skb)) {
1391                BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1392                wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1393                if (unlikely(!wrb_cnt))
1394                        goto drop;
1395                else
1396                        skb_get(skb);
1397        }
1398
1399        if (be_is_txq_full(txo)) {
1400                netif_stop_subqueue(netdev, q_idx);
1401                tx_stats(txo)->tx_stops++;
1402        }
1403
1404        if (flush || __netif_subqueue_stopped(netdev, q_idx))
1405                be_xmit_flush(adapter, txo);
1406
1407        return NETDEV_TX_OK;
1408drop:
1409        tx_stats(txo)->tx_drv_drops++;
1410        /* Flush the already enqueued tx requests */
1411        if (flush && txo->pend_wrb_cnt)
1412                be_xmit_flush(adapter, txo);
1413
1414        return NETDEV_TX_OK;
1415}
1416
1417static inline bool be_in_all_promisc(struct be_adapter *adapter)
1418{
1419        return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1420                        BE_IF_FLAGS_ALL_PROMISCUOUS;
1421}
1422
1423static int be_set_vlan_promisc(struct be_adapter *adapter)
1424{
1425        struct device *dev = &adapter->pdev->dev;
1426        int status;
1427
1428        if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1429                return 0;
1430
1431        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1432        if (!status) {
1433                dev_info(dev, "Enabled VLAN promiscuous mode\n");
1434                adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1435        } else {
1436                dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1437        }
1438        return status;
1439}
1440
1441static int be_clear_vlan_promisc(struct be_adapter *adapter)
1442{
1443        struct device *dev = &adapter->pdev->dev;
1444        int status;
1445
1446        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1447        if (!status) {
1448                dev_info(dev, "Disabling VLAN promiscuous mode\n");
1449                adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1450        }
1451        return status;
1452}
1453
1454/*
1455 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1456 * If the user configures more, place BE in vlan promiscuous mode.
1457 */
1458static int be_vid_config(struct be_adapter *adapter)
1459{
1460        struct device *dev = &adapter->pdev->dev;
1461        u16 vids[BE_NUM_VLANS_SUPPORTED];
1462        u16 num = 0, i = 0;
1463        int status = 0;
1464
1465        /* No need to change the VLAN state if the I/F is in promiscuous */
1466        if (adapter->netdev->flags & IFF_PROMISC)
1467                return 0;
1468
1469        if (adapter->vlans_added > be_max_vlans(adapter))
1470                return be_set_vlan_promisc(adapter);
1471
1472        if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1473                status = be_clear_vlan_promisc(adapter);
1474                if (status)
1475                        return status;
1476        }
1477        /* Construct VLAN Table to give to HW */
1478        for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1479                vids[num++] = cpu_to_le16(i);
1480
1481        status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1482        if (status) {
1483                dev_err(dev, "Setting HW VLAN filtering failed\n");
1484                /* Set to VLAN promisc mode as setting VLAN filter failed */
1485                if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1486                    addl_status(status) ==
1487                                MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1488                        return be_set_vlan_promisc(adapter);
1489        }
1490        return status;
1491}
1492
1493static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1494{
1495        struct be_adapter *adapter = netdev_priv(netdev);
1496        int status = 0;
1497
1498        mutex_lock(&adapter->rx_filter_lock);
1499
1500        /* Packets with VID 0 are always received by Lancer by default */
1501        if (lancer_chip(adapter) && vid == 0)
1502                goto done;
1503
1504        if (test_bit(vid, adapter->vids))
1505                goto done;
1506
1507        set_bit(vid, adapter->vids);
1508        adapter->vlans_added++;
1509
1510        status = be_vid_config(adapter);
1511done:
1512        mutex_unlock(&adapter->rx_filter_lock);
1513        return status;
1514}
1515
1516static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1517{
1518        struct be_adapter *adapter = netdev_priv(netdev);
1519        int status = 0;
1520
1521        mutex_lock(&adapter->rx_filter_lock);
1522
1523        /* Packets with VID 0 are always received by Lancer by default */
1524        if (lancer_chip(adapter) && vid == 0)
1525                goto done;
1526
1527        if (!test_bit(vid, adapter->vids))
1528                goto done;
1529
1530        clear_bit(vid, adapter->vids);
1531        adapter->vlans_added--;
1532
1533        status = be_vid_config(adapter);
1534done:
1535        mutex_unlock(&adapter->rx_filter_lock);
1536        return status;
1537}
1538
1539static void be_set_all_promisc(struct be_adapter *adapter)
1540{
1541        be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1542        adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1543}
1544
1545static void be_set_mc_promisc(struct be_adapter *adapter)
1546{
1547        int status;
1548
1549        if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1550                return;
1551
1552        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1553        if (!status)
1554                adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1555}
1556
1557static void be_set_uc_promisc(struct be_adapter *adapter)
1558{
1559        int status;
1560
1561        if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1562                return;
1563
1564        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1565        if (!status)
1566                adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1567}
1568
1569static void be_clear_uc_promisc(struct be_adapter *adapter)
1570{
1571        int status;
1572
1573        if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1574                return;
1575
1576        status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1577        if (!status)
1578                adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1579}
1580
1581/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1582 * We use a single callback function for both sync and unsync. We really don't
1583 * add/remove addresses through this callback. But, we use it to detect changes
1584 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1585 */
1586static int be_uc_list_update(struct net_device *netdev,
1587                             const unsigned char *addr)
1588{
1589        struct be_adapter *adapter = netdev_priv(netdev);
1590
1591        adapter->update_uc_list = true;
1592        return 0;
1593}
1594
1595static int be_mc_list_update(struct net_device *netdev,
1596                             const unsigned char *addr)
1597{
1598        struct be_adapter *adapter = netdev_priv(netdev);
1599
1600        adapter->update_mc_list = true;
1601        return 0;
1602}
1603
1604static void be_set_mc_list(struct be_adapter *adapter)
1605{
1606        struct net_device *netdev = adapter->netdev;
1607        struct netdev_hw_addr *ha;
1608        bool mc_promisc = false;
1609        int status;
1610
1611        netif_addr_lock_bh(netdev);
1612        __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1613
1614        if (netdev->flags & IFF_PROMISC) {
1615                adapter->update_mc_list = false;
1616        } else if (netdev->flags & IFF_ALLMULTI ||
1617                   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1618                /* Enable multicast promisc if num configured exceeds
1619                 * what we support
1620                 */
1621                mc_promisc = true;
1622                adapter->update_mc_list = false;
1623        } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1624                /* Update mc-list unconditionally if the iface was previously
1625                 * in mc-promisc mode and now is out of that mode.
1626                 */
1627                adapter->update_mc_list = true;
1628        }
1629
1630        if (adapter->update_mc_list) {
1631                int i = 0;
1632
1633                /* cache the mc-list in adapter */
1634                netdev_for_each_mc_addr(ha, netdev) {
1635                        ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1636                        i++;
1637                }
1638                adapter->mc_count = netdev_mc_count(netdev);
1639        }
1640        netif_addr_unlock_bh(netdev);
1641
1642        if (mc_promisc) {
1643                be_set_mc_promisc(adapter);
1644        } else if (adapter->update_mc_list) {
1645                status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1646                if (!status)
1647                        adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1648                else
1649                        be_set_mc_promisc(adapter);
1650
1651                adapter->update_mc_list = false;
1652        }
1653}
1654
1655static void be_clear_mc_list(struct be_adapter *adapter)
1656{
1657        struct net_device *netdev = adapter->netdev;
1658
1659        __dev_mc_unsync(netdev, NULL);
1660        be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1661        adapter->mc_count = 0;
1662}
1663
1664static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1665{
1666        if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1667                adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1668                return 0;
1669        }
1670
1671        return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1672                               adapter->if_handle,
1673                               &adapter->pmac_id[uc_idx + 1], 0);
1674}
1675
1676static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1677{
1678        if (pmac_id == adapter->pmac_id[0])
1679                return;
1680
1681        be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1682}
1683
1684static void be_set_uc_list(struct be_adapter *adapter)
1685{
1686        struct net_device *netdev = adapter->netdev;
1687        struct netdev_hw_addr *ha;
1688        bool uc_promisc = false;
1689        int curr_uc_macs = 0, i;
1690
1691        netif_addr_lock_bh(netdev);
1692        __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1693
1694        if (netdev->flags & IFF_PROMISC) {
1695                adapter->update_uc_list = false;
1696        } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1697                uc_promisc = true;
1698                adapter->update_uc_list = false;
1699        }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1700                /* Update uc-list unconditionally if the iface was previously
1701                 * in uc-promisc mode and now is out of that mode.
1702                 */
1703                adapter->update_uc_list = true;
1704        }
1705
1706        if (adapter->update_uc_list) {
1707                /* cache the uc-list in adapter array */
1708                i = 0;
1709                netdev_for_each_uc_addr(ha, netdev) {
1710                        ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1711                        i++;
1712                }
1713                curr_uc_macs = netdev_uc_count(netdev);
1714        }
1715        netif_addr_unlock_bh(netdev);
1716
1717        if (uc_promisc) {
1718                be_set_uc_promisc(adapter);
1719        } else if (adapter->update_uc_list) {
1720                be_clear_uc_promisc(adapter);
1721
1722                for (i = 0; i < adapter->uc_macs; i++)
1723                        be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1724
1725                for (i = 0; i < curr_uc_macs; i++)
1726                        be_uc_mac_add(adapter, i);
1727                adapter->uc_macs = curr_uc_macs;
1728                adapter->update_uc_list = false;
1729        }
1730}
1731
1732static void be_clear_uc_list(struct be_adapter *adapter)
1733{
1734        struct net_device *netdev = adapter->netdev;
1735        int i;
1736
1737        __dev_uc_unsync(netdev, NULL);
1738        for (i = 0; i < adapter->uc_macs; i++)
1739                be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1740
1741        adapter->uc_macs = 0;
1742}
1743
1744static void __be_set_rx_mode(struct be_adapter *adapter)
1745{
1746        struct net_device *netdev = adapter->netdev;
1747
1748        mutex_lock(&adapter->rx_filter_lock);
1749
1750        if (netdev->flags & IFF_PROMISC) {
1751                if (!be_in_all_promisc(adapter))
1752                        be_set_all_promisc(adapter);
1753        } else if (be_in_all_promisc(adapter)) {
1754                /* We need to re-program the vlan-list or clear
1755                 * vlan-promisc mode (if needed) when the interface
1756                 * comes out of promisc mode.
1757                 */
1758                be_vid_config(adapter);
1759        }
1760
1761        be_set_uc_list(adapter);
1762        be_set_mc_list(adapter);
1763
1764        mutex_unlock(&adapter->rx_filter_lock);
1765}
1766
1767static void be_work_set_rx_mode(struct work_struct *work)
1768{
1769        struct be_cmd_work *cmd_work =
1770                                container_of(work, struct be_cmd_work, work);
1771
1772        __be_set_rx_mode(cmd_work->adapter);
1773        kfree(cmd_work);
1774}
1775
1776static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1777{
1778        struct be_adapter *adapter = netdev_priv(netdev);
1779        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1780        int status;
1781
1782        if (!sriov_enabled(adapter))
1783                return -EPERM;
1784
1785        if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1786                return -EINVAL;
1787
1788        /* Proceed further only if user provided MAC is different
1789         * from active MAC
1790         */
1791        if (ether_addr_equal(mac, vf_cfg->mac_addr))
1792                return 0;
1793
1794        if (BEx_chip(adapter)) {
1795                be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1796                                vf + 1);
1797
1798                status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1799                                         &vf_cfg->pmac_id, vf + 1);
1800        } else {
1801                status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1802                                        vf + 1);
1803        }
1804
1805        if (status) {
1806                dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1807                        mac, vf, status);
1808                return be_cmd_status(status);
1809        }
1810
1811        ether_addr_copy(vf_cfg->mac_addr, mac);
1812
1813        return 0;
1814}
1815
1816static int be_get_vf_config(struct net_device *netdev, int vf,
1817                            struct ifla_vf_info *vi)
1818{
1819        struct be_adapter *adapter = netdev_priv(netdev);
1820        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1821
1822        if (!sriov_enabled(adapter))
1823                return -EPERM;
1824
1825        if (vf >= adapter->num_vfs)
1826                return -EINVAL;
1827
1828        vi->vf = vf;
1829        vi->max_tx_rate = vf_cfg->tx_rate;
1830        vi->min_tx_rate = 0;
1831        vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1832        vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1833        memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1834        vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1835        vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1836
1837        return 0;
1838}
1839
1840static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1841{
1842        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1843        u16 vids[BE_NUM_VLANS_SUPPORTED];
1844        int vf_if_id = vf_cfg->if_handle;
1845        int status;
1846
1847        /* Enable Transparent VLAN Tagging */
1848        status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1849        if (status)
1850                return status;
1851
1852        /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1853        vids[0] = 0;
1854        status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1855        if (!status)
1856                dev_info(&adapter->pdev->dev,
1857                         "Cleared guest VLANs on VF%d", vf);
1858
1859        /* After TVT is enabled, disallow VFs to program VLAN filters */
1860        if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1861                status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1862                                                  ~BE_PRIV_FILTMGMT, vf + 1);
1863                if (!status)
1864                        vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1865        }
1866        return 0;
1867}
1868
1869static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1870{
1871        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1872        struct device *dev = &adapter->pdev->dev;
1873        int status;
1874
1875        /* Reset Transparent VLAN Tagging. */
1876        status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1877                                       vf_cfg->if_handle, 0, 0);
1878        if (status)
1879                return status;
1880
1881        /* Allow VFs to program VLAN filtering */
1882        if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1883                status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1884                                                  BE_PRIV_FILTMGMT, vf + 1);
1885                if (!status) {
1886                        vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1887                        dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1888                }
1889        }
1890
1891        dev_info(dev,
1892                 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1893        return 0;
1894}
1895
1896static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1897                          __be16 vlan_proto)
1898{
1899        struct be_adapter *adapter = netdev_priv(netdev);
1900        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1901        int status;
1902
1903        if (!sriov_enabled(adapter))
1904                return -EPERM;
1905
1906        if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1907                return -EINVAL;
1908
1909        if (vlan_proto != htons(ETH_P_8021Q))
1910                return -EPROTONOSUPPORT;
1911
1912        if (vlan || qos) {
1913                vlan |= qos << VLAN_PRIO_SHIFT;
1914                status = be_set_vf_tvt(adapter, vf, vlan);
1915        } else {
1916                status = be_clear_vf_tvt(adapter, vf);
1917        }
1918
1919        if (status) {
1920                dev_err(&adapter->pdev->dev,
1921                        "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1922                        status);
1923                return be_cmd_status(status);
1924        }
1925
1926        vf_cfg->vlan_tag = vlan;
1927        return 0;
1928}
1929
1930static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1931                             int min_tx_rate, int max_tx_rate)
1932{
1933        struct be_adapter *adapter = netdev_priv(netdev);
1934        struct device *dev = &adapter->pdev->dev;
1935        int percent_rate, status = 0;
1936        u16 link_speed = 0;
1937        u8 link_status;
1938
1939        if (!sriov_enabled(adapter))
1940                return -EPERM;
1941
1942        if (vf >= adapter->num_vfs)
1943                return -EINVAL;
1944
1945        if (min_tx_rate)
1946                return -EINVAL;
1947
1948        if (!max_tx_rate)
1949                goto config_qos;
1950
1951        status = be_cmd_link_status_query(adapter, &link_speed,
1952                                          &link_status, 0);
1953        if (status)
1954                goto err;
1955
1956        if (!link_status) {
1957                dev_err(dev, "TX-rate setting not allowed when link is down\n");
1958                status = -ENETDOWN;
1959                goto err;
1960        }
1961
1962        if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1963                dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1964                        link_speed);
1965                status = -EINVAL;
1966                goto err;
1967        }
1968
1969        /* On Skyhawk the QOS setting must be done only as a % value */
1970        percent_rate = link_speed / 100;
1971        if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1972                dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1973                        percent_rate);
1974                status = -EINVAL;
1975                goto err;
1976        }
1977
1978config_qos:
1979        status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1980        if (status)
1981                goto err;
1982
1983        adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1984        return 0;
1985
1986err:
1987        dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1988                max_tx_rate, vf);
1989        return be_cmd_status(status);
1990}
1991
1992static int be_set_vf_link_state(struct net_device *netdev, int vf,
1993                                int link_state)
1994{
1995        struct be_adapter *adapter = netdev_priv(netdev);
1996        int status;
1997
1998        if (!sriov_enabled(adapter))
1999                return -EPERM;
2000
2001        if (vf >= adapter->num_vfs)
2002                return -EINVAL;
2003
2004        status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2005        if (status) {
2006                dev_err(&adapter->pdev->dev,
2007                        "Link state change on VF %d failed: %#x\n", vf, status);
2008                return be_cmd_status(status);
2009        }
2010
2011        adapter->vf_cfg[vf].plink_tracking = link_state;
2012
2013        return 0;
2014}
2015
2016static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2017{
2018        struct be_adapter *adapter = netdev_priv(netdev);
2019        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2020        u8 spoofchk;
2021        int status;
2022
2023        if (!sriov_enabled(adapter))
2024                return -EPERM;
2025
2026        if (vf >= adapter->num_vfs)
2027                return -EINVAL;
2028
2029        if (BEx_chip(adapter))
2030                return -EOPNOTSUPP;
2031
2032        if (enable == vf_cfg->spoofchk)
2033                return 0;
2034
2035        spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2036
2037        status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2038                                       0, spoofchk);
2039        if (status) {
2040                dev_err(&adapter->pdev->dev,
2041                        "Spoofchk change on VF %d failed: %#x\n", vf, status);
2042                return be_cmd_status(status);
2043        }
2044
2045        vf_cfg->spoofchk = enable;
2046        return 0;
2047}
2048
2049static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2050                          ulong now)
2051{
2052        aic->rx_pkts_prev = rx_pkts;
2053        aic->tx_reqs_prev = tx_pkts;
2054        aic->jiffies = now;
2055}
2056
2057static int be_get_new_eqd(struct be_eq_obj *eqo)
2058{
2059        struct be_adapter *adapter = eqo->adapter;
2060        int eqd, start;
2061        struct be_aic_obj *aic;
2062        struct be_rx_obj *rxo;
2063        struct be_tx_obj *txo;
2064        u64 rx_pkts = 0, tx_pkts = 0;
2065        ulong now;
2066        u32 pps, delta;
2067        int i;
2068
2069        aic = &adapter->aic_obj[eqo->idx];
2070        if (!aic->enable) {
2071                if (aic->jiffies)
2072                        aic->jiffies = 0;
2073                eqd = aic->et_eqd;
2074                return eqd;
2075        }
2076
2077        for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2078                do {
2079                        start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2080                        rx_pkts += rxo->stats.rx_pkts;
2081                } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2082        }
2083
2084        for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2085                do {
2086                        start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2087                        tx_pkts += txo->stats.tx_reqs;
2088                } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2089        }
2090
2091        /* Skip, if wrapped around or first calculation */
2092        now = jiffies;
2093        if (!aic->jiffies || time_before(now, aic->jiffies) ||
2094            rx_pkts < aic->rx_pkts_prev ||
2095            tx_pkts < aic->tx_reqs_prev) {
2096                be_aic_update(aic, rx_pkts, tx_pkts, now);
2097                return aic->prev_eqd;
2098        }
2099
2100        delta = jiffies_to_msecs(now - aic->jiffies);
2101        if (delta == 0)
2102                return aic->prev_eqd;
2103
2104        pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2105                (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2106        eqd = (pps / 15000) << 2;
2107
2108        if (eqd < 8)
2109                eqd = 0;
2110        eqd = min_t(u32, eqd, aic->max_eqd);
2111        eqd = max_t(u32, eqd, aic->min_eqd);
2112
2113        be_aic_update(aic, rx_pkts, tx_pkts, now);
2114
2115        return eqd;
2116}
2117
2118/* For Skyhawk-R only */
2119static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2120{
2121        struct be_adapter *adapter = eqo->adapter;
2122        struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2123        ulong now = jiffies;
2124        int eqd;
2125        u32 mult_enc;
2126
2127        if (!aic->enable)
2128                return 0;
2129
2130        if (jiffies_to_msecs(now - aic->jiffies) < 1)
2131                eqd = aic->prev_eqd;
2132        else
2133                eqd = be_get_new_eqd(eqo);
2134
2135        if (eqd > 100)
2136                mult_enc = R2I_DLY_ENC_1;
2137        else if (eqd > 60)
2138                mult_enc = R2I_DLY_ENC_2;
2139        else if (eqd > 20)
2140                mult_enc = R2I_DLY_ENC_3;
2141        else
2142                mult_enc = R2I_DLY_ENC_0;
2143
2144        aic->prev_eqd = eqd;
2145
2146        return mult_enc;
2147}
2148
2149void be_eqd_update(struct be_adapter *adapter, bool force_update)
2150{
2151        struct be_set_eqd set_eqd[MAX_EVT_QS];
2152        struct be_aic_obj *aic;
2153        struct be_eq_obj *eqo;
2154        int i, num = 0, eqd;
2155
2156        for_all_evt_queues(adapter, eqo, i) {
2157                aic = &adapter->aic_obj[eqo->idx];
2158                eqd = be_get_new_eqd(eqo);
2159                if (force_update || eqd != aic->prev_eqd) {
2160                        set_eqd[num].delay_multiplier = (eqd * 65)/100;
2161                        set_eqd[num].eq_id = eqo->q.id;
2162                        aic->prev_eqd = eqd;
2163                        num++;
2164                }
2165        }
2166
2167        if (num)
2168                be_cmd_modify_eqd(adapter, set_eqd, num);
2169}
2170
2171static void be_rx_stats_update(struct be_rx_obj *rxo,
2172                               struct be_rx_compl_info *rxcp)
2173{
2174        struct be_rx_stats *stats = rx_stats(rxo);
2175
2176        u64_stats_update_begin(&stats->sync);
2177        stats->rx_compl++;
2178        stats->rx_bytes += rxcp->pkt_size;
2179        stats->rx_pkts++;
2180        if (rxcp->tunneled)
2181                stats->rx_vxlan_offload_pkts++;
2182        if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2183                stats->rx_mcast_pkts++;
2184        if (rxcp->err)
2185                stats->rx_compl_err++;
2186        u64_stats_update_end(&stats->sync);
2187}
2188
2189static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2190{
2191        /* L4 checksum is not reliable for non TCP/UDP packets.
2192         * Also ignore ipcksm for ipv6 pkts
2193         */
2194        return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2195                (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2196}
2197
2198static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2199{
2200        struct be_adapter *adapter = rxo->adapter;
2201        struct be_rx_page_info *rx_page_info;
2202        struct be_queue_info *rxq = &rxo->q;
2203        u32 frag_idx = rxq->tail;
2204
2205        rx_page_info = &rxo->page_info_tbl[frag_idx];
2206        BUG_ON(!rx_page_info->page);
2207
2208        if (rx_page_info->last_frag) {
2209                dma_unmap_page(&adapter->pdev->dev,
2210                               dma_unmap_addr(rx_page_info, bus),
2211                               adapter->big_page_size, DMA_FROM_DEVICE);
2212                rx_page_info->last_frag = false;
2213        } else {
2214                dma_sync_single_for_cpu(&adapter->pdev->dev,
2215                                        dma_unmap_addr(rx_page_info, bus),
2216                                        rx_frag_size, DMA_FROM_DEVICE);
2217        }
2218
2219        queue_tail_inc(rxq);
2220        atomic_dec(&rxq->used);
2221        return rx_page_info;
2222}
2223
2224/* Throwaway the data in the Rx completion */
2225static void be_rx_compl_discard(struct be_rx_obj *rxo,
2226                                struct be_rx_compl_info *rxcp)
2227{
2228        struct be_rx_page_info *page_info;
2229        u16 i, num_rcvd = rxcp->num_rcvd;
2230
2231        for (i = 0; i < num_rcvd; i++) {
2232                page_info = get_rx_page_info(rxo);
2233                put_page(page_info->page);
2234                memset(page_info, 0, sizeof(*page_info));
2235        }
2236}
2237
2238/*
2239 * skb_fill_rx_data forms a complete skb for an ether frame
2240 * indicated by rxcp.
2241 */
2242static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2243                             struct be_rx_compl_info *rxcp)
2244{
2245        struct be_rx_page_info *page_info;
2246        u16 i, j;
2247        u16 hdr_len, curr_frag_len, remaining;
2248        u8 *start;
2249
2250        page_info = get_rx_page_info(rxo);
2251        start = page_address(page_info->page) + page_info->page_offset;
2252        prefetch(start);
2253
2254        /* Copy data in the first descriptor of this completion */
2255        curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2256
2257        skb->len = curr_frag_len;
2258        if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2259                memcpy(skb->data, start, curr_frag_len);
2260                /* Complete packet has now been moved to data */
2261                put_page(page_info->page);
2262                skb->data_len = 0;
2263                skb->tail += curr_frag_len;
2264        } else {
2265                hdr_len = ETH_HLEN;
2266                memcpy(skb->data, start, hdr_len);
2267                skb_shinfo(skb)->nr_frags = 1;
2268                skb_frag_set_page(skb, 0, page_info->page);
2269                skb_shinfo(skb)->frags[0].page_offset =
2270                                        page_info->page_offset + hdr_len;
2271                skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2272                                  curr_frag_len - hdr_len);
2273                skb->data_len = curr_frag_len - hdr_len;
2274                skb->truesize += rx_frag_size;
2275                skb->tail += hdr_len;
2276        }
2277        page_info->page = NULL;
2278
2279        if (rxcp->pkt_size <= rx_frag_size) {
2280                BUG_ON(rxcp->num_rcvd != 1);
2281                return;
2282        }
2283
2284        /* More frags present for this completion */
2285        remaining = rxcp->pkt_size - curr_frag_len;
2286        for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2287                page_info = get_rx_page_info(rxo);
2288                curr_frag_len = min(remaining, rx_frag_size);
2289
2290                /* Coalesce all frags from the same physical page in one slot */
2291                if (page_info->page_offset == 0) {
2292                        /* Fresh page */
2293                        j++;
2294                        skb_frag_set_page(skb, j, page_info->page);
2295                        skb_shinfo(skb)->frags[j].page_offset =
2296                                                        page_info->page_offset;
2297                        skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2298                        skb_shinfo(skb)->nr_frags++;
2299                } else {
2300                        put_page(page_info->page);
2301                }
2302
2303                skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2304                skb->len += curr_frag_len;
2305                skb->data_len += curr_frag_len;
2306                skb->truesize += rx_frag_size;
2307                remaining -= curr_frag_len;
2308                page_info->page = NULL;
2309        }
2310        BUG_ON(j > MAX_SKB_FRAGS);
2311}
2312
2313/* Process the RX completion indicated by rxcp when GRO is disabled */
2314static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2315                                struct be_rx_compl_info *rxcp)
2316{
2317        struct be_adapter *adapter = rxo->adapter;
2318        struct net_device *netdev = adapter->netdev;
2319        struct sk_buff *skb;
2320
2321        skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2322        if (unlikely(!skb)) {
2323                rx_stats(rxo)->rx_drops_no_skbs++;
2324                be_rx_compl_discard(rxo, rxcp);
2325                return;
2326        }
2327
2328        skb_fill_rx_data(rxo, skb, rxcp);
2329
2330        if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2331                skb->ip_summed = CHECKSUM_UNNECESSARY;
2332        else
2333                skb_checksum_none_assert(skb);
2334
2335        skb->protocol = eth_type_trans(skb, netdev);
2336        skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2337        if (netdev->features & NETIF_F_RXHASH)
2338                skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2339
2340        skb->csum_level = rxcp->tunneled;
2341        skb_mark_napi_id(skb, napi);
2342
2343        if (rxcp->vlanf)
2344                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2345
2346        netif_receive_skb(skb);
2347}
2348
2349/* Process the RX completion indicated by rxcp when GRO is enabled */
2350static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2351                                    struct napi_struct *napi,
2352                                    struct be_rx_compl_info *rxcp)
2353{
2354        struct be_adapter *adapter = rxo->adapter;
2355        struct be_rx_page_info *page_info;
2356        struct sk_buff *skb = NULL;
2357        u16 remaining, curr_frag_len;
2358        u16 i, j;
2359
2360        skb = napi_get_frags(napi);
2361        if (!skb) {
2362                be_rx_compl_discard(rxo, rxcp);
2363                return;
2364        }
2365
2366        remaining = rxcp->pkt_size;
2367        for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2368                page_info = get_rx_page_info(rxo);
2369
2370                curr_frag_len = min(remaining, rx_frag_size);
2371
2372                /* Coalesce all frags from the same physical page in one slot */
2373                if (i == 0 || page_info->page_offset == 0) {
2374                        /* First frag or Fresh page */
2375                        j++;
2376                        skb_frag_set_page(skb, j, page_info->page);
2377                        skb_shinfo(skb)->frags[j].page_offset =
2378                                                        page_info->page_offset;
2379                        skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2380                } else {
2381                        put_page(page_info->page);
2382                }
2383                skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2384                skb->truesize += rx_frag_size;
2385                remaining -= curr_frag_len;
2386                memset(page_info, 0, sizeof(*page_info));
2387        }
2388        BUG_ON(j > MAX_SKB_FRAGS);
2389
2390        skb_shinfo(skb)->nr_frags = j + 1;
2391        skb->len = rxcp->pkt_size;
2392        skb->data_len = rxcp->pkt_size;
2393        skb->ip_summed = CHECKSUM_UNNECESSARY;
2394        skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2395        if (adapter->netdev->features & NETIF_F_RXHASH)
2396                skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2397
2398        skb->csum_level = rxcp->tunneled;
2399
2400        if (rxcp->vlanf)
2401                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2402
2403        napi_gro_frags(napi);
2404}
2405
2406static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2407                                 struct be_rx_compl_info *rxcp)
2408{
2409        rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2410        rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2411        rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2412        rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2413        rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2414        rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2415        rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2416        rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2417        rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2418        rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2419        rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2420        if (rxcp->vlanf) {
2421                rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2422                rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2423        }
2424        rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2425        rxcp->tunneled =
2426                GET_RX_COMPL_V1_BITS(tunneled, compl);
2427}
2428
2429static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2430                                 struct be_rx_compl_info *rxcp)
2431{
2432        rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2433        rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2434        rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2435        rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2436        rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2437        rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2438        rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2439        rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2440        rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2441        rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2442        rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2443        if (rxcp->vlanf) {
2444                rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2445                rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2446        }
2447        rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2448        rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2449}
2450
2451static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2452{
2453        struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2454        struct be_rx_compl_info *rxcp = &rxo->rxcp;
2455        struct be_adapter *adapter = rxo->adapter;
2456
2457        /* For checking the valid bit it is Ok to use either definition as the
2458         * valid bit is at the same position in both v0 and v1 Rx compl */
2459        if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2460                return NULL;
2461
2462        rmb();
2463        be_dws_le_to_cpu(compl, sizeof(*compl));
2464
2465        if (adapter->be3_native)
2466                be_parse_rx_compl_v1(compl, rxcp);
2467        else
2468                be_parse_rx_compl_v0(compl, rxcp);
2469
2470        if (rxcp->ip_frag)
2471                rxcp->l4_csum = 0;
2472
2473        if (rxcp->vlanf) {
2474                /* In QNQ modes, if qnq bit is not set, then the packet was
2475                 * tagged only with the transparent outer vlan-tag and must
2476                 * not be treated as a vlan packet by host
2477                 */
2478                if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2479                        rxcp->vlanf = 0;
2480
2481                if (!lancer_chip(adapter))
2482                        rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2483
2484                if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2485                    !test_bit(rxcp->vlan_tag, adapter->vids))
2486                        rxcp->vlanf = 0;
2487        }
2488
2489        /* As the compl has been parsed, reset it; we wont touch it again */
2490        compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2491
2492        queue_tail_inc(&rxo->cq);
2493        return rxcp;
2494}
2495
2496static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2497{
2498        u32 order = get_order(size);
2499
2500        if (order > 0)
2501                gfp |= __GFP_COMP;
2502        return  alloc_pages(gfp, order);
2503}
2504
2505/*
2506 * Allocate a page, split it to fragments of size rx_frag_size and post as
2507 * receive buffers to BE
2508 */
2509static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2510{
2511        struct be_adapter *adapter = rxo->adapter;
2512        struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2513        struct be_queue_info *rxq = &rxo->q;
2514        struct page *pagep = NULL;
2515        struct device *dev = &adapter->pdev->dev;
2516        struct be_eth_rx_d *rxd;
2517        u64 page_dmaaddr = 0, frag_dmaaddr;
2518        u32 posted, page_offset = 0, notify = 0;
2519
2520        page_info = &rxo->page_info_tbl[rxq->head];
2521        for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2522                if (!pagep) {
2523                        pagep = be_alloc_pages(adapter->big_page_size, gfp);
2524                        if (unlikely(!pagep)) {
2525                                rx_stats(rxo)->rx_post_fail++;
2526                                break;
2527                        }
2528                        page_dmaaddr = dma_map_page(dev, pagep, 0,
2529                                                    adapter->big_page_size,
2530                                                    DMA_FROM_DEVICE);
2531                        if (dma_mapping_error(dev, page_dmaaddr)) {
2532                                put_page(pagep);
2533                                pagep = NULL;
2534                                adapter->drv_stats.dma_map_errors++;
2535                                break;
2536                        }
2537                        page_offset = 0;
2538                } else {
2539                        get_page(pagep);
2540                        page_offset += rx_frag_size;
2541                }
2542                page_info->page_offset = page_offset;
2543                page_info->page = pagep;
2544
2545                rxd = queue_head_node(rxq);
2546                frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2547                rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2548                rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2549
2550                /* Any space left in the current big page for another frag? */
2551                if ((page_offset + rx_frag_size + rx_frag_size) >
2552                                        adapter->big_page_size) {
2553                        pagep = NULL;
2554                        page_info->last_frag = true;
2555                        dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2556                } else {
2557                        dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2558                }
2559
2560                prev_page_info = page_info;
2561                queue_head_inc(rxq);
2562                page_info = &rxo->page_info_tbl[rxq->head];
2563        }
2564
2565        /* Mark the last frag of a page when we break out of the above loop
2566         * with no more slots available in the RXQ
2567         */
2568        if (pagep) {
2569                prev_page_info->last_frag = true;
2570                dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2571        }
2572
2573        if (posted) {
2574                atomic_add(posted, &rxq->used);
2575                if (rxo->rx_post_starved)
2576                        rxo->rx_post_starved = false;
2577                do {
2578                        notify = min(MAX_NUM_POST_ERX_DB, posted);
2579                        be_rxq_notify(adapter, rxq->id, notify);
2580                        posted -= notify;
2581                } while (posted);
2582        } else if (atomic_read(&rxq->used) == 0) {
2583                /* Let be_worker replenish when memory is available */
2584                rxo->rx_post_starved = true;
2585        }
2586}
2587
2588static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2589{
2590        struct be_queue_info *tx_cq = &txo->cq;
2591        struct be_tx_compl_info *txcp = &txo->txcp;
2592        struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2593
2594        if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2595                return NULL;
2596
2597        /* Ensure load ordering of valid bit dword and other dwords below */
2598        rmb();
2599        be_dws_le_to_cpu(compl, sizeof(*compl));
2600
2601        txcp->status = GET_TX_COMPL_BITS(status, compl);
2602        txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2603
2604        compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2605        queue_tail_inc(tx_cq);
2606        return txcp;
2607}
2608
2609static u16 be_tx_compl_process(struct be_adapter *adapter,
2610                               struct be_tx_obj *txo, u16 last_index)
2611{
2612        struct sk_buff **sent_skbs = txo->sent_skb_list;
2613        struct be_queue_info *txq = &txo->q;
2614        struct sk_buff *skb = NULL;
2615        bool unmap_skb_hdr = false;
2616        struct be_eth_wrb *wrb;
2617        u16 num_wrbs = 0;
2618        u32 frag_index;
2619
2620        do {
2621                if (sent_skbs[txq->tail]) {
2622                        /* Free skb from prev req */
2623                        if (skb)
2624                                dev_consume_skb_any(skb);
2625                        skb = sent_skbs[txq->tail];
2626                        sent_skbs[txq->tail] = NULL;
2627                        queue_tail_inc(txq);  /* skip hdr wrb */
2628                        num_wrbs++;
2629                        unmap_skb_hdr = true;
2630                }
2631                wrb = queue_tail_node(txq);
2632                frag_index = txq->tail;
2633                unmap_tx_frag(&adapter->pdev->dev, wrb,
2634                              (unmap_skb_hdr && skb_headlen(skb)));
2635                unmap_skb_hdr = false;
2636                queue_tail_inc(txq);
2637                num_wrbs++;
2638        } while (frag_index != last_index);
2639        dev_consume_skb_any(skb);
2640
2641        return num_wrbs;
2642}
2643
2644/* Return the number of events in the event queue */
2645static inline int events_get(struct be_eq_obj *eqo)
2646{
2647        struct be_eq_entry *eqe;
2648        int num = 0;
2649
2650        do {
2651                eqe = queue_tail_node(&eqo->q);
2652                if (eqe->evt == 0)
2653                        break;
2654
2655                rmb();
2656                eqe->evt = 0;
2657                num++;
2658                queue_tail_inc(&eqo->q);
2659        } while (true);
2660
2661        return num;
2662}
2663
2664/* Leaves the EQ is disarmed state */
2665static void be_eq_clean(struct be_eq_obj *eqo)
2666{
2667        int num = events_get(eqo);
2668
2669        be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2670}
2671
2672/* Free posted rx buffers that were not used */
2673static void be_rxq_clean(struct be_rx_obj *rxo)
2674{
2675        struct be_queue_info *rxq = &rxo->q;
2676        struct be_rx_page_info *page_info;
2677
2678        while (atomic_read(&rxq->used) > 0) {
2679                page_info = get_rx_page_info(rxo);
2680                put_page(page_info->page);
2681                memset(page_info, 0, sizeof(*page_info));
2682        }
2683        BUG_ON(atomic_read(&rxq->used));
2684        rxq->tail = 0;
2685        rxq->head = 0;
2686}
2687
2688static void be_rx_cq_clean(struct be_rx_obj *rxo)
2689{
2690        struct be_queue_info *rx_cq = &rxo->cq;
2691        struct be_rx_compl_info *rxcp;
2692        struct be_adapter *adapter = rxo->adapter;
2693        int flush_wait = 0;
2694
2695        /* Consume pending rx completions.
2696         * Wait for the flush completion (identified by zero num_rcvd)
2697         * to arrive. Notify CQ even when there are no more CQ entries
2698         * for HW to flush partially coalesced CQ entries.
2699         * In Lancer, there is no need to wait for flush compl.
2700         */
2701        for (;;) {
2702                rxcp = be_rx_compl_get(rxo);
2703                if (!rxcp) {
2704                        if (lancer_chip(adapter))
2705                                break;
2706
2707                        if (flush_wait++ > 50 ||
2708                            be_check_error(adapter,
2709                                           BE_ERROR_HW)) {
2710                                dev_warn(&adapter->pdev->dev,
2711                                         "did not receive flush compl\n");
2712                                break;
2713                        }
2714                        be_cq_notify(adapter, rx_cq->id, true, 0);
2715                        mdelay(1);
2716                } else {
2717                        be_rx_compl_discard(rxo, rxcp);
2718                        be_cq_notify(adapter, rx_cq->id, false, 1);
2719                        if (rxcp->num_rcvd == 0)
2720                                break;
2721                }
2722        }
2723
2724        /* After cleanup, leave the CQ in unarmed state */
2725        be_cq_notify(adapter, rx_cq->id, false, 0);
2726}
2727
2728static void be_tx_compl_clean(struct be_adapter *adapter)
2729{
2730        struct device *dev = &adapter->pdev->dev;
2731        u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2732        struct be_tx_compl_info *txcp;
2733        struct be_queue_info *txq;
2734        u32 end_idx, notified_idx;
2735        struct be_tx_obj *txo;
2736        int i, pending_txqs;
2737
2738        /* Stop polling for compls when HW has been silent for 10ms */
2739        do {
2740                pending_txqs = adapter->num_tx_qs;
2741
2742                for_all_tx_queues(adapter, txo, i) {
2743                        cmpl = 0;
2744                        num_wrbs = 0;
2745                        txq = &txo->q;
2746                        while ((txcp = be_tx_compl_get(txo))) {
2747                                num_wrbs +=
2748                                        be_tx_compl_process(adapter, txo,
2749                                                            txcp->end_index);
2750                                cmpl++;
2751                        }
2752                        if (cmpl) {
2753                                be_cq_notify(adapter, txo->cq.id, false, cmpl);
2754                                atomic_sub(num_wrbs, &txq->used);
2755                                timeo = 0;
2756                        }
2757                        if (!be_is_tx_compl_pending(txo))
2758                                pending_txqs--;
2759                }
2760
2761                if (pending_txqs == 0 || ++timeo > 10 ||
2762                    be_check_error(adapter, BE_ERROR_HW))
2763                        break;
2764
2765                mdelay(1);
2766        } while (true);
2767
2768        /* Free enqueued TX that was never notified to HW */
2769        for_all_tx_queues(adapter, txo, i) {
2770                txq = &txo->q;
2771
2772                if (atomic_read(&txq->used)) {
2773                        dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2774                                 i, atomic_read(&txq->used));
2775                        notified_idx = txq->tail;
2776                        end_idx = txq->tail;
2777                        index_adv(&end_idx, atomic_read(&txq->used) - 1,
2778                                  txq->len);
2779                        /* Use the tx-compl process logic to handle requests
2780                         * that were not sent to the HW.
2781                         */
2782                        num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2783                        atomic_sub(num_wrbs, &txq->used);
2784                        BUG_ON(atomic_read(&txq->used));
2785                        txo->pend_wrb_cnt = 0;
2786                        /* Since hw was never notified of these requests,
2787                         * reset TXQ indices
2788                         */
2789                        txq->head = notified_idx;
2790                        txq->tail = notified_idx;
2791                }
2792        }
2793}
2794
2795static void be_evt_queues_destroy(struct be_adapter *adapter)
2796{
2797        struct be_eq_obj *eqo;
2798        int i;
2799
2800        for_all_evt_queues(adapter, eqo, i) {
2801                if (eqo->q.created) {
2802                        be_eq_clean(eqo);
2803                        be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2804                        netif_napi_del(&eqo->napi);
2805                        free_cpumask_var(eqo->affinity_mask);
2806                }
2807                be_queue_free(adapter, &eqo->q);
2808        }
2809}
2810
2811static int be_evt_queues_create(struct be_adapter *adapter)
2812{
2813        struct be_queue_info *eq;
2814        struct be_eq_obj *eqo;
2815        struct be_aic_obj *aic;
2816        int i, rc;
2817
2818        /* need enough EQs to service both RX and TX queues */
2819        adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2820                                    max(adapter->cfg_num_rx_irqs,
2821                                        adapter->cfg_num_tx_irqs));
2822
2823        for_all_evt_queues(adapter, eqo, i) {
2824                int numa_node = dev_to_node(&adapter->pdev->dev);
2825
2826                aic = &adapter->aic_obj[i];
2827                eqo->adapter = adapter;
2828                eqo->idx = i;
2829                aic->max_eqd = BE_MAX_EQD;
2830                aic->enable = true;
2831
2832                eq = &eqo->q;
2833                rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2834                                    sizeof(struct be_eq_entry));
2835                if (rc)
2836                        return rc;
2837
2838                rc = be_cmd_eq_create(adapter, eqo);
2839                if (rc)
2840                        return rc;
2841
2842                if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2843                        return -ENOMEM;
2844                cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2845                                eqo->affinity_mask);
2846                netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2847                               BE_NAPI_WEIGHT);
2848        }
2849        return 0;
2850}
2851
2852static void be_mcc_queues_destroy(struct be_adapter *adapter)
2853{
2854        struct be_queue_info *q;
2855
2856        q = &adapter->mcc_obj.q;
2857        if (q->created)
2858                be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2859        be_queue_free(adapter, q);
2860
2861        q = &adapter->mcc_obj.cq;
2862        if (q->created)
2863                be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2864        be_queue_free(adapter, q);
2865}
2866
2867/* Must be called only after TX qs are created as MCC shares TX EQ */
2868static int be_mcc_queues_create(struct be_adapter *adapter)
2869{
2870        struct be_queue_info *q, *cq;
2871
2872        cq = &adapter->mcc_obj.cq;
2873        if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2874                           sizeof(struct be_mcc_compl)))
2875                goto err;
2876
2877        /* Use the default EQ for MCC completions */
2878        if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2879                goto mcc_cq_free;
2880
2881        q = &adapter->mcc_obj.q;
2882        if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2883                goto mcc_cq_destroy;
2884
2885        if (be_cmd_mccq_create(adapter, q, cq))
2886                goto mcc_q_free;
2887
2888        return 0;
2889
2890mcc_q_free:
2891        be_queue_free(adapter, q);
2892mcc_cq_destroy:
2893        be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2894mcc_cq_free:
2895        be_queue_free(adapter, cq);
2896err:
2897        return -1;
2898}
2899
2900static void be_tx_queues_destroy(struct be_adapter *adapter)
2901{
2902        struct be_queue_info *q;
2903        struct be_tx_obj *txo;
2904        u8 i;
2905
2906        for_all_tx_queues(adapter, txo, i) {
2907                q = &txo->q;
2908                if (q->created)
2909                        be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2910                be_queue_free(adapter, q);
2911
2912                q = &txo->cq;
2913                if (q->created)
2914                        be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2915                be_queue_free(adapter, q);
2916        }
2917}
2918
2919static int be_tx_qs_create(struct be_adapter *adapter)
2920{
2921        struct be_queue_info *cq;
2922        struct be_tx_obj *txo;
2923        struct be_eq_obj *eqo;
2924        int status, i;
2925
2926        adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2927
2928        for_all_tx_queues(adapter, txo, i) {
2929                cq = &txo->cq;
2930                status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2931                                        sizeof(struct be_eth_tx_compl));
2932                if (status)
2933                        return status;
2934
2935                u64_stats_init(&txo->stats.sync);
2936                u64_stats_init(&txo->stats.sync_compl);
2937
2938                /* If num_evt_qs is less than num_tx_qs, then more than
2939                 * one txq share an eq
2940                 */
2941                eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2942                status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2943                if (status)
2944                        return status;
2945
2946                status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2947                                        sizeof(struct be_eth_wrb));
2948                if (status)
2949                        return status;
2950
2951                status = be_cmd_txq_create(adapter, txo);
2952                if (status)
2953                        return status;
2954
2955                netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2956                                    eqo->idx);
2957        }
2958
2959        dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2960                 adapter->num_tx_qs);
2961        return 0;
2962}
2963
2964static void be_rx_cqs_destroy(struct be_adapter *adapter)
2965{
2966        struct be_queue_info *q;
2967        struct be_rx_obj *rxo;
2968        int i;
2969
2970        for_all_rx_queues(adapter, rxo, i) {
2971                q = &rxo->cq;
2972                if (q->created)
2973                        be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2974                be_queue_free(adapter, q);
2975        }
2976}
2977
2978static int be_rx_cqs_create(struct be_adapter *adapter)
2979{
2980        struct be_queue_info *eq, *cq;
2981        struct be_rx_obj *rxo;
2982        int rc, i;
2983
2984        adapter->num_rss_qs =
2985                        min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2986
2987        /* We'll use RSS only if atleast 2 RSS rings are supported. */
2988        if (adapter->num_rss_qs < 2)
2989                adapter->num_rss_qs = 0;
2990
2991        adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2992
2993        /* When the interface is not capable of RSS rings (and there is no
2994         * need to create a default RXQ) we'll still need one RXQ
2995         */
2996        if (adapter->num_rx_qs == 0)
2997                adapter->num_rx_qs = 1;
2998
2999        adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3000        for_all_rx_queues(adapter, rxo, i) {
3001                rxo->adapter = adapter;
3002                cq = &rxo->cq;
3003                rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3004                                    sizeof(struct be_eth_rx_compl));
3005                if (rc)
3006                        return rc;
3007
3008                u64_stats_init(&rxo->stats.sync);
3009                eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3010                rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3011                if (rc)
3012                        return rc;
3013        }
3014
3015        dev_info(&adapter->pdev->dev,
3016                 "created %d RX queue(s)\n", adapter->num_rx_qs);
3017        return 0;
3018}
3019
3020static irqreturn_t be_intx(int irq, void *dev)
3021{
3022        struct be_eq_obj *eqo = dev;
3023        struct be_adapter *adapter = eqo->adapter;
3024        int num_evts = 0;
3025
3026        /* IRQ is not expected when NAPI is scheduled as the EQ
3027         * will not be armed.
3028         * But, this can happen on Lancer INTx where it takes
3029         * a while to de-assert INTx or in BE2 where occasionaly
3030         * an interrupt may be raised even when EQ is unarmed.
3031         * If NAPI is already scheduled, then counting & notifying
3032         * events will orphan them.
3033         */
3034        if (napi_schedule_prep(&eqo->napi)) {
3035                num_evts = events_get(eqo);
3036                __napi_schedule(&eqo->napi);
3037                if (num_evts)
3038                        eqo->spurious_intr = 0;
3039        }
3040        be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3041
3042        /* Return IRQ_HANDLED only for the the first spurious intr
3043         * after a valid intr to stop the kernel from branding
3044         * this irq as a bad one!
3045         */
3046        if (num_evts || eqo->spurious_intr++ == 0)
3047                return IRQ_HANDLED;
3048        else
3049                return IRQ_NONE;
3050}
3051
3052static irqreturn_t be_msix(int irq, void *dev)
3053{
3054        struct be_eq_obj *eqo = dev;
3055
3056        be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3057        napi_schedule(&eqo->napi);
3058        return IRQ_HANDLED;
3059}
3060
3061static inline bool do_gro(struct be_rx_compl_info *rxcp)
3062{
3063        return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3064}
3065
3066static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3067                         int budget, int polling)
3068{
3069        struct be_adapter *adapter = rxo->adapter;
3070        struct be_queue_info *rx_cq = &rxo->cq;
3071        struct be_rx_compl_info *rxcp;
3072        u32 work_done;
3073        u32 frags_consumed = 0;
3074
3075        for (work_done = 0; work_done < budget; work_done++) {
3076                rxcp = be_rx_compl_get(rxo);
3077                if (!rxcp)
3078                        break;
3079
3080                /* Is it a flush compl that has no data */
3081                if (unlikely(rxcp->num_rcvd == 0))
3082                        goto loop_continue;
3083
3084                /* Discard compl with partial DMA Lancer B0 */
3085                if (unlikely(!rxcp->pkt_size)) {
3086                        be_rx_compl_discard(rxo, rxcp);
3087                        goto loop_continue;
3088                }
3089
3090                /* On BE drop pkts that arrive due to imperfect filtering in
3091                 * promiscuous mode on some skews
3092                 */
3093                if (unlikely(rxcp->port != adapter->port_num &&
3094                             !lancer_chip(adapter))) {
3095                        be_rx_compl_discard(rxo, rxcp);
3096                        goto loop_continue;
3097                }
3098
3099                /* Don't do gro when we're busy_polling */
3100                if (do_gro(rxcp) && polling != BUSY_POLLING)
3101                        be_rx_compl_process_gro(rxo, napi, rxcp);
3102                else
3103                        be_rx_compl_process(rxo, napi, rxcp);
3104
3105loop_continue:
3106                frags_consumed += rxcp->num_rcvd;
3107                be_rx_stats_update(rxo, rxcp);
3108        }
3109
3110        if (work_done) {
3111                be_cq_notify(adapter, rx_cq->id, true, work_done);
3112
3113                /* When an rx-obj gets into post_starved state, just
3114                 * let be_worker do the posting.
3115                 */
3116                if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3117                    !rxo->rx_post_starved)
3118                        be_post_rx_frags(rxo, GFP_ATOMIC,
3119                                         max_t(u32, MAX_RX_POST,
3120                                               frags_consumed));
3121        }
3122
3123        return work_done;
3124}
3125
3126static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3127{
3128        switch (status) {
3129        case BE_TX_COMP_HDR_PARSE_ERR:
3130                tx_stats(txo)->tx_hdr_parse_err++;
3131                break;
3132        case BE_TX_COMP_NDMA_ERR:
3133                tx_stats(txo)->tx_dma_err++;
3134                break;
3135        case BE_TX_COMP_ACL_ERR:
3136                tx_stats(txo)->tx_spoof_check_err++;
3137                break;
3138        }
3139}
3140
3141static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3142{
3143        switch (status) {
3144        case LANCER_TX_COMP_LSO_ERR:
3145                tx_stats(txo)->tx_tso_err++;
3146                break;
3147        case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3148        case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3149                tx_stats(txo)->tx_spoof_check_err++;
3150                break;
3151        case LANCER_TX_COMP_QINQ_ERR:
3152                tx_stats(txo)->tx_qinq_err++;
3153                break;
3154        case LANCER_TX_COMP_PARITY_ERR:
3155                tx_stats(txo)->tx_internal_parity_err++;
3156                break;
3157        case LANCER_TX_COMP_DMA_ERR:
3158                tx_stats(txo)->tx_dma_err++;
3159                break;
3160        }
3161}
3162
3163static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3164                          int idx)
3165{
3166        int num_wrbs = 0, work_done = 0;
3167        struct be_tx_compl_info *txcp;
3168
3169        while ((txcp = be_tx_compl_get(txo))) {
3170                num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3171                work_done++;
3172
3173                if (txcp->status) {
3174                        if (lancer_chip(adapter))
3175                                lancer_update_tx_err(txo, txcp->status);
3176                        else
3177                                be_update_tx_err(txo, txcp->status);
3178                }
3179        }
3180
3181        if (work_done) {
3182                be_cq_notify(adapter, txo->cq.id, true, work_done);
3183                atomic_sub(num_wrbs, &txo->q.used);
3184
3185                /* As Tx wrbs have been freed up, wake up netdev queue
3186                 * if it was stopped due to lack of tx wrbs.  */
3187                if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3188                    be_can_txq_wake(txo)) {
3189                        netif_wake_subqueue(adapter->netdev, idx);
3190                }
3191
3192                u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3193                tx_stats(txo)->tx_compl += work_done;
3194                u64_stats_update_end(&tx_stats(txo)->sync_compl);
3195        }
3196}
3197
3198#ifdef CONFIG_NET_RX_BUSY_POLL
3199static inline bool be_lock_napi(struct be_eq_obj *eqo)
3200{
3201        bool status = true;
3202
3203        spin_lock(&eqo->lock); /* BH is already disabled */
3204        if (eqo->state & BE_EQ_LOCKED) {
3205                WARN_ON(eqo->state & BE_EQ_NAPI);
3206                eqo->state |= BE_EQ_NAPI_YIELD;
3207                status = false;
3208        } else {
3209                eqo->state = BE_EQ_NAPI;
3210        }
3211        spin_unlock(&eqo->lock);
3212        return status;
3213}
3214
3215static inline void be_unlock_napi(struct be_eq_obj *eqo)
3216{
3217        spin_lock(&eqo->lock); /* BH is already disabled */
3218
3219        WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3220        eqo->state = BE_EQ_IDLE;
3221
3222        spin_unlock(&eqo->lock);
3223}
3224
3225static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3226{
3227        bool status = true;
3228
3229        spin_lock_bh(&eqo->lock);
3230        if (eqo->state & BE_EQ_LOCKED) {
3231                eqo->state |= BE_EQ_POLL_YIELD;
3232                status = false;
3233        } else {
3234                eqo->state |= BE_EQ_POLL;
3235        }
3236        spin_unlock_bh(&eqo->lock);
3237        return status;
3238}
3239
3240static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3241{
3242        spin_lock_bh(&eqo->lock);
3243
3244        WARN_ON(eqo->state & (BE_EQ_NAPI));
3245        eqo->state = BE_EQ_IDLE;
3246
3247        spin_unlock_bh(&eqo->lock);
3248}
3249
3250static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3251{
3252        spin_lock_init(&eqo->lock);
3253        eqo->state = BE_EQ_IDLE;
3254}
3255
3256static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3257{
3258        local_bh_disable();
3259
3260        /* It's enough to just acquire napi lock on the eqo to stop
3261         * be_busy_poll() from processing any queueus.
3262         */
3263        while (!be_lock_napi(eqo))
3264                mdelay(1);
3265
3266        local_bh_enable();
3267}
3268
3269#else /* CONFIG_NET_RX_BUSY_POLL */
3270
3271static inline bool be_lock_napi(struct be_eq_obj *eqo)
3272{
3273        return true;
3274}
3275
3276static inline void be_unlock_napi(struct be_eq_obj *eqo)
3277{
3278}
3279
3280static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3281{
3282        return false;
3283}
3284
3285static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3286{
3287}
3288
3289static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3290{
3291}
3292
3293static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3294{
3295}
3296#endif /* CONFIG_NET_RX_BUSY_POLL */
3297
3298int be_poll(struct napi_struct *napi, int budget)
3299{
3300        struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3301        struct be_adapter *adapter = eqo->adapter;
3302        int max_work = 0, work, i, num_evts;
3303        struct be_rx_obj *rxo;
3304        struct be_tx_obj *txo;
3305        u32 mult_enc = 0;
3306
3307        num_evts = events_get(eqo);
3308
3309        for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3310                be_process_tx(adapter, txo, i);
3311
3312        if (be_lock_napi(eqo)) {
3313                /* This loop will iterate twice for EQ0 in which
3314                 * completions of the last RXQ (default one) are also processed
3315                 * For other EQs the loop iterates only once
3316                 */
3317                for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3318                        work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3319                        max_work = max(work, max_work);
3320                }
3321                be_unlock_napi(eqo);
3322        } else {
3323                max_work = budget;
3324        }
3325
3326        if (is_mcc_eqo(eqo))
3327                be_process_mcc(adapter);
3328
3329        if (max_work < budget) {
3330                napi_complete(napi);
3331
3332                /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3333                 * delay via a delay multiplier encoding value
3334                 */
3335                if (skyhawk_chip(adapter))
3336                        mult_enc = be_get_eq_delay_mult_enc(eqo);
3337
3338                be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3339                             mult_enc);
3340        } else {
3341                /* As we'll continue in polling mode, count and clear events */
3342                be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3343        }
3344        return max_work;
3345}
3346
3347#ifdef CONFIG_NET_RX_BUSY_POLL
3348static int be_busy_poll(struct napi_struct *napi)
3349{
3350        struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3351        struct be_adapter *adapter = eqo->adapter;
3352        struct be_rx_obj *rxo;
3353        int i, work = 0;
3354
3355        if (!be_lock_busy_poll(eqo))
3356                return LL_FLUSH_BUSY;
3357
3358        for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3359                work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3360                if (work)
3361                        break;
3362        }
3363
3364        be_unlock_busy_poll(eqo);
3365        return work;
3366}
3367#endif
3368
3369void be_detect_error(struct be_adapter *adapter)
3370{
3371        u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3372        u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3373        u32 i;
3374        struct device *dev = &adapter->pdev->dev;
3375
3376        if (be_check_error(adapter, BE_ERROR_HW))
3377                return;
3378
3379        if (lancer_chip(adapter)) {
3380                sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3381                if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3382                        be_set_error(adapter, BE_ERROR_UE);
3383                        sliport_err1 = ioread32(adapter->db +
3384                                                SLIPORT_ERROR1_OFFSET);
3385                        sliport_err2 = ioread32(adapter->db +
3386                                                SLIPORT_ERROR2_OFFSET);
3387                        /* Do not log error messages if its a FW reset */
3388                        if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3389                            sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3390                                dev_info(dev, "Firmware update in progress\n");
3391                        } else {
3392                                dev_err(dev, "Error detected in the card\n");
3393                                dev_err(dev, "ERR: sliport status 0x%x\n",
3394                                        sliport_status);
3395                                dev_err(dev, "ERR: sliport error1 0x%x\n",
3396                                        sliport_err1);
3397                                dev_err(dev, "ERR: sliport error2 0x%x\n",
3398                                        sliport_err2);
3399                        }
3400                }
3401        } else {
3402                ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3403                ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3404                ue_lo_mask = ioread32(adapter->pcicfg +
3405                                      PCICFG_UE_STATUS_LOW_MASK);
3406                ue_hi_mask = ioread32(adapter->pcicfg +
3407                                      PCICFG_UE_STATUS_HI_MASK);
3408
3409                ue_lo = (ue_lo & ~ue_lo_mask);
3410                ue_hi = (ue_hi & ~ue_hi_mask);
3411
3412                /* On certain platforms BE hardware can indicate spurious UEs.
3413                 * Allow HW to stop working completely in case of a real UE.
3414                 * Hence not setting the hw_error for UE detection.
3415                 */
3416
3417                if (ue_lo || ue_hi) {
3418                        dev_err(dev, "Error detected in the adapter");
3419                        if (skyhawk_chip(adapter))
3420                                be_set_error(adapter, BE_ERROR_UE);
3421
3422                        for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3423                                if (ue_lo & 1)
3424                                        dev_err(dev, "UE: %s bit set\n",
3425                                                ue_status_low_desc[i]);
3426                        }
3427                        for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3428                                if (ue_hi & 1)
3429                                        dev_err(dev, "UE: %s bit set\n",
3430                                                ue_status_hi_desc[i]);
3431                        }
3432                }
3433        }
3434}
3435
3436static void be_msix_disable(struct be_adapter *adapter)
3437{
3438        if (msix_enabled(adapter)) {
3439                pci_disable_msix(adapter->pdev);
3440                adapter->num_msix_vec = 0;
3441                adapter->num_msix_roce_vec = 0;
3442        }
3443}
3444
3445static int be_msix_enable(struct be_adapter *adapter)
3446{
3447        unsigned int i, max_roce_eqs;
3448        struct device *dev = &adapter->pdev->dev;
3449        int num_vec;
3450
3451        /* If RoCE is supported, program the max number of vectors that
3452         * could be used for NIC and RoCE, else, just program the number
3453         * we'll use initially.
3454         */
3455        if (be_roce_supported(adapter)) {
3456                max_roce_eqs =
3457                        be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3458                max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3459                num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3460        } else {
3461                num_vec = max(adapter->cfg_num_rx_irqs,
3462                              adapter->cfg_num_tx_irqs);
3463        }
3464
3465        for (i = 0; i < num_vec; i++)
3466                adapter->msix_entries[i].entry = i;
3467
3468        num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3469                                        MIN_MSIX_VECTORS, num_vec);
3470        if (num_vec < 0)
3471                goto fail;
3472
3473        if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3474                adapter->num_msix_roce_vec = num_vec / 2;
3475                dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3476                         adapter->num_msix_roce_vec);
3477        }
3478
3479        adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3480
3481        dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3482                 adapter->num_msix_vec);
3483        return 0;
3484
3485fail:
3486        dev_warn(dev, "MSIx enable failed\n");
3487
3488        /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3489        if (be_virtfn(adapter))
3490                return num_vec;
3491        return 0;
3492}
3493
3494static inline int be_msix_vec_get(struct be_adapter *adapter,
3495                                  struct be_eq_obj *eqo)
3496{
3497        return adapter->msix_entries[eqo->msix_idx].vector;
3498}
3499
3500static int be_msix_register(struct be_adapter *adapter)
3501{
3502        struct net_device *netdev = adapter->netdev;
3503        struct be_eq_obj *eqo;
3504        int status, i, vec;
3505
3506        for_all_evt_queues(adapter, eqo, i) {
3507                sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3508                vec = be_msix_vec_get(adapter, eqo);
3509                status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3510                if (status)
3511                        goto err_msix;
3512
3513                irq_set_affinity_hint(vec, eqo->affinity_mask);
3514        }
3515
3516        return 0;
3517err_msix:
3518        for (i--; i >= 0; i--) {
3519                eqo = &adapter->eq_obj[i];
3520                free_irq(be_msix_vec_get(adapter, eqo), eqo);
3521        }
3522        dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3523                 status);
3524        be_msix_disable(adapter);
3525        return status;
3526}
3527
3528static int be_irq_register(struct be_adapter *adapter)
3529{
3530        struct net_device *netdev = adapter->netdev;
3531        int status;
3532
3533        if (msix_enabled(adapter)) {
3534                status = be_msix_register(adapter);
3535                if (status == 0)
3536                        goto done;
3537                /* INTx is not supported for VF */
3538                if (be_virtfn(adapter))
3539                        return status;
3540        }
3541
3542        /* INTx: only the first EQ is used */
3543        netdev->irq = adapter->pdev->irq;
3544        status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3545                             &adapter->eq_obj[0]);
3546        if (status) {
3547                dev_err(&adapter->pdev->dev,
3548                        "INTx request IRQ failed - err %d\n", status);
3549                return status;
3550        }
3551done:
3552        adapter->isr_registered = true;
3553        return 0;
3554}
3555
3556static void be_irq_unregister(struct be_adapter *adapter)
3557{
3558        struct net_device *netdev = adapter->netdev;
3559        struct be_eq_obj *eqo;
3560        int i, vec;
3561
3562        if (!adapter->isr_registered)
3563                return;
3564
3565        /* INTx */
3566        if (!msix_enabled(adapter)) {
3567                free_irq(netdev->irq, &adapter->eq_obj[0]);
3568                goto done;
3569        }
3570
3571        /* MSIx */
3572        for_all_evt_queues(adapter, eqo, i) {
3573                vec = be_msix_vec_get(adapter, eqo);
3574                irq_set_affinity_hint(vec, NULL);
3575                free_irq(vec, eqo);
3576        }
3577
3578done:
3579        adapter->isr_registered = false;
3580}
3581
3582static void be_rx_qs_destroy(struct be_adapter *adapter)
3583{
3584        struct rss_info *rss = &adapter->rss_info;
3585        struct be_queue_info *q;
3586        struct be_rx_obj *rxo;
3587        int i;
3588
3589        for_all_rx_queues(adapter, rxo, i) {
3590                q = &rxo->q;
3591                if (q->created) {
3592                        /* If RXQs are destroyed while in an "out of buffer"
3593                         * state, there is a possibility of an HW stall on
3594                         * Lancer. So, post 64 buffers to each queue to relieve
3595                         * the "out of buffer" condition.
3596                         * Make sure there's space in the RXQ before posting.
3597                         */
3598                        if (lancer_chip(adapter)) {
3599                                be_rx_cq_clean(rxo);
3600                                if (atomic_read(&q->used) == 0)
3601                                        be_post_rx_frags(rxo, GFP_KERNEL,
3602                                                         MAX_RX_POST);
3603                        }
3604
3605                        be_cmd_rxq_destroy(adapter, q);
3606                        be_rx_cq_clean(rxo);
3607                        be_rxq_clean(rxo);
3608                }
3609                be_queue_free(adapter, q);
3610        }
3611
3612        if (rss->rss_flags) {
3613                rss->rss_flags = RSS_ENABLE_NONE;
3614                be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3615                                  128, rss->rss_hkey);
3616        }
3617}
3618
3619static void be_disable_if_filters(struct be_adapter *adapter)
3620{
3621        /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3622        if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3623            check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3624                be_dev_mac_del(adapter, adapter->pmac_id[0]);
3625                eth_zero_addr(adapter->dev_mac);
3626        }
3627
3628        be_clear_uc_list(adapter);
3629        be_clear_mc_list(adapter);
3630
3631        /* The IFACE flags are enabled in the open path and cleared
3632         * in the close path. When a VF gets detached from the host and
3633         * assigned to a VM the following happens:
3634         *      - VF's IFACE flags get cleared in the detach path
3635         *      - IFACE create is issued by the VF in the attach path
3636         * Due to a bug in the BE3/Skyhawk-R FW
3637         * (Lancer FW doesn't have the bug), the IFACE capability flags
3638         * specified along with the IFACE create cmd issued by a VF are not
3639         * honoured by FW.  As a consequence, if a *new* driver
3640         * (that enables/disables IFACE flags in open/close)
3641         * is loaded in the host and an *old* driver is * used by a VM/VF,
3642         * the IFACE gets created *without* the needed flags.
3643         * To avoid this, disable RX-filter flags only for Lancer.
3644         */
3645        if (lancer_chip(adapter)) {
3646                be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3647                adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3648        }
3649}
3650
3651static int be_close(struct net_device *netdev)
3652{
3653        struct be_adapter *adapter = netdev_priv(netdev);
3654        struct be_eq_obj *eqo;
3655        int i;
3656
3657        /* This protection is needed as be_close() may be called even when the
3658         * adapter is in cleared state (after eeh perm failure)
3659         */
3660        if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3661                return 0;
3662
3663        /* Before attempting cleanup ensure all the pending cmds in the
3664         * config_wq have finished execution
3665         */
3666        flush_workqueue(be_wq);
3667
3668        be_disable_if_filters(adapter);
3669
3670        if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3671                for_all_evt_queues(adapter, eqo, i) {
3672                        napi_disable(&eqo->napi);
3673                        be_disable_busy_poll(eqo);
3674                }
3675                adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3676        }
3677
3678        be_async_mcc_disable(adapter);
3679
3680        /* Wait for all pending tx completions to arrive so that
3681         * all tx skbs are freed.
3682         */
3683        netif_tx_disable(netdev);
3684        be_tx_compl_clean(adapter);
3685
3686        be_rx_qs_destroy(adapter);
3687
3688        for_all_evt_queues(adapter, eqo, i) {
3689                if (msix_enabled(adapter))
3690                        synchronize_irq(be_msix_vec_get(adapter, eqo));
3691                else
3692                        synchronize_irq(netdev->irq);
3693                be_eq_clean(eqo);
3694        }
3695
3696        be_irq_unregister(adapter);
3697
3698        return 0;
3699}
3700
3701static int be_rx_qs_create(struct be_adapter *adapter)
3702{
3703        struct rss_info *rss = &adapter->rss_info;
3704        u8 rss_key[RSS_HASH_KEY_LEN];
3705        struct be_rx_obj *rxo;
3706        int rc, i, j;
3707
3708        for_all_rx_queues(adapter, rxo, i) {
3709                rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3710                                    sizeof(struct be_eth_rx_d));
3711                if (rc)
3712                        return rc;
3713        }
3714
3715        if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3716                rxo = default_rxo(adapter);
3717                rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3718                                       rx_frag_size, adapter->if_handle,
3719                                       false, &rxo->rss_id);
3720                if (rc)
3721                        return rc;
3722        }
3723
3724        for_all_rss_queues(adapter, rxo, i) {
3725                rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3726                                       rx_frag_size, adapter->if_handle,
3727                                       true, &rxo->rss_id);
3728                if (rc)
3729                        return rc;
3730        }
3731
3732        if (be_multi_rxq(adapter)) {
3733                for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3734                        for_all_rss_queues(adapter, rxo, i) {
3735                                if ((j + i) >= RSS_INDIR_TABLE_LEN)
3736                                        break;
3737                                rss->rsstable[j + i] = rxo->rss_id;
3738                                rss->rss_queue[j + i] = i;
3739                        }
3740                }
3741                rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3742                        RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3743
3744                if (!BEx_chip(adapter))
3745                        rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3746                                RSS_ENABLE_UDP_IPV6;
3747
3748                netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3749                rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3750                                       RSS_INDIR_TABLE_LEN, rss_key);
3751                if (rc) {
3752                        rss->rss_flags = RSS_ENABLE_NONE;
3753                        return rc;
3754                }
3755
3756                memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3757        } else {
3758                /* Disable RSS, if only default RX Q is created */
3759                rss->rss_flags = RSS_ENABLE_NONE;
3760        }
3761
3762
3763        /* Post 1 less than RXQ-len to avoid head being equal to tail,
3764         * which is a queue empty condition
3765         */
3766        for_all_rx_queues(adapter, rxo, i)
3767                be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3768
3769        return 0;
3770}
3771
3772static int be_enable_if_filters(struct be_adapter *adapter)
3773{
3774        int status;
3775
3776        status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3777        if (status)
3778                return status;
3779
3780        /* Normally this condition usually true as the ->dev_mac is zeroed.
3781         * But on BE3 VFs the initial MAC is pre-programmed by PF and
3782         * subsequent be_dev_mac_add() can fail (after fresh boot)
3783         */
3784        if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3785                int old_pmac_id = -1;
3786
3787                /* Remember old programmed MAC if any - can happen on BE3 VF */
3788                if (!is_zero_ether_addr(adapter->dev_mac))
3789                        old_pmac_id = adapter->pmac_id[0];
3790
3791                status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3792                if (status)
3793                        return status;
3794
3795                /* Delete the old programmed MAC as we successfully programmed
3796                 * a new MAC
3797                 */
3798                if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3799                        be_dev_mac_del(adapter, old_pmac_id);
3800
3801                ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3802        }
3803
3804        if (adapter->vlans_added)
3805                be_vid_config(adapter);
3806
3807        __be_set_rx_mode(adapter);
3808
3809        return 0;
3810}
3811
3812static int be_open(struct net_device *netdev)
3813{
3814        struct be_adapter *adapter = netdev_priv(netdev);
3815        struct be_eq_obj *eqo;
3816        struct be_rx_obj *rxo;
3817        struct be_tx_obj *txo;
3818        u8 link_status;
3819        int status, i;
3820
3821        status = be_rx_qs_create(adapter);
3822        if (status)
3823                goto err;
3824
3825        status = be_enable_if_filters(adapter);
3826        if (status)
3827                goto err;
3828
3829        status = be_irq_register(adapter);
3830        if (status)
3831                goto err;
3832
3833        for_all_rx_queues(adapter, rxo, i)
3834                be_cq_notify(adapter, rxo->cq.id, true, 0);
3835
3836        for_all_tx_queues(adapter, txo, i)
3837                be_cq_notify(adapter, txo->cq.id, true, 0);
3838
3839        be_async_mcc_enable(adapter);
3840
3841        for_all_evt_queues(adapter, eqo, i) {
3842                napi_enable(&eqo->napi);
3843                be_enable_busy_poll(eqo);
3844                be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3845        }
3846        adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3847
3848        status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3849        if (!status)
3850                be_link_status_update(adapter, link_status);
3851
3852        netif_tx_start_all_queues(netdev);
3853        if (skyhawk_chip(adapter))
3854                udp_tunnel_get_rx_info(netdev);
3855
3856        return 0;
3857err:
3858        be_close(adapter->netdev);
3859        return -EIO;
3860}
3861
3862static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3863{
3864        u32 addr;
3865
3866        addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3867
3868        mac[5] = (u8)(addr & 0xFF);
3869        mac[4] = (u8)((addr >> 8) & 0xFF);
3870        mac[3] = (u8)((addr >> 16) & 0xFF);
3871        /* Use the OUI from the current MAC address */
3872        memcpy(mac, adapter->netdev->dev_addr, 3);
3873}
3874
3875/*
3876 * Generate a seed MAC address from the PF MAC Address using jhash.
3877 * MAC Address for VFs are assigned incrementally starting from the seed.
3878 * These addresses are programmed in the ASIC by the PF and the VF driver
3879 * queries for the MAC address during its probe.
3880 */
3881static int be_vf_eth_addr_config(struct be_adapter *adapter)
3882{
3883        u32 vf;
3884        int status = 0;
3885        u8 mac[ETH_ALEN];
3886        struct be_vf_cfg *vf_cfg;
3887
3888        be_vf_eth_addr_generate(adapter, mac);
3889
3890        for_all_vfs(adapter, vf_cfg, vf) {
3891                if (BEx_chip(adapter))
3892                        status = be_cmd_pmac_add(adapter, mac,
3893                                                 vf_cfg->if_handle,
3894                                                 &vf_cfg->pmac_id, vf + 1);
3895                else
3896                        status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3897                                                vf + 1);
3898
3899                if (status)
3900                        dev_err(&adapter->pdev->dev,
3901                                "Mac address assignment failed for VF %d\n",
3902                                vf);
3903                else
3904                        memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3905
3906                mac[5] += 1;
3907        }
3908        return status;
3909}
3910
3911static int be_vfs_mac_query(struct be_adapter *adapter)
3912{
3913        int status, vf;
3914        u8 mac[ETH_ALEN];
3915        struct be_vf_cfg *vf_cfg;
3916
3917        for_all_vfs(adapter, vf_cfg, vf) {
3918                status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3919                                               mac, vf_cfg->if_handle,
3920                                               false, vf+1);
3921                if (status)
3922                        return status;
3923                memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3924        }
3925        return 0;
3926}
3927
3928static void be_vf_clear(struct be_adapter *adapter)
3929{
3930        struct be_vf_cfg *vf_cfg;
3931        u32 vf;
3932
3933        if (pci_vfs_assigned(adapter->pdev)) {
3934                dev_warn(&adapter->pdev->dev,
3935                         "VFs are assigned to VMs: not disabling VFs\n");
3936                goto done;
3937        }
3938
3939        pci_disable_sriov(adapter->pdev);
3940
3941        for_all_vfs(adapter, vf_cfg, vf) {
3942                if (BEx_chip(adapter))
3943                        be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3944                                        vf_cfg->pmac_id, vf + 1);
3945                else
3946                        be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3947                                       vf + 1);
3948
3949                be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3950        }
3951
3952        if (BE3_chip(adapter))
3953                be_cmd_set_hsw_config(adapter, 0, 0,
3954                                      adapter->if_handle,
3955                                      PORT_FWD_TYPE_PASSTHRU, 0);
3956done:
3957        kfree(adapter->vf_cfg);
3958        adapter->num_vfs = 0;
3959        adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3960}
3961
3962static void be_clear_queues(struct be_adapter *adapter)
3963{
3964        be_mcc_queues_destroy(adapter);
3965        be_rx_cqs_destroy(adapter);
3966        be_tx_queues_destroy(adapter);
3967        be_evt_queues_destroy(adapter);
3968}
3969
3970static void be_cancel_worker(struct be_adapter *adapter)
3971{
3972        if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3973                cancel_delayed_work_sync(&adapter->work);
3974                adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3975        }
3976}
3977
3978static void be_cancel_err_detection(struct be_adapter *adapter)
3979{
3980        struct be_error_recovery *err_rec = &adapter->error_recovery;
3981
3982        if (!be_err_recovery_workq)
3983                return;
3984
3985        if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3986                cancel_delayed_work_sync(&err_rec->err_detection_work);
3987                adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3988        }
3989}
3990
3991static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3992{
3993        struct net_device *netdev = adapter->netdev;
3994
3995        if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3996                be_cmd_manage_iface(adapter, adapter->if_handle,
3997                                    OP_CONVERT_TUNNEL_TO_NORMAL);
3998
3999        if (adapter->vxlan_port)
4000                be_cmd_set_vxlan_port(adapter, 0);
4001
4002        adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4003        adapter->vxlan_port = 0;
4004
4005        netdev->hw_enc_features = 0;
4006        netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
4007        netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
4008}
4009
4010static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4011                                struct be_resources *vft_res)
4012{
4013        struct be_resources res = adapter->pool_res;
4014        u32 vf_if_cap_flags = res.vf_if_cap_flags;
4015        struct be_resources res_mod = {0};
4016        u16 num_vf_qs = 1;
4017
4018        /* Distribute the queue resources among the PF and it's VFs */
4019        if (num_vfs) {
4020                /* Divide the rx queues evenly among the VFs and the PF, capped
4021                 * at VF-EQ-count. Any remainder queues belong to the PF.
4022                 */
4023                num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4024                                res.max_rss_qs / (num_vfs + 1));
4025
4026                /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4027                 * RSS Tables per port. Provide RSS on VFs, only if number of
4028                 * VFs requested is less than it's PF Pool's RSS Tables limit.
4029                 */
4030                if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4031                        num_vf_qs = 1;
4032        }
4033
4034        /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4035         * which are modifiable using SET_PROFILE_CONFIG cmd.
4036         */
4037        be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4038                                  RESOURCE_MODIFIABLE, 0);
4039
4040        /* If RSS IFACE capability flags are modifiable for a VF, set the
4041         * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4042         * more than 1 RSSQ is available for a VF.
4043         * Otherwise, provision only 1 queue pair for VF.
4044         */
4045        if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4046                vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4047                if (num_vf_qs > 1) {
4048                        vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4049                        if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4050                                vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4051                } else {
4052                        vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4053                                             BE_IF_FLAGS_DEFQ_RSS);
4054                }
4055        } else {
4056                num_vf_qs = 1;
4057        }
4058
4059        if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4060                vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4061                vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4062        }
4063
4064        vft_res->vf_if_cap_flags = vf_if_cap_flags;
4065        vft_res->max_rx_qs = num_vf_qs;
4066        vft_res->max_rss_qs = num_vf_qs;
4067        vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4068        vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4069
4070        /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4071         * among the PF and it's VFs, if the fields are changeable
4072         */
4073        if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4074                vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4075
4076        if (res_mod.max_vlans == FIELD_MODIFIABLE)
4077                vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4078
4079        if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4080                vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4081
4082        if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4083                vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4084}
4085
4086static void be_if_destroy(struct be_adapter *adapter)
4087{
4088        be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4089
4090        kfree(adapter->pmac_id);
4091        adapter->pmac_id = NULL;
4092
4093        kfree(adapter->mc_list);
4094        adapter->mc_list = NULL;
4095
4096        kfree(adapter->uc_list);
4097        adapter->uc_list = NULL;
4098}
4099
4100static int be_clear(struct be_adapter *adapter)
4101{
4102        struct pci_dev *pdev = adapter->pdev;
4103        struct  be_resources vft_res = {0};
4104
4105        be_cancel_worker(adapter);
4106
4107        flush_workqueue(be_wq);
4108
4109        if (sriov_enabled(adapter))
4110                be_vf_clear(adapter);
4111
4112        /* Re-configure FW to distribute resources evenly across max-supported
4113         * number of VFs, only when VFs are not already enabled.
4114         */
4115        if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4116            !pci_vfs_assigned(pdev)) {
4117                be_calculate_vf_res(adapter,
4118                                    pci_sriov_get_totalvfs(pdev),
4119                                    &vft_res);
4120                be_cmd_set_sriov_config(adapter, adapter->pool_res,
4121                                        pci_sriov_get_totalvfs(pdev),
4122                                        &vft_res);
4123        }
4124
4125        be_disable_vxlan_offloads(adapter);
4126
4127        be_if_destroy(adapter);
4128
4129        be_clear_queues(adapter);
4130
4131        be_msix_disable(adapter);
4132        adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4133        return 0;
4134}
4135
4136static int be_vfs_if_create(struct be_adapter *adapter)
4137{
4138        struct be_resources res = {0};
4139        u32 cap_flags, en_flags, vf;
4140        struct be_vf_cfg *vf_cfg;
4141        int status;
4142
4143        /* If a FW profile exists, then cap_flags are updated */
4144        cap_flags = BE_VF_IF_EN_FLAGS;
4145
4146        for_all_vfs(adapter, vf_cfg, vf) {
4147                if (!BE3_chip(adapter)) {
4148                        status = be_cmd_get_profile_config(adapter, &res, NULL,
4149                                                           ACTIVE_PROFILE_TYPE,
4150                                                           RESOURCE_LIMITS,
4151                                                           vf + 1);
4152                        if (!status) {
4153                                cap_flags = res.if_cap_flags;
4154                                /* Prevent VFs from enabling VLAN promiscuous
4155                                 * mode
4156                                 */
4157                                cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4158                        }
4159                }
4160
4161                /* PF should enable IF flags during proxy if_create call */
4162                en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4163                status = be_cmd_if_create(adapter, cap_flags, en_flags,
4164                                          &vf_cfg->if_handle, vf + 1);
4165                if (status)
4166                        return status;
4167        }
4168
4169        return 0;
4170}
4171
4172static int be_vf_setup_init(struct be_adapter *adapter)
4173{
4174        struct be_vf_cfg *vf_cfg;
4175        int vf;
4176
4177        adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4178                                  GFP_KERNEL);
4179        if (!adapter->vf_cfg)
4180                return -ENOMEM;
4181
4182        for_all_vfs(adapter, vf_cfg, vf) {
4183                vf_cfg->if_handle = -1;
4184                vf_cfg->pmac_id = -1;
4185        }
4186        return 0;
4187}
4188
4189static int be_vf_setup(struct be_adapter *adapter)
4190{
4191        struct device *dev = &adapter->pdev->dev;
4192        struct be_vf_cfg *vf_cfg;
4193        int status, old_vfs, vf;
4194        bool spoofchk;
4195
4196        old_vfs = pci_num_vf(adapter->pdev);
4197
4198        status = be_vf_setup_init(adapter);
4199        if (status)
4200                goto err;
4201
4202        if (old_vfs) {
4203                for_all_vfs(adapter, vf_cfg, vf) {
4204                        status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4205                        if (status)
4206                                goto err;
4207                }
4208
4209                status = be_vfs_mac_query(adapter);
4210                if (status)
4211                        goto err;
4212        } else {
4213                status = be_vfs_if_create(adapter);
4214                if (status)
4215                        goto err;
4216
4217                status = be_vf_eth_addr_config(adapter);
4218                if (status)
4219                        goto err;
4220        }
4221
4222        for_all_vfs(adapter, vf_cfg, vf) {
4223                /* Allow VFs to programs MAC/VLAN filters */
4224                status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4225                                                  vf + 1);
4226                if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4227                        status = be_cmd_set_fn_privileges(adapter,
4228                                                          vf_cfg->privileges |
4229                                                          BE_PRIV_FILTMGMT,
4230                                                          vf + 1);
4231                        if (!status) {
4232                                vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4233                                dev_info(dev, "VF%d has FILTMGMT privilege\n",
4234                                         vf);
4235                        }
4236                }
4237
4238                /* Allow full available bandwidth */
4239                if (!old_vfs)
4240                        be_cmd_config_qos(adapter, 0, 0, vf + 1);
4241
4242                status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4243                                               vf_cfg->if_handle, NULL,
4244                                               &spoofchk);
4245                if (!status)
4246                        vf_cfg->spoofchk = spoofchk;
4247
4248                if (!old_vfs) {
4249                        be_cmd_enable_vf(adapter, vf + 1);
4250                        be_cmd_set_logical_link_config(adapter,
4251                                                       IFLA_VF_LINK_STATE_AUTO,
4252                                                       vf+1);
4253                }
4254        }
4255
4256        if (!old_vfs) {
4257                status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4258                if (status) {
4259                        dev_err(dev, "SRIOV enable failed\n");
4260                        adapter->num_vfs = 0;
4261                        goto err;
4262                }
4263        }
4264
4265        if (BE3_chip(adapter)) {
4266                /* On BE3, enable VEB only when SRIOV is enabled */
4267                status = be_cmd_set_hsw_config(adapter, 0, 0,
4268                                               adapter->if_handle,
4269                                               PORT_FWD_TYPE_VEB, 0);
4270                if (status)
4271                        goto err;
4272        }
4273
4274        adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4275        return 0;
4276err:
4277        dev_err(dev, "VF setup failed\n");
4278        be_vf_clear(adapter);
4279        return status;
4280}
4281
4282/* Converting function_mode bits on BE3 to SH mc_type enums */
4283
4284static u8 be_convert_mc_type(u32 function_mode)
4285{
4286        if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4287                return vNIC1;
4288        else if (function_mode & QNQ_MODE)
4289                return FLEX10;
4290        else if (function_mode & VNIC_MODE)
4291                return vNIC2;
4292        else if (function_mode & UMC_ENABLED)
4293                return UMC;
4294        else
4295                return MC_NONE;
4296}
4297
4298/* On BE2/BE3 FW does not suggest the supported limits */
4299static void BEx_get_resources(struct be_adapter *adapter,
4300                              struct be_resources *res)
4301{
4302        bool use_sriov = adapter->num_vfs ? 1 : 0;
4303
4304        if (be_physfn(adapter))
4305                res->max_uc_mac = BE_UC_PMAC_COUNT;
4306        else
4307                res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4308
4309        adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4310
4311        if (be_is_mc(adapter)) {
4312                /* Assuming that there are 4 channels per port,
4313                 * when multi-channel is enabled
4314                 */
4315                if (be_is_qnq_mode(adapter))
4316                        res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4317                else
4318                        /* In a non-qnq multichannel mode, the pvid
4319                         * takes up one vlan entry
4320                         */
4321                        res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4322        } else {
4323                res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4324        }
4325
4326        res->max_mcast_mac = BE_MAX_MC;
4327
4328        /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4329         * 2) Create multiple TX rings on a BE3-R multi-channel interface
4330         *    *only* if it is RSS-capable.
4331         */
4332        if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4333            be_virtfn(adapter) ||
4334            (be_is_mc(adapter) &&
4335             !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4336                res->max_tx_qs = 1;
4337        } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4338                struct be_resources super_nic_res = {0};
4339
4340                /* On a SuperNIC profile, the driver needs to use the
4341                 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4342                 */
4343                be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4344                                          ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4345                                          0);
4346                /* Some old versions of BE3 FW don't report max_tx_qs value */
4347                res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4348        } else {
4349                res->max_tx_qs = BE3_MAX_TX_QS;
4350        }
4351
4352        if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4353            !use_sriov && be_physfn(adapter))
4354                res->max_rss_qs = (adapter->be3_native) ?
4355                                           BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4356        res->max_rx_qs = res->max_rss_qs + 1;
4357
4358        if (be_physfn(adapter))
4359                res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4360                                        BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4361        else
4362                res->max_evt_qs = 1;
4363
4364        res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4365        res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4366        if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4367                res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4368}
4369
4370static void be_setup_init(struct be_adapter *adapter)
4371{
4372        adapter->vlan_prio_bmap = 0xff;
4373        adapter->phy.link_speed = -1;
4374        adapter->if_handle = -1;
4375        adapter->be3_native = false;
4376        adapter->if_flags = 0;
4377        adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4378        if (be_physfn(adapter))
4379                adapter->cmd_privileges = MAX_PRIVILEGES;
4380        else
4381                adapter->cmd_privileges = MIN_PRIVILEGES;
4382}
4383
4384/* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4385 * However, this HW limitation is not exposed to the host via any SLI cmd.
4386 * As a result, in the case of SRIOV and in particular multi-partition configs
4387 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4388 * for distribution between the VFs. This self-imposed limit will determine the
4389 * no: of VFs for which RSS can be enabled.
4390 */
4391static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4392{
4393        struct be_port_resources port_res = {0};
4394        u8 rss_tables_on_port;
4395        u16 max_vfs = be_max_vfs(adapter);
4396
4397        be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4398                                  RESOURCE_LIMITS, 0);
4399
4400        rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4401
4402        /* Each PF Pool's RSS Tables limit =
4403         * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4404         */
4405        adapter->pool_res.max_rss_tables =
4406                max_vfs * rss_tables_on_port / port_res.max_vfs;
4407}
4408
4409static int be_get_sriov_config(struct be_adapter *adapter)
4410{
4411        struct be_resources res = {0};
4412        int max_vfs, old_vfs;
4413
4414        be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4415                                  RESOURCE_LIMITS, 0);
4416
4417        /* Some old versions of BE3 FW don't report max_vfs value */
4418        if (BE3_chip(adapter) && !res.max_vfs) {
4419                max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4420                res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4421        }
4422
4423        adapter->pool_res = res;
4424
4425        /* If during previous unload of the driver, the VFs were not disabled,
4426         * then we cannot rely on the PF POOL limits for the TotalVFs value.
4427         * Instead use the TotalVFs value stored in the pci-dev struct.
4428         */
4429        old_vfs = pci_num_vf(adapter->pdev);
4430        if (old_vfs) {
4431                dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4432                         old_vfs);
4433
4434                adapter->pool_res.max_vfs =
4435                        pci_sriov_get_totalvfs(adapter->pdev);
4436                adapter->num_vfs = old_vfs;
4437        }
4438
4439        if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4440                be_calculate_pf_pool_rss_tables(adapter);
4441                dev_info(&adapter->pdev->dev,
4442                         "RSS can be enabled for all VFs if num_vfs <= %d\n",
4443                         be_max_pf_pool_rss_tables(adapter));
4444        }
4445        return 0;
4446}
4447
4448static void be_alloc_sriov_res(struct be_adapter *adapter)
4449{
4450        int old_vfs = pci_num_vf(adapter->pdev);
4451        struct  be_resources vft_res = {0};
4452        int status;
4453
4454        be_get_sriov_config(adapter);
4455
4456        if (!old_vfs)
4457                pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4458
4459        /* When the HW is in SRIOV capable configuration, the PF-pool
4460         * resources are given to PF during driver load, if there are no
4461         * old VFs. This facility is not available in BE3 FW.
4462         * Also, this is done by FW in Lancer chip.
4463         */
4464        if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4465                be_calculate_vf_res(adapter, 0, &vft_res);
4466                status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4467                                                 &vft_res);
4468                if (status)
4469                        dev_err(&adapter->pdev->dev,
4470                                "Failed to optimize SRIOV resources\n");
4471        }
4472}
4473
4474static int be_get_resources(struct be_adapter *adapter)
4475{
4476        struct device *dev = &adapter->pdev->dev;
4477        struct be_resources res = {0};
4478        int status;
4479
4480        /* For Lancer, SH etc read per-function resource limits from FW.
4481         * GET_FUNC_CONFIG returns per function guaranteed limits.
4482         * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4483         */
4484        if (BEx_chip(adapter)) {
4485                BEx_get_resources(adapter, &res);
4486        } else {
4487                status = be_cmd_get_func_config(adapter, &res);
4488                if (status)
4489                        return status;
4490
4491                /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4492                if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4493                    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4494                        res.max_rss_qs -= 1;
4495        }
4496
4497        /* If RoCE is supported stash away half the EQs for RoCE */
4498        res.max_nic_evt_qs = be_roce_supported(adapter) ?
4499                                res.max_evt_qs / 2 : res.max_evt_qs;
4500        adapter->res = res;
4501
4502        /* If FW supports RSS default queue, then skip creating non-RSS
4503         * queue for non-IP traffic.
4504         */
4505        adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4506                                 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4507
4508        dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4509                 be_max_txqs(adapter), be_max_rxqs(adapter),
4510                 be_max_rss(adapter), be_max_nic_eqs(adapter),
4511                 be_max_vfs(adapter));
4512        dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4513                 be_max_uc(adapter), be_max_mc(adapter),
4514                 be_max_vlans(adapter));
4515
4516        /* Ensure RX and TX queues are created in pairs at init time */
4517        adapter->cfg_num_rx_irqs =
4518                                min_t(u16, netif_get_num_default_rss_queues(),
4519                                      be_max_qp_irqs(adapter));
4520        adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4521        return 0;
4522}
4523
4524static int be_get_config(struct be_adapter *adapter)
4525{
4526        int status, level;
4527        u16 profile_id;
4528
4529        status = be_cmd_get_cntl_attributes(adapter);
4530        if (status)
4531                return status;
4532
4533        status = be_cmd_query_fw_cfg(adapter);
4534        if (status)
4535                return status;
4536
4537        if (!lancer_chip(adapter) && be_physfn(adapter))
4538                be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4539
4540        if (BEx_chip(adapter)) {
4541                level = be_cmd_get_fw_log_level(adapter);
4542                adapter->msg_enable =
4543                        level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4544        }
4545
4546        be_cmd_get_acpi_wol_cap(adapter);
4547        pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4548        pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4549
4550        be_cmd_query_port_name(adapter);
4551
4552        if (be_physfn(adapter)) {
4553                status = be_cmd_get_active_profile(adapter, &profile_id);
4554                if (!status)
4555                        dev_info(&adapter->pdev->dev,
4556                                 "Using profile 0x%x\n", profile_id);
4557        }
4558
4559        return 0;
4560}
4561
4562static int be_mac_setup(struct be_adapter *adapter)
4563{
4564        u8 mac[ETH_ALEN];
4565        int status;
4566
4567        if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4568                status = be_cmd_get_perm_mac(adapter, mac);
4569                if (status)
4570                        return status;
4571
4572                memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4573                memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4574
4575                /* Initial MAC for BE3 VFs is already programmed by PF */
4576                if (BEx_chip(adapter) && be_virtfn(adapter))
4577                        memcpy(adapter->dev_mac, mac, ETH_ALEN);
4578        }
4579
4580        return 0;
4581}
4582
4583static void be_schedule_worker(struct be_adapter *adapter)
4584{
4585        queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4586        adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4587}
4588
4589static void be_destroy_err_recovery_workq(void)
4590{
4591        if (!be_err_recovery_workq)
4592                return;
4593
4594        flush_workqueue(be_err_recovery_workq);
4595        destroy_workqueue(be_err_recovery_workq);
4596        be_err_recovery_workq = NULL;
4597}
4598
4599static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4600{
4601        struct be_error_recovery *err_rec = &adapter->error_recovery;
4602
4603        if (!be_err_recovery_workq)
4604                return;
4605
4606        queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4607                           msecs_to_jiffies(delay));
4608        adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4609}
4610
4611static int be_setup_queues(struct be_adapter *adapter)
4612{
4613        struct net_device *netdev = adapter->netdev;
4614        int status;
4615
4616        status = be_evt_queues_create(adapter);
4617        if (status)
4618                goto err;
4619
4620        status = be_tx_qs_create(adapter);
4621        if (status)
4622                goto err;
4623
4624        status = be_rx_cqs_create(adapter);
4625        if (status)
4626                goto err;
4627
4628        status = be_mcc_queues_create(adapter);
4629        if (status)
4630                goto err;
4631
4632        status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4633        if (status)
4634                goto err;
4635
4636        status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4637        if (status)
4638                goto err;
4639
4640        return 0;
4641err:
4642        dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4643        return status;
4644}
4645
4646static int be_if_create(struct be_adapter *adapter)
4647{
4648        u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4649        u32 cap_flags = be_if_cap_flags(adapter);
4650        int status;
4651
4652        /* alloc required memory for other filtering fields */
4653        adapter->pmac_id = kcalloc(be_max_uc(adapter),
4654                                   sizeof(*adapter->pmac_id), GFP_KERNEL);
4655        if (!adapter->pmac_id)
4656                return -ENOMEM;
4657
4658        adapter->mc_list = kcalloc(be_max_mc(adapter),
4659                                   sizeof(*adapter->mc_list), GFP_KERNEL);
4660        if (!adapter->mc_list)
4661                return -ENOMEM;
4662
4663        adapter->uc_list = kcalloc(be_max_uc(adapter),
4664                                   sizeof(*adapter->uc_list), GFP_KERNEL);
4665        if (!adapter->uc_list)
4666                return -ENOMEM;
4667
4668        if (adapter->cfg_num_rx_irqs == 1)
4669                cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4670
4671        en_flags &= cap_flags;
4672        /* will enable all the needed filter flags in be_open() */
4673        status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4674                                  &adapter->if_handle, 0);
4675
4676        if (status)
4677                return status;
4678
4679        return 0;
4680}
4681
4682int be_update_queues(struct be_adapter *adapter)
4683{
4684        struct net_device *netdev = adapter->netdev;
4685        int status;
4686
4687        if (netif_running(netdev))
4688                be_close(netdev);
4689
4690        be_cancel_worker(adapter);
4691
4692        /* If any vectors have been shared with RoCE we cannot re-program
4693         * the MSIx table.
4694         */
4695        if (!adapter->num_msix_roce_vec)
4696                be_msix_disable(adapter);
4697
4698        be_clear_queues(adapter);
4699        status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4700        if (status)
4701                return status;
4702
4703        if (!msix_enabled(adapter)) {
4704                status = be_msix_enable(adapter);
4705                if (status)
4706                        return status;
4707        }
4708
4709        status = be_if_create(adapter);
4710        if (status)
4711                return status;
4712
4713        status = be_setup_queues(adapter);
4714        if (status)
4715                return status;
4716
4717        be_schedule_worker(adapter);
4718
4719        if (netif_running(netdev))
4720                status = be_open(netdev);
4721
4722        return status;
4723}
4724
4725static inline int fw_major_num(const char *fw_ver)
4726{
4727        int fw_major = 0, i;
4728
4729        i = sscanf(fw_ver, "%d.", &fw_major);
4730        if (i != 1)
4731                return 0;
4732
4733        return fw_major;
4734}
4735
4736/* If it is error recovery, FLR the PF
4737 * Else if any VFs are already enabled don't FLR the PF
4738 */
4739static bool be_reset_required(struct be_adapter *adapter)
4740{
4741        if (be_error_recovering(adapter))
4742                return true;
4743        else
4744                return pci_num_vf(adapter->pdev) == 0;
4745}
4746
4747/* Wait for the FW to be ready and perform the required initialization */
4748static int be_func_init(struct be_adapter *adapter)
4749{
4750        int status;
4751
4752        status = be_fw_wait_ready(adapter);
4753        if (status)
4754                return status;
4755
4756        /* FW is now ready; clear errors to allow cmds/doorbell */
4757        be_clear_error(adapter, BE_CLEAR_ALL);
4758
4759        if (be_reset_required(adapter)) {
4760                status = be_cmd_reset_function(adapter);
4761                if (status)
4762                        return status;
4763
4764                /* Wait for interrupts to quiesce after an FLR */
4765                msleep(100);
4766        }
4767
4768        /* Tell FW we're ready to fire cmds */
4769        status = be_cmd_fw_init(adapter);
4770        if (status)
4771                return status;
4772
4773        /* Allow interrupts for other ULPs running on NIC function */
4774        be_intr_set(adapter, true);
4775
4776        return 0;
4777}
4778
4779static int be_setup(struct be_adapter *adapter)
4780{
4781        struct device *dev = &adapter->pdev->dev;
4782        int status;
4783
4784        status = be_func_init(adapter);
4785        if (status)
4786                return status;
4787
4788        be_setup_init(adapter);
4789
4790        if (!lancer_chip(adapter))
4791                be_cmd_req_native_mode(adapter);
4792
4793        /* invoke this cmd first to get pf_num and vf_num which are needed
4794         * for issuing profile related cmds
4795         */
4796        if (!BEx_chip(adapter)) {
4797                status = be_cmd_get_func_config(adapter, NULL);
4798                if (status)
4799                        return status;
4800        }
4801
4802        status = be_get_config(adapter);
4803        if (status)
4804                goto err;
4805
4806        if (!BE2_chip(adapter) && be_physfn(adapter))
4807                be_alloc_sriov_res(adapter);
4808
4809        status = be_get_resources(adapter);
4810        if (status)
4811                goto err;
4812
4813        status = be_msix_enable(adapter);
4814        if (status)
4815                goto err;
4816
4817        /* will enable all the needed filter flags in be_open() */
4818        status = be_if_create(adapter);
4819        if (status)
4820                goto err;
4821
4822        /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4823        rtnl_lock();
4824        status = be_setup_queues(adapter);
4825        rtnl_unlock();
4826        if (status)
4827                goto err;
4828
4829        be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4830
4831        status = be_mac_setup(adapter);
4832        if (status)
4833                goto err;
4834
4835        be_cmd_get_fw_ver(adapter);
4836        dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4837
4838        if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4839                dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4840                        adapter->fw_ver);
4841                dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4842        }
4843
4844        status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4845                                         adapter->rx_fc);
4846        if (status)
4847                be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4848                                        &adapter->rx_fc);
4849
4850        dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4851                 adapter->tx_fc, adapter->rx_fc);
4852
4853        if (be_physfn(adapter))
4854                be_cmd_set_logical_link_config(adapter,
4855                                               IFLA_VF_LINK_STATE_AUTO, 0);
4856
4857        /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4858         * confusing a linux bridge or OVS that it might be connected to.
4859         * Set the EVB to PASSTHRU mode which effectively disables the EVB
4860         * when SRIOV is not enabled.
4861         */
4862        if (BE3_chip(adapter))
4863                be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4864                                      PORT_FWD_TYPE_PASSTHRU, 0);
4865
4866        if (adapter->num_vfs)
4867                be_vf_setup(adapter);
4868
4869        status = be_cmd_get_phy_info(adapter);
4870        if (!status && be_pause_supported(adapter))
4871                adapter->phy.fc_autoneg = 1;
4872
4873        if (be_physfn(adapter) && !lancer_chip(adapter))
4874                be_cmd_set_features(adapter);
4875
4876        be_schedule_worker(adapter);
4877        adapter->flags |= BE_FLAGS_SETUP_DONE;
4878        return 0;
4879err:
4880        be_clear(adapter);
4881        return status;
4882}
4883
4884#ifdef CONFIG_NET_POLL_CONTROLLER
4885static void be_netpoll(struct net_device *netdev)
4886{
4887        struct be_adapter *adapter = netdev_priv(netdev);
4888        struct be_eq_obj *eqo;
4889        int i;
4890
4891        for_all_evt_queues(adapter, eqo, i) {
4892                be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4893                napi_schedule(&eqo->napi);
4894        }
4895}
4896#endif
4897
4898int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4899{
4900        const struct firmware *fw;
4901        int status;
4902
4903        if (!netif_running(adapter->netdev)) {
4904                dev_err(&adapter->pdev->dev,
4905                        "Firmware load not allowed (interface is down)\n");
4906                return -ENETDOWN;
4907        }
4908
4909        status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4910        if (status)
4911                goto fw_exit;
4912
4913        dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4914
4915        if (lancer_chip(adapter))
4916                status = lancer_fw_download(adapter, fw);
4917        else
4918                status = be_fw_download(adapter, fw);
4919
4920        if (!status)
4921                be_cmd_get_fw_ver(adapter);
4922
4923fw_exit:
4924        release_firmware(fw);
4925        return status;
4926}
4927
4928static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4929                                 u16 flags)
4930{
4931        struct be_adapter *adapter = netdev_priv(dev);
4932        struct nlattr *attr, *br_spec;
4933        int rem;
4934        int status = 0;
4935        u16 mode = 0;
4936
4937        if (!sriov_enabled(adapter))
4938                return -EOPNOTSUPP;
4939
4940        br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4941        if (!br_spec)
4942                return -EINVAL;
4943
4944        nla_for_each_nested(attr, br_spec, rem) {
4945                if (nla_type(attr) != IFLA_BRIDGE_MODE)
4946                        continue;
4947
4948                if (nla_len(attr) < sizeof(mode))
4949                        return -EINVAL;
4950
4951                mode = nla_get_u16(attr);
4952                if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4953                        return -EOPNOTSUPP;
4954
4955                if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4956                        return -EINVAL;
4957
4958                status = be_cmd_set_hsw_config(adapter, 0, 0,
4959                                               adapter->if_handle,
4960                                               mode == BRIDGE_MODE_VEPA ?
4961                                               PORT_FWD_TYPE_VEPA :
4962                                               PORT_FWD_TYPE_VEB, 0);
4963                if (status)
4964                        goto err;
4965
4966                dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4967                         mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4968
4969                return status;
4970        }
4971err:
4972        dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4973                mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4974
4975        return status;
4976}
4977
4978static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4979                                 struct net_device *dev, u32 filter_mask,
4980                                 int nlflags)
4981{
4982        struct be_adapter *adapter = netdev_priv(dev);
4983        int status = 0;
4984        u8 hsw_mode;
4985
4986        /* BE and Lancer chips support VEB mode only */
4987        if (BEx_chip(adapter) || lancer_chip(adapter)) {
4988                /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4989                if (!pci_sriov_get_totalvfs(adapter->pdev))
4990                        return 0;
4991                hsw_mode = PORT_FWD_TYPE_VEB;
4992        } else {
4993                status = be_cmd_get_hsw_config(adapter, NULL, 0,
4994                                               adapter->if_handle, &hsw_mode,
4995                                               NULL);
4996                if (status)
4997                        return 0;
4998
4999                if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5000                        return 0;
5001        }
5002
5003        return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5004                                       hsw_mode == PORT_FWD_TYPE_VEPA ?
5005                                       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5006                                       0, 0, nlflags, filter_mask, NULL);
5007}
5008
5009static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5010                                         void (*func)(struct work_struct *))
5011{
5012        struct be_cmd_work *work;
5013
5014        work = kzalloc(sizeof(*work), GFP_ATOMIC);
5015        if (!work) {
5016                dev_err(&adapter->pdev->dev,
5017                        "be_work memory allocation failed\n");
5018                return NULL;
5019        }
5020
5021        INIT_WORK(&work->work, func);
5022        work->adapter = adapter;
5023        return work;
5024}
5025
5026/* VxLAN offload Notes:
5027 *
5028 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5029 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5030 * is expected to work across all types of IP tunnels once exported. Skyhawk
5031 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5032 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5033 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5034 * those other tunnels are unexported on the fly through ndo_features_check().
5035 *
5036 * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5037 * adds more than one port, disable offloads and don't re-enable them again
5038 * until after all the tunnels are removed.
5039 */
5040static void be_work_add_vxlan_port(struct work_struct *work)
5041{
5042        struct be_cmd_work *cmd_work =
5043                                container_of(work, struct be_cmd_work, work);
5044        struct be_adapter *adapter = cmd_work->adapter;
5045        struct net_device *netdev = adapter->netdev;
5046        struct device *dev = &adapter->pdev->dev;
5047        __be16 port = cmd_work->info.vxlan_port;
5048        int status;
5049
5050        if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5051                adapter->vxlan_port_aliases++;
5052                goto done;
5053        }
5054
5055        if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5056                dev_info(dev,
5057                         "Only one UDP port supported for VxLAN offloads\n");
5058                dev_info(dev, "Disabling VxLAN offloads\n");
5059                adapter->vxlan_port_count++;
5060                goto err;
5061        }
5062
5063        if (adapter->vxlan_port_count++ >= 1)
5064                goto done;
5065
5066        status = be_cmd_manage_iface(adapter, adapter->if_handle,
5067                                     OP_CONVERT_NORMAL_TO_TUNNEL);
5068        if (status) {
5069                dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5070                goto err;
5071        }
5072
5073        status = be_cmd_set_vxlan_port(adapter, port);
5074        if (status) {
5075                dev_warn(dev, "Failed to add VxLAN port\n");
5076                goto err;
5077        }
5078        adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5079        adapter->vxlan_port = port;
5080
5081        netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5082                                   NETIF_F_TSO | NETIF_F_TSO6 |
5083                                   NETIF_F_GSO_UDP_TUNNEL;
5084        netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5085        netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5086
5087        dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5088                 be16_to_cpu(port));
5089        goto done;
5090err:
5091        be_disable_vxlan_offloads(adapter);
5092done:
5093        kfree(cmd_work);
5094}
5095
5096static void be_work_del_vxlan_port(struct work_struct *work)
5097{
5098        struct be_cmd_work *cmd_work =
5099                                container_of(work, struct be_cmd_work, work);
5100        struct be_adapter *adapter = cmd_work->adapter;
5101        __be16 port = cmd_work->info.vxlan_port;
5102
5103        if (adapter->vxlan_port != port)
5104                goto done;
5105
5106        if (adapter->vxlan_port_aliases) {
5107                adapter->vxlan_port_aliases--;
5108                goto out;
5109        }
5110
5111        be_disable_vxlan_offloads(adapter);
5112
5113        dev_info(&adapter->pdev->dev,
5114                 "Disabled VxLAN offloads for UDP port %d\n",
5115                 be16_to_cpu(port));
5116done:
5117        adapter->vxlan_port_count--;
5118out:
5119        kfree(cmd_work);
5120}
5121
5122static void be_cfg_vxlan_port(struct net_device *netdev,
5123                              struct udp_tunnel_info *ti,
5124                              void (*func)(struct work_struct *))
5125{
5126        struct be_adapter *adapter = netdev_priv(netdev);
5127        struct be_cmd_work *cmd_work;
5128
5129        if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5130                return;
5131
5132        if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5133                return;
5134
5135        cmd_work = be_alloc_work(adapter, func);
5136        if (cmd_work) {
5137                cmd_work->info.vxlan_port = ti->port;
5138                queue_work(be_wq, &cmd_work->work);
5139        }
5140}
5141
5142static void be_del_vxlan_port(struct net_device *netdev,
5143                              struct udp_tunnel_info *ti)
5144{
5145        be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5146}
5147
5148static void be_add_vxlan_port(struct net_device *netdev,
5149                              struct udp_tunnel_info *ti)
5150{
5151        be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5152}
5153
5154static netdev_features_t be_features_check(struct sk_buff *skb,
5155                                           struct net_device *dev,
5156                                           netdev_features_t features)
5157{
5158        struct be_adapter *adapter = netdev_priv(dev);
5159        u8 l4_hdr = 0;
5160
5161        /* The code below restricts offload features for some tunneled packets.
5162         * Offload features for normal (non tunnel) packets are unchanged.
5163         */
5164        if (!skb->encapsulation ||
5165            !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5166                return features;
5167
5168        /* It's an encapsulated packet and VxLAN offloads are enabled. We
5169         * should disable tunnel offload features if it's not a VxLAN packet,
5170         * as tunnel offloads have been enabled only for VxLAN. This is done to
5171         * allow other tunneled traffic like GRE work fine while VxLAN
5172         * offloads are configured in Skyhawk-R.
5173         */
5174        switch (vlan_get_protocol(skb)) {
5175        case htons(ETH_P_IP):
5176                l4_hdr = ip_hdr(skb)->protocol;
5177                break;
5178        case htons(ETH_P_IPV6):
5179                l4_hdr = ipv6_hdr(skb)->nexthdr;
5180                break;
5181        default:
5182                return features;
5183        }
5184
5185        if (l4_hdr != IPPROTO_UDP ||
5186            skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5187            skb->inner_protocol != htons(ETH_P_TEB) ||
5188            skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5189                sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5190            !adapter->vxlan_port ||
5191            udp_hdr(skb)->dest != adapter->vxlan_port)
5192                return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5193
5194        return features;
5195}
5196
5197static int be_get_phys_port_id(struct net_device *dev,
5198                               struct netdev_phys_item_id *ppid)
5199{
5200        int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5201        struct be_adapter *adapter = netdev_priv(dev);
5202        u8 *id;
5203
5204        if (MAX_PHYS_ITEM_ID_LEN < id_len)
5205                return -ENOSPC;
5206
5207        ppid->id[0] = adapter->hba_port_num + 1;
5208        id = &ppid->id[1];
5209        for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5210             i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5211                memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5212
5213        ppid->id_len = id_len;
5214
5215        return 0;
5216}
5217
5218static void be_set_rx_mode(struct net_device *dev)
5219{
5220        struct be_adapter *adapter = netdev_priv(dev);
5221        struct be_cmd_work *work;
5222
5223        work = be_alloc_work(adapter, be_work_set_rx_mode);
5224        if (work)
5225                queue_work(be_wq, &work->work);
5226}
5227
5228static const struct net_device_ops be_netdev_ops = {
5229        .ndo_open               = be_open,
5230        .ndo_stop               = be_close,
5231        .ndo_start_xmit         = be_xmit,
5232        .ndo_set_rx_mode        = be_set_rx_mode,
5233        .ndo_set_mac_address    = be_mac_addr_set,
5234        .ndo_get_stats64        = be_get_stats64,
5235        .ndo_validate_addr      = eth_validate_addr,
5236        .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5237        .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5238        .ndo_set_vf_mac         = be_set_vf_mac,
5239        .ndo_set_vf_vlan        = be_set_vf_vlan,
5240        .ndo_set_vf_rate        = be_set_vf_tx_rate,
5241        .ndo_get_vf_config      = be_get_vf_config,
5242        .ndo_set_vf_link_state  = be_set_vf_link_state,
5243        .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5244#ifdef CONFIG_NET_POLL_CONTROLLER
5245        .ndo_poll_controller    = be_netpoll,
5246#endif
5247        .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5248        .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5249#ifdef CONFIG_NET_RX_BUSY_POLL
5250        .ndo_busy_poll          = be_busy_poll,
5251#endif
5252        .ndo_udp_tunnel_add     = be_add_vxlan_port,
5253        .ndo_udp_tunnel_del     = be_del_vxlan_port,
5254        .ndo_features_check     = be_features_check,
5255        .ndo_get_phys_port_id   = be_get_phys_port_id,
5256};
5257
5258static void be_netdev_init(struct net_device *netdev)
5259{
5260        struct be_adapter *adapter = netdev_priv(netdev);
5261
5262        netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5263                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5264                NETIF_F_HW_VLAN_CTAG_TX;
5265        if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5266                netdev->hw_features |= NETIF_F_RXHASH;
5267
5268        netdev->features |= netdev->hw_features |
5269                NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5270
5271        netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5272                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5273
5274        netdev->priv_flags |= IFF_UNICAST_FLT;
5275
5276        netdev->flags |= IFF_MULTICAST;
5277
5278        netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5279
5280        netdev->netdev_ops = &be_netdev_ops;
5281
5282        netdev->ethtool_ops = &be_ethtool_ops;
5283
5284        /* MTU range: 256 - 9000 */
5285        netdev->min_mtu = BE_MIN_MTU;
5286        netdev->max_mtu = BE_MAX_MTU;
5287}
5288
5289static void be_cleanup(struct be_adapter *adapter)
5290{
5291        struct net_device *netdev = adapter->netdev;
5292
5293        rtnl_lock();
5294        netif_device_detach(netdev);
5295        if (netif_running(netdev))
5296                be_close(netdev);
5297        rtnl_unlock();
5298
5299        be_clear(adapter);
5300}
5301
5302static int be_resume(struct be_adapter *adapter)
5303{
5304        struct net_device *netdev = adapter->netdev;
5305        int status;
5306
5307        status = be_setup(adapter);
5308        if (status)
5309                return status;
5310
5311        rtnl_lock();
5312        if (netif_running(netdev))
5313                status = be_open(netdev);
5314        rtnl_unlock();
5315
5316        if (status)
5317                return status;
5318
5319        netif_device_attach(netdev);
5320
5321        return 0;
5322}
5323
5324static void be_soft_reset(struct be_adapter *adapter)
5325{
5326        u32 val;
5327
5328        dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5329        val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5330        val |= SLIPORT_SOFTRESET_SR_MASK;
5331        iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5332}
5333
5334static bool be_err_is_recoverable(struct be_adapter *adapter)
5335{
5336        struct be_error_recovery *err_rec = &adapter->error_recovery;
5337        unsigned long initial_idle_time =
5338                msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5339        unsigned long recovery_interval =
5340                msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5341        u16 ue_err_code;
5342        u32 val;
5343
5344        val = be_POST_stage_get(adapter);
5345        if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5346                return false;
5347        ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5348        if (ue_err_code == 0)
5349                return false;
5350
5351        dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5352                ue_err_code);
5353
5354        if (jiffies - err_rec->probe_time <= initial_idle_time) {
5355                dev_err(&adapter->pdev->dev,
5356                        "Cannot recover within %lu sec from driver load\n",
5357                        jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5358                return false;
5359        }
5360
5361        if (err_rec->last_recovery_time &&
5362            (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5363                dev_err(&adapter->pdev->dev,
5364                        "Cannot recover within %lu sec from last recovery\n",
5365                        jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5366                return false;
5367        }
5368
5369        if (ue_err_code == err_rec->last_err_code) {
5370                dev_err(&adapter->pdev->dev,
5371                        "Cannot recover from a consecutive TPE error\n");
5372                return false;
5373        }
5374
5375        err_rec->last_recovery_time = jiffies;
5376        err_rec->last_err_code = ue_err_code;
5377        return true;
5378}
5379
5380static int be_tpe_recover(struct be_adapter *adapter)
5381{
5382        struct be_error_recovery *err_rec = &adapter->error_recovery;
5383        int status = -EAGAIN;
5384        u32 val;
5385
5386        switch (err_rec->recovery_state) {
5387        case ERR_RECOVERY_ST_NONE:
5388                err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5389                err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5390                break;
5391
5392        case ERR_RECOVERY_ST_DETECT:
5393                val = be_POST_stage_get(adapter);
5394                if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5395                    POST_STAGE_RECOVERABLE_ERR) {
5396                        dev_err(&adapter->pdev->dev,
5397                                "Unrecoverable HW error detected: 0x%x\n", val);
5398                        status = -EINVAL;
5399                        err_rec->resched_delay = 0;
5400                        break;
5401                }
5402
5403                dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5404
5405                /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5406                 * milliseconds before it checks for final error status in
5407                 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5408                 * If it does, then PF0 initiates a Soft Reset.
5409                 */
5410                if (adapter->pf_num == 0) {
5411                        err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5412                        err_rec->resched_delay = err_rec->ue_to_reset_time -
5413                                        ERR_RECOVERY_UE_DETECT_DURATION;
5414                        break;
5415                }
5416
5417                err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5418                err_rec->resched_delay = err_rec->ue_to_poll_time -
5419                                        ERR_RECOVERY_UE_DETECT_DURATION;
5420                break;
5421
5422        case ERR_RECOVERY_ST_RESET:
5423                if (!be_err_is_recoverable(adapter)) {
5424                        dev_err(&adapter->pdev->dev,
5425                                "Failed to meet recovery criteria\n");
5426                        status = -EIO;
5427                        err_rec->resched_delay = 0;
5428                        break;
5429                }
5430                be_soft_reset(adapter);
5431                err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5432                err_rec->resched_delay = err_rec->ue_to_poll_time -
5433                                        err_rec->ue_to_reset_time;
5434                break;
5435
5436        case ERR_RECOVERY_ST_PRE_POLL:
5437                err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5438                err_rec->resched_delay = 0;
5439                status = 0;                     /* done */
5440                break;
5441
5442        default:
5443                status = -EINVAL;
5444                err_rec->resched_delay = 0;
5445                break;
5446        }
5447
5448        return status;
5449}
5450
5451static int be_err_recover(struct be_adapter *adapter)
5452{
5453        int status;
5454
5455        if (!lancer_chip(adapter)) {
5456                if (!adapter->error_recovery.recovery_supported ||
5457                    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5458                        return -EIO;
5459                status = be_tpe_recover(adapter);
5460                if (status)
5461                        goto err;
5462        }
5463
5464        /* Wait for adapter to reach quiescent state before
5465         * destroying queues
5466         */
5467        status = be_fw_wait_ready(adapter);
5468        if (status)
5469                goto err;
5470
5471        adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5472
5473        be_cleanup(adapter);
5474
5475        status = be_resume(adapter);
5476        if (status)
5477                goto err;
5478
5479        adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5480
5481err:
5482        return status;
5483}
5484
5485static void be_err_detection_task(struct work_struct *work)
5486{
5487        struct be_error_recovery *err_rec =
5488                        container_of(work, struct be_error_recovery,
5489                                     err_detection_work.work);
5490        struct be_adapter *adapter =
5491                        container_of(err_rec, struct be_adapter,
5492                                     error_recovery);
5493        u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5494        struct device *dev = &adapter->pdev->dev;
5495        int recovery_status;
5496
5497        be_detect_error(adapter);
5498        if (!be_check_error(adapter, BE_ERROR_HW))
5499                goto reschedule_task;
5500
5501        recovery_status = be_err_recover(adapter);
5502        if (!recovery_status) {
5503                err_rec->recovery_retries = 0;
5504                err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5505                dev_info(dev, "Adapter recovery successful\n");
5506                goto reschedule_task;
5507        } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5508                /* BEx/SH recovery state machine */
5509                if (adapter->pf_num == 0 &&
5510                    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5511                        dev_err(&adapter->pdev->dev,
5512                                "Adapter recovery in progress\n");
5513                resched_delay = err_rec->resched_delay;
5514                goto reschedule_task;
5515        } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5516                /* For VFs, check if PF have allocated resources
5517                 * every second.
5518                 */
5519                dev_err(dev, "Re-trying adapter recovery\n");
5520                goto reschedule_task;
5521        } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5522                   ERR_RECOVERY_MAX_RETRY_COUNT) {
5523                /* In case of another error during recovery, it takes 30 sec
5524                 * for adapter to come out of error. Retry error recovery after
5525                 * this time interval.
5526                 */
5527                dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5528                resched_delay = ERR_RECOVERY_RETRY_DELAY;
5529                goto reschedule_task;
5530        } else {
5531                dev_err(dev, "Adapter recovery failed\n");
5532                dev_err(dev, "Please reboot server to recover\n");
5533        }
5534
5535        return;
5536
5537reschedule_task:
5538        be_schedule_err_detection(adapter, resched_delay);
5539}
5540
5541static void be_log_sfp_info(struct be_adapter *adapter)
5542{
5543        int status;
5544
5545        status = be_cmd_query_sfp_info(adapter);
5546        if (!status) {
5547                dev_err(&adapter->pdev->dev,
5548                        "Port %c: %s Vendor: %s part no: %s",
5549                        adapter->port_name,
5550                        be_misconfig_evt_port_state[adapter->phy_state],
5551                        adapter->phy.vendor_name,
5552                        adapter->phy.vendor_pn);
5553        }
5554        adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5555}
5556
5557static void be_worker(struct work_struct *work)
5558{
5559        struct be_adapter *adapter =
5560                container_of(work, struct be_adapter, work.work);
5561        struct be_rx_obj *rxo;
5562        int i;
5563
5564        if (be_physfn(adapter) &&
5565            MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5566                be_cmd_get_die_temperature(adapter);
5567
5568        /* when interrupts are not yet enabled, just reap any pending
5569         * mcc completions
5570         */
5571        if (!netif_running(adapter->netdev)) {
5572                local_bh_disable();
5573                be_process_mcc(adapter);
5574                local_bh_enable();
5575                goto reschedule;
5576        }
5577
5578        if (!adapter->stats_cmd_sent) {
5579                if (lancer_chip(adapter))
5580                        lancer_cmd_get_pport_stats(adapter,
5581                                                   &adapter->stats_cmd);
5582                else
5583                        be_cmd_get_stats(adapter, &adapter->stats_cmd);
5584        }
5585
5586        for_all_rx_queues(adapter, rxo, i) {
5587                /* Replenish RX-queues starved due to memory
5588                 * allocation failures.
5589                 */
5590                if (rxo->rx_post_starved)
5591                        be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5592        }
5593
5594        /* EQ-delay update for Skyhawk is done while notifying EQ */
5595        if (!skyhawk_chip(adapter))
5596                be_eqd_update(adapter, false);
5597
5598        if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5599                be_log_sfp_info(adapter);
5600
5601reschedule:
5602        adapter->work_counter++;
5603        queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5604}
5605
5606static void be_unmap_pci_bars(struct be_adapter *adapter)
5607{
5608        if (adapter->csr)
5609                pci_iounmap(adapter->pdev, adapter->csr);
5610        if (adapter->db)
5611                pci_iounmap(adapter->pdev, adapter->db);
5612        if (adapter->pcicfg && adapter->pcicfg_mapped)
5613                pci_iounmap(adapter->pdev, adapter->pcicfg);
5614}
5615
5616static int db_bar(struct be_adapter *adapter)
5617{
5618        if (lancer_chip(adapter) || be_virtfn(adapter))
5619                return 0;
5620        else
5621                return 4;
5622}
5623
5624static int be_roce_map_pci_bars(struct be_adapter *adapter)
5625{
5626        if (skyhawk_chip(adapter)) {
5627                adapter->roce_db.size = 4096;
5628                adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5629                                                              db_bar(adapter));
5630                adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5631                                                               db_bar(adapter));
5632        }
5633        return 0;
5634}
5635
5636static int be_map_pci_bars(struct be_adapter *adapter)
5637{
5638        struct pci_dev *pdev = adapter->pdev;
5639        u8 __iomem *addr;
5640        u32 sli_intf;
5641
5642        pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5643        adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5644                                SLI_INTF_FAMILY_SHIFT;
5645        adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5646
5647        if (BEx_chip(adapter) && be_physfn(adapter)) {
5648                adapter->csr = pci_iomap(pdev, 2, 0);
5649                if (!adapter->csr)
5650                        return -ENOMEM;
5651        }
5652
5653        addr = pci_iomap(pdev, db_bar(adapter), 0);
5654        if (!addr)
5655                goto pci_map_err;
5656        adapter->db = addr;
5657
5658        if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5659                if (be_physfn(adapter)) {
5660                        /* PCICFG is the 2nd BAR in BE2 */
5661                        addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5662                        if (!addr)
5663                                goto pci_map_err;
5664                        adapter->pcicfg = addr;
5665                        adapter->pcicfg_mapped = true;
5666                } else {
5667                        adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5668                        adapter->pcicfg_mapped = false;
5669                }
5670        }
5671
5672        be_roce_map_pci_bars(adapter);
5673        return 0;
5674
5675pci_map_err:
5676        dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5677        be_unmap_pci_bars(adapter);
5678        return -ENOMEM;
5679}
5680
5681static void be_drv_cleanup(struct be_adapter *adapter)
5682{
5683        struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5684        struct device *dev = &adapter->pdev->dev;
5685
5686        if (mem->va)
5687                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5688
5689        mem = &adapter->rx_filter;
5690        if (mem->va)
5691                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5692
5693        mem = &adapter->stats_cmd;
5694        if (mem->va)
5695                dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5696}
5697
5698/* Allocate and initialize various fields in be_adapter struct */
5699static int be_drv_init(struct be_adapter *adapter)
5700{
5701        struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5702        struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5703        struct be_dma_mem *rx_filter = &adapter->rx_filter;
5704        struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5705        struct device *dev = &adapter->pdev->dev;
5706        int status = 0;
5707
5708        mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5709        mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5710                                                 &mbox_mem_alloc->dma,
5711                                                 GFP_KERNEL);
5712        if (!mbox_mem_alloc->va)
5713                return -ENOMEM;
5714
5715        mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5716        mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5717        mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5718
5719        rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5720        rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5721                                            &rx_filter->dma, GFP_KERNEL);
5722        if (!rx_filter->va) {
5723                status = -ENOMEM;
5724                goto free_mbox;
5725        }
5726
5727        if (lancer_chip(adapter))
5728                stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5729        else if (BE2_chip(adapter))
5730                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5731        else if (BE3_chip(adapter))
5732                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5733        else
5734                stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5735        stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5736                                            &stats_cmd->dma, GFP_KERNEL);
5737        if (!stats_cmd->va) {
5738                status = -ENOMEM;
5739                goto free_rx_filter;
5740        }
5741
5742        mutex_init(&adapter->mbox_lock);
5743        mutex_init(&adapter->mcc_lock);
5744        mutex_init(&adapter->rx_filter_lock);
5745        spin_lock_init(&adapter->mcc_cq_lock);
5746        init_completion(&adapter->et_cmd_compl);
5747
5748        pci_save_state(adapter->pdev);
5749
5750        INIT_DELAYED_WORK(&adapter->work, be_worker);
5751
5752        adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5753        adapter->error_recovery.resched_delay = 0;
5754        INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5755                          be_err_detection_task);
5756
5757        adapter->rx_fc = true;
5758        adapter->tx_fc = true;
5759
5760        /* Must be a power of 2 or else MODULO will BUG_ON */
5761        adapter->be_get_temp_freq = 64;
5762
5763        return 0;
5764
5765free_rx_filter:
5766        dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5767free_mbox:
5768        dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5769                          mbox_mem_alloc->dma);
5770        return status;
5771}
5772
5773static void be_remove(struct pci_dev *pdev)
5774{
5775        struct be_adapter *adapter = pci_get_drvdata(pdev);
5776
5777        if (!adapter)
5778                return;
5779
5780        be_roce_dev_remove(adapter);
5781        be_intr_set(adapter, false);
5782
5783        be_cancel_err_detection(adapter);
5784
5785        unregister_netdev(adapter->netdev);
5786
5787        be_clear(adapter);
5788
5789        if (!pci_vfs_assigned(adapter->pdev))
5790                be_cmd_reset_function(adapter);
5791
5792        /* tell fw we're done with firing cmds */
5793        be_cmd_fw_clean(adapter);
5794
5795        be_unmap_pci_bars(adapter);
5796        be_drv_cleanup(adapter);
5797
5798        pci_disable_pcie_error_reporting(pdev);
5799
5800        pci_release_regions(pdev);
5801        pci_disable_device(pdev);
5802
5803        free_netdev(adapter->netdev);
5804}
5805
5806static ssize_t be_hwmon_show_temp(struct device *dev,
5807                                  struct device_attribute *dev_attr,
5808                                  char *buf)
5809{
5810        struct be_adapter *adapter = dev_get_drvdata(dev);
5811
5812        /* Unit: millidegree Celsius */
5813        if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5814                return -EIO;
5815        else
5816                return sprintf(buf, "%u\n",
5817                               adapter->hwmon_info.be_on_die_temp * 1000);
5818}
5819
5820static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5821                          be_hwmon_show_temp, NULL, 1);
5822
5823static struct attribute *be_hwmon_attrs[] = {
5824        &sensor_dev_attr_temp1_input.dev_attr.attr,
5825        NULL
5826};
5827
5828ATTRIBUTE_GROUPS(be_hwmon);
5829
5830static char *mc_name(struct be_adapter *adapter)
5831{
5832        char *str = ""; /* default */
5833
5834        switch (adapter->mc_type) {
5835        case UMC:
5836                str = "UMC";
5837                break;
5838        case FLEX10:
5839                str = "FLEX10";
5840                break;
5841        case vNIC1:
5842                str = "vNIC-1";
5843                break;
5844        case nPAR:
5845                str = "nPAR";
5846                break;
5847        case UFP:
5848                str = "UFP";
5849                break;
5850        case vNIC2:
5851                str = "vNIC-2";
5852                break;
5853        default:
5854                str = "";
5855        }
5856
5857        return str;
5858}
5859
5860static inline char *func_name(struct be_adapter *adapter)
5861{
5862        return be_physfn(adapter) ? "PF" : "VF";
5863}
5864
5865static inline char *nic_name(struct pci_dev *pdev)
5866{
5867        switch (pdev->device) {
5868        case OC_DEVICE_ID1:
5869                return OC_NAME;
5870        case OC_DEVICE_ID2:
5871                return OC_NAME_BE;
5872        case OC_DEVICE_ID3:
5873        case OC_DEVICE_ID4:
5874                return OC_NAME_LANCER;
5875        case BE_DEVICE_ID2:
5876                return BE3_NAME;
5877        case OC_DEVICE_ID5:
5878        case OC_DEVICE_ID6:
5879                return OC_NAME_SH;
5880        default:
5881                return BE_NAME;
5882        }
5883}
5884
5885static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5886{
5887        struct be_adapter *adapter;
5888        struct net_device *netdev;
5889        int status = 0;
5890
5891        dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5892
5893        status = pci_enable_device(pdev);
5894        if (status)
5895                goto do_none;
5896
5897        status = pci_request_regions(pdev, DRV_NAME);
5898        if (status)
5899                goto disable_dev;
5900        pci_set_master(pdev);
5901
5902        netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5903        if (!netdev) {
5904                status = -ENOMEM;
5905                goto rel_reg;
5906        }
5907        adapter = netdev_priv(netdev);
5908        adapter->pdev = pdev;
5909        pci_set_drvdata(pdev, adapter);
5910        adapter->netdev = netdev;
5911        SET_NETDEV_DEV(netdev, &pdev->dev);
5912
5913        status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5914        if (!status) {
5915                netdev->features |= NETIF_F_HIGHDMA;
5916        } else {
5917                status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5918                if (status) {
5919                        dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5920                        goto free_netdev;
5921                }
5922        }
5923
5924        status = pci_enable_pcie_error_reporting(pdev);
5925        if (!status)
5926                dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5927
5928        status = be_map_pci_bars(adapter);
5929        if (status)
5930                goto free_netdev;
5931
5932        status = be_drv_init(adapter);
5933        if (status)
5934                goto unmap_bars;
5935
5936        status = be_setup(adapter);
5937        if (status)
5938                goto drv_cleanup;
5939
5940        be_netdev_init(netdev);
5941        status = register_netdev(netdev);
5942        if (status != 0)
5943                goto unsetup;
5944
5945        be_roce_dev_add(adapter);
5946
5947        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5948        adapter->error_recovery.probe_time = jiffies;
5949
5950        /* On Die temperature not supported for VF. */
5951        if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5952                adapter->hwmon_info.hwmon_dev =
5953                        devm_hwmon_device_register_with_groups(&pdev->dev,
5954                                                               DRV_NAME,
5955                                                               adapter,
5956                                                               be_hwmon_groups);
5957                adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5958        }
5959
5960        dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5961                 func_name(adapter), mc_name(adapter), adapter->port_name);
5962
5963        return 0;
5964
5965unsetup:
5966        be_clear(adapter);
5967drv_cleanup:
5968        be_drv_cleanup(adapter);
5969unmap_bars:
5970        be_unmap_pci_bars(adapter);
5971free_netdev:
5972        free_netdev(netdev);
5973rel_reg:
5974        pci_release_regions(pdev);
5975disable_dev:
5976        pci_disable_device(pdev);
5977do_none:
5978        dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5979        return status;
5980}
5981
5982static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5983{
5984        struct be_adapter *adapter = pci_get_drvdata(pdev);
5985
5986        be_intr_set(adapter, false);
5987        be_cancel_err_detection(adapter);
5988
5989        be_cleanup(adapter);
5990
5991        pci_save_state(pdev);
5992        pci_disable_device(pdev);
5993        pci_set_power_state(pdev, pci_choose_state(pdev, state));
5994        return 0;
5995}
5996
5997static int be_pci_resume(struct pci_dev *pdev)
5998{
5999        struct be_adapter *adapter = pci_get_drvdata(pdev);
6000        int status = 0;
6001
6002        status = pci_enable_device(pdev);
6003        if (status)
6004                return status;
6005
6006        pci_restore_state(pdev);
6007
6008        status = be_resume(adapter);
6009        if (status)
6010                return status;
6011
6012        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6013
6014        return 0;
6015}
6016
6017/*
6018 * An FLR will stop BE from DMAing any data.
6019 */
6020static void be_shutdown(struct pci_dev *pdev)
6021{
6022        struct be_adapter *adapter = pci_get_drvdata(pdev);
6023
6024        if (!adapter)
6025                return;
6026
6027        be_roce_dev_shutdown(adapter);
6028        cancel_delayed_work_sync(&adapter->work);
6029        be_cancel_err_detection(adapter);
6030
6031        netif_device_detach(adapter->netdev);
6032
6033        be_cmd_reset_function(adapter);
6034
6035        pci_disable_device(pdev);
6036}
6037
6038static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6039                                            pci_channel_state_t state)
6040{
6041        struct be_adapter *adapter = pci_get_drvdata(pdev);
6042
6043        dev_err(&adapter->pdev->dev, "EEH error detected\n");
6044
6045        be_roce_dev_remove(adapter);
6046
6047        if (!be_check_error(adapter, BE_ERROR_EEH)) {
6048                be_set_error(adapter, BE_ERROR_EEH);
6049
6050                be_cancel_err_detection(adapter);
6051
6052                be_cleanup(adapter);
6053        }
6054
6055        if (state == pci_channel_io_perm_failure)
6056                return PCI_ERS_RESULT_DISCONNECT;
6057
6058        pci_disable_device(pdev);
6059
6060        /* The error could cause the FW to trigger a flash debug dump.
6061         * Resetting the card while flash dump is in progress
6062         * can cause it not to recover; wait for it to finish.
6063         * Wait only for first function as it is needed only once per
6064         * adapter.
6065         */
6066        if (pdev->devfn == 0)
6067                ssleep(30);
6068
6069        return PCI_ERS_RESULT_NEED_RESET;
6070}
6071
6072static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6073{
6074        struct be_adapter *adapter = pci_get_drvdata(pdev);
6075        int status;
6076
6077        dev_info(&adapter->pdev->dev, "EEH reset\n");
6078
6079        status = pci_enable_device(pdev);
6080        if (status)
6081                return PCI_ERS_RESULT_DISCONNECT;
6082
6083        pci_set_master(pdev);
6084        pci_restore_state(pdev);
6085
6086        /* Check if card is ok and fw is ready */
6087        dev_info(&adapter->pdev->dev,
6088                 "Waiting for FW to be ready after EEH reset\n");
6089        status = be_fw_wait_ready(adapter);
6090        if (status)
6091                return PCI_ERS_RESULT_DISCONNECT;
6092
6093        pci_cleanup_aer_uncorrect_error_status(pdev);
6094        be_clear_error(adapter, BE_CLEAR_ALL);
6095        return PCI_ERS_RESULT_RECOVERED;
6096}
6097
6098static void be_eeh_resume(struct pci_dev *pdev)
6099{
6100        int status = 0;
6101        struct be_adapter *adapter = pci_get_drvdata(pdev);
6102
6103        dev_info(&adapter->pdev->dev, "EEH resume\n");
6104
6105        pci_save_state(pdev);
6106
6107        status = be_resume(adapter);
6108        if (status)
6109                goto err;
6110
6111        be_roce_dev_add(adapter);
6112
6113        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6114        return;
6115err:
6116        dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6117}
6118
6119static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6120{
6121        struct be_adapter *adapter = pci_get_drvdata(pdev);
6122        struct be_resources vft_res = {0};
6123        int status;
6124
6125        if (!num_vfs)
6126                be_vf_clear(adapter);
6127
6128        adapter->num_vfs = num_vfs;
6129
6130        if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6131                dev_warn(&pdev->dev,
6132                         "Cannot disable VFs while they are assigned\n");
6133                return -EBUSY;
6134        }
6135
6136        /* When the HW is in SRIOV capable configuration, the PF-pool resources
6137         * are equally distributed across the max-number of VFs. The user may
6138         * request only a subset of the max-vfs to be enabled.
6139         * Based on num_vfs, redistribute the resources across num_vfs so that
6140         * each VF will have access to more number of resources.
6141         * This facility is not available in BE3 FW.
6142         * Also, this is done by FW in Lancer chip.
6143         */
6144        if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6145                be_calculate_vf_res(adapter, adapter->num_vfs,
6146                                    &vft_res);
6147                status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6148                                                 adapter->num_vfs, &vft_res);
6149                if (status)
6150                        dev_err(&pdev->dev,
6151                                "Failed to optimize SR-IOV resources\n");
6152        }
6153
6154        status = be_get_resources(adapter);
6155        if (status)
6156                return be_cmd_status(status);
6157
6158        /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6159        rtnl_lock();
6160        status = be_update_queues(adapter);
6161        rtnl_unlock();
6162        if (status)
6163                return be_cmd_status(status);
6164
6165        if (adapter->num_vfs)
6166                status = be_vf_setup(adapter);
6167
6168        if (!status)
6169                return adapter->num_vfs;
6170
6171        return 0;
6172}
6173
6174static const struct pci_error_handlers be_eeh_handlers = {
6175        .error_detected = be_eeh_err_detected,
6176        .slot_reset = be_eeh_reset,
6177        .resume = be_eeh_resume,
6178};
6179
6180static struct pci_driver be_driver = {
6181        .name = DRV_NAME,
6182        .id_table = be_dev_ids,
6183        .probe = be_probe,
6184        .remove = be_remove,
6185        .suspend = be_suspend,
6186        .resume = be_pci_resume,
6187        .shutdown = be_shutdown,
6188        .sriov_configure = be_pci_sriov_configure,
6189        .err_handler = &be_eeh_handlers
6190};
6191
6192static int __init be_init_module(void)
6193{
6194        int status;
6195
6196        if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6197            rx_frag_size != 2048) {
6198                printk(KERN_WARNING DRV_NAME
6199                        " : Module param rx_frag_size must be 2048/4096/8192."
6200                        " Using 2048\n");
6201                rx_frag_size = 2048;
6202        }
6203
6204        if (num_vfs > 0) {
6205                pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6206                pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6207        }
6208
6209        be_wq = create_singlethread_workqueue("be_wq");
6210        if (!be_wq) {
6211                pr_warn(DRV_NAME "workqueue creation failed\n");
6212                return -1;
6213        }
6214
6215        be_err_recovery_workq =
6216                create_singlethread_workqueue("be_err_recover");
6217        if (!be_err_recovery_workq)
6218                pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6219
6220        status = pci_register_driver(&be_driver);
6221        if (status) {
6222                destroy_workqueue(be_wq);
6223                be_destroy_err_recovery_workq();
6224        }
6225        return status;
6226}
6227module_init(be_init_module);
6228
6229static void __exit be_exit_module(void)
6230{
6231        pci_unregister_driver(&be_driver);
6232
6233        be_destroy_err_recovery_workq();
6234
6235        if (be_wq)
6236                destroy_workqueue(be_wq);
6237}
6238module_exit(be_exit_module);
6239