linux/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
<<
>>
Prefs
   1/*
   2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
   3 *
   4 * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
   5 *
   6 * This software is available to you under a choice of one of two
   7 * licenses.  You may choose to be licensed under the terms of the GNU
   8 * General Public License (GPL) Version 2, available from the file
   9 * COPYING in the main directory of this source tree, or the
  10 * OpenIB.org BSD license below:
  11 *
  12 *     Redistribution and use in source and binary forms, with or
  13 *     without modification, are permitted provided that the following
  14 *     conditions are met:
  15 *
  16 *      - Redistributions of source code must retain the above
  17 *        copyright notice, this list of conditions and the following
  18 *        disclaimer.
  19 *
  20 *      - Redistributions in binary form must reproduce the above
  21 *        copyright notice, this list of conditions and the following
  22 *        disclaimer in the documentation and/or other materials
  23 *        provided with the distribution.
  24 *
  25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32 * SOFTWARE.
  33 */
  34
  35#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  36
  37#include <linux/bitmap.h>
  38#include <linux/crc32.h>
  39#include <linux/ctype.h>
  40#include <linux/debugfs.h>
  41#include <linux/err.h>
  42#include <linux/etherdevice.h>
  43#include <linux/firmware.h>
  44#include <linux/if.h>
  45#include <linux/if_vlan.h>
  46#include <linux/init.h>
  47#include <linux/log2.h>
  48#include <linux/mdio.h>
  49#include <linux/module.h>
  50#include <linux/moduleparam.h>
  51#include <linux/mutex.h>
  52#include <linux/netdevice.h>
  53#include <linux/pci.h>
  54#include <linux/aer.h>
  55#include <linux/rtnetlink.h>
  56#include <linux/sched.h>
  57#include <linux/seq_file.h>
  58#include <linux/sockios.h>
  59#include <linux/vmalloc.h>
  60#include <linux/workqueue.h>
  61#include <net/neighbour.h>
  62#include <net/netevent.h>
  63#include <net/addrconf.h>
  64#include <net/bonding.h>
  65#include <linux/uaccess.h>
  66#include <linux/crash_dump.h>
  67#include <net/udp_tunnel.h>
  68#include <net/xfrm.h>
  69
  70#include "cxgb4.h"
  71#include "cxgb4_filter.h"
  72#include "t4_regs.h"
  73#include "t4_values.h"
  74#include "t4_msg.h"
  75#include "t4fw_api.h"
  76#include "t4fw_version.h"
  77#include "cxgb4_dcb.h"
  78#include "srq.h"
  79#include "cxgb4_debugfs.h"
  80#include "clip_tbl.h"
  81#include "l2t.h"
  82#include "smt.h"
  83#include "sched.h"
  84#include "cxgb4_tc_u32.h"
  85#include "cxgb4_tc_flower.h"
  86#include "cxgb4_tc_mqprio.h"
  87#include "cxgb4_tc_matchall.h"
  88#include "cxgb4_ptp.h"
  89#include "cxgb4_cudbg.h"
  90
  91char cxgb4_driver_name[] = KBUILD_MODNAME;
  92
  93#define DRV_DESC "Chelsio T4/T5/T6 Network Driver"
  94
  95#define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
  96                         NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
  97                         NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
  98
  99/* Macros needed to support the PCI Device ID Table ...
 100 */
 101#define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
 102        static const struct pci_device_id cxgb4_pci_tbl[] = {
 103#define CXGB4_UNIFIED_PF 0x4
 104
 105#define CH_PCI_DEVICE_ID_FUNCTION CXGB4_UNIFIED_PF
 106
 107/* Include PCI Device IDs for both PF4 and PF0-3 so our PCI probe() routine is
 108 * called for both.
 109 */
 110#define CH_PCI_DEVICE_ID_FUNCTION2 0x0
 111
 112#define CH_PCI_ID_TABLE_ENTRY(devid) \
 113                {PCI_VDEVICE(CHELSIO, (devid)), CXGB4_UNIFIED_PF}
 114
 115#define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \
 116                { 0, } \
 117        }
 118
 119#include "t4_pci_id_tbl.h"
 120
 121#define FW4_FNAME "cxgb4/t4fw.bin"
 122#define FW5_FNAME "cxgb4/t5fw.bin"
 123#define FW6_FNAME "cxgb4/t6fw.bin"
 124#define FW4_CFNAME "cxgb4/t4-config.txt"
 125#define FW5_CFNAME "cxgb4/t5-config.txt"
 126#define FW6_CFNAME "cxgb4/t6-config.txt"
 127#define PHY_AQ1202_FIRMWARE "cxgb4/aq1202_fw.cld"
 128#define PHY_BCM84834_FIRMWARE "cxgb4/bcm8483.bin"
 129#define PHY_AQ1202_DEVICEID 0x4409
 130#define PHY_BCM84834_DEVICEID 0x4486
 131
 132MODULE_DESCRIPTION(DRV_DESC);
 133MODULE_AUTHOR("Chelsio Communications");
 134MODULE_LICENSE("Dual BSD/GPL");
 135MODULE_DEVICE_TABLE(pci, cxgb4_pci_tbl);
 136MODULE_FIRMWARE(FW4_FNAME);
 137MODULE_FIRMWARE(FW5_FNAME);
 138MODULE_FIRMWARE(FW6_FNAME);
 139
 140/*
 141 * The driver uses the best interrupt scheme available on a platform in the
 142 * order MSI-X, MSI, legacy INTx interrupts.  This parameter determines which
 143 * of these schemes the driver may consider as follows:
 144 *
 145 * msi = 2: choose from among all three options
 146 * msi = 1: only consider MSI and INTx interrupts
 147 * msi = 0: force INTx interrupts
 148 */
 149static int msi = 2;
 150
 151module_param(msi, int, 0644);
 152MODULE_PARM_DESC(msi, "whether to use INTx (0), MSI (1) or MSI-X (2)");
 153
 154/*
 155 * Normally we tell the chip to deliver Ingress Packets into our DMA buffers
 156 * offset by 2 bytes in order to have the IP headers line up on 4-byte
 157 * boundaries.  This is a requirement for many architectures which will throw
 158 * a machine check fault if an attempt is made to access one of the 4-byte IP
 159 * header fields on a non-4-byte boundary.  And it's a major performance issue
 160 * even on some architectures which allow it like some implementations of the
 161 * x86 ISA.  However, some architectures don't mind this and for some very
 162 * edge-case performance sensitive applications (like forwarding large volumes
 163 * of small packets), setting this DMA offset to 0 will decrease the number of
 164 * PCI-E Bus transfers enough to measurably affect performance.
 165 */
 166static int rx_dma_offset = 2;
 167
 168/* TX Queue select used to determine what algorithm to use for selecting TX
 169 * queue. Select between the kernel provided function (select_queue=0) or user
 170 * cxgb_select_queue function (select_queue=1)
 171 *
 172 * Default: select_queue=0
 173 */
 174static int select_queue;
 175module_param(select_queue, int, 0644);
 176MODULE_PARM_DESC(select_queue,
 177                 "Select between kernel provided method of selecting or driver method of selecting TX queue. Default is kernel method.");
 178
 179static struct dentry *cxgb4_debugfs_root;
 180
 181LIST_HEAD(adapter_list);
 182DEFINE_MUTEX(uld_mutex);
 183
 184static int cfg_queues(struct adapter *adap);
 185
 186static void link_report(struct net_device *dev)
 187{
 188        if (!netif_carrier_ok(dev))
 189                netdev_info(dev, "link down\n");
 190        else {
 191                static const char *fc[] = { "no", "Rx", "Tx", "Tx/Rx" };
 192
 193                const char *s;
 194                const struct port_info *p = netdev_priv(dev);
 195
 196                switch (p->link_cfg.speed) {
 197                case 100:
 198                        s = "100Mbps";
 199                        break;
 200                case 1000:
 201                        s = "1Gbps";
 202                        break;
 203                case 10000:
 204                        s = "10Gbps";
 205                        break;
 206                case 25000:
 207                        s = "25Gbps";
 208                        break;
 209                case 40000:
 210                        s = "40Gbps";
 211                        break;
 212                case 50000:
 213                        s = "50Gbps";
 214                        break;
 215                case 100000:
 216                        s = "100Gbps";
 217                        break;
 218                default:
 219                        pr_info("%s: unsupported speed: %d\n",
 220                                dev->name, p->link_cfg.speed);
 221                        return;
 222                }
 223
 224                netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s,
 225                            fc[p->link_cfg.fc]);
 226        }
 227}
 228
 229#ifdef CONFIG_CHELSIO_T4_DCB
 230/* Set up/tear down Data Center Bridging Priority mapping for a net device. */
 231static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable)
 232{
 233        struct port_info *pi = netdev_priv(dev);
 234        struct adapter *adap = pi->adapter;
 235        struct sge_eth_txq *txq = &adap->sge.ethtxq[pi->first_qset];
 236        int i;
 237
 238        /* We use a simple mapping of Port TX Queue Index to DCB
 239         * Priority when we're enabling DCB.
 240         */
 241        for (i = 0; i < pi->nqsets; i++, txq++) {
 242                u32 name, value;
 243                int err;
 244
 245                name = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
 246                        FW_PARAMS_PARAM_X_V(
 247                                FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH) |
 248                        FW_PARAMS_PARAM_YZ_V(txq->q.cntxt_id));
 249                value = enable ? i : 0xffffffff;
 250
 251                /* Since we can be called while atomic (from "interrupt
 252                 * level") we need to issue the Set Parameters Commannd
 253                 * without sleeping (timeout < 0).
 254                 */
 255                err = t4_set_params_timeout(adap, adap->mbox, adap->pf, 0, 1,
 256                                            &name, &value,
 257                                            -FW_CMD_MAX_TIMEOUT);
 258
 259                if (err)
 260                        dev_err(adap->pdev_dev,
 261                                "Can't %s DCB Priority on port %d, TX Queue %d: err=%d\n",
 262                                enable ? "set" : "unset", pi->port_id, i, -err);
 263                else
 264                        txq->dcb_prio = enable ? value : 0;
 265        }
 266}
 267
 268int cxgb4_dcb_enabled(const struct net_device *dev)
 269{
 270        struct port_info *pi = netdev_priv(dev);
 271
 272        if (!pi->dcb.enabled)
 273                return 0;
 274
 275        return ((pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED) ||
 276                (pi->dcb.state == CXGB4_DCB_STATE_HOST));
 277}
 278#endif /* CONFIG_CHELSIO_T4_DCB */
 279
 280void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat)
 281{
 282        struct net_device *dev = adapter->port[port_id];
 283
 284        /* Skip changes from disabled ports. */
 285        if (netif_running(dev) && link_stat != netif_carrier_ok(dev)) {
 286                if (link_stat)
 287                        netif_carrier_on(dev);
 288                else {
 289#ifdef CONFIG_CHELSIO_T4_DCB
 290                        if (cxgb4_dcb_enabled(dev)) {
 291                                cxgb4_dcb_reset(dev);
 292                                dcb_tx_queue_prio_enable(dev, false);
 293                        }
 294#endif /* CONFIG_CHELSIO_T4_DCB */
 295                        netif_carrier_off(dev);
 296                }
 297
 298                link_report(dev);
 299        }
 300}
 301
 302void t4_os_portmod_changed(struct adapter *adap, int port_id)
 303{
 304        static const char *mod_str[] = {
 305                NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
 306        };
 307
 308        struct net_device *dev = adap->port[port_id];
 309        struct port_info *pi = netdev_priv(dev);
 310
 311        if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
 312                netdev_info(dev, "port module unplugged\n");
 313        else if (pi->mod_type < ARRAY_SIZE(mod_str))
 314                netdev_info(dev, "%s module inserted\n", mod_str[pi->mod_type]);
 315        else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
 316                netdev_info(dev, "%s: unsupported port module inserted\n",
 317                            dev->name);
 318        else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
 319                netdev_info(dev, "%s: unknown port module inserted\n",
 320                            dev->name);
 321        else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
 322                netdev_info(dev, "%s: transceiver module error\n", dev->name);
 323        else
 324                netdev_info(dev, "%s: unknown module type %d inserted\n",
 325                            dev->name, pi->mod_type);
 326
 327        /* If the interface is running, then we'll need any "sticky" Link
 328         * Parameters redone with a new Transceiver Module.
 329         */
 330        pi->link_cfg.redo_l1cfg = netif_running(dev);
 331}
 332
 333int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */
 334module_param(dbfifo_int_thresh, int, 0644);
 335MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold");
 336
 337/*
 338 * usecs to sleep while draining the dbfifo
 339 */
 340static int dbfifo_drain_delay = 1000;
 341module_param(dbfifo_drain_delay, int, 0644);
 342MODULE_PARM_DESC(dbfifo_drain_delay,
 343                 "usecs to sleep while draining the dbfifo");
 344
 345static inline int cxgb4_set_addr_hash(struct port_info *pi)
 346{
 347        struct adapter *adap = pi->adapter;
 348        u64 vec = 0;
 349        bool ucast = false;
 350        struct hash_mac_addr *entry;
 351
 352        /* Calculate the hash vector for the updated list and program it */
 353        list_for_each_entry(entry, &adap->mac_hlist, list) {
 354                ucast |= is_unicast_ether_addr(entry->addr);
 355                vec |= (1ULL << hash_mac_addr(entry->addr));
 356        }
 357        return t4_set_addr_hash(adap, adap->mbox, pi->viid, ucast,
 358                                vec, false);
 359}
 360
 361static int cxgb4_mac_sync(struct net_device *netdev, const u8 *mac_addr)
 362{
 363        struct port_info *pi = netdev_priv(netdev);
 364        struct adapter *adap = pi->adapter;
 365        int ret;
 366        u64 mhash = 0;
 367        u64 uhash = 0;
 368        /* idx stores the index of allocated filters,
 369         * its size should be modified based on the number of
 370         * MAC addresses that we allocate filters for
 371         */
 372
 373        u16 idx[1] = {};
 374        bool free = false;
 375        bool ucast = is_unicast_ether_addr(mac_addr);
 376        const u8 *maclist[1] = {mac_addr};
 377        struct hash_mac_addr *new_entry;
 378
 379        ret = cxgb4_alloc_mac_filt(adap, pi->viid, free, 1, maclist,
 380                                   idx, ucast ? &uhash : &mhash, false);
 381        if (ret < 0)
 382                goto out;
 383        /* if hash != 0, then add the addr to hash addr list
 384         * so on the end we will calculate the hash for the
 385         * list and program it
 386         */
 387        if (uhash || mhash) {
 388                new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
 389                if (!new_entry)
 390                        return -ENOMEM;
 391                ether_addr_copy(new_entry->addr, mac_addr);
 392                list_add_tail(&new_entry->list, &adap->mac_hlist);
 393                ret = cxgb4_set_addr_hash(pi);
 394        }
 395out:
 396        return ret < 0 ? ret : 0;
 397}
 398
 399static int cxgb4_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
 400{
 401        struct port_info *pi = netdev_priv(netdev);
 402        struct adapter *adap = pi->adapter;
 403        int ret;
 404        const u8 *maclist[1] = {mac_addr};
 405        struct hash_mac_addr *entry, *tmp;
 406
 407        /* If the MAC address to be removed is in the hash addr
 408         * list, delete it from the list and update hash vector
 409         */
 410        list_for_each_entry_safe(entry, tmp, &adap->mac_hlist, list) {
 411                if (ether_addr_equal(entry->addr, mac_addr)) {
 412                        list_del(&entry->list);
 413                        kfree(entry);
 414                        return cxgb4_set_addr_hash(pi);
 415                }
 416        }
 417
 418        ret = cxgb4_free_mac_filt(adap, pi->viid, 1, maclist, false);
 419        return ret < 0 ? -EINVAL : 0;
 420}
 421
 422/*
 423 * Set Rx properties of a port, such as promiscruity, address filters, and MTU.
 424 * If @mtu is -1 it is left unchanged.
 425 */
 426static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
 427{
 428        struct port_info *pi = netdev_priv(dev);
 429        struct adapter *adapter = pi->adapter;
 430
 431        __dev_uc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
 432        __dev_mc_sync(dev, cxgb4_mac_sync, cxgb4_mac_unsync);
 433
 434        return t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu,
 435                             (dev->flags & IFF_PROMISC) ? 1 : 0,
 436                             (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1,
 437                             sleep_ok);
 438}
 439
 440/**
 441 *      cxgb4_change_mac - Update match filter for a MAC address.
 442 *      @pi: the port_info
 443 *      @viid: the VI id
 444 *      @tcam_idx: TCAM index of existing filter for old value of MAC address,
 445 *                 or -1
 446 *      @addr: the new MAC address value
 447 *      @persist: whether a new MAC allocation should be persistent
 448 *      @add_smt: if true also add the address to the HW SMT
 449 *
 450 *      Modifies an MPS filter and sets it to the new MAC address if
 451 *      @tcam_idx >= 0, or adds the MAC address to a new filter if
 452 *      @tcam_idx < 0. In the latter case the address is added persistently
 453 *      if @persist is %true.
 454 *      Addresses are programmed to hash region, if tcam runs out of entries.
 455 *
 456 */
 457int cxgb4_change_mac(struct port_info *pi, unsigned int viid,
 458                     int *tcam_idx, const u8 *addr, bool persist,
 459                     u8 *smt_idx)
 460{
 461        struct adapter *adapter = pi->adapter;
 462        struct hash_mac_addr *entry, *new_entry;
 463        int ret;
 464
 465        ret = t4_change_mac(adapter, adapter->mbox, viid,
 466                            *tcam_idx, addr, persist, smt_idx);
 467        /* We ran out of TCAM entries. try programming hash region. */
 468        if (ret == -ENOMEM) {
 469                /* If the MAC address to be updated is in the hash addr
 470                 * list, update it from the list
 471                 */
 472                list_for_each_entry(entry, &adapter->mac_hlist, list) {
 473                        if (entry->iface_mac) {
 474                                ether_addr_copy(entry->addr, addr);
 475                                goto set_hash;
 476                        }
 477                }
 478                new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
 479                if (!new_entry)
 480                        return -ENOMEM;
 481                ether_addr_copy(new_entry->addr, addr);
 482                new_entry->iface_mac = true;
 483                list_add_tail(&new_entry->list, &adapter->mac_hlist);
 484set_hash:
 485                ret = cxgb4_set_addr_hash(pi);
 486        } else if (ret >= 0) {
 487                *tcam_idx = ret;
 488                ret = 0;
 489        }
 490
 491        return ret;
 492}
 493
 494/*
 495 *      link_start - enable a port
 496 *      @dev: the port to enable
 497 *
 498 *      Performs the MAC and PHY actions needed to enable a port.
 499 */
 500static int link_start(struct net_device *dev)
 501{
 502        int ret;
 503        struct port_info *pi = netdev_priv(dev);
 504        unsigned int mb = pi->adapter->pf;
 505
 506        /*
 507         * We do not set address filters and promiscuity here, the stack does
 508         * that step explicitly.
 509         */
 510        ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1,
 511                            !!(dev->features & NETIF_F_HW_VLAN_CTAG_RX), true);
 512        if (ret == 0)
 513                ret = cxgb4_update_mac_filt(pi, pi->viid, &pi->xact_addr_filt,
 514                                            dev->dev_addr, true, &pi->smt_idx);
 515        if (ret == 0)
 516                ret = t4_link_l1cfg(pi->adapter, mb, pi->tx_chan,
 517                                    &pi->link_cfg);
 518        if (ret == 0) {
 519                local_bh_disable();
 520                ret = t4_enable_pi_params(pi->adapter, mb, pi, true,
 521                                          true, CXGB4_DCB_ENABLED);
 522                local_bh_enable();
 523        }
 524
 525        return ret;
 526}
 527
 528#ifdef CONFIG_CHELSIO_T4_DCB
 529/* Handle a Data Center Bridging update message from the firmware. */
 530static void dcb_rpl(struct adapter *adap, const struct fw_port_cmd *pcmd)
 531{
 532        int port = FW_PORT_CMD_PORTID_G(ntohl(pcmd->op_to_portid));
 533        struct net_device *dev = adap->port[adap->chan_map[port]];
 534        int old_dcb_enabled = cxgb4_dcb_enabled(dev);
 535        int new_dcb_enabled;
 536
 537        cxgb4_dcb_handle_fw_update(adap, pcmd);
 538        new_dcb_enabled = cxgb4_dcb_enabled(dev);
 539
 540        /* If the DCB has become enabled or disabled on the port then we're
 541         * going to need to set up/tear down DCB Priority parameters for the
 542         * TX Queues associated with the port.
 543         */
 544        if (new_dcb_enabled != old_dcb_enabled)
 545                dcb_tx_queue_prio_enable(dev, new_dcb_enabled);
 546}
 547#endif /* CONFIG_CHELSIO_T4_DCB */
 548
 549/* Response queue handler for the FW event queue.
 550 */
 551static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
 552                          const struct pkt_gl *gl)
 553{
 554        u8 opcode = ((const struct rss_header *)rsp)->opcode;
 555
 556        rsp++;                                          /* skip RSS header */
 557
 558        /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
 559         */
 560        if (unlikely(opcode == CPL_FW4_MSG &&
 561           ((const struct cpl_fw4_msg *)rsp)->type == FW_TYPE_RSSCPL)) {
 562                rsp++;
 563                opcode = ((const struct rss_header *)rsp)->opcode;
 564                rsp++;
 565                if (opcode != CPL_SGE_EGR_UPDATE) {
 566                        dev_err(q->adap->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
 567                                , opcode);
 568                        goto out;
 569                }
 570        }
 571
 572        if (likely(opcode == CPL_SGE_EGR_UPDATE)) {
 573                const struct cpl_sge_egr_update *p = (void *)rsp;
 574                unsigned int qid = EGR_QID_G(ntohl(p->opcode_qid));
 575                struct sge_txq *txq;
 576
 577                txq = q->adap->sge.egr_map[qid - q->adap->sge.egr_start];
 578                txq->restarts++;
 579                if (txq->q_type == CXGB4_TXQ_ETH) {
 580                        struct sge_eth_txq *eq;
 581
 582                        eq = container_of(txq, struct sge_eth_txq, q);
 583                        t4_sge_eth_txq_egress_update(q->adap, eq, -1);
 584                } else {
 585                        struct sge_uld_txq *oq;
 586
 587                        oq = container_of(txq, struct sge_uld_txq, q);
 588                        tasklet_schedule(&oq->qresume_tsk);
 589                }
 590        } else if (opcode == CPL_FW6_MSG || opcode == CPL_FW4_MSG) {
 591                const struct cpl_fw6_msg *p = (void *)rsp;
 592
 593#ifdef CONFIG_CHELSIO_T4_DCB
 594                const struct fw_port_cmd *pcmd = (const void *)p->data;
 595                unsigned int cmd = FW_CMD_OP_G(ntohl(pcmd->op_to_portid));
 596                unsigned int action =
 597                        FW_PORT_CMD_ACTION_G(ntohl(pcmd->action_to_len16));
 598
 599                if (cmd == FW_PORT_CMD &&
 600                    (action == FW_PORT_ACTION_GET_PORT_INFO ||
 601                     action == FW_PORT_ACTION_GET_PORT_INFO32)) {
 602                        int port = FW_PORT_CMD_PORTID_G(
 603                                        be32_to_cpu(pcmd->op_to_portid));
 604                        struct net_device *dev;
 605                        int dcbxdis, state_input;
 606
 607                        dev = q->adap->port[q->adap->chan_map[port]];
 608                        dcbxdis = (action == FW_PORT_ACTION_GET_PORT_INFO
 609                          ? !!(pcmd->u.info.dcbxdis_pkd & FW_PORT_CMD_DCBXDIS_F)
 610                          : !!(be32_to_cpu(pcmd->u.info32.lstatus32_to_cbllen32)
 611                               & FW_PORT_CMD_DCBXDIS32_F));
 612                        state_input = (dcbxdis
 613                                       ? CXGB4_DCB_INPUT_FW_DISABLED
 614                                       : CXGB4_DCB_INPUT_FW_ENABLED);
 615
 616                        cxgb4_dcb_state_fsm(dev, state_input);
 617                }
 618
 619                if (cmd == FW_PORT_CMD &&
 620                    action == FW_PORT_ACTION_L2_DCB_CFG)
 621                        dcb_rpl(q->adap, pcmd);
 622                else
 623#endif
 624                        if (p->type == 0)
 625                                t4_handle_fw_rpl(q->adap, p->data);
 626        } else if (opcode == CPL_L2T_WRITE_RPL) {
 627                const struct cpl_l2t_write_rpl *p = (void *)rsp;
 628
 629                do_l2t_write_rpl(q->adap, p);
 630        } else if (opcode == CPL_SMT_WRITE_RPL) {
 631                const struct cpl_smt_write_rpl *p = (void *)rsp;
 632
 633                do_smt_write_rpl(q->adap, p);
 634        } else if (opcode == CPL_SET_TCB_RPL) {
 635                const struct cpl_set_tcb_rpl *p = (void *)rsp;
 636
 637                filter_rpl(q->adap, p);
 638        } else if (opcode == CPL_ACT_OPEN_RPL) {
 639                const struct cpl_act_open_rpl *p = (void *)rsp;
 640
 641                hash_filter_rpl(q->adap, p);
 642        } else if (opcode == CPL_ABORT_RPL_RSS) {
 643                const struct cpl_abort_rpl_rss *p = (void *)rsp;
 644
 645                hash_del_filter_rpl(q->adap, p);
 646        } else if (opcode == CPL_SRQ_TABLE_RPL) {
 647                const struct cpl_srq_table_rpl *p = (void *)rsp;
 648
 649                do_srq_table_rpl(q->adap, p);
 650        } else
 651                dev_err(q->adap->pdev_dev,
 652                        "unexpected CPL %#x on FW event queue\n", opcode);
 653out:
 654        return 0;
 655}
 656
 657static void disable_msi(struct adapter *adapter)
 658{
 659        if (adapter->flags & CXGB4_USING_MSIX) {
 660                pci_disable_msix(adapter->pdev);
 661                adapter->flags &= ~CXGB4_USING_MSIX;
 662        } else if (adapter->flags & CXGB4_USING_MSI) {
 663                pci_disable_msi(adapter->pdev);
 664                adapter->flags &= ~CXGB4_USING_MSI;
 665        }
 666}
 667
 668/*
 669 * Interrupt handler for non-data events used with MSI-X.
 670 */
 671static irqreturn_t t4_nondata_intr(int irq, void *cookie)
 672{
 673        struct adapter *adap = cookie;
 674        u32 v = t4_read_reg(adap, MYPF_REG(PL_PF_INT_CAUSE_A));
 675
 676        if (v & PFSW_F) {
 677                adap->swintr = 1;
 678                t4_write_reg(adap, MYPF_REG(PL_PF_INT_CAUSE_A), v);
 679        }
 680        if (adap->flags & CXGB4_MASTER_PF)
 681                t4_slow_intr_handler(adap);
 682        return IRQ_HANDLED;
 683}
 684
 685int cxgb4_set_msix_aff(struct adapter *adap, unsigned short vec,
 686                       cpumask_var_t *aff_mask, int idx)
 687{
 688        int rv;
 689
 690        if (!zalloc_cpumask_var(aff_mask, GFP_KERNEL)) {
 691                dev_err(adap->pdev_dev, "alloc_cpumask_var failed\n");
 692                return -ENOMEM;
 693        }
 694
 695        cpumask_set_cpu(cpumask_local_spread(idx, dev_to_node(adap->pdev_dev)),
 696                        *aff_mask);
 697
 698        rv = irq_set_affinity_hint(vec, *aff_mask);
 699        if (rv)
 700                dev_warn(adap->pdev_dev,
 701                         "irq_set_affinity_hint %u failed %d\n",
 702                         vec, rv);
 703
 704        return 0;
 705}
 706
 707void cxgb4_clear_msix_aff(unsigned short vec, cpumask_var_t aff_mask)
 708{
 709        irq_set_affinity_hint(vec, NULL);
 710        free_cpumask_var(aff_mask);
 711}
 712
 713static int request_msix_queue_irqs(struct adapter *adap)
 714{
 715        struct sge *s = &adap->sge;
 716        struct msix_info *minfo;
 717        int err, ethqidx;
 718
 719        if (s->fwevtq_msix_idx < 0)
 720                return -ENOMEM;
 721
 722        err = request_irq(adap->msix_info[s->fwevtq_msix_idx].vec,
 723                          t4_sge_intr_msix, 0,
 724                          adap->msix_info[s->fwevtq_msix_idx].desc,
 725                          &s->fw_evtq);
 726        if (err)
 727                return err;
 728
 729        for_each_ethrxq(s, ethqidx) {
 730                minfo = s->ethrxq[ethqidx].msix;
 731                err = request_irq(minfo->vec,
 732                                  t4_sge_intr_msix, 0,
 733                                  minfo->desc,
 734                                  &s->ethrxq[ethqidx].rspq);
 735                if (err)
 736                        goto unwind;
 737
 738                cxgb4_set_msix_aff(adap, minfo->vec,
 739                                   &minfo->aff_mask, ethqidx);
 740        }
 741        return 0;
 742
 743unwind:
 744        while (--ethqidx >= 0) {
 745                minfo = s->ethrxq[ethqidx].msix;
 746                cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
 747                free_irq(minfo->vec, &s->ethrxq[ethqidx].rspq);
 748        }
 749        free_irq(adap->msix_info[s->fwevtq_msix_idx].vec, &s->fw_evtq);
 750        return err;
 751}
 752
 753static void free_msix_queue_irqs(struct adapter *adap)
 754{
 755        struct sge *s = &adap->sge;
 756        struct msix_info *minfo;
 757        int i;
 758
 759        free_irq(adap->msix_info[s->fwevtq_msix_idx].vec, &s->fw_evtq);
 760        for_each_ethrxq(s, i) {
 761                minfo = s->ethrxq[i].msix;
 762                cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask);
 763                free_irq(minfo->vec, &s->ethrxq[i].rspq);
 764        }
 765}
 766
 767static int setup_ppod_edram(struct adapter *adap)
 768{
 769        unsigned int param, val;
 770        int ret;
 771
 772        /* Driver sends FW_PARAMS_PARAM_DEV_PPOD_EDRAM read command to check
 773         * if firmware supports ppod edram feature or not. If firmware
 774         * returns 1, then driver can enable this feature by sending
 775         * FW_PARAMS_PARAM_DEV_PPOD_EDRAM write command with value 1 to
 776         * enable ppod edram feature.
 777         */
 778        param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
 779                FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_PPOD_EDRAM));
 780
 781        ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
 782        if (ret < 0) {
 783                dev_warn(adap->pdev_dev,
 784                         "querying PPOD_EDRAM support failed: %d\n",
 785                         ret);
 786                return -1;
 787        }
 788
 789        if (val != 1)
 790                return -1;
 791
 792        ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
 793        if (ret < 0) {
 794                dev_err(adap->pdev_dev,
 795                        "setting PPOD_EDRAM failed: %d\n", ret);
 796                return -1;
 797        }
 798        return 0;
 799}
 800
 801static void adap_config_hpfilter(struct adapter *adapter)
 802{
 803        u32 param, val = 0;
 804        int ret;
 805
 806        /* Enable HP filter region. Older fw will fail this request and
 807         * it is fine.
 808         */
 809        param = FW_PARAM_DEV(HPFILTER_REGION_SUPPORT);
 810        ret = t4_set_params(adapter, adapter->mbox, adapter->pf, 0,
 811                            1, &param, &val);
 812
 813        /* An error means FW doesn't know about HP filter support,
 814         * it's not a problem, don't return an error.
 815         */
 816        if (ret < 0)
 817                dev_err(adapter->pdev_dev,
 818                        "HP filter region isn't supported by FW\n");
 819}
 820
 821/**
 822 *      cxgb4_write_rss - write the RSS table for a given port
 823 *      @pi: the port
 824 *      @queues: array of queue indices for RSS
 825 *
 826 *      Sets up the portion of the HW RSS table for the port's VI to distribute
 827 *      packets to the Rx queues in @queues.
 828 *      Should never be called before setting up sge eth rx queues
 829 */
 830int cxgb4_write_rss(const struct port_info *pi, const u16 *queues)
 831{
 832        u16 *rss;
 833        int i, err;
 834        struct adapter *adapter = pi->adapter;
 835        const struct sge_eth_rxq *rxq;
 836
 837        rxq = &adapter->sge.ethrxq[pi->first_qset];
 838        rss = kmalloc_array(pi->rss_size, sizeof(u16), GFP_KERNEL);
 839        if (!rss)
 840                return -ENOMEM;
 841
 842        /* map the queue indices to queue ids */
 843        for (i = 0; i < pi->rss_size; i++, queues++)
 844                rss[i] = rxq[*queues].rspq.abs_id;
 845
 846        err = t4_config_rss_range(adapter, adapter->pf, pi->viid, 0,
 847                                  pi->rss_size, rss, pi->rss_size);
 848        /* If Tunnel All Lookup isn't specified in the global RSS
 849         * Configuration, then we need to specify a default Ingress
 850         * Queue for any ingress packets which aren't hashed.  We'll
 851         * use our first ingress queue ...
 852         */
 853        if (!err)
 854                err = t4_config_vi_rss(adapter, adapter->mbox, pi->viid,
 855                                       FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN_F |
 856                                       FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN_F |
 857                                       FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN_F |
 858                                       FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN_F |
 859                                       FW_RSS_VI_CONFIG_CMD_UDPEN_F,
 860                                       rss[0]);
 861        kfree(rss);
 862        return err;
 863}
 864
 865/**
 866 *      setup_rss - configure RSS
 867 *      @adap: the adapter
 868 *
 869 *      Sets up RSS for each port.
 870 */
 871static int setup_rss(struct adapter *adap)
 872{
 873        int i, j, err;
 874
 875        for_each_port(adap, i) {
 876                const struct port_info *pi = adap2pinfo(adap, i);
 877
 878                /* Fill default values with equal distribution */
 879                for (j = 0; j < pi->rss_size; j++)
 880                        pi->rss[j] = j % pi->nqsets;
 881
 882                err = cxgb4_write_rss(pi, pi->rss);
 883                if (err)
 884                        return err;
 885        }
 886        return 0;
 887}
 888
 889/*
 890 * Return the channel of the ingress queue with the given qid.
 891 */
 892static unsigned int rxq_to_chan(const struct sge *p, unsigned int qid)
 893{
 894        qid -= p->ingr_start;
 895        return netdev2pinfo(p->ingr_map[qid]->netdev)->tx_chan;
 896}
 897
 898void cxgb4_quiesce_rx(struct sge_rspq *q)
 899{
 900        if (q->handler)
 901                napi_disable(&q->napi);
 902}
 903
 904/*
 905 * Wait until all NAPI handlers are descheduled.
 906 */
 907static void quiesce_rx(struct adapter *adap)
 908{
 909        int i;
 910
 911        for (i = 0; i < adap->sge.ingr_sz; i++) {
 912                struct sge_rspq *q = adap->sge.ingr_map[i];
 913
 914                if (!q)
 915                        continue;
 916
 917                cxgb4_quiesce_rx(q);
 918        }
 919}
 920
 921/* Disable interrupt and napi handler */
 922static void disable_interrupts(struct adapter *adap)
 923{
 924        struct sge *s = &adap->sge;
 925
 926        if (adap->flags & CXGB4_FULL_INIT_DONE) {
 927                t4_intr_disable(adap);
 928                if (adap->flags & CXGB4_USING_MSIX) {
 929                        free_msix_queue_irqs(adap);
 930                        free_irq(adap->msix_info[s->nd_msix_idx].vec,
 931                                 adap);
 932                } else {
 933                        free_irq(adap->pdev->irq, adap);
 934                }
 935                quiesce_rx(adap);
 936        }
 937}
 938
 939void cxgb4_enable_rx(struct adapter *adap, struct sge_rspq *q)
 940{
 941        if (q->handler)
 942                napi_enable(&q->napi);
 943
 944        /* 0-increment GTS to start the timer and enable interrupts */
 945        t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A),
 946                     SEINTARM_V(q->intr_params) |
 947                     INGRESSQID_V(q->cntxt_id));
 948}
 949
 950/*
 951 * Enable NAPI scheduling and interrupt generation for all Rx queues.
 952 */
 953static void enable_rx(struct adapter *adap)
 954{
 955        int i;
 956
 957        for (i = 0; i < adap->sge.ingr_sz; i++) {
 958                struct sge_rspq *q = adap->sge.ingr_map[i];
 959
 960                if (!q)
 961                        continue;
 962
 963                cxgb4_enable_rx(adap, q);
 964        }
 965}
 966
 967static int setup_non_data_intr(struct adapter *adap)
 968{
 969        int msix;
 970
 971        adap->sge.nd_msix_idx = -1;
 972        if (!(adap->flags & CXGB4_USING_MSIX))
 973                return 0;
 974
 975        /* Request MSI-X vector for non-data interrupt */
 976        msix = cxgb4_get_msix_idx_from_bmap(adap);
 977        if (msix < 0)
 978                return -ENOMEM;
 979
 980        snprintf(adap->msix_info[msix].desc,
 981                 sizeof(adap->msix_info[msix].desc),
 982                 "%s", adap->port[0]->name);
 983
 984        adap->sge.nd_msix_idx = msix;
 985        return 0;
 986}
 987
 988static int setup_fw_sge_queues(struct adapter *adap)
 989{
 990        struct sge *s = &adap->sge;
 991        int msix, err = 0;
 992
 993        bitmap_zero(s->starving_fl, s->egr_sz);
 994        bitmap_zero(s->txq_maperr, s->egr_sz);
 995
 996        if (adap->flags & CXGB4_USING_MSIX) {
 997                s->fwevtq_msix_idx = -1;
 998                msix = cxgb4_get_msix_idx_from_bmap(adap);
 999                if (msix < 0)
1000                        return -ENOMEM;
1001
1002                snprintf(adap->msix_info[msix].desc,
1003                         sizeof(adap->msix_info[msix].desc),
1004                         "%s-FWeventq", adap->port[0]->name);
1005        } else {
1006                err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
1007                                       NULL, NULL, NULL, -1);
1008                if (err)
1009                        return err;
1010                msix = -((int)s->intrq.abs_id + 1);
1011        }
1012
1013        err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
1014                               msix, NULL, fwevtq_handler, NULL, -1);
1015        if (err && msix >= 0)
1016                cxgb4_free_msix_idx_in_bmap(adap, msix);
1017
1018        s->fwevtq_msix_idx = msix;
1019        return err;
1020}
1021
1022/**
1023 *      setup_sge_queues - configure SGE Tx/Rx/response queues
1024 *      @adap: the adapter
1025 *
1026 *      Determines how many sets of SGE queues to use and initializes them.
1027 *      We support multiple queue sets per port if we have MSI-X, otherwise
1028 *      just one queue set per port.
1029 */
1030static int setup_sge_queues(struct adapter *adap)
1031{
1032        struct sge_uld_rxq_info *rxq_info = NULL;
1033        struct sge *s = &adap->sge;
1034        unsigned int cmplqid = 0;
1035        int err, i, j, msix = 0;
1036
1037        if (is_uld(adap))
1038                rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA];
1039
1040        if (!(adap->flags & CXGB4_USING_MSIX))
1041                msix = -((int)s->intrq.abs_id + 1);
1042
1043        for_each_port(adap, i) {
1044                struct net_device *dev = adap->port[i];
1045                struct port_info *pi = netdev_priv(dev);
1046                struct sge_eth_rxq *q = &s->ethrxq[pi->first_qset];
1047                struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
1048
1049                for (j = 0; j < pi->nqsets; j++, q++) {
1050                        if (msix >= 0) {
1051                                msix = cxgb4_get_msix_idx_from_bmap(adap);
1052                                if (msix < 0) {
1053                                        err = msix;
1054                                        goto freeout;
1055                                }
1056
1057                                snprintf(adap->msix_info[msix].desc,
1058                                         sizeof(adap->msix_info[msix].desc),
1059                                         "%s-Rx%d", dev->name, j);
1060                                q->msix = &adap->msix_info[msix];
1061                        }
1062
1063                        err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
1064                                               msix, &q->fl,
1065                                               t4_ethrx_handler,
1066                                               NULL,
1067                                               t4_get_tp_ch_map(adap,
1068                                                                pi->tx_chan));
1069                        if (err)
1070                                goto freeout;
1071                        q->rspq.idx = j;
1072                        memset(&q->stats, 0, sizeof(q->stats));
1073                }
1074
1075                q = &s->ethrxq[pi->first_qset];
1076                for (j = 0; j < pi->nqsets; j++, t++, q++) {
1077                        err = t4_sge_alloc_eth_txq(adap, t, dev,
1078                                        netdev_get_tx_queue(dev, j),
1079                                        q->rspq.cntxt_id,
1080                                        !!(adap->flags & CXGB4_SGE_DBQ_TIMER));
1081                        if (err)
1082                                goto freeout;
1083                }
1084        }
1085
1086        for_each_port(adap, i) {
1087                /* Note that cmplqid below is 0 if we don't
1088                 * have RDMA queues, and that's the right value.
1089                 */
1090                if (rxq_info)
1091                        cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id;
1092
1093                err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i],
1094                                            s->fw_evtq.cntxt_id, cmplqid);
1095                if (err)
1096                        goto freeout;
1097        }
1098
1099        if (!is_t4(adap->params.chip)) {
1100                err = t4_sge_alloc_eth_txq(adap, &s->ptptxq, adap->port[0],
1101                                           netdev_get_tx_queue(adap->port[0], 0)
1102                                           , s->fw_evtq.cntxt_id, false);
1103                if (err)
1104                        goto freeout;
1105        }
1106
1107        t4_write_reg(adap, is_t4(adap->params.chip) ?
1108                                MPS_TRC_RSS_CONTROL_A :
1109                                MPS_T5_TRC_RSS_CONTROL_A,
1110                     RSSCONTROL_V(netdev2pinfo(adap->port[0])->tx_chan) |
1111                     QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id));
1112        return 0;
1113freeout:
1114        dev_err(adap->pdev_dev, "Can't allocate queues, err=%d\n", -err);
1115        t4_free_sge_resources(adap);
1116        return err;
1117}
1118
1119static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
1120                             struct net_device *sb_dev)
1121{
1122        int txq;
1123
1124#ifdef CONFIG_CHELSIO_T4_DCB
1125        /* If a Data Center Bridging has been successfully negotiated on this
1126         * link then we'll use the skb's priority to map it to a TX Queue.
1127         * The skb's priority is determined via the VLAN Tag Priority Code
1128         * Point field.
1129         */
1130        if (cxgb4_dcb_enabled(dev) && !is_kdump_kernel()) {
1131                u16 vlan_tci;
1132                int err;
1133
1134                err = vlan_get_tag(skb, &vlan_tci);
1135                if (unlikely(err)) {
1136                        if (net_ratelimit())
1137                                netdev_warn(dev,
1138                                            "TX Packet without VLAN Tag on DCB Link\n");
1139                        txq = 0;
1140                } else {
1141                        txq = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
1142#ifdef CONFIG_CHELSIO_T4_FCOE
1143                        if (skb->protocol == htons(ETH_P_FCOE))
1144                                txq = skb->priority & 0x7;
1145#endif /* CONFIG_CHELSIO_T4_FCOE */
1146                }
1147                return txq;
1148        }
1149#endif /* CONFIG_CHELSIO_T4_DCB */
1150
1151        if (dev->num_tc) {
1152                struct port_info *pi = netdev2pinfo(dev);
1153                u8 ver, proto;
1154
1155                ver = ip_hdr(skb)->version;
1156                proto = (ver == 6) ? ipv6_hdr(skb)->nexthdr :
1157                                     ip_hdr(skb)->protocol;
1158
1159                /* Send unsupported traffic pattern to normal NIC queues. */
1160                txq = netdev_pick_tx(dev, skb, sb_dev);
1161                if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) ||
1162                    skb->encapsulation ||
1163                    (proto != IPPROTO_TCP && proto != IPPROTO_UDP))
1164                        txq = txq % pi->nqsets;
1165
1166                return txq;
1167        }
1168
1169        if (select_queue) {
1170                txq = (skb_rx_queue_recorded(skb)
1171                        ? skb_get_rx_queue(skb)
1172                        : smp_processor_id());
1173
1174                while (unlikely(txq >= dev->real_num_tx_queues))
1175                        txq -= dev->real_num_tx_queues;
1176
1177                return txq;
1178        }
1179
1180        return netdev_pick_tx(dev, skb, NULL) % dev->real_num_tx_queues;
1181}
1182
1183static int closest_timer(const struct sge *s, int time)
1184{
1185        int i, delta, match = 0, min_delta = INT_MAX;
1186
1187        for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
1188                delta = time - s->timer_val[i];
1189                if (delta < 0)
1190                        delta = -delta;
1191                if (delta < min_delta) {
1192                        min_delta = delta;
1193                        match = i;
1194                }
1195        }
1196        return match;
1197}
1198
1199static int closest_thres(const struct sge *s, int thres)
1200{
1201        int i, delta, match = 0, min_delta = INT_MAX;
1202
1203        for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
1204                delta = thres - s->counter_val[i];
1205                if (delta < 0)
1206                        delta = -delta;
1207                if (delta < min_delta) {
1208                        min_delta = delta;
1209                        match = i;
1210                }
1211        }
1212        return match;
1213}
1214
1215/**
1216 *      cxgb4_set_rspq_intr_params - set a queue's interrupt holdoff parameters
1217 *      @q: the Rx queue
1218 *      @us: the hold-off time in us, or 0 to disable timer
1219 *      @cnt: the hold-off packet count, or 0 to disable counter
1220 *
1221 *      Sets an Rx queue's interrupt hold-off time and packet count.  At least
1222 *      one of the two needs to be enabled for the queue to generate interrupts.
1223 */
1224int cxgb4_set_rspq_intr_params(struct sge_rspq *q,
1225                               unsigned int us, unsigned int cnt)
1226{
1227        struct adapter *adap = q->adap;
1228
1229        if ((us | cnt) == 0)
1230                cnt = 1;
1231
1232        if (cnt) {
1233                int err;
1234                u32 v, new_idx;
1235
1236                new_idx = closest_thres(&adap->sge, cnt);
1237                if (q->desc && q->pktcnt_idx != new_idx) {
1238                        /* the queue has already been created, update it */
1239                        v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1240                            FW_PARAMS_PARAM_X_V(
1241                                        FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1242                            FW_PARAMS_PARAM_YZ_V(q->cntxt_id);
1243                        err = t4_set_params(adap, adap->mbox, adap->pf, 0, 1,
1244                                            &v, &new_idx);
1245                        if (err)
1246                                return err;
1247                }
1248                q->pktcnt_idx = new_idx;
1249        }
1250
1251        us = us == 0 ? 6 : closest_timer(&adap->sge, us);
1252        q->intr_params = QINTR_TIMER_IDX_V(us) | QINTR_CNT_EN_V(cnt > 0);
1253        return 0;
1254}
1255
1256static int cxgb_set_features(struct net_device *dev, netdev_features_t features)
1257{
1258        const struct port_info *pi = netdev_priv(dev);
1259        netdev_features_t changed = dev->features ^ features;
1260        int err;
1261
1262        if (!(changed & NETIF_F_HW_VLAN_CTAG_RX))
1263                return 0;
1264
1265        err = t4_set_rxmode(pi->adapter, pi->adapter->pf, pi->viid, -1,
1266                            -1, -1, -1,
1267                            !!(features & NETIF_F_HW_VLAN_CTAG_RX), true);
1268        if (unlikely(err))
1269                dev->features = features ^ NETIF_F_HW_VLAN_CTAG_RX;
1270        return err;
1271}
1272
1273static int setup_debugfs(struct adapter *adap)
1274{
1275        if (IS_ERR_OR_NULL(adap->debugfs_root))
1276                return -1;
1277
1278#ifdef CONFIG_DEBUG_FS
1279        t4_setup_debugfs(adap);
1280#endif
1281        return 0;
1282}
1283
1284/*
1285 * upper-layer driver support
1286 */
1287
1288/*
1289 * Allocate an active-open TID and set it to the supplied value.
1290 */
1291int cxgb4_alloc_atid(struct tid_info *t, void *data)
1292{
1293        int atid = -1;
1294
1295        spin_lock_bh(&t->atid_lock);
1296        if (t->afree) {
1297                union aopen_entry *p = t->afree;
1298
1299                atid = (p - t->atid_tab) + t->atid_base;
1300                t->afree = p->next;
1301                p->data = data;
1302                t->atids_in_use++;
1303        }
1304        spin_unlock_bh(&t->atid_lock);
1305        return atid;
1306}
1307EXPORT_SYMBOL(cxgb4_alloc_atid);
1308
1309/*
1310 * Release an active-open TID.
1311 */
1312void cxgb4_free_atid(struct tid_info *t, unsigned int atid)
1313{
1314        union aopen_entry *p = &t->atid_tab[atid - t->atid_base];
1315
1316        spin_lock_bh(&t->atid_lock);
1317        p->next = t->afree;
1318        t->afree = p;
1319        t->atids_in_use--;
1320        spin_unlock_bh(&t->atid_lock);
1321}
1322EXPORT_SYMBOL(cxgb4_free_atid);
1323
1324/*
1325 * Allocate a server TID and set it to the supplied value.
1326 */
1327int cxgb4_alloc_stid(struct tid_info *t, int family, void *data)
1328{
1329        int stid;
1330
1331        spin_lock_bh(&t->stid_lock);
1332        if (family == PF_INET) {
1333                stid = find_first_zero_bit(t->stid_bmap, t->nstids);
1334                if (stid < t->nstids)
1335                        __set_bit(stid, t->stid_bmap);
1336                else
1337                        stid = -1;
1338        } else {
1339                stid = bitmap_find_free_region(t->stid_bmap, t->nstids, 1);
1340                if (stid < 0)
1341                        stid = -1;
1342        }
1343        if (stid >= 0) {
1344                t->stid_tab[stid].data = data;
1345                stid += t->stid_base;
1346                /* IPv6 requires max of 520 bits or 16 cells in TCAM
1347                 * This is equivalent to 4 TIDs. With CLIP enabled it
1348                 * needs 2 TIDs.
1349                 */
1350                if (family == PF_INET6) {
1351                        t->stids_in_use += 2;
1352                        t->v6_stids_in_use += 2;
1353                } else {
1354                        t->stids_in_use++;
1355                }
1356        }
1357        spin_unlock_bh(&t->stid_lock);
1358        return stid;
1359}
1360EXPORT_SYMBOL(cxgb4_alloc_stid);
1361
1362/* Allocate a server filter TID and set it to the supplied value.
1363 */
1364int cxgb4_alloc_sftid(struct tid_info *t, int family, void *data)
1365{
1366        int stid;
1367
1368        spin_lock_bh(&t->stid_lock);
1369        if (family == PF_INET) {
1370                stid = find_next_zero_bit(t->stid_bmap,
1371                                t->nstids + t->nsftids, t->nstids);
1372                if (stid < (t->nstids + t->nsftids))
1373                        __set_bit(stid, t->stid_bmap);
1374                else
1375                        stid = -1;
1376        } else {
1377                stid = -1;
1378        }
1379        if (stid >= 0) {
1380                t->stid_tab[stid].data = data;
1381                stid -= t->nstids;
1382                stid += t->sftid_base;
1383                t->sftids_in_use++;
1384        }
1385        spin_unlock_bh(&t->stid_lock);
1386        return stid;
1387}
1388EXPORT_SYMBOL(cxgb4_alloc_sftid);
1389
1390/* Release a server TID.
1391 */
1392void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family)
1393{
1394        /* Is it a server filter TID? */
1395        if (t->nsftids && (stid >= t->sftid_base)) {
1396                stid -= t->sftid_base;
1397                stid += t->nstids;
1398        } else {
1399                stid -= t->stid_base;
1400        }
1401
1402        spin_lock_bh(&t->stid_lock);
1403        if (family == PF_INET)
1404                __clear_bit(stid, t->stid_bmap);
1405        else
1406                bitmap_release_region(t->stid_bmap, stid, 1);
1407        t->stid_tab[stid].data = NULL;
1408        if (stid < t->nstids) {
1409                if (family == PF_INET6) {
1410                        t->stids_in_use -= 2;
1411                        t->v6_stids_in_use -= 2;
1412                } else {
1413                        t->stids_in_use--;
1414                }
1415        } else {
1416                t->sftids_in_use--;
1417        }
1418
1419        spin_unlock_bh(&t->stid_lock);
1420}
1421EXPORT_SYMBOL(cxgb4_free_stid);
1422
1423/*
1424 * Populate a TID_RELEASE WR.  Caller must properly size the skb.
1425 */
1426static void mk_tid_release(struct sk_buff *skb, unsigned int chan,
1427                           unsigned int tid)
1428{
1429        struct cpl_tid_release *req;
1430
1431        set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
1432        req = __skb_put(skb, sizeof(*req));
1433        INIT_TP_WR(req, tid);
1434        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
1435}
1436
1437/*
1438 * Queue a TID release request and if necessary schedule a work queue to
1439 * process it.
1440 */
1441static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan,
1442                                    unsigned int tid)
1443{
1444        struct adapter *adap = container_of(t, struct adapter, tids);
1445        void **p = &t->tid_tab[tid - t->tid_base];
1446
1447        spin_lock_bh(&adap->tid_release_lock);
1448        *p = adap->tid_release_head;
1449        /* Low 2 bits encode the Tx channel number */
1450        adap->tid_release_head = (void **)((uintptr_t)p | chan);
1451        if (!adap->tid_release_task_busy) {
1452                adap->tid_release_task_busy = true;
1453                queue_work(adap->workq, &adap->tid_release_task);
1454        }
1455        spin_unlock_bh(&adap->tid_release_lock);
1456}
1457
1458/*
1459 * Process the list of pending TID release requests.
1460 */
1461static void process_tid_release_list(struct work_struct *work)
1462{
1463        struct sk_buff *skb;
1464        struct adapter *adap;
1465
1466        adap = container_of(work, struct adapter, tid_release_task);
1467
1468        spin_lock_bh(&adap->tid_release_lock);
1469        while (adap->tid_release_head) {
1470                void **p = adap->tid_release_head;
1471                unsigned int chan = (uintptr_t)p & 3;
1472                p = (void *)p - chan;
1473
1474                adap->tid_release_head = *p;
1475                *p = NULL;
1476                spin_unlock_bh(&adap->tid_release_lock);
1477
1478                while (!(skb = alloc_skb(sizeof(struct cpl_tid_release),
1479                                         GFP_KERNEL)))
1480                        schedule_timeout_uninterruptible(1);
1481
1482                mk_tid_release(skb, chan, p - adap->tids.tid_tab);
1483                t4_ofld_send(adap, skb);
1484                spin_lock_bh(&adap->tid_release_lock);
1485        }
1486        adap->tid_release_task_busy = false;
1487        spin_unlock_bh(&adap->tid_release_lock);
1488}
1489
1490/*
1491 * Release a TID and inform HW.  If we are unable to allocate the release
1492 * message we defer to a work queue.
1493 */
1494void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid,
1495                      unsigned short family)
1496{
1497        struct adapter *adap = container_of(t, struct adapter, tids);
1498        struct sk_buff *skb;
1499
1500        WARN_ON(tid_out_of_range(&adap->tids, tid));
1501
1502        if (t->tid_tab[tid - adap->tids.tid_base]) {
1503                t->tid_tab[tid - adap->tids.tid_base] = NULL;
1504                atomic_dec(&t->conns_in_use);
1505                if (t->hash_base && (tid >= t->hash_base)) {
1506                        if (family == AF_INET6)
1507                                atomic_sub(2, &t->hash_tids_in_use);
1508                        else
1509                                atomic_dec(&t->hash_tids_in_use);
1510                } else {
1511                        if (family == AF_INET6)
1512                                atomic_sub(2, &t->tids_in_use);
1513                        else
1514                                atomic_dec(&t->tids_in_use);
1515                }
1516        }
1517
1518        skb = alloc_skb(sizeof(struct cpl_tid_release), GFP_ATOMIC);
1519        if (likely(skb)) {
1520                mk_tid_release(skb, chan, tid);
1521                t4_ofld_send(adap, skb);
1522        } else
1523                cxgb4_queue_tid_release(t, chan, tid);
1524}
1525EXPORT_SYMBOL(cxgb4_remove_tid);
1526
1527/*
1528 * Allocate and initialize the TID tables.  Returns 0 on success.
1529 */
1530static int tid_init(struct tid_info *t)
1531{
1532        struct adapter *adap = container_of(t, struct adapter, tids);
1533        unsigned int max_ftids = t->nftids + t->nsftids;
1534        unsigned int natids = t->natids;
1535        unsigned int hpftid_bmap_size;
1536        unsigned int eotid_bmap_size;
1537        unsigned int stid_bmap_size;
1538        unsigned int ftid_bmap_size;
1539        size_t size;
1540
1541        stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
1542        ftid_bmap_size = BITS_TO_LONGS(t->nftids);
1543        hpftid_bmap_size = BITS_TO_LONGS(t->nhpftids);
1544        eotid_bmap_size = BITS_TO_LONGS(t->neotids);
1545        size = t->ntids * sizeof(*t->tid_tab) +
1546               natids * sizeof(*t->atid_tab) +
1547               t->nstids * sizeof(*t->stid_tab) +
1548               t->nsftids * sizeof(*t->stid_tab) +
1549               stid_bmap_size * sizeof(long) +
1550               t->nhpftids * sizeof(*t->hpftid_tab) +
1551               hpftid_bmap_size * sizeof(long) +
1552               max_ftids * sizeof(*t->ftid_tab) +
1553               ftid_bmap_size * sizeof(long) +
1554               t->neotids * sizeof(*t->eotid_tab) +
1555               eotid_bmap_size * sizeof(long);
1556
1557        t->tid_tab = kvzalloc(size, GFP_KERNEL);
1558        if (!t->tid_tab)
1559                return -ENOMEM;
1560
1561        t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
1562        t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
1563        t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
1564        t->hpftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
1565        t->hpftid_bmap = (unsigned long *)&t->hpftid_tab[t->nhpftids];
1566        t->ftid_tab = (struct filter_entry *)&t->hpftid_bmap[hpftid_bmap_size];
1567        t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids];
1568        t->eotid_tab = (struct eotid_entry *)&t->ftid_bmap[ftid_bmap_size];
1569        t->eotid_bmap = (unsigned long *)&t->eotid_tab[t->neotids];
1570        spin_lock_init(&t->stid_lock);
1571        spin_lock_init(&t->atid_lock);
1572        spin_lock_init(&t->ftid_lock);
1573
1574        t->stids_in_use = 0;
1575        t->v6_stids_in_use = 0;
1576        t->sftids_in_use = 0;
1577        t->afree = NULL;
1578        t->atids_in_use = 0;
1579        atomic_set(&t->tids_in_use, 0);
1580        atomic_set(&t->conns_in_use, 0);
1581        atomic_set(&t->hash_tids_in_use, 0);
1582
1583        /* Setup the free list for atid_tab and clear the stid bitmap. */
1584        if (natids) {
1585                while (--natids)
1586                        t->atid_tab[natids - 1].next = &t->atid_tab[natids];
1587                t->afree = t->atid_tab;
1588        }
1589
1590        if (is_offload(adap)) {
1591                bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
1592                /* Reserve stid 0 for T4/T5 adapters */
1593                if (!t->stid_base &&
1594                    CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
1595                        __set_bit(0, t->stid_bmap);
1596
1597                if (t->neotids)
1598                        bitmap_zero(t->eotid_bmap, t->neotids);
1599        }
1600
1601        if (t->nhpftids)
1602                bitmap_zero(t->hpftid_bmap, t->nhpftids);
1603        bitmap_zero(t->ftid_bmap, t->nftids);
1604        return 0;
1605}
1606
1607/**
1608 *      cxgb4_create_server - create an IP server
1609 *      @dev: the device
1610 *      @stid: the server TID
1611 *      @sip: local IP address to bind server to
1612 *      @sport: the server's TCP port
1613 *      @queue: queue to direct messages from this server to
1614 *
1615 *      Create an IP server for the given port and address.
1616 *      Returns <0 on error and one of the %NET_XMIT_* values on success.
1617 */
1618int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
1619                        __be32 sip, __be16 sport, __be16 vlan,
1620                        unsigned int queue)
1621{
1622        unsigned int chan;
1623        struct sk_buff *skb;
1624        struct adapter *adap;
1625        struct cpl_pass_open_req *req;
1626        int ret;
1627
1628        skb = alloc_skb(sizeof(*req), GFP_KERNEL);
1629        if (!skb)
1630                return -ENOMEM;
1631
1632        adap = netdev2adap(dev);
1633        req = __skb_put(skb, sizeof(*req));
1634        INIT_TP_WR(req, 0);
1635        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
1636        req->local_port = sport;
1637        req->peer_port = htons(0);
1638        req->local_ip = sip;
1639        req->peer_ip = htonl(0);
1640        chan = rxq_to_chan(&adap->sge, queue);
1641        req->opt0 = cpu_to_be64(TX_CHAN_V(chan));
1642        req->opt1 = cpu_to_be64(CONN_POLICY_V(CPL_CONN_POLICY_ASK) |
1643                                SYN_RSS_ENABLE_F | SYN_RSS_QUEUE_V(queue));
1644        ret = t4_mgmt_tx(adap, skb);
1645        return net_xmit_eval(ret);
1646}
1647EXPORT_SYMBOL(cxgb4_create_server);
1648
1649/*      cxgb4_create_server6 - create an IPv6 server
1650 *      @dev: the device
1651 *      @stid: the server TID
1652 *      @sip: local IPv6 address to bind server to
1653 *      @sport: the server's TCP port
1654 *      @queue: queue to direct messages from this server to
1655 *
1656 *      Create an IPv6 server for the given port and address.
1657 *      Returns <0 on error and one of the %NET_XMIT_* values on success.
1658 */
1659int cxgb4_create_server6(const struct net_device *dev, unsigned int stid,
1660                         const struct in6_addr *sip, __be16 sport,
1661                         unsigned int queue)
1662{
1663        unsigned int chan;
1664        struct sk_buff *skb;
1665        struct adapter *adap;
1666        struct cpl_pass_open_req6 *req;
1667        int ret;
1668
1669        skb = alloc_skb(sizeof(*req), GFP_KERNEL);
1670        if (!skb)
1671                return -ENOMEM;
1672
1673        adap = netdev2adap(dev);
1674        req = __skb_put(skb, sizeof(*req));
1675        INIT_TP_WR(req, 0);
1676        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, stid));
1677        req->local_port = sport;
1678        req->peer_port = htons(0);
1679        req->local_ip_hi = *(__be64 *)(sip->s6_addr);
1680        req->local_ip_lo = *(__be64 *)(sip->s6_addr + 8);
1681        req->peer_ip_hi = cpu_to_be64(0);
1682        req->peer_ip_lo = cpu_to_be64(0);
1683        chan = rxq_to_chan(&adap->sge, queue);
1684        req->opt0 = cpu_to_be64(TX_CHAN_V(chan));
1685        req->opt1 = cpu_to_be64(CONN_POLICY_V(CPL_CONN_POLICY_ASK) |
1686                                SYN_RSS_ENABLE_F | SYN_RSS_QUEUE_V(queue));
1687        ret = t4_mgmt_tx(adap, skb);
1688        return net_xmit_eval(ret);
1689}
1690EXPORT_SYMBOL(cxgb4_create_server6);
1691
1692int cxgb4_remove_server(const struct net_device *dev, unsigned int stid,
1693                        unsigned int queue, bool ipv6)
1694{
1695        struct sk_buff *skb;
1696        struct adapter *adap;
1697        struct cpl_close_listsvr_req *req;
1698        int ret;
1699
1700        adap = netdev2adap(dev);
1701
1702        skb = alloc_skb(sizeof(*req), GFP_KERNEL);
1703        if (!skb)
1704                return -ENOMEM;
1705
1706        req = __skb_put(skb, sizeof(*req));
1707        INIT_TP_WR(req, 0);
1708        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, stid));
1709        req->reply_ctrl = htons(NO_REPLY_V(0) | (ipv6 ? LISTSVR_IPV6_V(1) :
1710                                LISTSVR_IPV6_V(0)) | QUEUENO_V(queue));
1711        ret = t4_mgmt_tx(adap, skb);
1712        return net_xmit_eval(ret);
1713}
1714EXPORT_SYMBOL(cxgb4_remove_server);
1715
1716/**
1717 *      cxgb4_best_mtu - find the entry in the MTU table closest to an MTU
1718 *      @mtus: the HW MTU table
1719 *      @mtu: the target MTU
1720 *      @idx: index of selected entry in the MTU table
1721 *
1722 *      Returns the index and the value in the HW MTU table that is closest to
1723 *      but does not exceed @mtu, unless @mtu is smaller than any value in the
1724 *      table, in which case that smallest available value is selected.
1725 */
1726unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
1727                            unsigned int *idx)
1728{
1729        unsigned int i = 0;
1730
1731        while (i < NMTUS - 1 && mtus[i + 1] <= mtu)
1732                ++i;
1733        if (idx)
1734                *idx = i;
1735        return mtus[i];
1736}
1737EXPORT_SYMBOL(cxgb4_best_mtu);
1738
1739/**
1740 *     cxgb4_best_aligned_mtu - find best MTU, [hopefully] data size aligned
1741 *     @mtus: the HW MTU table
1742 *     @header_size: Header Size
1743 *     @data_size_max: maximum Data Segment Size
1744 *     @data_size_align: desired Data Segment Size Alignment (2^N)
1745 *     @mtu_idxp: HW MTU Table Index return value pointer (possibly NULL)
1746 *
1747 *     Similar to cxgb4_best_mtu() but instead of searching the Hardware
1748 *     MTU Table based solely on a Maximum MTU parameter, we break that
1749 *     parameter up into a Header Size and Maximum Data Segment Size, and
1750 *     provide a desired Data Segment Size Alignment.  If we find an MTU in
1751 *     the Hardware MTU Table which will result in a Data Segment Size with
1752 *     the requested alignment _and_ that MTU isn't "too far" from the
1753 *     closest MTU, then we'll return that rather than the closest MTU.
1754 */
1755unsigned int cxgb4_best_aligned_mtu(const unsigned short *mtus,
1756                                    unsigned short header_size,
1757                                    unsigned short data_size_max,
1758                                    unsigned short data_size_align,
1759                                    unsigned int *mtu_idxp)
1760{
1761        unsigned short max_mtu = header_size + data_size_max;
1762        unsigned short data_size_align_mask = data_size_align - 1;
1763        int mtu_idx, aligned_mtu_idx;
1764
1765        /* Scan the MTU Table till we find an MTU which is larger than our
1766         * Maximum MTU or we reach the end of the table.  Along the way,
1767         * record the last MTU found, if any, which will result in a Data
1768         * Segment Length matching the requested alignment.
1769         */
1770        for (mtu_idx = 0, aligned_mtu_idx = -1; mtu_idx < NMTUS; mtu_idx++) {
1771                unsigned short data_size = mtus[mtu_idx] - header_size;
1772
1773                /* If this MTU minus the Header Size would result in a
1774                 * Data Segment Size of the desired alignment, remember it.
1775                 */
1776                if ((data_size & data_size_align_mask) == 0)
1777                        aligned_mtu_idx = mtu_idx;
1778
1779                /* If we're not at the end of the Hardware MTU Table and the
1780                 * next element is larger than our Maximum MTU, drop out of
1781                 * the loop.
1782                 */
1783                if (mtu_idx+1 < NMTUS && mtus[mtu_idx+1] > max_mtu)
1784                        break;
1785        }
1786
1787        /* If we fell out of the loop because we ran to the end of the table,
1788         * then we just have to use the last [largest] entry.
1789         */
1790        if (mtu_idx == NMTUS)
1791                mtu_idx--;
1792
1793        /* If we found an MTU which resulted in the requested Data Segment
1794         * Length alignment and that's "not far" from the largest MTU which is
1795         * less than or equal to the maximum MTU, then use that.
1796         */
1797        if (aligned_mtu_idx >= 0 &&
1798            mtu_idx - aligned_mtu_idx <= 1)
1799                mtu_idx = aligned_mtu_idx;
1800
1801        /* If the caller has passed in an MTU Index pointer, pass the
1802         * MTU Index back.  Return the MTU value.
1803         */
1804        if (mtu_idxp)
1805                *mtu_idxp = mtu_idx;
1806        return mtus[mtu_idx];
1807}
1808EXPORT_SYMBOL(cxgb4_best_aligned_mtu);
1809
1810/**
1811 *      cxgb4_port_chan - get the HW channel of a port
1812 *      @dev: the net device for the port
1813 *
1814 *      Return the HW Tx channel of the given port.
1815 */
1816unsigned int cxgb4_port_chan(const struct net_device *dev)
1817{
1818        return netdev2pinfo(dev)->tx_chan;
1819}
1820EXPORT_SYMBOL(cxgb4_port_chan);
1821
1822/**
1823 *      cxgb4_port_e2cchan - get the HW c-channel of a port
1824 *      @dev: the net device for the port
1825 *
1826 *      Return the HW RX c-channel of the given port.
1827 */
1828unsigned int cxgb4_port_e2cchan(const struct net_device *dev)
1829{
1830        return netdev2pinfo(dev)->rx_cchan;
1831}
1832EXPORT_SYMBOL(cxgb4_port_e2cchan);
1833
1834unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo)
1835{
1836        struct adapter *adap = netdev2adap(dev);
1837        u32 v1, v2, lp_count, hp_count;
1838
1839        v1 = t4_read_reg(adap, SGE_DBFIFO_STATUS_A);
1840        v2 = t4_read_reg(adap, SGE_DBFIFO_STATUS2_A);
1841        if (is_t4(adap->params.chip)) {
1842                lp_count = LP_COUNT_G(v1);
1843                hp_count = HP_COUNT_G(v1);
1844        } else {
1845                lp_count = LP_COUNT_T5_G(v1);
1846                hp_count = HP_COUNT_T5_G(v2);
1847        }
1848        return lpfifo ? lp_count : hp_count;
1849}
1850EXPORT_SYMBOL(cxgb4_dbfifo_count);
1851
1852/**
1853 *      cxgb4_port_viid - get the VI id of a port
1854 *      @dev: the net device for the port
1855 *
1856 *      Return the VI id of the given port.
1857 */
1858unsigned int cxgb4_port_viid(const struct net_device *dev)
1859{
1860        return netdev2pinfo(dev)->viid;
1861}
1862EXPORT_SYMBOL(cxgb4_port_viid);
1863
1864/**
1865 *      cxgb4_port_idx - get the index of a port
1866 *      @dev: the net device for the port
1867 *
1868 *      Return the index of the given port.
1869 */
1870unsigned int cxgb4_port_idx(const struct net_device *dev)
1871{
1872        return netdev2pinfo(dev)->port_id;
1873}
1874EXPORT_SYMBOL(cxgb4_port_idx);
1875
1876void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4,
1877                         struct tp_tcp_stats *v6)
1878{
1879        struct adapter *adap = pci_get_drvdata(pdev);
1880
1881        spin_lock(&adap->stats_lock);
1882        t4_tp_get_tcp_stats(adap, v4, v6, false);
1883        spin_unlock(&adap->stats_lock);
1884}
1885EXPORT_SYMBOL(cxgb4_get_tcp_stats);
1886
1887void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask,
1888                      const unsigned int *pgsz_order)
1889{
1890        struct adapter *adap = netdev2adap(dev);
1891
1892        t4_write_reg(adap, ULP_RX_ISCSI_TAGMASK_A, tag_mask);
1893        t4_write_reg(adap, ULP_RX_ISCSI_PSZ_A, HPZ0_V(pgsz_order[0]) |
1894                     HPZ1_V(pgsz_order[1]) | HPZ2_V(pgsz_order[2]) |
1895                     HPZ3_V(pgsz_order[3]));
1896}
1897EXPORT_SYMBOL(cxgb4_iscsi_init);
1898
1899int cxgb4_flush_eq_cache(struct net_device *dev)
1900{
1901        struct adapter *adap = netdev2adap(dev);
1902
1903        return t4_sge_ctxt_flush(adap, adap->mbox, CTXT_EGRESS);
1904}
1905EXPORT_SYMBOL(cxgb4_flush_eq_cache);
1906
1907static int read_eq_indices(struct adapter *adap, u16 qid, u16 *pidx, u16 *cidx)
1908{
1909        u32 addr = t4_read_reg(adap, SGE_DBQ_CTXT_BADDR_A) + 24 * qid + 8;
1910        __be64 indices;
1911        int ret;
1912
1913        spin_lock(&adap->win0_lock);
1914        ret = t4_memory_rw(adap, 0, MEM_EDC0, addr,
1915                           sizeof(indices), (__be32 *)&indices,
1916                           T4_MEMORY_READ);
1917        spin_unlock(&adap->win0_lock);
1918        if (!ret) {
1919                *cidx = (be64_to_cpu(indices) >> 25) & 0xffff;
1920                *pidx = (be64_to_cpu(indices) >> 9) & 0xffff;
1921        }
1922        return ret;
1923}
1924
1925int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx,
1926                        u16 size)
1927{
1928        struct adapter *adap = netdev2adap(dev);
1929        u16 hw_pidx, hw_cidx;
1930        int ret;
1931
1932        ret = read_eq_indices(adap, qid, &hw_pidx, &hw_cidx);
1933        if (ret)
1934                goto out;
1935
1936        if (pidx != hw_pidx) {
1937                u16 delta;
1938                u32 val;
1939
1940                if (pidx >= hw_pidx)
1941                        delta = pidx - hw_pidx;
1942                else
1943                        delta = size - hw_pidx + pidx;
1944
1945                if (is_t4(adap->params.chip))
1946                        val = PIDX_V(delta);
1947                else
1948                        val = PIDX_T5_V(delta);
1949                wmb();
1950                t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
1951                             QID_V(qid) | val);
1952        }
1953out:
1954        return ret;
1955}
1956EXPORT_SYMBOL(cxgb4_sync_txq_pidx);
1957
1958int cxgb4_read_tpte(struct net_device *dev, u32 stag, __be32 *tpte)
1959{
1960        u32 edc0_size, edc1_size, mc0_size, mc1_size, size;
1961        u32 edc0_end, edc1_end, mc0_end, mc1_end;
1962        u32 offset, memtype, memaddr;
1963        struct adapter *adap;
1964        u32 hma_size = 0;
1965        int ret;
1966
1967        adap = netdev2adap(dev);
1968
1969        offset = ((stag >> 8) * 32) + adap->vres.stag.start;
1970
1971        /* Figure out where the offset lands in the Memory Type/Address scheme.
1972         * This code assumes that the memory is laid out starting at offset 0
1973         * with no breaks as: EDC0, EDC1, MC0, MC1. All cards have both EDC0
1974         * and EDC1.  Some cards will have neither MC0 nor MC1, most cards have
1975         * MC0, and some have both MC0 and MC1.
1976         */
1977        size = t4_read_reg(adap, MA_EDRAM0_BAR_A);
1978        edc0_size = EDRAM0_SIZE_G(size) << 20;
1979        size = t4_read_reg(adap, MA_EDRAM1_BAR_A);
1980        edc1_size = EDRAM1_SIZE_G(size) << 20;
1981        size = t4_read_reg(adap, MA_EXT_MEMORY0_BAR_A);
1982        mc0_size = EXT_MEM0_SIZE_G(size) << 20;
1983
1984        if (t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A) & HMA_MUX_F) {
1985                size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
1986                hma_size = EXT_MEM1_SIZE_G(size) << 20;
1987        }
1988        edc0_end = edc0_size;
1989        edc1_end = edc0_end + edc1_size;
1990        mc0_end = edc1_end + mc0_size;
1991
1992        if (offset < edc0_end) {
1993                memtype = MEM_EDC0;
1994                memaddr = offset;
1995        } else if (offset < edc1_end) {
1996                memtype = MEM_EDC1;
1997                memaddr = offset - edc0_end;
1998        } else {
1999                if (hma_size && (offset < (edc1_end + hma_size))) {
2000                        memtype = MEM_HMA;
2001                        memaddr = offset - edc1_end;
2002                } else if (offset < mc0_end) {
2003                        memtype = MEM_MC0;
2004                        memaddr = offset - edc1_end;
2005                } else if (is_t5(adap->params.chip)) {
2006                        size = t4_read_reg(adap, MA_EXT_MEMORY1_BAR_A);
2007                        mc1_size = EXT_MEM1_SIZE_G(size) << 20;
2008                        mc1_end = mc0_end + mc1_size;
2009                        if (offset < mc1_end) {
2010                                memtype = MEM_MC1;
2011                                memaddr = offset - mc0_end;
2012                        } else {
2013                                /* offset beyond the end of any memory */
2014                                goto err;
2015                        }
2016                } else {
2017                        /* T4/T6 only has a single memory channel */
2018                        goto err;
2019                }
2020        }
2021
2022        spin_lock(&adap->win0_lock);
2023        ret = t4_memory_rw(adap, 0, memtype, memaddr, 32, tpte, T4_MEMORY_READ);
2024        spin_unlock(&adap->win0_lock);
2025        return ret;
2026
2027err:
2028        dev_err(adap->pdev_dev, "stag %#x, offset %#x out of range\n",
2029                stag, offset);
2030        return -EINVAL;
2031}
2032EXPORT_SYMBOL(cxgb4_read_tpte);
2033
2034u64 cxgb4_read_sge_timestamp(struct net_device *dev)
2035{
2036        u32 hi, lo;
2037        struct adapter *adap;
2038
2039        adap = netdev2adap(dev);
2040        lo = t4_read_reg(adap, SGE_TIMESTAMP_LO_A);
2041        hi = TSVAL_G(t4_read_reg(adap, SGE_TIMESTAMP_HI_A));
2042
2043        return ((u64)hi << 32) | (u64)lo;
2044}
2045EXPORT_SYMBOL(cxgb4_read_sge_timestamp);
2046
2047int cxgb4_bar2_sge_qregs(struct net_device *dev,
2048                         unsigned int qid,
2049                         enum cxgb4_bar2_qtype qtype,
2050                         int user,
2051                         u64 *pbar2_qoffset,
2052                         unsigned int *pbar2_qid)
2053{
2054        return t4_bar2_sge_qregs(netdev2adap(dev),
2055                                 qid,
2056                                 (qtype == CXGB4_BAR2_QTYPE_EGRESS
2057                                  ? T4_BAR2_QTYPE_EGRESS
2058                                  : T4_BAR2_QTYPE_INGRESS),
2059                                 user,
2060                                 pbar2_qoffset,
2061                                 pbar2_qid);
2062}
2063EXPORT_SYMBOL(cxgb4_bar2_sge_qregs);
2064
2065static struct pci_driver cxgb4_driver;
2066
2067static void check_neigh_update(struct neighbour *neigh)
2068{
2069        const struct device *parent;
2070        const struct net_device *netdev = neigh->dev;
2071
2072        if (is_vlan_dev(netdev))
2073                netdev = vlan_dev_real_dev(netdev);
2074        parent = netdev->dev.parent;
2075        if (parent && parent->driver == &cxgb4_driver.driver)
2076                t4_l2t_update(dev_get_drvdata(parent), neigh);
2077}
2078
2079static int netevent_cb(struct notifier_block *nb, unsigned long event,
2080                       void *data)
2081{
2082        switch (event) {
2083        case NETEVENT_NEIGH_UPDATE:
2084                check_neigh_update(data);
2085                break;
2086        case NETEVENT_REDIRECT:
2087        default:
2088                break;
2089        }
2090        return 0;
2091}
2092
2093static bool netevent_registered;
2094static struct notifier_block cxgb4_netevent_nb = {
2095        .notifier_call = netevent_cb
2096};
2097
2098static void drain_db_fifo(struct adapter *adap, int usecs)
2099{
2100        u32 v1, v2, lp_count, hp_count;
2101
2102        do {
2103                v1 = t4_read_reg(adap, SGE_DBFIFO_STATUS_A);
2104                v2 = t4_read_reg(adap, SGE_DBFIFO_STATUS2_A);
2105                if (is_t4(adap->params.chip)) {
2106                        lp_count = LP_COUNT_G(v1);
2107                        hp_count = HP_COUNT_G(v1);
2108                } else {
2109                        lp_count = LP_COUNT_T5_G(v1);
2110                        hp_count = HP_COUNT_T5_G(v2);
2111                }
2112
2113                if (lp_count == 0 && hp_count == 0)
2114                        break;
2115                set_current_state(TASK_UNINTERRUPTIBLE);
2116                schedule_timeout(usecs_to_jiffies(usecs));
2117        } while (1);
2118}
2119
2120static void disable_txq_db(struct sge_txq *q)
2121{
2122        unsigned long flags;
2123
2124        spin_lock_irqsave(&q->db_lock, flags);
2125        q->db_disabled = 1;
2126        spin_unlock_irqrestore(&q->db_lock, flags);
2127}
2128
2129static void enable_txq_db(struct adapter *adap, struct sge_txq *q)
2130{
2131        spin_lock_irq(&q->db_lock);
2132        if (q->db_pidx_inc) {
2133                /* Make sure that all writes to the TX descriptors
2134                 * are committed before we tell HW about them.
2135                 */
2136                wmb();
2137                t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
2138                             QID_V(q->cntxt_id) | PIDX_V(q->db_pidx_inc));
2139                q->db_pidx_inc = 0;
2140        }
2141        q->db_disabled = 0;
2142        spin_unlock_irq(&q->db_lock);
2143}
2144
2145static void disable_dbs(struct adapter *adap)
2146{
2147        int i;
2148
2149        for_each_ethrxq(&adap->sge, i)
2150                disable_txq_db(&adap->sge.ethtxq[i].q);
2151        if (is_offload(adap)) {
2152                struct sge_uld_txq_info *txq_info =
2153                        adap->sge.uld_txq_info[CXGB4_TX_OFLD];
2154
2155                if (txq_info) {
2156                        for_each_ofldtxq(&adap->sge, i) {
2157                                struct sge_uld_txq *txq = &txq_info->uldtxq[i];
2158
2159                                disable_txq_db(&txq->q);
2160                        }
2161                }
2162        }
2163        for_each_port(adap, i)
2164                disable_txq_db(&adap->sge.ctrlq[i].q);
2165}
2166
2167static void enable_dbs(struct adapter *adap)
2168{
2169        int i;
2170
2171        for_each_ethrxq(&adap->sge, i)
2172                enable_txq_db(adap, &adap->sge.ethtxq[i].q);
2173        if (is_offload(adap)) {
2174                struct sge_uld_txq_info *txq_info =
2175                        adap->sge.uld_txq_info[CXGB4_TX_OFLD];
2176
2177                if (txq_info) {
2178                        for_each_ofldtxq(&adap->sge, i) {
2179                                struct sge_uld_txq *txq = &txq_info->uldtxq[i];
2180
2181                                enable_txq_db(adap, &txq->q);
2182                        }
2183                }
2184        }
2185        for_each_port(adap, i)
2186                enable_txq_db(adap, &adap->sge.ctrlq[i].q);
2187}
2188
2189static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
2190{
2191        enum cxgb4_uld type = CXGB4_ULD_RDMA;
2192
2193        if (adap->uld && adap->uld[type].handle)
2194                adap->uld[type].control(adap->uld[type].handle, cmd);
2195}
2196
2197static void process_db_full(struct work_struct *work)
2198{
2199        struct adapter *adap;
2200
2201        adap = container_of(work, struct adapter, db_full_task);
2202
2203        drain_db_fifo(adap, dbfifo_drain_delay);
2204        enable_dbs(adap);
2205        notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY);
2206        if (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
2207                t4_set_reg_field(adap, SGE_INT_ENABLE3_A,
2208                                 DBFIFO_HP_INT_F | DBFIFO_LP_INT_F,
2209                                 DBFIFO_HP_INT_F | DBFIFO_LP_INT_F);
2210        else
2211                t4_set_reg_field(adap, SGE_INT_ENABLE3_A,
2212                                 DBFIFO_LP_INT_F, DBFIFO_LP_INT_F);
2213}
2214
2215static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q)
2216{
2217        u16 hw_pidx, hw_cidx;
2218        int ret;
2219
2220        spin_lock_irq(&q->db_lock);
2221        ret = read_eq_indices(adap, (u16)q->cntxt_id, &hw_pidx, &hw_cidx);
2222        if (ret)
2223                goto out;
2224        if (q->db_pidx != hw_pidx) {
2225                u16 delta;
2226                u32 val;
2227
2228                if (q->db_pidx >= hw_pidx)
2229                        delta = q->db_pidx - hw_pidx;
2230                else
2231                        delta = q->size - hw_pidx + q->db_pidx;
2232
2233                if (is_t4(adap->params.chip))
2234                        val = PIDX_V(delta);
2235                else
2236                        val = PIDX_T5_V(delta);
2237                wmb();
2238                t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
2239                             QID_V(q->cntxt_id) | val);
2240        }
2241out:
2242        q->db_disabled = 0;
2243        q->db_pidx_inc = 0;
2244        spin_unlock_irq(&q->db_lock);
2245        if (ret)
2246                CH_WARN(adap, "DB drop recovery failed.\n");
2247}
2248
2249static void recover_all_queues(struct adapter *adap)
2250{
2251        int i;
2252
2253        for_each_ethrxq(&adap->sge, i)
2254                sync_txq_pidx(adap, &adap->sge.ethtxq[i].q);
2255        if (is_offload(adap)) {
2256                struct sge_uld_txq_info *txq_info =
2257                        adap->sge.uld_txq_info[CXGB4_TX_OFLD];
2258                if (txq_info) {
2259                        for_each_ofldtxq(&adap->sge, i) {
2260                                struct sge_uld_txq *txq = &txq_info->uldtxq[i];
2261
2262                                sync_txq_pidx(adap, &txq->q);
2263                        }
2264                }
2265        }
2266        for_each_port(adap, i)
2267                sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
2268}
2269
2270static void process_db_drop(struct work_struct *work)
2271{
2272        struct adapter *adap;
2273
2274        adap = container_of(work, struct adapter, db_drop_task);
2275
2276        if (is_t4(adap->params.chip)) {
2277                drain_db_fifo(adap, dbfifo_drain_delay);
2278                notify_rdma_uld(adap, CXGB4_CONTROL_DB_DROP);
2279                drain_db_fifo(adap, dbfifo_drain_delay);
2280                recover_all_queues(adap);
2281                drain_db_fifo(adap, dbfifo_drain_delay);
2282                enable_dbs(adap);
2283                notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY);
2284        } else if (is_t5(adap->params.chip)) {
2285                u32 dropped_db = t4_read_reg(adap, 0x010ac);
2286                u16 qid = (dropped_db >> 15) & 0x1ffff;
2287                u16 pidx_inc = dropped_db & 0x1fff;
2288                u64 bar2_qoffset;
2289                unsigned int bar2_qid;
2290                int ret;
2291
2292                ret = t4_bar2_sge_qregs(adap, qid, T4_BAR2_QTYPE_EGRESS,
2293                                        0, &bar2_qoffset, &bar2_qid);
2294                if (ret)
2295                        dev_err(adap->pdev_dev, "doorbell drop recovery: "
2296                                "qid=%d, pidx_inc=%d\n", qid, pidx_inc);
2297                else
2298                        writel(PIDX_T5_V(pidx_inc) | QID_V(bar2_qid),
2299                               adap->bar2 + bar2_qoffset + SGE_UDB_KDOORBELL);
2300
2301                /* Re-enable BAR2 WC */
2302                t4_set_reg_field(adap, 0x10b0, 1<<15, 1<<15);
2303        }
2304
2305        if (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
2306                t4_set_reg_field(adap, SGE_DOORBELL_CONTROL_A, DROPPED_DB_F, 0);
2307}
2308
2309void t4_db_full(struct adapter *adap)
2310{
2311        if (is_t4(adap->params.chip)) {
2312                disable_dbs(adap);
2313                notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL);
2314                t4_set_reg_field(adap, SGE_INT_ENABLE3_A,
2315                                 DBFIFO_HP_INT_F | DBFIFO_LP_INT_F, 0);
2316                queue_work(adap->workq, &adap->db_full_task);
2317        }
2318}
2319
2320void t4_db_dropped(struct adapter *adap)
2321{
2322        if (is_t4(adap->params.chip)) {
2323                disable_dbs(adap);
2324                notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL);
2325        }
2326        queue_work(adap->workq, &adap->db_drop_task);
2327}
2328
2329void t4_register_netevent_notifier(void)
2330{
2331        if (!netevent_registered) {
2332                register_netevent_notifier(&cxgb4_netevent_nb);
2333                netevent_registered = true;
2334        }
2335}
2336
2337static void detach_ulds(struct adapter *adap)
2338{
2339        unsigned int i;
2340
2341        mutex_lock(&uld_mutex);
2342        list_del(&adap->list_node);
2343
2344        for (i = 0; i < CXGB4_ULD_MAX; i++)
2345                if (adap->uld && adap->uld[i].handle)
2346                        adap->uld[i].state_change(adap->uld[i].handle,
2347                                             CXGB4_STATE_DETACH);
2348
2349        if (netevent_registered && list_empty(&adapter_list)) {
2350                unregister_netevent_notifier(&cxgb4_netevent_nb);
2351                netevent_registered = false;
2352        }
2353        mutex_unlock(&uld_mutex);
2354}
2355
2356static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
2357{
2358        unsigned int i;
2359
2360        mutex_lock(&uld_mutex);
2361        for (i = 0; i < CXGB4_ULD_MAX; i++)
2362                if (adap->uld && adap->uld[i].handle)
2363                        adap->uld[i].state_change(adap->uld[i].handle,
2364                                                  new_state);
2365        mutex_unlock(&uld_mutex);
2366}
2367
2368#if IS_ENABLED(CONFIG_IPV6)
2369static int cxgb4_inet6addr_handler(struct notifier_block *this,
2370                                   unsigned long event, void *data)
2371{
2372        struct inet6_ifaddr *ifa = data;
2373        struct net_device *event_dev = ifa->idev->dev;
2374        const struct device *parent = NULL;
2375#if IS_ENABLED(CONFIG_BONDING)
2376        struct adapter *adap;
2377#endif
2378        if (is_vlan_dev(event_dev))
2379                event_dev = vlan_dev_real_dev(event_dev);
2380#if IS_ENABLED(CONFIG_BONDING)
2381        if (event_dev->flags & IFF_MASTER) {
2382                list_for_each_entry(adap, &adapter_list, list_node) {
2383                        switch (event) {
2384                        case NETDEV_UP:
2385                                cxgb4_clip_get(adap->port[0],
2386                                               (const u32 *)ifa, 1);
2387                                break;
2388                        case NETDEV_DOWN:
2389                                cxgb4_clip_release(adap->port[0],
2390                                                   (const u32 *)ifa, 1);
2391                                break;
2392                        default:
2393                                break;
2394                        }
2395                }
2396                return NOTIFY_OK;
2397        }
2398#endif
2399
2400        if (event_dev)
2401                parent = event_dev->dev.parent;
2402
2403        if (parent && parent->driver == &cxgb4_driver.driver) {
2404                switch (event) {
2405                case NETDEV_UP:
2406                        cxgb4_clip_get(event_dev, (const u32 *)ifa, 1);
2407                        break;
2408                case NETDEV_DOWN:
2409                        cxgb4_clip_release(event_dev, (const u32 *)ifa, 1);
2410                        break;
2411                default:
2412                        break;
2413                }
2414        }
2415        return NOTIFY_OK;
2416}
2417
2418static bool inet6addr_registered;
2419static struct notifier_block cxgb4_inet6addr_notifier = {
2420        .notifier_call = cxgb4_inet6addr_handler
2421};
2422
2423static void update_clip(const struct adapter *adap)
2424{
2425        int i;
2426        struct net_device *dev;
2427        int ret;
2428
2429        rcu_read_lock();
2430
2431        for (i = 0; i < MAX_NPORTS; i++) {
2432                dev = adap->port[i];
2433                ret = 0;
2434
2435                if (dev)
2436                        ret = cxgb4_update_root_dev_clip(dev);
2437
2438                if (ret < 0)
2439                        break;
2440        }
2441        rcu_read_unlock();
2442}
2443#endif /* IS_ENABLED(CONFIG_IPV6) */
2444
2445/**
2446 *      cxgb_up - enable the adapter
2447 *      @adap: adapter being enabled
2448 *
2449 *      Called when the first port is enabled, this function performs the
2450 *      actions necessary to make an adapter operational, such as completing
2451 *      the initialization of HW modules, and enabling interrupts.
2452 *
2453 *      Must be called with the rtnl lock held.
2454 */
2455static int cxgb_up(struct adapter *adap)
2456{
2457        struct sge *s = &adap->sge;
2458        int err;
2459
2460        mutex_lock(&uld_mutex);
2461        err = setup_sge_queues(adap);
2462        if (err)
2463                goto rel_lock;
2464        err = setup_rss(adap);
2465        if (err)
2466                goto freeq;
2467
2468        if (adap->flags & CXGB4_USING_MSIX) {
2469                if (s->nd_msix_idx < 0) {
2470                        err = -ENOMEM;
2471                        goto irq_err;
2472                }
2473
2474                err = request_irq(adap->msix_info[s->nd_msix_idx].vec,
2475                                  t4_nondata_intr, 0,
2476                                  adap->msix_info[s->nd_msix_idx].desc, adap);
2477                if (err)
2478                        goto irq_err;
2479
2480                err = request_msix_queue_irqs(adap);
2481                if (err)
2482                        goto irq_err_free_nd_msix;
2483        } else {
2484                err = request_irq(adap->pdev->irq, t4_intr_handler(adap),
2485                                  (adap->flags & CXGB4_USING_MSI) ? 0
2486                                                                  : IRQF_SHARED,
2487                                  adap->port[0]->name, adap);
2488                if (err)
2489                        goto irq_err;
2490        }
2491
2492        enable_rx(adap);
2493        t4_sge_start(adap);
2494        t4_intr_enable(adap);
2495        adap->flags |= CXGB4_FULL_INIT_DONE;
2496        mutex_unlock(&uld_mutex);
2497
2498        notify_ulds(adap, CXGB4_STATE_UP);
2499#if IS_ENABLED(CONFIG_IPV6)
2500        update_clip(adap);
2501#endif
2502        return err;
2503
2504irq_err_free_nd_msix:
2505        free_irq(adap->msix_info[s->nd_msix_idx].vec, adap);
2506irq_err:
2507        dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
2508freeq:
2509        t4_free_sge_resources(adap);
2510rel_lock:
2511        mutex_unlock(&uld_mutex);
2512        return err;
2513}
2514
2515static void cxgb_down(struct adapter *adapter)
2516{
2517        cancel_work_sync(&adapter->tid_release_task);
2518        cancel_work_sync(&adapter->db_full_task);
2519        cancel_work_sync(&adapter->db_drop_task);
2520        adapter->tid_release_task_busy = false;
2521        adapter->tid_release_head = NULL;
2522
2523        t4_sge_stop(adapter);
2524        t4_free_sge_resources(adapter);
2525
2526        adapter->flags &= ~CXGB4_FULL_INIT_DONE;
2527}
2528
2529/*
2530 * net_device operations
2531 */
2532int cxgb_open(struct net_device *dev)
2533{
2534        struct port_info *pi = netdev_priv(dev);
2535        struct adapter *adapter = pi->adapter;
2536        int err;
2537
2538        netif_carrier_off(dev);
2539
2540        if (!(adapter->flags & CXGB4_FULL_INIT_DONE)) {
2541                err = cxgb_up(adapter);
2542                if (err < 0)
2543                        return err;
2544        }
2545
2546        /* It's possible that the basic port information could have
2547         * changed since we first read it.
2548         */
2549        err = t4_update_port_info(pi);
2550        if (err < 0)
2551                return err;
2552
2553        err = link_start(dev);
2554        if (!err)
2555                netif_tx_start_all_queues(dev);
2556        return err;
2557}
2558
2559int cxgb_close(struct net_device *dev)
2560{
2561        struct port_info *pi = netdev_priv(dev);
2562        struct adapter *adapter = pi->adapter;
2563        int ret;
2564
2565        netif_tx_stop_all_queues(dev);
2566        netif_carrier_off(dev);
2567        ret = t4_enable_pi_params(adapter, adapter->pf, pi,
2568                                  false, false, false);
2569#ifdef CONFIG_CHELSIO_T4_DCB
2570        cxgb4_dcb_reset(dev);
2571        dcb_tx_queue_prio_enable(dev, false);
2572#endif
2573        return ret;
2574}
2575
2576int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
2577                __be32 sip, __be16 sport, __be16 vlan,
2578                unsigned int queue, unsigned char port, unsigned char mask)
2579{
2580        int ret;
2581        struct filter_entry *f;
2582        struct adapter *adap;
2583        int i;
2584        u8 *val;
2585
2586        adap = netdev2adap(dev);
2587
2588        /* Adjust stid to correct filter index */
2589        stid -= adap->tids.sftid_base;
2590        stid += adap->tids.nftids;
2591
2592        /* Check to make sure the filter requested is writable ...
2593         */
2594        f = &adap->tids.ftid_tab[stid];
2595        ret = writable_filter(f);
2596        if (ret)
2597                return ret;
2598
2599        /* Clear out any old resources being used by the filter before
2600         * we start constructing the new filter.
2601         */
2602        if (f->valid)
2603                clear_filter(adap, f);
2604
2605        /* Clear out filter specifications */
2606        memset(&f->fs, 0, sizeof(struct ch_filter_specification));
2607        f->fs.val.lport = cpu_to_be16(sport);
2608        f->fs.mask.lport  = ~0;
2609        val = (u8 *)&sip;
2610        if ((val[0] | val[1] | val[2] | val[3]) != 0) {
2611                for (i = 0; i < 4; i++) {
2612                        f->fs.val.lip[i] = val[i];
2613                        f->fs.mask.lip[i] = ~0;
2614                }
2615                if (adap->params.tp.vlan_pri_map & PORT_F) {
2616                        f->fs.val.iport = port;
2617                        f->fs.mask.iport = mask;
2618                }
2619        }
2620
2621        if (adap->params.tp.vlan_pri_map & PROTOCOL_F) {
2622                f->fs.val.proto = IPPROTO_TCP;
2623                f->fs.mask.proto = ~0;
2624        }
2625
2626        f->fs.dirsteer = 1;
2627        f->fs.iq = queue;
2628        /* Mark filter as locked */
2629        f->locked = 1;
2630        f->fs.rpttid = 1;
2631
2632        /* Save the actual tid. We need this to get the corresponding
2633         * filter entry structure in filter_rpl.
2634         */
2635        f->tid = stid + adap->tids.ftid_base;
2636        ret = set_filter_wr(adap, stid);
2637        if (ret) {
2638                clear_filter(adap, f);
2639                return ret;
2640        }
2641
2642        return 0;
2643}
2644EXPORT_SYMBOL(cxgb4_create_server_filter);
2645
2646int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
2647                unsigned int queue, bool ipv6)
2648{
2649        struct filter_entry *f;
2650        struct adapter *adap;
2651
2652        adap = netdev2adap(dev);
2653
2654        /* Adjust stid to correct filter index */
2655        stid -= adap->tids.sftid_base;
2656        stid += adap->tids.nftids;
2657
2658        f = &adap->tids.ftid_tab[stid];
2659        /* Unlock the filter */
2660        f->locked = 0;
2661
2662        return delete_filter(adap, stid);
2663}
2664EXPORT_SYMBOL(cxgb4_remove_server_filter);
2665
2666static void cxgb_get_stats(struct net_device *dev,
2667                           struct rtnl_link_stats64 *ns)
2668{
2669        struct port_stats stats;
2670        struct port_info *p = netdev_priv(dev);
2671        struct adapter *adapter = p->adapter;
2672
2673        /* Block retrieving statistics during EEH error
2674         * recovery. Otherwise, the recovery might fail
2675         * and the PCI device will be removed permanently
2676         */
2677        spin_lock(&adapter->stats_lock);
2678        if (!netif_device_present(dev)) {
2679                spin_unlock(&adapter->stats_lock);
2680                return;
2681        }
2682        t4_get_port_stats_offset(adapter, p->tx_chan, &stats,
2683                                 &p->stats_base);
2684        spin_unlock(&adapter->stats_lock);
2685
2686        ns->tx_bytes   = stats.tx_octets;
2687        ns->tx_packets = stats.tx_frames;
2688        ns->rx_bytes   = stats.rx_octets;
2689        ns->rx_packets = stats.rx_frames;
2690        ns->multicast  = stats.rx_mcast_frames;
2691
2692        /* detailed rx_errors */
2693        ns->rx_length_errors = stats.rx_jabber + stats.rx_too_long +
2694                               stats.rx_runt;
2695        ns->rx_over_errors   = 0;
2696        ns->rx_crc_errors    = stats.rx_fcs_err;
2697        ns->rx_frame_errors  = stats.rx_symbol_err;
2698        ns->rx_dropped       = stats.rx_ovflow0 + stats.rx_ovflow1 +
2699                               stats.rx_ovflow2 + stats.rx_ovflow3 +
2700                               stats.rx_trunc0 + stats.rx_trunc1 +
2701                               stats.rx_trunc2 + stats.rx_trunc3;
2702        ns->rx_missed_errors = 0;
2703
2704        /* detailed tx_errors */
2705        ns->tx_aborted_errors   = 0;
2706        ns->tx_carrier_errors   = 0;
2707        ns->tx_fifo_errors      = 0;
2708        ns->tx_heartbeat_errors = 0;
2709        ns->tx_window_errors    = 0;
2710
2711        ns->tx_errors = stats.tx_error_frames;
2712        ns->rx_errors = stats.rx_symbol_err + stats.rx_fcs_err +
2713                ns->rx_length_errors + stats.rx_len_err + ns->rx_fifo_errors;
2714}
2715
2716static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
2717{
2718        unsigned int mbox;
2719        int ret = 0, prtad, devad;
2720        struct port_info *pi = netdev_priv(dev);
2721        struct adapter *adapter = pi->adapter;
2722        struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data;
2723
2724        switch (cmd) {
2725        case SIOCGMIIPHY:
2726                if (pi->mdio_addr < 0)
2727                        return -EOPNOTSUPP;
2728                data->phy_id = pi->mdio_addr;
2729                break;
2730        case SIOCGMIIREG:
2731        case SIOCSMIIREG:
2732                if (mdio_phy_id_is_c45(data->phy_id)) {
2733                        prtad = mdio_phy_id_prtad(data->phy_id);
2734                        devad = mdio_phy_id_devad(data->phy_id);
2735                } else if (data->phy_id < 32) {
2736                        prtad = data->phy_id;
2737                        devad = 0;
2738                        data->reg_num &= 0x1f;
2739                } else
2740                        return -EINVAL;
2741
2742                mbox = pi->adapter->pf;
2743                if (cmd == SIOCGMIIREG)
2744                        ret = t4_mdio_rd(pi->adapter, mbox, prtad, devad,
2745                                         data->reg_num, &data->val_out);
2746                else
2747                        ret = t4_mdio_wr(pi->adapter, mbox, prtad, devad,
2748                                         data->reg_num, data->val_in);
2749                break;
2750        case SIOCGHWTSTAMP:
2751                return copy_to_user(req->ifr_data, &pi->tstamp_config,
2752                                    sizeof(pi->tstamp_config)) ?
2753                        -EFAULT : 0;
2754        case SIOCSHWTSTAMP:
2755                if (copy_from_user(&pi->tstamp_config, req->ifr_data,
2756                                   sizeof(pi->tstamp_config)))
2757                        return -EFAULT;
2758
2759                if (!is_t4(adapter->params.chip)) {
2760                        switch (pi->tstamp_config.tx_type) {
2761                        case HWTSTAMP_TX_OFF:
2762                        case HWTSTAMP_TX_ON:
2763                                break;
2764                        default:
2765                                return -ERANGE;
2766                        }
2767
2768                        switch (pi->tstamp_config.rx_filter) {
2769                        case HWTSTAMP_FILTER_NONE:
2770                                pi->rxtstamp = false;
2771                                break;
2772                        case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
2773                        case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
2774                                cxgb4_ptprx_timestamping(pi, pi->port_id,
2775                                                         PTP_TS_L4);
2776                                break;
2777                        case HWTSTAMP_FILTER_PTP_V2_EVENT:
2778                                cxgb4_ptprx_timestamping(pi, pi->port_id,
2779                                                         PTP_TS_L2_L4);
2780                                break;
2781                        case HWTSTAMP_FILTER_ALL:
2782                        case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
2783                        case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
2784                        case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
2785                        case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
2786                                pi->rxtstamp = true;
2787                                break;
2788                        default:
2789                                pi->tstamp_config.rx_filter =
2790                                        HWTSTAMP_FILTER_NONE;
2791                                return -ERANGE;
2792                        }
2793
2794                        if ((pi->tstamp_config.tx_type == HWTSTAMP_TX_OFF) &&
2795                            (pi->tstamp_config.rx_filter ==
2796                                HWTSTAMP_FILTER_NONE)) {
2797                                if (cxgb4_ptp_txtype(adapter, pi->port_id) >= 0)
2798                                        pi->ptp_enable = false;
2799                        }
2800
2801                        if (pi->tstamp_config.rx_filter !=
2802                                HWTSTAMP_FILTER_NONE) {
2803                                if (cxgb4_ptp_redirect_rx_packet(adapter,
2804                                                                 pi) >= 0)
2805                                        pi->ptp_enable = true;
2806                        }
2807                } else {
2808                        /* For T4 Adapters */
2809                        switch (pi->tstamp_config.rx_filter) {
2810                        case HWTSTAMP_FILTER_NONE:
2811                        pi->rxtstamp = false;
2812                        break;
2813                        case HWTSTAMP_FILTER_ALL:
2814                        pi->rxtstamp = true;
2815                        break;
2816                        default:
2817                        pi->tstamp_config.rx_filter =
2818                        HWTSTAMP_FILTER_NONE;
2819                        return -ERANGE;
2820                        }
2821                }
2822                return copy_to_user(req->ifr_data, &pi->tstamp_config,
2823                                    sizeof(pi->tstamp_config)) ?
2824                        -EFAULT : 0;
2825        default:
2826                return -EOPNOTSUPP;
2827        }
2828        return ret;
2829}
2830
2831static void cxgb_set_rxmode(struct net_device *dev)
2832{
2833        /* unfortunately we can't return errors to the stack */
2834        set_rxmode(dev, -1, false);
2835}
2836
2837static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
2838{
2839        int ret;
2840        struct port_info *pi = netdev_priv(dev);
2841
2842        ret = t4_set_rxmode(pi->adapter, pi->adapter->pf, pi->viid, new_mtu, -1,
2843                            -1, -1, -1, true);
2844        if (!ret)
2845                dev->mtu = new_mtu;
2846        return ret;
2847}
2848
2849#ifdef CONFIG_PCI_IOV
2850static int cxgb4_mgmt_open(struct net_device *dev)
2851{
2852        /* Turn carrier off since we don't have to transmit anything on this
2853         * interface.
2854         */
2855        netif_carrier_off(dev);
2856        return 0;
2857}
2858
2859/* Fill MAC address that will be assigned by the FW */
2860static void cxgb4_mgmt_fill_vf_station_mac_addr(struct adapter *adap)
2861{
2862        u8 hw_addr[ETH_ALEN], macaddr[ETH_ALEN];
2863        unsigned int i, vf, nvfs;
2864        u16 a, b;
2865        int err;
2866        u8 *na;
2867
2868        adap->params.pci.vpd_cap_addr = pci_find_capability(adap->pdev,
2869                                                            PCI_CAP_ID_VPD);
2870        err = t4_get_raw_vpd_params(adap, &adap->params.vpd);
2871        if (err)
2872                return;
2873
2874        na = adap->params.vpd.na;
2875        for (i = 0; i < ETH_ALEN; i++)
2876                hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 +
2877                              hex2val(na[2 * i + 1]));
2878
2879        a = (hw_addr[0] << 8) | hw_addr[1];
2880        b = (hw_addr[1] << 8) | hw_addr[2];
2881        a ^= b;
2882        a |= 0x0200;    /* locally assigned Ethernet MAC address */
2883        a &= ~0x0100;   /* not a multicast Ethernet MAC address */
2884        macaddr[0] = a >> 8;
2885        macaddr[1] = a & 0xff;
2886
2887        for (i = 2; i < 5; i++)
2888                macaddr[i] = hw_addr[i + 1];
2889
2890        for (vf = 0, nvfs = pci_sriov_get_totalvfs(adap->pdev);
2891                vf < nvfs; vf++) {
2892                macaddr[5] = adap->pf * nvfs + vf;
2893                ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, macaddr);
2894        }
2895}
2896
2897static int cxgb4_mgmt_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
2898{
2899        struct port_info *pi = netdev_priv(dev);
2900        struct adapter *adap = pi->adapter;
2901        int ret;
2902
2903        /* verify MAC addr is valid */
2904        if (!is_valid_ether_addr(mac)) {
2905                dev_err(pi->adapter->pdev_dev,
2906                        "Invalid Ethernet address %pM for VF %d\n",
2907                        mac, vf);
2908                return -EINVAL;
2909        }
2910
2911        dev_info(pi->adapter->pdev_dev,
2912                 "Setting MAC %pM on VF %d\n", mac, vf);
2913        ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac);
2914        if (!ret)
2915                ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac);
2916        return ret;
2917}
2918
2919static int cxgb4_mgmt_get_vf_config(struct net_device *dev,
2920                                    int vf, struct ifla_vf_info *ivi)
2921{
2922        struct port_info *pi = netdev_priv(dev);
2923        struct adapter *adap = pi->adapter;
2924        struct vf_info *vfinfo;
2925
2926        if (vf >= adap->num_vfs)
2927                return -EINVAL;
2928        vfinfo = &adap->vfinfo[vf];
2929
2930        ivi->vf = vf;
2931        ivi->max_tx_rate = vfinfo->tx_rate;
2932        ivi->min_tx_rate = 0;
2933        ether_addr_copy(ivi->mac, vfinfo->vf_mac_addr);
2934        ivi->vlan = vfinfo->vlan;
2935        ivi->linkstate = vfinfo->link_state;
2936        return 0;
2937}
2938
2939static int cxgb4_mgmt_get_phys_port_id(struct net_device *dev,
2940                                       struct netdev_phys_item_id *ppid)
2941{
2942        struct port_info *pi = netdev_priv(dev);
2943        unsigned int phy_port_id;
2944
2945        phy_port_id = pi->adapter->adap_idx * 10 + pi->port_id;
2946        ppid->id_len = sizeof(phy_port_id);
2947        memcpy(ppid->id, &phy_port_id, ppid->id_len);
2948        return 0;
2949}
2950
2951static int cxgb4_mgmt_set_vf_rate(struct net_device *dev, int vf,
2952                                  int min_tx_rate, int max_tx_rate)
2953{
2954        struct port_info *pi = netdev_priv(dev);
2955        struct adapter *adap = pi->adapter;
2956        unsigned int link_ok, speed, mtu;
2957        u32 fw_pfvf, fw_class;
2958        int class_id = vf;
2959        int ret;
2960        u16 pktsize;
2961
2962        if (vf >= adap->num_vfs)
2963                return -EINVAL;
2964
2965        if (min_tx_rate) {
2966                dev_err(adap->pdev_dev,
2967                        "Min tx rate (%d) (> 0) for VF %d is Invalid.\n",
2968                        min_tx_rate, vf);
2969                return -EINVAL;
2970        }
2971
2972        if (max_tx_rate == 0) {
2973                /* unbind VF to to any Traffic Class */
2974                fw_pfvf =
2975                    (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2976                     FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_SCHEDCLASS_ETH));
2977                fw_class = 0xffffffff;
2978                ret = t4_set_params(adap, adap->mbox, adap->pf, vf + 1, 1,
2979                                    &fw_pfvf, &fw_class);
2980                if (ret) {
2981                        dev_err(adap->pdev_dev,
2982                                "Err %d in unbinding PF %d VF %d from TX Rate Limiting\n",
2983                                ret, adap->pf, vf);
2984                        return -EINVAL;
2985                }
2986                dev_info(adap->pdev_dev,
2987                         "PF %d VF %d is unbound from TX Rate Limiting\n",
2988                         adap->pf, vf);
2989                adap->vfinfo[vf].tx_rate = 0;
2990                return 0;
2991        }
2992
2993        ret = t4_get_link_params(pi, &link_ok, &speed, &mtu);
2994        if (ret != FW_SUCCESS) {
2995                dev_err(adap->pdev_dev,
2996                        "Failed to get link information for VF %d\n", vf);
2997                return -EINVAL;
2998        }
2999
3000        if (!link_ok) {
3001                dev_err(adap->pdev_dev, "Link down for VF %d\n", vf);
3002                return -EINVAL;
3003        }
3004
3005        if (max_tx_rate > speed) {
3006                dev_err(adap->pdev_dev,
3007                        "Max tx rate %d for VF %d can't be > link-speed %u",
3008                        max_tx_rate, vf, speed);
3009                return -EINVAL;
3010        }
3011
3012        pktsize = mtu;
3013        /* subtract ethhdr size and 4 bytes crc since, f/w appends it */
3014        pktsize = pktsize - sizeof(struct ethhdr) - 4;
3015        /* subtract ipv4 hdr size, tcp hdr size to get typical IPv4 MSS size */
3016        pktsize = pktsize - sizeof(struct iphdr) - sizeof(struct tcphdr);
3017        /* configure Traffic Class for rate-limiting */
3018        ret = t4_sched_params(adap, SCHED_CLASS_TYPE_PACKET,
3019                              SCHED_CLASS_LEVEL_CL_RL,
3020                              SCHED_CLASS_MODE_CLASS,
3021                              SCHED_CLASS_RATEUNIT_BITS,
3022                              SCHED_CLASS_RATEMODE_ABS,
3023                              pi->tx_chan, class_id, 0,
3024                              max_tx_rate * 1000, 0, pktsize);
3025        if (ret) {
3026                dev_err(adap->pdev_dev, "Err %d for Traffic Class config\n",
3027                        ret);
3028                return -EINVAL;
3029        }
3030        dev_info(adap->pdev_dev,
3031                 "Class %d with MSS %u configured with rate %u\n",
3032                 class_id, pktsize, max_tx_rate);
3033
3034        /* bind VF to configured Traffic Class */
3035        fw_pfvf = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
3036                   FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_SCHEDCLASS_ETH));
3037        fw_class = class_id;
3038        ret = t4_set_params(adap, adap->mbox, adap->pf, vf + 1, 1, &fw_pfvf,
3039                            &fw_class);
3040        if (ret) {
3041                dev_err(adap->pdev_dev,
3042                        "Err %d in binding PF %d VF %d to Traffic Class %d\n",
3043                        ret, adap->pf, vf, class_id);
3044                return -EINVAL;
3045        }
3046        dev_info(adap->pdev_dev, "PF %d VF %d is bound to Class %d\n",
3047                 adap->pf, vf, class_id);
3048        adap->vfinfo[vf].tx_rate = max_tx_rate;
3049        return 0;
3050}
3051
3052static int cxgb4_mgmt_set_vf_vlan(struct net_device *dev, int vf,
3053                                  u16 vlan, u8 qos, __be16 vlan_proto)
3054{
3055        struct port_info *pi = netdev_priv(dev);
3056        struct adapter *adap = pi->adapter;
3057        int ret;
3058
3059        if (vf >= adap->num_vfs || vlan > 4095 || qos > 7)
3060                return -EINVAL;
3061
3062        if (vlan_proto != htons(ETH_P_8021Q) || qos != 0)
3063                return -EPROTONOSUPPORT;
3064
3065        ret = t4_set_vlan_acl(adap, adap->mbox, vf + 1, vlan);
3066        if (!ret) {
3067                adap->vfinfo[vf].vlan = vlan;
3068                return 0;
3069        }
3070
3071        dev_err(adap->pdev_dev, "Err %d %s VLAN ACL for PF/VF %d/%d\n",
3072                ret, (vlan ? "setting" : "clearing"), adap->pf, vf);
3073        return ret;
3074}
3075
3076static int cxgb4_mgmt_set_vf_link_state(struct net_device *dev, int vf,
3077                                        int link)
3078{
3079        struct port_info *pi = netdev_priv(dev);
3080        struct adapter *adap = pi->adapter;
3081        u32 param, val;
3082        int ret = 0;
3083
3084        if (vf >= adap->num_vfs)
3085                return -EINVAL;
3086
3087        switch (link) {
3088        case IFLA_VF_LINK_STATE_AUTO:
3089                val = FW_VF_LINK_STATE_AUTO;
3090                break;
3091
3092        case IFLA_VF_LINK_STATE_ENABLE:
3093                val = FW_VF_LINK_STATE_ENABLE;
3094                break;
3095
3096        case IFLA_VF_LINK_STATE_DISABLE:
3097                val = FW_VF_LINK_STATE_DISABLE;
3098                break;
3099
3100        default:
3101                return -EINVAL;
3102        }
3103
3104        param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
3105                 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_LINK_STATE));
3106        ret = t4_set_params(adap, adap->mbox, adap->pf, vf + 1, 1,
3107                            &param, &val);
3108        if (ret) {
3109                dev_err(adap->pdev_dev,
3110                        "Error %d in setting PF %d VF %d link state\n",
3111                        ret, adap->pf, vf);
3112                return -EINVAL;
3113        }
3114
3115        adap->vfinfo[vf].link_state = link;
3116        return ret;
3117}
3118#endif /* CONFIG_PCI_IOV */
3119
3120static int cxgb_set_mac_addr(struct net_device *dev, void *p)
3121{
3122        int ret;
3123        struct sockaddr *addr = p;
3124        struct port_info *pi = netdev_priv(dev);
3125
3126        if (!is_valid_ether_addr(addr->sa_data))
3127                return -EADDRNOTAVAIL;
3128
3129        ret = cxgb4_update_mac_filt(pi, pi->viid, &pi->xact_addr_filt,
3130                                    addr->sa_data, true, &pi->smt_idx);
3131        if (ret < 0)
3132                return ret;
3133
3134        memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
3135        return 0;
3136}
3137
3138#ifdef CONFIG_NET_POLL_CONTROLLER
3139static void cxgb_netpoll(struct net_device *dev)
3140{
3141        struct port_info *pi = netdev_priv(dev);
3142        struct adapter *adap = pi->adapter;
3143
3144        if (adap->flags & CXGB4_USING_MSIX) {
3145                int i;
3146                struct sge_eth_rxq *rx = &adap->sge.ethrxq[pi->first_qset];
3147
3148                for (i = pi->nqsets; i; i--, rx++)
3149                        t4_sge_intr_msix(0, &rx->rspq);
3150        } else
3151                t4_intr_handler(adap)(0, adap);
3152}
3153#endif
3154
3155static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
3156{
3157        struct port_info *pi = netdev_priv(dev);
3158        struct adapter *adap = pi->adapter;
3159        struct ch_sched_queue qe = { 0 };
3160        struct ch_sched_params p = { 0 };
3161        struct sched_class *e;
3162        u32 req_rate;
3163        int err = 0;
3164
3165        if (!can_sched(dev))
3166                return -ENOTSUPP;
3167
3168        if (index < 0 || index > pi->nqsets - 1)
3169                return -EINVAL;
3170
3171        if (!(adap->flags & CXGB4_FULL_INIT_DONE)) {
3172                dev_err(adap->pdev_dev,
3173                        "Failed to rate limit on queue %d. Link Down?\n",
3174                        index);
3175                return -EINVAL;
3176        }
3177
3178        qe.queue = index;
3179        e = cxgb4_sched_queue_lookup(dev, &qe);
3180        if (e && e->info.u.params.level != SCHED_CLASS_LEVEL_CL_RL) {
3181                dev_err(adap->pdev_dev,
3182                        "Queue %u already bound to class %u of type: %u\n",
3183                        index, e->idx, e->info.u.params.level);
3184                return -EBUSY;
3185        }
3186
3187        /* Convert from Mbps to Kbps */
3188        req_rate = rate * 1000;
3189
3190        /* Max rate is 100 Gbps */
3191        if (req_rate > SCHED_MAX_RATE_KBPS) {
3192                dev_err(adap->pdev_dev,
3193                        "Invalid rate %u Mbps, Max rate is %u Mbps\n",
3194                        rate, SCHED_MAX_RATE_KBPS / 1000);
3195                return -ERANGE;
3196        }
3197
3198        /* First unbind the queue from any existing class */
3199        memset(&qe, 0, sizeof(qe));
3200        qe.queue = index;
3201        qe.class = SCHED_CLS_NONE;
3202
3203        err = cxgb4_sched_class_unbind(dev, (void *)(&qe), SCHED_QUEUE);
3204        if (err) {
3205                dev_err(adap->pdev_dev,
3206                        "Unbinding Queue %d on port %d fail. Err: %d\n",
3207                        index, pi->port_id, err);
3208                return err;
3209        }
3210
3211        /* Queue already unbound */
3212        if (!req_rate)
3213                return 0;
3214
3215        /* Fetch any available unused or matching scheduling class */
3216        p.type = SCHED_CLASS_TYPE_PACKET;
3217        p.u.params.level    = SCHED_CLASS_LEVEL_CL_RL;
3218        p.u.params.mode     = SCHED_CLASS_MODE_CLASS;
3219        p.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS;
3220        p.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS;
3221        p.u.params.channel  = pi->tx_chan;
3222        p.u.params.class    = SCHED_CLS_NONE;
3223        p.u.params.minrate  = 0;
3224        p.u.params.maxrate  = req_rate;
3225        p.u.params.weight   = 0;
3226        p.u.params.pktsize  = dev->mtu;
3227
3228        e = cxgb4_sched_class_alloc(dev, &p);
3229        if (!e)
3230                return -ENOMEM;
3231
3232        /* Bind the queue to a scheduling class */
3233        memset(&qe, 0, sizeof(qe));
3234        qe.queue = index;
3235        qe.class = e->idx;
3236
3237        err = cxgb4_sched_class_bind(dev, (void *)(&qe), SCHED_QUEUE);
3238        if (err)
3239                dev_err(adap->pdev_dev,
3240                        "Queue rate limiting failed. Err: %d\n", err);
3241        return err;
3242}
3243
3244static int cxgb_setup_tc_flower(struct net_device *dev,
3245                                struct flow_cls_offload *cls_flower)
3246{
3247        switch (cls_flower->command) {
3248        case FLOW_CLS_REPLACE:
3249                return cxgb4_tc_flower_replace(dev, cls_flower);
3250        case FLOW_CLS_DESTROY:
3251                return cxgb4_tc_flower_destroy(dev, cls_flower);
3252        case FLOW_CLS_STATS:
3253                return cxgb4_tc_flower_stats(dev, cls_flower);
3254        default:
3255                return -EOPNOTSUPP;
3256        }
3257}
3258
3259static int cxgb_setup_tc_cls_u32(struct net_device *dev,
3260                                 struct tc_cls_u32_offload *cls_u32)
3261{
3262        switch (cls_u32->command) {
3263        case TC_CLSU32_NEW_KNODE:
3264        case TC_CLSU32_REPLACE_KNODE:
3265                return cxgb4_config_knode(dev, cls_u32);
3266        case TC_CLSU32_DELETE_KNODE:
3267                return cxgb4_delete_knode(dev, cls_u32);
3268        default:
3269                return -EOPNOTSUPP;
3270        }
3271}
3272
3273static int cxgb_setup_tc_matchall(struct net_device *dev,
3274                                  struct tc_cls_matchall_offload *cls_matchall,
3275                                  bool ingress)
3276{
3277        struct adapter *adap = netdev2adap(dev);
3278
3279        if (!adap->tc_matchall)
3280                return -ENOMEM;
3281
3282        switch (cls_matchall->command) {
3283        case TC_CLSMATCHALL_REPLACE:
3284                return cxgb4_tc_matchall_replace(dev, cls_matchall, ingress);
3285        case TC_CLSMATCHALL_DESTROY:
3286                return cxgb4_tc_matchall_destroy(dev, cls_matchall, ingress);
3287        case TC_CLSMATCHALL_STATS:
3288                if (ingress)
3289                        return cxgb4_tc_matchall_stats(dev, cls_matchall);
3290                break;
3291        default:
3292                break;
3293        }
3294
3295        return -EOPNOTSUPP;
3296}
3297
3298static int cxgb_setup_tc_block_ingress_cb(enum tc_setup_type type,
3299                                          void *type_data, void *cb_priv)
3300{
3301        struct net_device *dev = cb_priv;
3302        struct port_info *pi = netdev2pinfo(dev);
3303        struct adapter *adap = netdev2adap(dev);
3304
3305        if (!(adap->flags & CXGB4_FULL_INIT_DONE)) {
3306                dev_err(adap->pdev_dev,
3307                        "Failed to setup tc on port %d. Link Down?\n",
3308                        pi->port_id);
3309                return -EINVAL;
3310        }
3311
3312        if (!tc_cls_can_offload_and_chain0(dev, type_data))
3313                return -EOPNOTSUPP;
3314
3315        switch (type) {
3316        case TC_SETUP_CLSU32:
3317                return cxgb_setup_tc_cls_u32(dev, type_data);
3318        case TC_SETUP_CLSFLOWER:
3319                return cxgb_setup_tc_flower(dev, type_data);
3320        case TC_SETUP_CLSMATCHALL:
3321                return cxgb_setup_tc_matchall(dev, type_data, true);
3322        default:
3323                return -EOPNOTSUPP;
3324        }
3325}
3326
3327static int cxgb_setup_tc_block_egress_cb(enum tc_setup_type type,
3328                                         void *type_data, void *cb_priv)
3329{
3330        struct net_device *dev = cb_priv;
3331        struct port_info *pi = netdev2pinfo(dev);
3332        struct adapter *adap = netdev2adap(dev);
3333
3334        if (!(adap->flags & CXGB4_FULL_INIT_DONE)) {
3335                dev_err(adap->pdev_dev,
3336                        "Failed to setup tc on port %d. Link Down?\n",
3337                        pi->port_id);
3338                return -EINVAL;
3339        }
3340
3341        if (!tc_cls_can_offload_and_chain0(dev, type_data))
3342                return -EOPNOTSUPP;
3343
3344        switch (type) {
3345        case TC_SETUP_CLSMATCHALL:
3346                return cxgb_setup_tc_matchall(dev, type_data, false);
3347        default:
3348                break;
3349        }
3350
3351        return -EOPNOTSUPP;
3352}
3353
3354static int cxgb_setup_tc_mqprio(struct net_device *dev,
3355                                struct tc_mqprio_qopt_offload *mqprio)
3356{
3357        struct adapter *adap = netdev2adap(dev);
3358
3359        if (!is_ethofld(adap) || !adap->tc_mqprio)
3360                return -ENOMEM;
3361
3362        return cxgb4_setup_tc_mqprio(dev, mqprio);
3363}
3364
3365static LIST_HEAD(cxgb_block_cb_list);
3366
3367static int cxgb_setup_tc_block(struct net_device *dev,
3368                               struct flow_block_offload *f)
3369{
3370        struct port_info *pi = netdev_priv(dev);
3371        flow_setup_cb_t *cb;
3372        bool ingress_only;
3373
3374        pi->tc_block_shared = f->block_shared;
3375        if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
3376                cb = cxgb_setup_tc_block_egress_cb;
3377                ingress_only = false;
3378        } else {
3379                cb = cxgb_setup_tc_block_ingress_cb;
3380                ingress_only = true;
3381        }
3382
3383        return flow_block_cb_setup_simple(f, &cxgb_block_cb_list,
3384                                          cb, pi, dev, ingress_only);
3385}
3386
3387static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
3388                         void *type_data)
3389{
3390        switch (type) {
3391        case TC_SETUP_QDISC_MQPRIO:
3392                return cxgb_setup_tc_mqprio(dev, type_data);
3393        case TC_SETUP_BLOCK:
3394                return cxgb_setup_tc_block(dev, type_data);
3395        default:
3396                return -EOPNOTSUPP;
3397        }
3398}
3399
3400static void cxgb_del_udp_tunnel(struct net_device *netdev,
3401                                struct udp_tunnel_info *ti)
3402{
3403        struct port_info *pi = netdev_priv(netdev);
3404        struct adapter *adapter = pi->adapter;
3405        unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
3406        u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
3407        int ret = 0, i;
3408
3409        if (chip_ver < CHELSIO_T6)
3410                return;
3411
3412        switch (ti->type) {
3413        case UDP_TUNNEL_TYPE_VXLAN:
3414                if (!adapter->vxlan_port_cnt ||
3415                    adapter->vxlan_port != ti->port)
3416                        return; /* Invalid VxLAN destination port */
3417
3418                adapter->vxlan_port_cnt--;
3419                if (adapter->vxlan_port_cnt)
3420                        return;
3421
3422                adapter->vxlan_port = 0;
3423                t4_write_reg(adapter, MPS_RX_VXLAN_TYPE_A, 0);
3424                break;
3425        case UDP_TUNNEL_TYPE_GENEVE:
3426                if (!adapter->geneve_port_cnt ||
3427                    adapter->geneve_port != ti->port)
3428                        return; /* Invalid GENEVE destination port */
3429
3430                adapter->geneve_port_cnt--;
3431                if (adapter->geneve_port_cnt)
3432                        return;
3433
3434                adapter->geneve_port = 0;
3435                t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A, 0);
3436                break;
3437        default:
3438                return;
3439        }
3440
3441        /* Matchall mac entries can be deleted only after all tunnel ports
3442         * are brought down or removed.
3443         */
3444        if (!adapter->rawf_cnt)
3445                return;
3446        for_each_port(adapter, i) {
3447                pi = adap2pinfo(adapter, i);
3448                ret = t4_free_raw_mac_filt(adapter, pi->viid,
3449                                           match_all_mac, match_all_mac,
3450                                           adapter->rawf_start +
3451                                            pi->port_id,
3452                                           1, pi->port_id, false);
3453                if (ret < 0) {
3454                        netdev_info(netdev, "Failed to free mac filter entry, for port %d\n",
3455                                    i);
3456                        return;
3457                }
3458        }
3459}
3460
3461static void cxgb_add_udp_tunnel(struct net_device *netdev,
3462                                struct udp_tunnel_info *ti)
3463{
3464        struct port_info *pi = netdev_priv(netdev);
3465        struct adapter *adapter = pi->adapter;
3466        unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
3467        u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
3468        int i, ret;
3469
3470        if (chip_ver < CHELSIO_T6 || !adapter->rawf_cnt)
3471                return;
3472
3473        switch (ti->type) {
3474        case UDP_TUNNEL_TYPE_VXLAN:
3475                /* Callback for adding vxlan port can be called with the same
3476                 * port for both IPv4 and IPv6. We should not disable the
3477                 * offloading when the same port for both protocols is added
3478                 * and later one of them is removed.
3479                 */
3480                if (adapter->vxlan_port_cnt &&
3481                    adapter->vxlan_port == ti->port) {
3482                        adapter->vxlan_port_cnt++;
3483                        return;
3484                }
3485
3486                /* We will support only one VxLAN port */
3487                if (adapter->vxlan_port_cnt) {
3488                        netdev_info(netdev, "UDP port %d already offloaded, not adding port %d\n",
3489                                    be16_to_cpu(adapter->vxlan_port),
3490                                    be16_to_cpu(ti->port));
3491                        return;
3492                }
3493
3494                adapter->vxlan_port = ti->port;
3495                adapter->vxlan_port_cnt = 1;
3496
3497                t4_write_reg(adapter, MPS_RX_VXLAN_TYPE_A,
3498                             VXLAN_V(be16_to_cpu(ti->port)) | VXLAN_EN_F);
3499                break;
3500        case UDP_TUNNEL_TYPE_GENEVE:
3501                if (adapter->geneve_port_cnt &&
3502                    adapter->geneve_port == ti->port) {
3503                        adapter->geneve_port_cnt++;
3504                        return;
3505                }
3506
3507                /* We will support only one GENEVE port */
3508                if (adapter->geneve_port_cnt) {
3509                        netdev_info(netdev, "UDP port %d already offloaded, not adding port %d\n",
3510                                    be16_to_cpu(adapter->geneve_port),
3511                                    be16_to_cpu(ti->port));
3512                        return;
3513                }
3514
3515                adapter->geneve_port = ti->port;
3516                adapter->geneve_port_cnt = 1;
3517
3518                t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A,
3519                             GENEVE_V(be16_to_cpu(ti->port)) | GENEVE_EN_F);
3520                break;
3521        default:
3522                return;
3523        }
3524
3525        /* Create a 'match all' mac filter entry for inner mac,
3526         * if raw mac interface is supported. Once the linux kernel provides
3527         * driver entry points for adding/deleting the inner mac addresses,
3528         * we will remove this 'match all' entry and fallback to adding
3529         * exact match filters.
3530         */
3531        for_each_port(adapter, i) {
3532                pi = adap2pinfo(adapter, i);
3533
3534                ret = t4_alloc_raw_mac_filt(adapter, pi->viid,
3535                                            match_all_mac,
3536                                            match_all_mac,
3537                                            adapter->rawf_start +
3538                                            pi->port_id,
3539                                            1, pi->port_id, false);
3540                if (ret < 0) {
3541                        netdev_info(netdev, "Failed to allocate a mac filter entry, not adding port %d\n",
3542                                    be16_to_cpu(ti->port));
3543                        cxgb_del_udp_tunnel(netdev, ti);
3544                        return;
3545                }
3546        }
3547}
3548
3549static netdev_features_t cxgb_features_check(struct sk_buff *skb,
3550                                             struct net_device *dev,
3551                                             netdev_features_t features)
3552{
3553        struct port_info *pi = netdev_priv(dev);
3554        struct adapter *adapter = pi->adapter;
3555
3556        if (CHELSIO_CHIP_VERSION(adapter->params.chip) < CHELSIO_T6)
3557                return features;
3558
3559        /* Check if hw supports offload for this packet */
3560        if (!skb->encapsulation || cxgb_encap_offload_supported(skb))
3561                return features;
3562
3563        /* Offload is not supported for this encapsulated packet */
3564        return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
3565}
3566
3567static netdev_features_t cxgb_fix_features(struct net_device *dev,
3568                                           netdev_features_t features)
3569{
3570        /* Disable GRO, if RX_CSUM is disabled */
3571        if (!(features & NETIF_F_RXCSUM))
3572                features &= ~NETIF_F_GRO;
3573
3574        return features;
3575}
3576
3577static const struct net_device_ops cxgb4_netdev_ops = {
3578        .ndo_open             = cxgb_open,
3579        .ndo_stop             = cxgb_close,
3580        .ndo_start_xmit       = t4_start_xmit,
3581        .ndo_select_queue     = cxgb_select_queue,
3582        .ndo_get_stats64      = cxgb_get_stats,
3583        .ndo_set_rx_mode      = cxgb_set_rxmode,
3584        .ndo_set_mac_address  = cxgb_set_mac_addr,
3585        .ndo_set_features     = cxgb_set_features,
3586        .ndo_validate_addr    = eth_validate_addr,
3587        .ndo_do_ioctl         = cxgb_ioctl,
3588        .ndo_change_mtu       = cxgb_change_mtu,
3589#ifdef CONFIG_NET_POLL_CONTROLLER
3590        .ndo_poll_controller  = cxgb_netpoll,
3591#endif
3592#ifdef CONFIG_CHELSIO_T4_FCOE
3593        .ndo_fcoe_enable      = cxgb_fcoe_enable,
3594        .ndo_fcoe_disable     = cxgb_fcoe_disable,
3595#endif /* CONFIG_CHELSIO_T4_FCOE */
3596        .ndo_set_tx_maxrate   = cxgb_set_tx_maxrate,
3597        .ndo_setup_tc         = cxgb_setup_tc,
3598        .ndo_udp_tunnel_add   = cxgb_add_udp_tunnel,
3599        .ndo_udp_tunnel_del   = cxgb_del_udp_tunnel,
3600        .ndo_features_check   = cxgb_features_check,
3601        .ndo_fix_features     = cxgb_fix_features,
3602};
3603
3604#ifdef CONFIG_PCI_IOV
3605static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
3606        .ndo_open               = cxgb4_mgmt_open,
3607        .ndo_set_vf_mac         = cxgb4_mgmt_set_vf_mac,
3608        .ndo_get_vf_config      = cxgb4_mgmt_get_vf_config,
3609        .ndo_set_vf_rate        = cxgb4_mgmt_set_vf_rate,
3610        .ndo_get_phys_port_id   = cxgb4_mgmt_get_phys_port_id,
3611        .ndo_set_vf_vlan        = cxgb4_mgmt_set_vf_vlan,
3612        .ndo_set_vf_link_state  = cxgb4_mgmt_set_vf_link_state,
3613};
3614#endif
3615
3616static void cxgb4_mgmt_get_drvinfo(struct net_device *dev,
3617                                   struct ethtool_drvinfo *info)
3618{
3619        struct adapter *adapter = netdev2adap(dev);
3620
3621        strlcpy(info->driver, cxgb4_driver_name, sizeof(info->driver));
3622        strlcpy(info->bus_info, pci_name(adapter->pdev),
3623                sizeof(info->bus_info));
3624}
3625
3626static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
3627        .get_drvinfo       = cxgb4_mgmt_get_drvinfo,
3628};
3629
3630static void notify_fatal_err(struct work_struct *work)
3631{
3632        struct adapter *adap;
3633
3634        adap = container_of(work, struct adapter, fatal_err_notify_task);
3635        notify_ulds(adap, CXGB4_STATE_FATAL_ERROR);
3636}
3637
3638void t4_fatal_err(struct adapter *adap)
3639{
3640        int port;
3641
3642        if (pci_channel_offline(adap->pdev))
3643                return;
3644
3645        /* Disable the SGE since ULDs are going to free resources that
3646         * could be exposed to the adapter.  RDMA MWs for example...
3647         */
3648        t4_shutdown_adapter(adap);
3649        for_each_port(adap, port) {
3650                struct net_device *dev = adap->port[port];
3651
3652                /* If we get here in very early initialization the network
3653                 * devices may not have been set up yet.
3654                 */
3655                if (!dev)
3656                        continue;
3657
3658                netif_tx_stop_all_queues(dev);
3659                netif_carrier_off(dev);
3660        }
3661        dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n");
3662        queue_work(adap->workq, &adap->fatal_err_notify_task);
3663}
3664
3665static void setup_memwin(struct adapter *adap)
3666{
3667        u32 nic_win_base = t4_get_util_window(adap);
3668
3669        t4_setup_memwin(adap, nic_win_base, MEMWIN_NIC);
3670}
3671
3672static void setup_memwin_rdma(struct adapter *adap)
3673{
3674        if (adap->vres.ocq.size) {
3675                u32 start;
3676                unsigned int sz_kb;
3677
3678                start = t4_read_pcie_cfg4(adap, PCI_BASE_ADDRESS_2);
3679                start &= PCI_BASE_ADDRESS_MEM_MASK;
3680                start += OCQ_WIN_OFFSET(adap->pdev, &adap->vres);
3681                sz_kb = roundup_pow_of_two(adap->vres.ocq.size) >> 10;
3682                t4_write_reg(adap,
3683                             PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, 3),
3684                             start | BIR_V(1) | WINDOW_V(ilog2(sz_kb)));
3685                t4_write_reg(adap,
3686                             PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, 3),
3687                             adap->vres.ocq.start);
3688                t4_read_reg(adap,
3689                            PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, 3));
3690        }
3691}
3692
3693/* HMA Definitions */
3694
3695/* The maximum number of address that can be send in a single FW cmd */
3696#define HMA_MAX_ADDR_IN_CMD     5
3697
3698#define HMA_PAGE_SIZE           PAGE_SIZE
3699
3700#define HMA_MAX_NO_FW_ADDRESS   (16 << 10)  /* FW supports 16K addresses */
3701
3702#define HMA_PAGE_ORDER                                  \
3703        ((HMA_PAGE_SIZE < HMA_MAX_NO_FW_ADDRESS) ?      \
3704        ilog2(HMA_MAX_NO_FW_ADDRESS / HMA_PAGE_SIZE) : 0)
3705
3706/* The minimum and maximum possible HMA sizes that can be specified in the FW
3707 * configuration(in units of MB).
3708 */
3709#define HMA_MIN_TOTAL_SIZE      1
3710#define HMA_MAX_TOTAL_SIZE                              \
3711        (((HMA_PAGE_SIZE << HMA_PAGE_ORDER) *           \
3712          HMA_MAX_NO_FW_ADDRESS) >> 20)
3713
3714static void adap_free_hma_mem(struct adapter *adapter)
3715{
3716        struct scatterlist *iter;
3717        struct page *page;
3718        int i;
3719
3720        if (!adapter->hma.sgt)
3721                return;
3722
3723        if (adapter->hma.flags & HMA_DMA_MAPPED_FLAG) {
3724                dma_unmap_sg(adapter->pdev_dev, adapter->hma.sgt->sgl,
3725                             adapter->hma.sgt->nents, PCI_DMA_BIDIRECTIONAL);
3726                adapter->hma.flags &= ~HMA_DMA_MAPPED_FLAG;
3727        }
3728
3729        for_each_sg(adapter->hma.sgt->sgl, iter,
3730                    adapter->hma.sgt->orig_nents, i) {
3731                page = sg_page(iter);
3732                if (page)
3733                        __free_pages(page, HMA_PAGE_ORDER);
3734        }
3735
3736        kfree(adapter->hma.phy_addr);
3737        sg_free_table(adapter->hma.sgt);
3738        kfree(adapter->hma.sgt);
3739        adapter->hma.sgt = NULL;
3740}
3741
3742static int adap_config_hma(struct adapter *adapter)
3743{
3744        struct scatterlist *sgl, *iter;
3745        struct sg_table *sgt;
3746        struct page *newpage;
3747        unsigned int i, j, k;
3748        u32 param, hma_size;
3749        unsigned int ncmds;
3750        size_t page_size;
3751        u32 page_order;
3752        int node, ret;
3753
3754        /* HMA is supported only for T6+ cards.
3755         * Avoid initializing HMA in kdump kernels.
3756         */
3757        if (is_kdump_kernel() ||
3758            CHELSIO_CHIP_VERSION(adapter->params.chip) < CHELSIO_T6)
3759                return 0;
3760
3761        /* Get the HMA region size required by fw */
3762        param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
3763                 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_HMA_SIZE));
3764        ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
3765                              1, &param, &hma_size);
3766        /* An error means card has its own memory or HMA is not supported by
3767         * the firmware. Return without any errors.
3768         */
3769        if (ret || !hma_size)
3770                return 0;
3771
3772        if (hma_size < HMA_MIN_TOTAL_SIZE ||
3773            hma_size > HMA_MAX_TOTAL_SIZE) {
3774                dev_err(adapter->pdev_dev,
3775                        "HMA size %uMB beyond bounds(%u-%lu)MB\n",
3776                        hma_size, HMA_MIN_TOTAL_SIZE, HMA_MAX_TOTAL_SIZE);
3777                return -EINVAL;
3778        }
3779
3780        page_size = HMA_PAGE_SIZE;
3781        page_order = HMA_PAGE_ORDER;
3782        adapter->hma.sgt = kzalloc(sizeof(*adapter->hma.sgt), GFP_KERNEL);
3783        if (unlikely(!adapter->hma.sgt)) {
3784                dev_err(adapter->pdev_dev, "HMA SG table allocation failed\n");
3785                return -ENOMEM;
3786        }
3787        sgt = adapter->hma.sgt;
3788        /* FW returned value will be in MB's
3789         */
3790        sgt->orig_nents = (hma_size << 20) / (page_size << page_order);
3791        if (sg_alloc_table(sgt, sgt->orig_nents, GFP_KERNEL)) {
3792                dev_err(adapter->pdev_dev, "HMA SGL allocation failed\n");
3793                kfree(adapter->hma.sgt);
3794                adapter->hma.sgt = NULL;
3795                return -ENOMEM;
3796        }
3797
3798        sgl = adapter->hma.sgt->sgl;
3799        node = dev_to_node(adapter->pdev_dev);
3800        for_each_sg(sgl, iter, sgt->orig_nents, i) {
3801                newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL |
3802                                           __GFP_ZERO, page_order);
3803                if (!newpage) {
3804                        dev_err(adapter->pdev_dev,
3805                                "Not enough memory for HMA page allocation\n");
3806                        ret = -ENOMEM;
3807                        goto free_hma;
3808                }
3809                sg_set_page(iter, newpage, page_size << page_order, 0);
3810        }
3811
3812        sgt->nents = dma_map_sg(adapter->pdev_dev, sgl, sgt->orig_nents,
3813                                DMA_BIDIRECTIONAL);
3814        if (!sgt->nents) {
3815                dev_err(adapter->pdev_dev,
3816                        "Not enough memory for HMA DMA mapping");
3817                ret = -ENOMEM;
3818                goto free_hma;
3819        }
3820        adapter->hma.flags |= HMA_DMA_MAPPED_FLAG;
3821
3822        adapter->hma.phy_addr = kcalloc(sgt->nents, sizeof(dma_addr_t),
3823                                        GFP_KERNEL);
3824        if (unlikely(!adapter->hma.phy_addr))
3825                goto free_hma;
3826
3827        for_each_sg(sgl, iter, sgt->nents, i) {
3828                newpage = sg_page(iter);
3829                adapter->hma.phy_addr[i] = sg_dma_address(iter);
3830        }
3831
3832        ncmds = DIV_ROUND_UP(sgt->nents, HMA_MAX_ADDR_IN_CMD);
3833        /* Pass on the addresses to firmware */
3834        for (i = 0, k = 0; i < ncmds; i++, k += HMA_MAX_ADDR_IN_CMD) {
3835                struct fw_hma_cmd hma_cmd;
3836                u8 naddr = HMA_MAX_ADDR_IN_CMD;
3837                u8 soc = 0, eoc = 0;
3838                u8 hma_mode = 1; /* Presently we support only Page table mode */
3839
3840                soc = (i == 0) ? 1 : 0;
3841                eoc = (i == ncmds - 1) ? 1 : 0;
3842
3843                /* For last cmd, set naddr corresponding to remaining
3844                 * addresses
3845                 */
3846                if (i == ncmds - 1) {
3847                        naddr = sgt->nents % HMA_MAX_ADDR_IN_CMD;
3848                        naddr = naddr ? naddr : HMA_MAX_ADDR_IN_CMD;
3849                }
3850                memset(&hma_cmd, 0, sizeof(hma_cmd));
3851                hma_cmd.op_pkd = htonl(FW_CMD_OP_V(FW_HMA_CMD) |
3852                                       FW_CMD_REQUEST_F | FW_CMD_WRITE_F);
3853                hma_cmd.retval_len16 = htonl(FW_LEN16(hma_cmd));
3854
3855                hma_cmd.mode_to_pcie_params =
3856                        htonl(FW_HMA_CMD_MODE_V(hma_mode) |
3857                              FW_HMA_CMD_SOC_V(soc) | FW_HMA_CMD_EOC_V(eoc));
3858
3859                /* HMA cmd size specified in MB's */
3860                hma_cmd.naddr_size =
3861                        htonl(FW_HMA_CMD_SIZE_V(hma_size) |
3862                              FW_HMA_CMD_NADDR_V(naddr));
3863
3864                /* Total Page size specified in units of 4K */
3865                hma_cmd.addr_size_pkd =
3866                        htonl(FW_HMA_CMD_ADDR_SIZE_V
3867                                ((page_size << page_order) >> 12));
3868
3869                /* Fill the 5 addresses */
3870                for (j = 0; j < naddr; j++) {
3871                        hma_cmd.phy_address[j] =
3872                                cpu_to_be64(adapter->hma.phy_addr[j + k]);
3873                }
3874                ret = t4_wr_mbox(adapter, adapter->mbox, &hma_cmd,
3875                                 sizeof(hma_cmd), &hma_cmd);
3876                if (ret) {
3877                        dev_err(adapter->pdev_dev,
3878                                "HMA FW command failed with err %d\n", ret);
3879                        goto free_hma;
3880                }
3881        }
3882
3883        if (!ret)
3884                dev_info(adapter->pdev_dev,
3885                         "Reserved %uMB host memory for HMA\n", hma_size);
3886        return ret;
3887
3888free_hma:
3889        adap_free_hma_mem(adapter);
3890        return ret;
3891}
3892
3893static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
3894{
3895        u32 v;
3896        int ret;
3897
3898        /* Now that we've successfully configured and initialized the adapter
3899         * can ask the Firmware what resources it has provisioned for us.
3900         */
3901        ret = t4_get_pfres(adap);
3902        if (ret) {
3903                dev_err(adap->pdev_dev,
3904                        "Unable to retrieve resource provisioning information\n");
3905                return ret;
3906        }
3907
3908        /* get device capabilities */
3909        memset(c, 0, sizeof(*c));
3910        c->op_to_write = htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
3911                               FW_CMD_REQUEST_F | FW_CMD_READ_F);
3912        c->cfvalid_to_len16 = htonl(FW_LEN16(*c));
3913        ret = t4_wr_mbox(adap, adap->mbox, c, sizeof(*c), c);
3914        if (ret < 0)
3915                return ret;
3916
3917        c->op_to_write = htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
3918                               FW_CMD_REQUEST_F | FW_CMD_WRITE_F);
3919        ret = t4_wr_mbox(adap, adap->mbox, c, sizeof(*c), NULL);
3920        if (ret < 0)
3921                return ret;
3922
3923        ret = t4_config_glbl_rss(adap, adap->pf,
3924                                 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL,
3925                                 FW_RSS_GLB_CONFIG_CMD_TNLMAPEN_F |
3926                                 FW_RSS_GLB_CONFIG_CMD_TNLALLLKP_F);
3927        if (ret < 0)
3928                return ret;
3929
3930        ret = t4_cfg_pfvf(adap, adap->mbox, adap->pf, 0, adap->sge.egr_sz, 64,
3931                          MAX_INGQ, 0, 0, 4, 0xf, 0xf, 16, FW_CMD_CAP_PF,
3932                          FW_CMD_CAP_PF);
3933        if (ret < 0)
3934                return ret;
3935
3936        t4_sge_init(adap);
3937
3938        /* tweak some settings */
3939        t4_write_reg(adap, TP_SHIFT_CNT_A, 0x64f8849);
3940        t4_write_reg(adap, ULP_RX_TDDP_PSZ_A, HPZ0_V(PAGE_SHIFT - 12));
3941        t4_write_reg(adap, TP_PIO_ADDR_A, TP_INGRESS_CONFIG_A);
3942        v = t4_read_reg(adap, TP_PIO_DATA_A);
3943        t4_write_reg(adap, TP_PIO_DATA_A, v & ~CSUM_HAS_PSEUDO_HDR_F);
3944
3945        /* first 4 Tx modulation queues point to consecutive Tx channels */
3946        adap->params.tp.tx_modq_map = 0xE4;
3947        t4_write_reg(adap, TP_TX_MOD_QUEUE_REQ_MAP_A,
3948                     TX_MOD_QUEUE_REQ_MAP_V(adap->params.tp.tx_modq_map));
3949
3950        /* associate each Tx modulation queue with consecutive Tx channels */
3951        v = 0x84218421;
3952        t4_write_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A,
3953                          &v, 1, TP_TX_SCHED_HDR_A);
3954        t4_write_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A,
3955                          &v, 1, TP_TX_SCHED_FIFO_A);
3956        t4_write_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A,
3957                          &v, 1, TP_TX_SCHED_PCMD_A);
3958
3959#define T4_TX_MODQ_10G_WEIGHT_DEFAULT 16 /* in KB units */
3960        if (is_offload(adap)) {
3961                t4_write_reg(adap, TP_TX_MOD_QUEUE_WEIGHT0_A,
3962                             TX_MODQ_WEIGHT0_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
3963                             TX_MODQ_WEIGHT1_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
3964                             TX_MODQ_WEIGHT2_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
3965                             TX_MODQ_WEIGHT3_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT));
3966                t4_write_reg(adap, TP_TX_MOD_CHANNEL_WEIGHT_A,
3967                             TX_MODQ_WEIGHT0_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
3968                             TX_MODQ_WEIGHT1_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
3969                             TX_MODQ_WEIGHT2_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT) |
3970                             TX_MODQ_WEIGHT3_V(T4_TX_MODQ_10G_WEIGHT_DEFAULT));
3971        }
3972
3973        /* get basic stuff going */
3974        return t4_early_init(adap, adap->pf);
3975}
3976
3977/*
3978 * Max # of ATIDs.  The absolute HW max is 16K but we keep it lower.
3979 */
3980#define MAX_ATIDS 8192U
3981
3982/*
3983 * Phase 0 of initialization: contact FW, obtain config, perform basic init.
3984 *
3985 * If the firmware we're dealing with has Configuration File support, then
3986 * we use that to perform all configuration
3987 */
3988
3989/*
3990 * Tweak configuration based on module parameters, etc.  Most of these have
3991 * defaults assigned to them by Firmware Configuration Files (if we're using
3992 * them) but need to be explicitly set if we're using hard-coded
3993 * initialization.  But even in the case of using Firmware Configuration
3994 * Files, we'd like to expose the ability to change these via module
3995 * parameters so these are essentially common tweaks/settings for
3996 * Configuration Files and hard-coded initialization ...
3997 */
3998static int adap_init0_tweaks(struct adapter *adapter)
3999{
4000        /*
4001         * Fix up various Host-Dependent Parameters like Page Size, Cache
4002         * Line Size, etc.  The firmware default is for a 4KB Page Size and
4003         * 64B Cache Line Size ...
4004         */
4005        t4_fixup_host_params(adapter, PAGE_SIZE, L1_CACHE_BYTES);
4006
4007        /*
4008         * Process module parameters which affect early initialization.
4009         */
4010        if (rx_dma_offset != 2 && rx_dma_offset != 0) {
4011                dev_err(&adapter->pdev->dev,
4012                        "Ignoring illegal rx_dma_offset=%d, using 2\n",
4013                        rx_dma_offset);
4014                rx_dma_offset = 2;
4015        }
4016        t4_set_reg_field(adapter, SGE_CONTROL_A,
4017                         PKTSHIFT_V(PKTSHIFT_M),
4018                         PKTSHIFT_V(rx_dma_offset));
4019
4020        /*
4021         * Don't include the "IP Pseudo Header" in CPL_RX_PKT checksums: Linux
4022         * adds the pseudo header itself.
4023         */
4024        t4_tp_wr_bits_indirect(adapter, TP_INGRESS_CONFIG_A,
4025                               CSUM_HAS_PSEUDO_HDR_F, 0);
4026
4027        return 0;
4028}
4029
4030/* 10Gb/s-BT PHY Support. chip-external 10Gb/s-BT PHYs are complex chips
4031 * unto themselves and they contain their own firmware to perform their
4032 * tasks ...
4033 */
4034static int phy_aq1202_version(const u8 *phy_fw_data,
4035                              size_t phy_fw_size)
4036{
4037        int offset;
4038
4039        /* At offset 0x8 you're looking for the primary image's
4040         * starting offset which is 3 Bytes wide
4041         *
4042         * At offset 0xa of the primary image, you look for the offset
4043         * of the DRAM segment which is 3 Bytes wide.
4044         *
4045         * The FW version is at offset 0x27e of the DRAM and is 2 Bytes
4046         * wide
4047         */
4048        #define be16(__p) (((__p)[0] << 8) | (__p)[1])
4049        #define le16(__p) ((__p)[0] | ((__p)[1] << 8))
4050        #define le24(__p) (le16(__p) | ((__p)[2] << 16))
4051
4052        offset = le24(phy_fw_data + 0x8) << 12;
4053        offset = le24(phy_fw_data + offset + 0xa);
4054        return be16(phy_fw_data + offset + 0x27e);
4055
4056        #undef be16
4057        #undef le16
4058        #undef le24
4059}
4060
4061static struct info_10gbt_phy_fw {
4062        unsigned int phy_fw_id;         /* PCI Device ID */
4063        char *phy_fw_file;              /* /lib/firmware/ PHY Firmware file */
4064        int (*phy_fw_version)(const u8 *phy_fw_data, size_t phy_fw_size);
4065        int phy_flash;                  /* Has FLASH for PHY Firmware */
4066} phy_info_array[] = {
4067        {
4068                PHY_AQ1202_DEVICEID,
4069                PHY_AQ1202_FIRMWARE,
4070                phy_aq1202_version,
4071                1,
4072        },
4073        {
4074                PHY_BCM84834_DEVICEID,
4075                PHY_BCM84834_FIRMWARE,
4076                NULL,
4077                0,
4078        },
4079        { 0, NULL, NULL },
4080};
4081
4082static struct info_10gbt_phy_fw *find_phy_info(int devid)
4083{
4084        int i;
4085
4086        for (i = 0; i < ARRAY_SIZE(phy_info_array); i++) {
4087                if (phy_info_array[i].phy_fw_id == devid)
4088                        return &phy_info_array[i];
4089        }
4090        return NULL;
4091}
4092
4093/* Handle updating of chip-external 10Gb/s-BT PHY firmware.  This needs to
4094 * happen after the FW_RESET_CMD but before the FW_INITIALIZE_CMD.  On error
4095 * we return a negative error number.  If we transfer new firmware we return 1
4096 * (from t4_load_phy_fw()).  If we don't do anything we return 0.
4097 */
4098static int adap_init0_phy(struct adapter *adap)
4099{
4100        const struct firmware *phyf;
4101        int ret;
4102        struct info_10gbt_phy_fw *phy_info;
4103
4104        /* Use the device ID to determine which PHY file to flash.
4105         */
4106        phy_info = find_phy_info(adap->pdev->device);
4107        if (!phy_info) {
4108                dev_warn(adap->pdev_dev,
4109                         "No PHY Firmware file found for this PHY\n");
4110                return -EOPNOTSUPP;
4111        }
4112
4113        /* If we have a T4 PHY firmware file under /lib/firmware/cxgb4/, then
4114         * use that. The adapter firmware provides us with a memory buffer
4115         * where we can load a PHY firmware file from the host if we want to
4116         * override the PHY firmware File in flash.
4117         */
4118        ret = request_firmware_direct(&phyf, phy_info->phy_fw_file,
4119                                      adap->pdev_dev);
4120        if (ret < 0) {
4121                /* For adapters without FLASH attached to PHY for their
4122                 * firmware, it's obviously a fatal error if we can't get the
4123                 * firmware to the adapter.  For adapters with PHY firmware
4124                 * FLASH storage, it's worth a warning if we can't find the
4125                 * PHY Firmware but we'll neuter the error ...
4126                 */
4127                dev_err(adap->pdev_dev, "unable to find PHY Firmware image "
4128                        "/lib/firmware/%s, error %d\n",
4129                        phy_info->phy_fw_file, -ret);
4130                if (phy_info->phy_flash) {
4131                        int cur_phy_fw_ver = 0;
4132
4133                        t4_phy_fw_ver(adap, &cur_phy_fw_ver);
4134                        dev_warn(adap->pdev_dev, "continuing with, on-adapter "
4135                                 "FLASH copy, version %#x\n", cur_phy_fw_ver);
4136                        ret = 0;
4137                }
4138
4139                return ret;
4140        }
4141
4142        /* Load PHY Firmware onto adapter.
4143         */
4144        ret = t4_load_phy_fw(adap, MEMWIN_NIC, &adap->win0_lock,
4145                             phy_info->phy_fw_version,
4146                             (u8 *)phyf->data, phyf->size);
4147        if (ret < 0)
4148                dev_err(adap->pdev_dev, "PHY Firmware transfer error %d\n",
4149                        -ret);
4150        else if (ret > 0) {
4151                int new_phy_fw_ver = 0;
4152
4153                if (phy_info->phy_fw_version)
4154                        new_phy_fw_ver = phy_info->phy_fw_version(phyf->data,
4155                                                                  phyf->size);
4156                dev_info(adap->pdev_dev, "Successfully transferred PHY "
4157                         "Firmware /lib/firmware/%s, version %#x\n",
4158                         phy_info->phy_fw_file, new_phy_fw_ver);
4159        }
4160
4161        release_firmware(phyf);
4162
4163        return ret;
4164}
4165
4166/*
4167 * Attempt to initialize the adapter via a Firmware Configuration File.
4168 */
4169static int adap_init0_config(struct adapter *adapter, int reset)
4170{
4171        char *fw_config_file, fw_config_file_path[256];
4172        u32 finiver, finicsum, cfcsum, param, val;
4173        struct fw_caps_config_cmd caps_cmd;
4174        unsigned long mtype = 0, maddr = 0;
4175        const struct firmware *cf;
4176        char *config_name = NULL;
4177        int config_issued = 0;
4178        int ret;
4179
4180        /*
4181         * Reset device if necessary.
4182         */
4183        if (reset) {
4184                ret = t4_fw_reset(adapter, adapter->mbox,
4185                                  PIORSTMODE_F | PIORST_F);
4186                if (ret < 0)
4187                        goto bye;
4188        }
4189
4190        /* If this is a 10Gb/s-BT adapter make sure the chip-external
4191         * 10Gb/s-BT PHYs have up-to-date firmware.  Note that this step needs
4192         * to be performed after any global adapter RESET above since some
4193         * PHYs only have local RAM copies of the PHY firmware.
4194         */
4195        if (is_10gbt_device(adapter->pdev->device)) {
4196                ret = adap_init0_phy(adapter);
4197                if (ret < 0)
4198                        goto bye;
4199        }
4200        /*
4201         * If we have a T4 configuration file under /lib/firmware/cxgb4/,
4202         * then use that.  Otherwise, use the configuration file stored
4203         * in the adapter flash ...
4204         */
4205        switch (CHELSIO_CHIP_VERSION(adapter->params.chip)) {
4206        case CHELSIO_T4:
4207                fw_config_file = FW4_CFNAME;
4208                break;
4209        case CHELSIO_T5:
4210                fw_config_file = FW5_CFNAME;
4211                break;
4212        case CHELSIO_T6:
4213                fw_config_file = FW6_CFNAME;
4214                break;
4215        default:
4216                dev_err(adapter->pdev_dev, "Device %d is not supported\n",
4217                       adapter->pdev->device);
4218                ret = -EINVAL;
4219                goto bye;
4220        }
4221
4222        ret = request_firmware(&cf, fw_config_file, adapter->pdev_dev);
4223        if (ret < 0) {
4224                config_name = "On FLASH";
4225                mtype = FW_MEMTYPE_CF_FLASH;
4226                maddr = t4_flash_cfg_addr(adapter);
4227        } else {
4228                u32 params[7], val[7];
4229
4230                sprintf(fw_config_file_path,
4231                        "/lib/firmware/%s", fw_config_file);
4232                config_name = fw_config_file_path;
4233
4234                if (cf->size >= FLASH_CFG_MAX_SIZE)
4235                        ret = -ENOMEM;
4236                else {
4237                        params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
4238                             FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_CF));
4239                        ret = t4_query_params(adapter, adapter->mbox,
4240                                              adapter->pf, 0, 1, params, val);
4241                        if (ret == 0) {
4242                                /*
4243                                 * For t4_memory_rw() below addresses and
4244                                 * sizes have to be in terms of multiples of 4
4245                                 * bytes.  So, if the Configuration File isn't
4246                                 * a multiple of 4 bytes in length we'll have
4247                                 * to write that out separately since we can't
4248                                 * guarantee that the bytes following the
4249                                 * residual byte in the buffer returned by
4250                                 * request_firmware() are zeroed out ...
4251                                 */
4252                                size_t resid = cf->size & 0x3;
4253                                size_t size = cf->size & ~0x3;
4254                                __be32 *data = (__be32 *)cf->data;
4255
4256                                mtype = FW_PARAMS_PARAM_Y_G(val[0]);
4257                                maddr = FW_PARAMS_PARAM_Z_G(val[0]) << 16;
4258
4259                                spin_lock(&adapter->win0_lock);
4260                                ret = t4_memory_rw(adapter, 0, mtype, maddr,
4261                                                   size, data, T4_MEMORY_WRITE);
4262                                if (ret == 0 && resid != 0) {
4263                                        union {
4264                                                __be32 word;
4265                                                char buf[4];
4266                                        } last;
4267                                        int i;
4268
4269                                        last.word = data[size >> 2];
4270                                        for (i = resid; i < 4; i++)
4271                                                last.buf[i] = 0;
4272                                        ret = t4_memory_rw(adapter, 0, mtype,
4273                                                           maddr + size,
4274                                                           4, &last.word,
4275                                                           T4_MEMORY_WRITE);
4276                                }
4277                                spin_unlock(&adapter->win0_lock);
4278                        }
4279                }
4280
4281                release_firmware(cf);
4282                if (ret)
4283                        goto bye;
4284        }
4285
4286        val = 0;
4287
4288        /* Ofld + Hash filter is supported. Older fw will fail this request and
4289         * it is fine.
4290         */
4291        param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
4292                 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD));
4293        ret = t4_set_params(adapter, adapter->mbox, adapter->pf, 0,
4294                            1, &param, &val);
4295
4296        /* FW doesn't know about Hash filter + ofld support,
4297         * it's not a problem, don't return an error.
4298         */
4299        if (ret < 0) {
4300                dev_warn(adapter->pdev_dev,
4301                         "Hash filter with ofld is not supported by FW\n");
4302        }
4303
4304        /*
4305         * Issue a Capability Configuration command to the firmware to get it
4306         * to parse the Configuration File.  We don't use t4_fw_config_file()
4307         * because we want the ability to modify various features after we've
4308         * processed the configuration file ...
4309         */
4310        memset(&caps_cmd, 0, sizeof(caps_cmd));
4311        caps_cmd.op_to_write =
4312                htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
4313                      FW_CMD_REQUEST_F |
4314                      FW_CMD_READ_F);
4315        caps_cmd.cfvalid_to_len16 =
4316                htonl(FW_CAPS_CONFIG_CMD_CFVALID_F |
4317                      FW_CAPS_CONFIG_CMD_MEMTYPE_CF_V(mtype) |
4318                      FW_CAPS_CONFIG_CMD_MEMADDR64K_CF_V(maddr >> 16) |
4319                      FW_LEN16(caps_cmd));
4320        ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
4321                         &caps_cmd);
4322
4323        /* If the CAPS_CONFIG failed with an ENOENT (for a Firmware
4324         * Configuration File in FLASH), our last gasp effort is to use the
4325         * Firmware Configuration File which is embedded in the firmware.  A
4326         * very few early versions of the firmware didn't have one embedded
4327         * but we can ignore those.
4328         */
4329        if (ret == -ENOENT) {
4330                memset(&caps_cmd, 0, sizeof(caps_cmd));
4331                caps_cmd.op_to_write =
4332                        htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
4333                                        FW_CMD_REQUEST_F |
4334                                        FW_CMD_READ_F);
4335                caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
4336                ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd,
4337                                sizeof(caps_cmd), &caps_cmd);
4338                config_name = "Firmware Default";
4339        }
4340
4341        config_issued = 1;
4342        if (ret < 0)
4343                goto bye;
4344
4345        finiver = ntohl(caps_cmd.finiver);
4346        finicsum = ntohl(caps_cmd.finicsum);
4347        cfcsum = ntohl(caps_cmd.cfcsum);
4348        if (finicsum != cfcsum)
4349                dev_warn(adapter->pdev_dev, "Configuration File checksum "\
4350                         "mismatch: [fini] csum=%#x, computed csum=%#x\n",
4351                         finicsum, cfcsum);
4352
4353        /*
4354         * And now tell the firmware to use the configuration we just loaded.
4355         */
4356        caps_cmd.op_to_write =
4357                htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
4358                      FW_CMD_REQUEST_F |
4359                      FW_CMD_WRITE_F);
4360        caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
4361        ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
4362                         NULL);
4363        if (ret < 0)
4364                goto bye;
4365
4366        /*
4367         * Tweak configuration based on system architecture, module
4368         * parameters, etc.
4369         */
4370        ret = adap_init0_tweaks(adapter);
4371        if (ret < 0)
4372                goto bye;
4373
4374        /* We will proceed even if HMA init fails. */
4375        ret = adap_config_hma(adapter);
4376        if (ret)
4377                dev_err(adapter->pdev_dev,
4378                        "HMA configuration failed with error %d\n", ret);
4379
4380        if (is_t6(adapter->params.chip)) {
4381                adap_config_hpfilter(adapter);
4382                ret = setup_ppod_edram(adapter);
4383                if (!ret)
4384                        dev_info(adapter->pdev_dev, "Successfully enabled "
4385                                 "ppod edram feature\n");
4386        }
4387
4388        /*
4389         * And finally tell the firmware to initialize itself using the
4390         * parameters from the Configuration File.
4391         */
4392        ret = t4_fw_initialize(adapter, adapter->mbox);
4393        if (ret < 0)
4394                goto bye;
4395
4396        /* Emit Firmware Configuration File information and return
4397         * successfully.
4398         */
4399        dev_info(adapter->pdev_dev, "Successfully configured using Firmware "\
4400                 "Configuration File \"%s\", version %#x, computed checksum %#x\n",
4401                 config_name, finiver, cfcsum);
4402        return 0;
4403
4404        /*
4405         * Something bad happened.  Return the error ...  (If the "error"
4406         * is that there's no Configuration File on the adapter we don't
4407         * want to issue a warning since this is fairly common.)
4408         */
4409bye:
4410        if (config_issued && ret != -ENOENT)
4411                dev_warn(adapter->pdev_dev, "\"%s\" configuration file error %d\n",
4412                         config_name, -ret);
4413        return ret;
4414}
4415
4416static struct fw_info fw_info_array[] = {
4417        {
4418                .chip = CHELSIO_T4,
4419                .fs_name = FW4_CFNAME,
4420                .fw_mod_name = FW4_FNAME,
4421                .fw_hdr = {
4422                        .chip = FW_HDR_CHIP_T4,
4423                        .fw_ver = __cpu_to_be32(FW_VERSION(T4)),
4424                        .intfver_nic = FW_INTFVER(T4, NIC),
4425                        .intfver_vnic = FW_INTFVER(T4, VNIC),
4426                        .intfver_ri = FW_INTFVER(T4, RI),
4427                        .intfver_iscsi = FW_INTFVER(T4, ISCSI),
4428                        .intfver_fcoe = FW_INTFVER(T4, FCOE),
4429                },
4430        }, {
4431                .chip = CHELSIO_T5,
4432                .fs_name = FW5_CFNAME,
4433                .fw_mod_name = FW5_FNAME,
4434                .fw_hdr = {
4435                        .chip = FW_HDR_CHIP_T5,
4436                        .fw_ver = __cpu_to_be32(FW_VERSION(T5)),
4437                        .intfver_nic = FW_INTFVER(T5, NIC),
4438                        .intfver_vnic = FW_INTFVER(T5, VNIC),
4439                        .intfver_ri = FW_INTFVER(T5, RI),
4440                        .intfver_iscsi = FW_INTFVER(T5, ISCSI),
4441                        .intfver_fcoe = FW_INTFVER(T5, FCOE),
4442                },
4443        }, {
4444                .chip = CHELSIO_T6,
4445                .fs_name = FW6_CFNAME,
4446                .fw_mod_name = FW6_FNAME,
4447                .fw_hdr = {
4448                        .chip = FW_HDR_CHIP_T6,
4449                        .fw_ver = __cpu_to_be32(FW_VERSION(T6)),
4450                        .intfver_nic = FW_INTFVER(T6, NIC),
4451                        .intfver_vnic = FW_INTFVER(T6, VNIC),
4452                        .intfver_ofld = FW_INTFVER(T6, OFLD),
4453                        .intfver_ri = FW_INTFVER(T6, RI),
4454                        .intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU),
4455                        .intfver_iscsi = FW_INTFVER(T6, ISCSI),
4456                        .intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU),
4457                        .intfver_fcoe = FW_INTFVER(T6, FCOE),
4458                },
4459        }
4460
4461};
4462
4463static struct fw_info *find_fw_info(int chip)
4464{
4465        int i;
4466
4467        for (i = 0; i < ARRAY_SIZE(fw_info_array); i++) {
4468                if (fw_info_array[i].chip == chip)
4469                        return &fw_info_array[i];
4470        }
4471        return NULL;
4472}
4473
4474/*
4475 * Phase 0 of initialization: contact FW, obtain config, perform basic init.
4476 */
4477static int adap_init0(struct adapter *adap, int vpd_skip)
4478{
4479        struct fw_caps_config_cmd caps_cmd;
4480        u32 params[7], val[7];
4481        enum dev_state state;
4482        u32 v, port_vec;
4483        int reset = 1;
4484        int ret;
4485
4486        /* Grab Firmware Device Log parameters as early as possible so we have
4487         * access to it for debugging, etc.
4488         */
4489        ret = t4_init_devlog_params(adap);
4490        if (ret < 0)
4491                return ret;
4492
4493        /* Contact FW, advertising Master capability */
4494        ret = t4_fw_hello(adap, adap->mbox, adap->mbox,
4495                          is_kdump_kernel() ? MASTER_MUST : MASTER_MAY, &state);
4496        if (ret < 0) {
4497                dev_err(adap->pdev_dev, "could not connect to FW, error %d\n",
4498                        ret);
4499                return ret;
4500        }
4501        if (ret == adap->mbox)
4502                adap->flags |= CXGB4_MASTER_PF;
4503
4504        /*
4505         * If we're the Master PF Driver and the device is uninitialized,
4506         * then let's consider upgrading the firmware ...  (We always want
4507         * to check the firmware version number in order to A. get it for
4508         * later reporting and B. to warn if the currently loaded firmware
4509         * is excessively mismatched relative to the driver.)
4510         */
4511
4512        t4_get_version_info(adap);
4513        ret = t4_check_fw_version(adap);
4514        /* If firmware is too old (not supported by driver) force an update. */
4515        if (ret)
4516                state = DEV_STATE_UNINIT;
4517        if ((adap->flags & CXGB4_MASTER_PF) && state != DEV_STATE_INIT) {
4518                struct fw_info *fw_info;
4519                struct fw_hdr *card_fw;
4520                const struct firmware *fw;
4521                const u8 *fw_data = NULL;
4522                unsigned int fw_size = 0;
4523
4524                /* This is the firmware whose headers the driver was compiled
4525                 * against
4526                 */
4527                fw_info = find_fw_info(CHELSIO_CHIP_VERSION(adap->params.chip));
4528                if (fw_info == NULL) {
4529                        dev_err(adap->pdev_dev,
4530                                "unable to get firmware info for chip %d.\n",
4531                                CHELSIO_CHIP_VERSION(adap->params.chip));
4532                        return -EINVAL;
4533                }
4534
4535                /* allocate memory to read the header of the firmware on the
4536                 * card
4537                 */
4538                card_fw = kvzalloc(sizeof(*card_fw), GFP_KERNEL);
4539                if (!card_fw) {
4540                        ret = -ENOMEM;
4541                        goto bye;
4542                }
4543
4544                /* Get FW from from /lib/firmware/ */
4545                ret = request_firmware(&fw, fw_info->fw_mod_name,
4546                                       adap->pdev_dev);
4547                if (ret < 0) {
4548                        dev_err(adap->pdev_dev,
4549                                "unable to load firmware image %s, error %d\n",
4550                                fw_info->fw_mod_name, ret);
4551                } else {
4552                        fw_data = fw->data;
4553                        fw_size = fw->size;
4554                }
4555
4556                /* upgrade FW logic */
4557                ret = t4_prep_fw(adap, fw_info, fw_data, fw_size, card_fw,
4558                                 state, &reset);
4559
4560                /* Cleaning up */
4561                release_firmware(fw);
4562                kvfree(card_fw);
4563
4564                if (ret < 0)
4565                        goto bye;
4566        }
4567
4568        /* If the firmware is initialized already, emit a simply note to that
4569         * effect. Otherwise, it's time to try initializing the adapter.
4570         */
4571        if (state == DEV_STATE_INIT) {
4572                ret = adap_config_hma(adap);
4573                if (ret)
4574                        dev_err(adap->pdev_dev,
4575                                "HMA configuration failed with error %d\n",
4576                                ret);
4577                dev_info(adap->pdev_dev, "Coming up as %s: "\
4578                         "Adapter already initialized\n",
4579                         adap->flags & CXGB4_MASTER_PF ? "MASTER" : "SLAVE");
4580        } else {
4581                dev_info(adap->pdev_dev, "Coming up as MASTER: "\
4582                         "Initializing adapter\n");
4583
4584                /* Find out whether we're dealing with a version of the
4585                 * firmware which has configuration file support.
4586                 */
4587                params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
4588                             FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_CF));
4589                ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1,
4590                                      params, val);
4591
4592                /* If the firmware doesn't support Configuration Files,
4593                 * return an error.
4594                 */
4595                if (ret < 0) {
4596                        dev_err(adap->pdev_dev, "firmware doesn't support "
4597                                "Firmware Configuration Files\n");
4598                        goto bye;
4599                }
4600
4601                /* The firmware provides us with a memory buffer where we can
4602                 * load a Configuration File from the host if we want to
4603                 * override the Configuration File in flash.
4604                 */
4605                ret = adap_init0_config(adap, reset);
4606                if (ret == -ENOENT) {
4607                        dev_err(adap->pdev_dev, "no Configuration File "
4608                                "present on adapter.\n");
4609                        goto bye;
4610                }
4611                if (ret < 0) {
4612                        dev_err(adap->pdev_dev, "could not initialize "
4613                                "adapter, error %d\n", -ret);
4614                        goto bye;
4615                }
4616        }
4617
4618        /* Now that we've successfully configured and initialized the adapter
4619         * (or found it already initialized), we can ask the Firmware what
4620         * resources it has provisioned for us.
4621         */
4622        ret = t4_get_pfres(adap);
4623        if (ret) {
4624                dev_err(adap->pdev_dev,
4625                        "Unable to retrieve resource provisioning information\n");
4626                goto bye;
4627        }
4628
4629        /* Grab VPD parameters.  This should be done after we establish a
4630         * connection to the firmware since some of the VPD parameters
4631         * (notably the Core Clock frequency) are retrieved via requests to
4632         * the firmware.  On the other hand, we need these fairly early on
4633         * so we do this right after getting ahold of the firmware.
4634         *
4635         * We need to do this after initializing the adapter because someone
4636         * could have FLASHed a new VPD which won't be read by the firmware
4637         * until we do the RESET ...
4638         */
4639        if (!vpd_skip) {
4640                ret = t4_get_vpd_params(adap, &adap->params.vpd);
4641                if (ret < 0)
4642                        goto bye;
4643        }
4644
4645        /* Find out what ports are available to us.  Note that we need to do
4646         * this before calling adap_init0_no_config() since it needs nports
4647         * and portvec ...
4648         */
4649        v =
4650            FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
4651            FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_PORTVEC);
4652        ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, &v, &port_vec);
4653        if (ret < 0)
4654                goto bye;
4655
4656        adap->params.nports = hweight32(port_vec);
4657        adap->params.portvec = port_vec;
4658
4659        /* Give the SGE code a chance to pull in anything that it needs ...
4660         * Note that this must be called after we retrieve our VPD parameters
4661         * in order to know how to convert core ticks to seconds, etc.
4662         */
4663        ret = t4_sge_init(adap);
4664        if (ret < 0)
4665                goto bye;
4666
4667        /* Grab the SGE Doorbell Queue Timer values.  If successful, that
4668         * indicates that the Firmware and Hardware support this.
4669         */
4670        params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
4671                    FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK));
4672        ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
4673                              1, params, val);
4674
4675        if (!ret) {
4676                adap->sge.dbqtimer_tick = val[0];
4677                ret = t4_read_sge_dbqtimers(adap,
4678                                            ARRAY_SIZE(adap->sge.dbqtimer_val),
4679                                            adap->sge.dbqtimer_val);
4680        }
4681
4682        if (!ret)
4683                adap->flags |= CXGB4_SGE_DBQ_TIMER;
4684
4685        if (is_bypass_device(adap->pdev->device))
4686                adap->params.bypass = 1;
4687
4688        /*
4689         * Grab some of our basic fundamental operating parameters.
4690         */
4691        params[0] = FW_PARAM_PFVF(EQ_START);
4692        params[1] = FW_PARAM_PFVF(L2T_START);
4693        params[2] = FW_PARAM_PFVF(L2T_END);
4694        params[3] = FW_PARAM_PFVF(FILTER_START);
4695        params[4] = FW_PARAM_PFVF(FILTER_END);
4696        params[5] = FW_PARAM_PFVF(IQFLINT_START);
4697        ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 6, params, val);
4698        if (ret < 0)
4699                goto bye;
4700        adap->sge.egr_start = val[0];
4701        adap->l2t_start = val[1];
4702        adap->l2t_end = val[2];
4703        adap->tids.ftid_base = val[3];
4704        adap->tids.nftids = val[4] - val[3] + 1;
4705        adap->sge.ingr_start = val[5];
4706
4707        if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) {
4708                params[0] = FW_PARAM_PFVF(HPFILTER_START);
4709                params[1] = FW_PARAM_PFVF(HPFILTER_END);
4710                ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
4711                                      params, val);
4712                if (ret < 0)
4713                        goto bye;
4714
4715                adap->tids.hpftid_base = val[0];
4716                adap->tids.nhpftids = val[1] - val[0] + 1;
4717
4718                /* Read the raw mps entries. In T6, the last 2 tcam entries
4719                 * are reserved for raw mac addresses (rawf = 2, one per port).
4720                 */
4721                params[0] = FW_PARAM_PFVF(RAWF_START);
4722                params[1] = FW_PARAM_PFVF(RAWF_END);
4723                ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
4724                                      params, val);
4725                if (ret == 0) {
4726                        adap->rawf_start = val[0];
4727                        adap->rawf_cnt = val[1] - val[0] + 1;
4728                }
4729
4730                adap->tids.tid_base =
4731                        t4_read_reg(adap, LE_DB_ACTIVE_TABLE_START_INDEX_A);
4732        }
4733
4734        /* qids (ingress/egress) returned from firmware can be anywhere
4735         * in the range from EQ(IQFLINT)_START to EQ(IQFLINT)_END.
4736         * Hence driver needs to allocate memory for this range to
4737         * store the queue info. Get the highest IQFLINT/EQ index returned
4738         * in FW_EQ_*_CMD.alloc command.
4739         */
4740        params[0] = FW_PARAM_PFVF(EQ_END);
4741        params[1] = FW_PARAM_PFVF(IQFLINT_END);
4742        ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2, params, val);
4743        if (ret < 0)
4744                goto bye;
4745        adap->sge.egr_sz = val[0] - adap->sge.egr_start + 1;
4746        adap->sge.ingr_sz = val[1] - adap->sge.ingr_start + 1;
4747
4748        adap->sge.egr_map = kcalloc(adap->sge.egr_sz,
4749                                    sizeof(*adap->sge.egr_map), GFP_KERNEL);
4750        if (!adap->sge.egr_map) {
4751                ret = -ENOMEM;
4752                goto bye;
4753        }
4754
4755        adap->sge.ingr_map = kcalloc(adap->sge.ingr_sz,
4756                                     sizeof(*adap->sge.ingr_map), GFP_KERNEL);
4757        if (!adap->sge.ingr_map) {
4758                ret = -ENOMEM;
4759                goto bye;
4760        }
4761
4762        /* Allocate the memory for the vaious egress queue bitmaps
4763         * ie starving_fl, txq_maperr and blocked_fl.
4764         */
4765        adap->sge.starving_fl = kcalloc(BITS_TO_LONGS(adap->sge.egr_sz),
4766                                        sizeof(long), GFP_KERNEL);
4767        if (!adap->sge.starving_fl) {
4768                ret = -ENOMEM;
4769                goto bye;
4770        }
4771
4772        adap->sge.txq_maperr = kcalloc(BITS_TO_LONGS(adap->sge.egr_sz),
4773                                       sizeof(long), GFP_KERNEL);
4774        if (!adap->sge.txq_maperr) {
4775                ret = -ENOMEM;
4776                goto bye;
4777        }
4778
4779#ifdef CONFIG_DEBUG_FS
4780        adap->sge.blocked_fl = kcalloc(BITS_TO_LONGS(adap->sge.egr_sz),
4781                                       sizeof(long), GFP_KERNEL);
4782        if (!adap->sge.blocked_fl) {
4783                ret = -ENOMEM;
4784                goto bye;
4785        }
4786#endif
4787
4788        params[0] = FW_PARAM_PFVF(CLIP_START);
4789        params[1] = FW_PARAM_PFVF(CLIP_END);
4790        ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2, params, val);
4791        if (ret < 0)
4792                goto bye;
4793        adap->clipt_start = val[0];
4794        adap->clipt_end = val[1];
4795
4796        /* Get the supported number of traffic classes */
4797        params[0] = FW_PARAM_DEV(NUM_TM_CLASS);
4798        ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params, val);
4799        if (ret < 0) {
4800                /* We couldn't retrieve the number of Traffic Classes
4801                 * supported by the hardware/firmware. So we hard
4802                 * code it here.
4803                 */
4804                adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16;
4805        } else {
4806                adap->params.nsched_cls = val[0];
4807        }
4808
4809        /* query params related to active filter region */
4810        params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START);
4811        params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END);
4812        ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2, params, val);
4813        /* If Active filter size is set we enable establishing
4814         * offload connection through firmware work request
4815         */
4816        if ((val[0] != val[1]) && (ret >= 0)) {
4817                adap->flags |= CXGB4_FW_OFLD_CONN;
4818                adap->tids.aftid_base = val[0];
4819                adap->tids.aftid_end = val[1];
4820        }
4821
4822        /* If we're running on newer firmware, let it know that we're
4823         * prepared to deal with encapsulated CPL messages.  Older
4824         * firmware won't understand this and we'll just get
4825         * unencapsulated messages ...
4826         */
4827        params[0] = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
4828        val[0] = 1;
4829        (void)t4_set_params(adap, adap->mbox, adap->pf, 0, 1, params, val);
4830
4831        /*
4832         * Find out whether we're allowed to use the T5+ ULPTX MEMWRITE DSGL
4833         * capability.  Earlier versions of the firmware didn't have the
4834         * ULPTX_MEMWRITE_DSGL so we'll interpret a query failure as no
4835         * permission to use ULPTX MEMWRITE DSGL.
4836         */
4837        if (is_t4(adap->params.chip)) {
4838                adap->params.ulptx_memwrite_dsgl = false;
4839        } else {
4840                params[0] = FW_PARAM_DEV(ULPTX_MEMWRITE_DSGL);
4841                ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
4842                                      1, params, val);
4843                adap->params.ulptx_memwrite_dsgl = (ret == 0 && val[0] != 0);
4844        }
4845
4846        /* See if FW supports FW_RI_FR_NSMR_TPTE_WR work request */
4847        params[0] = FW_PARAM_DEV(RI_FR_NSMR_TPTE_WR);
4848        ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
4849                              1, params, val);
4850        adap->params.fr_nsmr_tpte_wr_support = (ret == 0 && val[0] != 0);
4851
4852        /* See if FW supports FW_FILTER2 work request */
4853        if (is_t4(adap->params.chip)) {
4854                adap->params.filter2_wr_support = 0;
4855        } else {
4856                params[0] = FW_PARAM_DEV(FILTER2_WR);
4857                ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
4858                                      1, params, val);
4859                adap->params.filter2_wr_support = (ret == 0 && val[0] != 0);
4860        }
4861
4862        /* Check if FW supports returning vin and smt index.
4863         * If this is not supported, driver will interpret
4864         * these values from viid.
4865         */
4866        params[0] = FW_PARAM_DEV(OPAQUE_VIID_SMT_EXTN);
4867        ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
4868                              1, params, val);
4869        adap->params.viid_smt_extn_support = (ret == 0 && val[0] != 0);
4870
4871        /*
4872         * Get device capabilities so we can determine what resources we need
4873         * to manage.
4874         */
4875        memset(&caps_cmd, 0, sizeof(caps_cmd));
4876        caps_cmd.op_to_write = htonl(FW_CMD_OP_V(FW_CAPS_CONFIG_CMD) |
4877                                     FW_CMD_REQUEST_F | FW_CMD_READ_F);
4878        caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
4879        ret = t4_wr_mbox(adap, adap->mbox, &caps_cmd, sizeof(caps_cmd),
4880                         &caps_cmd);
4881        if (ret < 0)
4882                goto bye;
4883
4884        /* hash filter has some mandatory register settings to be tested and for
4885         * that it needs to test whether offload is enabled or not, hence
4886         * checking and setting it here.
4887         */
4888        if (caps_cmd.ofldcaps)
4889                adap->params.offload = 1;
4890
4891        if (caps_cmd.ofldcaps ||
4892            (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER)) ||
4893            (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_ETHOFLD))) {
4894                /* query offload-related parameters */
4895                params[0] = FW_PARAM_DEV(NTID);
4896                params[1] = FW_PARAM_PFVF(SERVER_START);
4897                params[2] = FW_PARAM_PFVF(SERVER_END);
4898                params[3] = FW_PARAM_PFVF(TDDP_START);
4899                params[4] = FW_PARAM_PFVF(TDDP_END);
4900                params[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4901                ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 6,
4902                                      params, val);
4903                if (ret < 0)
4904                        goto bye;
4905                adap->tids.ntids = val[0];
4906                adap->tids.natids = min(adap->tids.ntids / 2, MAX_ATIDS);
4907                adap->tids.stid_base = val[1];
4908                adap->tids.nstids = val[2] - val[1] + 1;
4909                /*
4910                 * Setup server filter region. Divide the available filter
4911                 * region into two parts. Regular filters get 1/3rd and server
4912                 * filters get 2/3rd part. This is only enabled if workarond
4913                 * path is enabled.
4914                 * 1. For regular filters.
4915                 * 2. Server filter: This are special filters which are used
4916                 * to redirect SYN packets to offload queue.
4917                 */
4918                if (adap->flags & CXGB4_FW_OFLD_CONN && !is_bypass(adap)) {
4919                        adap->tids.sftid_base = adap->tids.ftid_base +
4920                                        DIV_ROUND_UP(adap->tids.nftids, 3);
4921                        adap->tids.nsftids = adap->tids.nftids -
4922                                         DIV_ROUND_UP(adap->tids.nftids, 3);
4923                        adap->tids.nftids = adap->tids.sftid_base -
4924                                                adap->tids.ftid_base;
4925                }
4926                adap->vres.ddp.start = val[3];
4927                adap->vres.ddp.size = val[4] - val[3] + 1;
4928                adap->params.ofldq_wr_cred = val[5];
4929
4930                if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
4931                        init_hash_filter(adap);
4932                } else {
4933                        adap->num_ofld_uld += 1;
4934                }
4935
4936                if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_ETHOFLD)) {
4937                        params[0] = FW_PARAM_PFVF(ETHOFLD_START);
4938                        params[1] = FW_PARAM_PFVF(ETHOFLD_END);
4939                        ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
4940                                              params, val);
4941                        if (!ret) {
4942                                adap->tids.eotid_base = val[0];
4943                                adap->tids.neotids = min_t(u32, MAX_ATIDS,
4944                                                           val[1] - val[0] + 1);
4945                                adap->params.ethofld = 1;
4946                        }
4947                }
4948        }
4949        if (caps_cmd.rdmacaps) {
4950                params[0] = FW_PARAM_PFVF(STAG_START);
4951                params[1] = FW_PARAM_PFVF(STAG_END);
4952                params[2] = FW_PARAM_PFVF(RQ_START);
4953                params[3] = FW_PARAM_PFVF(RQ_END);
4954                params[4] = FW_PARAM_PFVF(PBL_START);
4955                params[5] = FW_PARAM_PFVF(PBL_END);
4956                ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 6,
4957                                      params, val);
4958                if (ret < 0)
4959                        goto bye;
4960                adap->vres.stag.start = val[0];
4961                adap->vres.stag.size = val[1] - val[0] + 1;
4962                adap->vres.rq.start = val[2];
4963                adap->vres.rq.size = val[3] - val[2] + 1;
4964                adap->vres.pbl.start = val[4];
4965                adap->vres.pbl.size = val[5] - val[4] + 1;
4966
4967                params[0] = FW_PARAM_PFVF(SRQ_START);
4968                params[1] = FW_PARAM_PFVF(SRQ_END);
4969                ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
4970                                      params, val);
4971                if (!ret) {
4972                        adap->vres.srq.start = val[0];
4973                        adap->vres.srq.size = val[1] - val[0] + 1;
4974                }
4975                if (adap->vres.srq.size) {
4976                        adap->srq = t4_init_srq(adap->vres.srq.size);
4977                        if (!adap->srq)
4978                                dev_warn(&adap->pdev->dev, "could not allocate SRQ, continuing\n");
4979                }
4980
4981                params[0] = FW_PARAM_PFVF(SQRQ_START);
4982                params[1] = FW_PARAM_PFVF(SQRQ_END);
4983                params[2] = FW_PARAM_PFVF(CQ_START);
4984                params[3] = FW_PARAM_PFVF(CQ_END);
4985                params[4] = FW_PARAM_PFVF(OCQ_START);
4986                params[5] = FW_PARAM_PFVF(OCQ_END);
4987                ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 6, params,
4988                                      val);
4989                if (ret < 0)
4990                        goto bye;
4991                adap->vres.qp.start = val[0];
4992                adap->vres.qp.size = val[1] - val[0] + 1;
4993                adap->vres.cq.start = val[2];
4994                adap->vres.cq.size = val[3] - val[2] + 1;
4995                adap->vres.ocq.start = val[4];
4996                adap->vres.ocq.size = val[5] - val[4] + 1;
4997
4998                params[0] = FW_PARAM_DEV(MAXORDIRD_QP);
4999                params[1] = FW_PARAM_DEV(MAXIRD_ADAPTER);
5000                ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2, params,
5001                                      val);
5002                if (ret < 0) {
5003                        adap->params.max_ordird_qp = 8;
5004                        adap->params.max_ird_adapter = 32 * adap->tids.ntids;
5005                        ret = 0;
5006                } else {
5007                        adap->params.max_ordird_qp = val[0];
5008                        adap->params.max_ird_adapter = val[1];
5009                }
5010                dev_info(adap->pdev_dev,
5011                         "max_ordird_qp %d max_ird_adapter %d\n",
5012                         adap->params.max_ordird_qp,
5013                         adap->params.max_ird_adapter);
5014
5015                /* Enable write_with_immediate if FW supports it */
5016                params[0] = FW_PARAM_DEV(RDMA_WRITE_WITH_IMM);
5017                ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params,
5018                                      val);
5019                adap->params.write_w_imm_support = (ret == 0 && val[0] != 0);
5020
5021                /* Enable write_cmpl if FW supports it */
5022                params[0] = FW_PARAM_DEV(RI_WRITE_CMPL_WR);
5023                ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, params,
5024                                      val);
5025                adap->params.write_cmpl_support = (ret == 0 && val[0] != 0);
5026                adap->num_ofld_uld += 2;
5027        }
5028        if (caps_cmd.iscsicaps) {
5029                params[0] = FW_PARAM_PFVF(ISCSI_START);
5030                params[1] = FW_PARAM_PFVF(ISCSI_END);
5031                ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
5032                                      params, val);
5033                if (ret < 0)
5034                        goto bye;
5035                adap->vres.iscsi.start = val[0];
5036                adap->vres.iscsi.size = val[1] - val[0] + 1;
5037                if (is_t6(adap->params.chip)) {
5038                        params[0] = FW_PARAM_PFVF(PPOD_EDRAM_START);
5039                        params[1] = FW_PARAM_PFVF(PPOD_EDRAM_END);
5040                        ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
5041                                              params, val);
5042                        if (!ret) {
5043                                adap->vres.ppod_edram.start = val[0];
5044                                adap->vres.ppod_edram.size =
5045                                        val[1] - val[0] + 1;
5046
5047                                dev_info(adap->pdev_dev,
5048                                         "ppod edram start 0x%x end 0x%x size 0x%x\n",
5049                                         val[0], val[1],
5050                                         adap->vres.ppod_edram.size);
5051                        }
5052                }
5053                /* LIO target and cxgb4i initiaitor */
5054                adap->num_ofld_uld += 2;
5055        }
5056        if (caps_cmd.cryptocaps) {
5057                if (ntohs(caps_cmd.cryptocaps) &
5058                    FW_CAPS_CONFIG_CRYPTO_LOOKASIDE) {
5059                        params[0] = FW_PARAM_PFVF(NCRYPTO_LOOKASIDE);
5060                        ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
5061                                              2, params, val);
5062                        if (ret < 0) {
5063                                if (ret != -EINVAL)
5064                                        goto bye;
5065                        } else {
5066                                adap->vres.ncrypto_fc = val[0];
5067                        }
5068                        adap->num_ofld_uld += 1;
5069                }
5070                if (ntohs(caps_cmd.cryptocaps) &
5071                    FW_CAPS_CONFIG_TLS_INLINE) {
5072                        params[0] = FW_PARAM_PFVF(TLS_START);
5073                        params[1] = FW_PARAM_PFVF(TLS_END);
5074                        ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
5075                                              2, params, val);
5076                        if (ret < 0)
5077                                goto bye;
5078                        adap->vres.key.start = val[0];
5079                        adap->vres.key.size = val[1] - val[0] + 1;
5080                        adap->num_uld += 1;
5081                }
5082                adap->params.crypto = ntohs(caps_cmd.cryptocaps);
5083        }
5084
5085        /* The MTU/MSS Table is initialized by now, so load their values.  If
5086         * we're initializing the adapter, then we'll make any modifications
5087         * we want to the MTU/MSS Table and also initialize the congestion
5088         * parameters.
5089         */
5090        t4_read_mtu_tbl(adap, adap->params.mtus, NULL);
5091        if (state != DEV_STATE_INIT) {
5092                int i;
5093
5094                /* The default MTU Table contains values 1492 and 1500.
5095                 * However, for TCP, it's better to have two values which are
5096                 * a multiple of 8 +/- 4 bytes apart near this popular MTU.
5097                 * This allows us to have a TCP Data Payload which is a
5098                 * multiple of 8 regardless of what combination of TCP Options
5099                 * are in use (always a multiple of 4 bytes) which is
5100                 * important for performance reasons.  For instance, if no
5101                 * options are in use, then we have a 20-byte IP header and a
5102                 * 20-byte TCP header.  In this case, a 1500-byte MSS would
5103                 * result in a TCP Data Payload of 1500 - 40 == 1460 bytes
5104                 * which is not a multiple of 8.  So using an MSS of 1488 in
5105                 * this case results in a TCP Data Payload of 1448 bytes which
5106                 * is a multiple of 8.  On the other hand, if 12-byte TCP Time
5107                 * Stamps have been negotiated, then an MTU of 1500 bytes
5108                 * results in a TCP Data Payload of 1448 bytes which, as
5109                 * above, is a multiple of 8 bytes ...
5110                 */
5111                for (i = 0; i < NMTUS; i++)
5112                        if (adap->params.mtus[i] == 1492) {
5113                                adap->params.mtus[i] = 1488;
5114                                break;
5115                        }
5116
5117                t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
5118                             adap->params.b_wnd);
5119        }
5120        t4_init_sge_params(adap);
5121        adap->flags |= CXGB4_FW_OK;
5122        t4_init_tp_params(adap, true);
5123        return 0;
5124
5125        /*
5126         * Something bad happened.  If a command timed out or failed with EIO
5127         * FW does not operate within its spec or something catastrophic
5128         * happened to HW/FW, stop issuing commands.
5129         */
5130bye:
5131        adap_free_hma_mem(adap);
5132        kfree(adap->sge.egr_map);
5133        kfree(adap->sge.ingr_map);
5134        kfree(adap->sge.starving_fl);
5135        kfree(adap->sge.txq_maperr);
5136#ifdef CONFIG_DEBUG_FS
5137        kfree(adap->sge.blocked_fl);
5138#endif
5139        if (ret != -ETIMEDOUT && ret != -EIO)
5140                t4_fw_bye(adap, adap->mbox);
5141        return ret;
5142}
5143
5144/* EEH callbacks */
5145
5146static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
5147                                         pci_channel_state_t state)
5148{
5149        int i;
5150        struct adapter *adap = pci_get_drvdata(pdev);
5151
5152        if (!adap)
5153                goto out;
5154
5155        rtnl_lock();
5156        adap->flags &= ~CXGB4_FW_OK;
5157        notify_ulds(adap, CXGB4_STATE_START_RECOVERY);
5158        spin_lock(&adap->stats_lock);
5159        for_each_port(adap, i) {
5160                struct net_device *dev = adap->port[i];
5161                if (dev) {
5162                        netif_device_detach(dev);
5163                        netif_carrier_off(dev);
5164                }
5165        }
5166        spin_unlock(&adap->stats_lock);
5167        disable_interrupts(adap);
5168        if (adap->flags & CXGB4_FULL_INIT_DONE)
5169                cxgb_down(adap);
5170        rtnl_unlock();
5171        if ((adap->flags & CXGB4_DEV_ENABLED)) {
5172                pci_disable_device(pdev);
5173                adap->flags &= ~CXGB4_DEV_ENABLED;
5174        }
5175out:    return state == pci_channel_io_perm_failure ?
5176                PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
5177}
5178
5179static pci_ers_result_t eeh_slot_reset(struct pci_dev *pdev)
5180{
5181        int i, ret;
5182        struct fw_caps_config_cmd c;
5183        struct adapter *adap = pci_get_drvdata(pdev);
5184
5185        if (!adap) {
5186                pci_restore_state(pdev);
5187                pci_save_state(pdev);
5188                return PCI_ERS_RESULT_RECOVERED;
5189        }
5190
5191        if (!(adap->flags & CXGB4_DEV_ENABLED)) {
5192                if (pci_enable_device(pdev)) {
5193                        dev_err(&pdev->dev, "Cannot reenable PCI "
5194                                            "device after reset\n");
5195                        return PCI_ERS_RESULT_DISCONNECT;
5196                }
5197                adap->flags |= CXGB4_DEV_ENABLED;
5198        }
5199
5200        pci_set_master(pdev);
5201        pci_restore_state(pdev);
5202        pci_save_state(pdev);
5203
5204        if (t4_wait_dev_ready(adap->regs) < 0)
5205                return PCI_ERS_RESULT_DISCONNECT;
5206        if (t4_fw_hello(adap, adap->mbox, adap->pf, MASTER_MUST, NULL) < 0)
5207                return PCI_ERS_RESULT_DISCONNECT;
5208        adap->flags |= CXGB4_FW_OK;
5209        if (adap_init1(adap, &c))
5210                return PCI_ERS_RESULT_DISCONNECT;
5211
5212        for_each_port(adap, i) {
5213                struct port_info *pi = adap2pinfo(adap, i);
5214                u8 vivld = 0, vin = 0;
5215
5216                ret = t4_alloc_vi(adap, adap->mbox, pi->tx_chan, adap->pf, 0, 1,
5217                                  NULL, NULL, &vivld, &vin);
5218                if (ret < 0)
5219                        return PCI_ERS_RESULT_DISCONNECT;
5220                pi->viid = ret;
5221                pi->xact_addr_filt = -1;
5222                /* If fw supports returning the VIN as part of FW_VI_CMD,
5223                 * save the returned values.
5224                 */
5225                if (adap->params.viid_smt_extn_support) {
5226                        pi->vivld = vivld;
5227                        pi->vin = vin;
5228                } else {
5229                        /* Retrieve the values from VIID */
5230                        pi->vivld = FW_VIID_VIVLD_G(pi->viid);
5231                        pi->vin = FW_VIID_VIN_G(pi->viid);
5232                }
5233        }
5234
5235        t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
5236                     adap->params.b_wnd);
5237        setup_memwin(adap);
5238        if (cxgb_up(adap))
5239                return PCI_ERS_RESULT_DISCONNECT;
5240        return PCI_ERS_RESULT_RECOVERED;
5241}
5242
5243static void eeh_resume(struct pci_dev *pdev)
5244{
5245        int i;
5246        struct adapter *adap = pci_get_drvdata(pdev);
5247
5248        if (!adap)
5249                return;
5250
5251        rtnl_lock();
5252        for_each_port(adap, i) {
5253                struct net_device *dev = adap->port[i];
5254                if (dev) {
5255                        if (netif_running(dev)) {
5256                                link_start(dev);
5257                                cxgb_set_rxmode(dev);
5258                        }
5259                        netif_device_attach(dev);
5260                }
5261        }
5262        rtnl_unlock();
5263}
5264
5265static void eeh_reset_prepare(struct pci_dev *pdev)
5266{
5267        struct adapter *adapter = pci_get_drvdata(pdev);
5268        int i;
5269
5270        if (adapter->pf != 4)
5271                return;
5272
5273        adapter->flags &= ~CXGB4_FW_OK;
5274
5275        notify_ulds(adapter, CXGB4_STATE_DOWN);
5276
5277        for_each_port(adapter, i)
5278                if (adapter->port[i]->reg_state == NETREG_REGISTERED)
5279                        cxgb_close(adapter->port[i]);
5280
5281        disable_interrupts(adapter);
5282        cxgb4_free_mps_ref_entries(adapter);
5283
5284        adap_free_hma_mem(adapter);
5285
5286        if (adapter->flags & CXGB4_FULL_INIT_DONE)
5287                cxgb_down(adapter);
5288}
5289
5290static void eeh_reset_done(struct pci_dev *pdev)
5291{
5292        struct adapter *adapter = pci_get_drvdata(pdev);
5293        int err, i;
5294
5295        if (adapter->pf != 4)
5296                return;
5297
5298        err = t4_wait_dev_ready(adapter->regs);
5299        if (err < 0) {
5300                dev_err(adapter->pdev_dev,
5301                        "Device not ready, err %d", err);
5302                return;
5303        }
5304
5305        setup_memwin(adapter);
5306
5307        err = adap_init0(adapter, 1);
5308        if (err) {
5309                dev_err(adapter->pdev_dev,
5310                        "Adapter init failed, err %d", err);
5311                return;
5312        }
5313
5314        setup_memwin_rdma(adapter);
5315
5316        if (adapter->flags & CXGB4_FW_OK) {
5317                err = t4_port_init(adapter, adapter->pf, adapter->pf, 0);
5318                if (err) {
5319                        dev_err(adapter->pdev_dev,
5320                                "Port init failed, err %d", err);
5321                        return;
5322                }
5323        }
5324
5325        err = cfg_queues(adapter);
5326        if (err) {
5327                dev_err(adapter->pdev_dev,
5328                        "Config queues failed, err %d", err);
5329                return;
5330        }
5331
5332        cxgb4_init_mps_ref_entries(adapter);
5333
5334        err = setup_fw_sge_queues(adapter);
5335        if (err) {
5336                dev_err(adapter->pdev_dev,
5337                        "FW sge queue allocation failed, err %d", err);
5338                return;
5339        }
5340
5341        for_each_port(adapter, i)
5342                if (adapter->port[i]->reg_state == NETREG_REGISTERED)
5343                        cxgb_open(adapter->port[i]);
5344}
5345
5346static const struct pci_error_handlers cxgb4_eeh = {
5347        .error_detected = eeh_err_detected,
5348        .slot_reset     = eeh_slot_reset,
5349        .resume         = eeh_resume,
5350        .reset_prepare  = eeh_reset_prepare,
5351        .reset_done     = eeh_reset_done,
5352};
5353
5354/* Return true if the Link Configuration supports "High Speeds" (those greater
5355 * than 1Gb/s).
5356 */
5357static inline bool is_x_10g_port(const struct link_config *lc)
5358{
5359        unsigned int speeds, high_speeds;
5360
5361        speeds = FW_PORT_CAP32_SPEED_V(FW_PORT_CAP32_SPEED_G(lc->pcaps));
5362        high_speeds = speeds &
5363                        ~(FW_PORT_CAP32_SPEED_100M | FW_PORT_CAP32_SPEED_1G);
5364
5365        return high_speeds != 0;
5366}
5367
5368/* Perform default configuration of DMA queues depending on the number and type
5369 * of ports we found and the number of available CPUs.  Most settings can be
5370 * modified by the admin prior to actual use.
5371 */
5372static int cfg_queues(struct adapter *adap)
5373{
5374        u32 avail_qsets, avail_eth_qsets, avail_uld_qsets;
5375        u32 i, n10g = 0, qidx = 0, n1g = 0;
5376        u32 ncpus = num_online_cpus();
5377        u32 niqflint, neq, num_ulds;
5378        struct sge *s = &adap->sge;
5379        u32 q10g = 0, q1g;
5380
5381        /* Reduce memory usage in kdump environment, disable all offload. */
5382        if (is_kdump_kernel() || (is_uld(adap) && t4_uld_mem_alloc(adap))) {
5383                adap->params.offload = 0;
5384                adap->params.crypto = 0;
5385                adap->params.ethofld = 0;
5386        }
5387
5388        /* Calculate the number of Ethernet Queue Sets available based on
5389         * resources provisioned for us.  We always have an Asynchronous
5390         * Firmware Event Ingress Queue.  If we're operating in MSI or Legacy
5391         * IRQ Pin Interrupt mode, then we'll also have a Forwarded Interrupt
5392         * Ingress Queue.  Meanwhile, we need two Egress Queues for each
5393         * Queue Set: one for the Free List and one for the Ethernet TX Queue.
5394         *
5395         * Note that we should also take into account all of the various
5396         * Offload Queues.  But, in any situation where we're operating in
5397         * a Resource Constrained Provisioning environment, doing any Offload
5398         * at all is problematic ...
5399         */
5400        niqflint = adap->params.pfres.niqflint - 1;
5401        if (!(adap->flags & CXGB4_USING_MSIX))
5402                niqflint--;
5403        neq = adap->params.pfres.neq / 2;
5404        avail_qsets = min(niqflint, neq);
5405
5406        if (avail_qsets < adap->params.nports) {
5407                dev_err(adap->pdev_dev, "avail_eth_qsets=%d < nports=%d\n",
5408                        avail_qsets, adap->params.nports);
5409                return -ENOMEM;
5410        }
5411
5412        /* Count the number of 10Gb/s or better ports */
5413        for_each_port(adap, i)
5414                n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
5415
5416        avail_eth_qsets = min_t(u32, avail_qsets, MAX_ETH_QSETS);
5417
5418        /* We default to 1 queue per non-10G port and up to # of cores queues
5419         * per 10G port.
5420         */
5421        if (n10g)
5422                q10g = (avail_eth_qsets - (adap->params.nports - n10g)) / n10g;
5423
5424        n1g = adap->params.nports - n10g;
5425#ifdef CONFIG_CHELSIO_T4_DCB
5426        /* For Data Center Bridging support we need to be able to support up
5427         * to 8 Traffic Priorities; each of which will be assigned to its
5428         * own TX Queue in order to prevent Head-Of-Line Blocking.
5429         */
5430        q1g = 8;
5431        if (adap->params.nports * 8 > avail_eth_qsets) {
5432                dev_err(adap->pdev_dev, "DCB avail_eth_qsets=%d < %d!\n",
5433                        avail_eth_qsets, adap->params.nports * 8);
5434                return -ENOMEM;
5435        }
5436
5437        if (adap->params.nports * ncpus < avail_eth_qsets)
5438                q10g = max(8U, ncpus);
5439        else
5440                q10g = max(8U, q10g);
5441
5442        while ((q10g * n10g) > (avail_eth_qsets - n1g * q1g))
5443                q10g--;
5444
5445#else /* !CONFIG_CHELSIO_T4_DCB */
5446        q1g = 1;
5447        q10g = min(q10g, ncpus);
5448#endif /* !CONFIG_CHELSIO_T4_DCB */
5449        if (is_kdump_kernel()) {
5450                q10g = 1;
5451                q1g = 1;
5452        }
5453
5454        for_each_port(adap, i) {
5455                struct port_info *pi = adap2pinfo(adap, i);
5456
5457                pi->first_qset = qidx;
5458                pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : q1g;
5459                qidx += pi->nqsets;
5460        }
5461
5462        s->ethqsets = qidx;
5463        s->max_ethqsets = qidx;   /* MSI-X may lower it later */
5464        avail_qsets -= qidx;
5465
5466        if (is_uld(adap)) {
5467                /* For offload we use 1 queue/channel if all ports are up to 1G,
5468                 * otherwise we divide all available queues amongst the channels
5469                 * capped by the number of available cores.
5470                 */
5471                num_ulds = adap->num_uld + adap->num_ofld_uld;
5472                i = min_t(u32, MAX_OFLD_QSETS, ncpus);
5473                avail_uld_qsets = roundup(i, adap->params.nports);
5474                if (avail_qsets < num_ulds * adap->params.nports) {
5475                        adap->params.offload = 0;
5476                        adap->params.crypto = 0;
5477                        s->ofldqsets = 0;
5478                } else if (avail_qsets < num_ulds * avail_uld_qsets || !n10g) {
5479                        s->ofldqsets = adap->params.nports;
5480                } else {
5481                        s->ofldqsets = avail_uld_qsets;
5482                }
5483
5484                avail_qsets -= num_ulds * s->ofldqsets;
5485        }
5486
5487        /* ETHOFLD Queues used for QoS offload should follow same
5488         * allocation scheme as normal Ethernet Queues.
5489         */
5490        if (is_ethofld(adap)) {
5491                if (avail_qsets < s->max_ethqsets) {
5492                        adap->params.ethofld = 0;
5493                        s->eoqsets = 0;
5494                } else {
5495                        s->eoqsets = s->max_ethqsets;
5496                }
5497                avail_qsets -= s->eoqsets;
5498        }
5499
5500        for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
5501                struct sge_eth_rxq *r = &s->ethrxq[i];
5502
5503                init_rspq(adap, &r->rspq, 5, 10, 1024, 64);
5504                r->fl.size = 72;
5505        }
5506
5507        for (i = 0; i < ARRAY_SIZE(s->ethtxq); i++)
5508                s->ethtxq[i].q.size = 1024;
5509
5510        for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++)
5511                s->ctrlq[i].q.size = 512;
5512
5513        if (!is_t4(adap->params.chip))
5514                s->ptptxq.q.size = 8;
5515
5516        init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64);
5517        init_rspq(adap, &s->intrq, 0, 1, 512, 64);
5518
5519        return 0;
5520}
5521
5522/*
5523 * Reduce the number of Ethernet queues across all ports to at most n.
5524 * n provides at least one queue per port.
5525 */
5526static void reduce_ethqs(struct adapter *adap, int n)
5527{
5528        int i;
5529        struct port_info *pi;
5530
5531        while (n < adap->sge.ethqsets)
5532                for_each_port(adap, i) {
5533                        pi = adap2pinfo(adap, i);
5534                        if (pi->nqsets > 1) {
5535                                pi->nqsets--;
5536                                adap->sge.ethqsets--;
5537                                if (adap->sge.ethqsets <= n)
5538                                        break;
5539                        }
5540                }
5541
5542        n = 0;
5543        for_each_port(adap, i) {
5544                pi = adap2pinfo(adap, i);
5545                pi->first_qset = n;
5546                n += pi->nqsets;
5547        }
5548}
5549
5550static int alloc_msix_info(struct adapter *adap, u32 num_vec)
5551{
5552        struct msix_info *msix_info;
5553
5554        msix_info = kcalloc(num_vec, sizeof(*msix_info), GFP_KERNEL);
5555        if (!msix_info)
5556                return -ENOMEM;
5557
5558        adap->msix_bmap.msix_bmap = kcalloc(BITS_TO_LONGS(num_vec),
5559                                            sizeof(long), GFP_KERNEL);
5560        if (!adap->msix_bmap.msix_bmap) {
5561                kfree(msix_info);
5562                return -ENOMEM;
5563        }
5564
5565        spin_lock_init(&adap->msix_bmap.lock);
5566        adap->msix_bmap.mapsize = num_vec;
5567
5568        adap->msix_info = msix_info;
5569        return 0;
5570}
5571
5572static void free_msix_info(struct adapter *adap)
5573{
5574        kfree(adap->msix_bmap.msix_bmap);
5575        kfree(adap->msix_info);
5576}
5577
5578int cxgb4_get_msix_idx_from_bmap(struct adapter *adap)
5579{
5580        struct msix_bmap *bmap = &adap->msix_bmap;
5581        unsigned int msix_idx;
5582        unsigned long flags;
5583
5584        spin_lock_irqsave(&bmap->lock, flags);
5585        msix_idx = find_first_zero_bit(bmap->msix_bmap, bmap->mapsize);
5586        if (msix_idx < bmap->mapsize) {
5587                __set_bit(msix_idx, bmap->msix_bmap);
5588        } else {
5589                spin_unlock_irqrestore(&bmap->lock, flags);
5590                return -ENOSPC;
5591        }
5592
5593        spin_unlock_irqrestore(&bmap->lock, flags);
5594        return msix_idx;
5595}
5596
5597void cxgb4_free_msix_idx_in_bmap(struct adapter *adap,
5598                                 unsigned int msix_idx)
5599{
5600        struct msix_bmap *bmap = &adap->msix_bmap;
5601        unsigned long flags;
5602
5603        spin_lock_irqsave(&bmap->lock, flags);
5604        __clear_bit(msix_idx, bmap->msix_bmap);
5605        spin_unlock_irqrestore(&bmap->lock, flags);
5606}
5607
5608/* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
5609#define EXTRA_VECS 2
5610
5611static int enable_msix(struct adapter *adap)
5612{
5613        u32 eth_need, uld_need = 0, ethofld_need = 0;
5614        u32 ethqsets = 0, ofldqsets = 0, eoqsets = 0;
5615        u8 num_uld = 0, nchan = adap->params.nports;
5616        u32 i, want, need, num_vec;
5617        struct sge *s = &adap->sge;
5618        struct msix_entry *entries;
5619        struct port_info *pi;
5620        int allocated, ret;
5621
5622        want = s->max_ethqsets;
5623#ifdef CONFIG_CHELSIO_T4_DCB
5624        /* For Data Center Bridging we need 8 Ethernet TX Priority Queues for
5625         * each port.
5626         */
5627        need = 8 * nchan;
5628#else
5629        need = nchan;
5630#endif
5631        eth_need = need;
5632        if (is_uld(adap)) {
5633                num_uld = adap->num_ofld_uld + adap->num_uld;
5634                want += num_uld * s->ofldqsets;
5635                uld_need = num_uld * nchan;
5636                need += uld_need;
5637        }
5638
5639        if (is_ethofld(adap)) {
5640                want += s->eoqsets;
5641                ethofld_need = eth_need;
5642                need += ethofld_need;
5643        }
5644
5645        want += EXTRA_VECS;
5646        need += EXTRA_VECS;
5647
5648        entries = kmalloc_array(want, sizeof(*entries), GFP_KERNEL);
5649        if (!entries)
5650                return -ENOMEM;
5651
5652        for (i = 0; i < want; i++)
5653                entries[i].entry = i;
5654
5655        allocated = pci_enable_msix_range(adap->pdev, entries, need, want);
5656        if (allocated < 0) {
5657                /* Disable offload and attempt to get vectors for NIC
5658                 * only mode.
5659                 */
5660                want = s->max_ethqsets + EXTRA_VECS;
5661                need = eth_need + EXTRA_VECS;
5662                allocated = pci_enable_msix_range(adap->pdev, entries,
5663                                                  need, want);
5664                if (allocated < 0) {
5665                        dev_info(adap->pdev_dev,
5666                                 "Disabling MSI-X due to insufficient MSI-X vectors\n");
5667                        ret = allocated;
5668                        goto out_free;
5669                }
5670
5671                dev_info(adap->pdev_dev,
5672                         "Disabling offload due to insufficient MSI-X vectors\n");
5673                adap->params.offload = 0;
5674                adap->params.crypto = 0;
5675                adap->params.ethofld = 0;
5676                s->ofldqsets = 0;
5677                s->eoqsets = 0;
5678                uld_need = 0;
5679                ethofld_need = 0;
5680        }
5681
5682        num_vec = allocated;
5683        if (num_vec < want) {
5684                /* Distribute available vectors to the various queue groups.
5685                 * Every group gets its minimum requirement and NIC gets top
5686                 * priority for leftovers.
5687                 */
5688                ethqsets = eth_need;
5689                if (is_uld(adap))
5690                        ofldqsets = nchan;
5691                if (is_ethofld(adap))
5692                        eoqsets = ethofld_need;
5693
5694                num_vec -= need;
5695                while (num_vec) {
5696                        if (num_vec < eth_need + ethofld_need ||
5697                            ethqsets > s->max_ethqsets)
5698                                break;
5699
5700                        for_each_port(adap, i) {
5701                                pi = adap2pinfo(adap, i);
5702                                if (pi->nqsets < 2)
5703                                        continue;
5704
5705                                ethqsets++;
5706                                num_vec--;
5707                                if (ethofld_need) {
5708                                        eoqsets++;
5709                                        num_vec--;
5710                                }
5711                        }
5712                }
5713
5714                if (is_uld(adap)) {
5715                        while (num_vec) {
5716                                if (num_vec < uld_need ||
5717                                    ofldqsets > s->ofldqsets)
5718                                        break;
5719
5720                                ofldqsets++;
5721                                num_vec -= uld_need;
5722                        }
5723                }
5724        } else {
5725                ethqsets = s->max_ethqsets;
5726                if (is_uld(adap))
5727                        ofldqsets = s->ofldqsets;
5728                if (is_ethofld(adap))
5729                        eoqsets = s->eoqsets;
5730        }
5731
5732        if (ethqsets < s->max_ethqsets) {
5733                s->max_ethqsets = ethqsets;
5734                reduce_ethqs(adap, ethqsets);
5735        }
5736
5737        if (is_uld(adap)) {
5738                s->ofldqsets = ofldqsets;
5739                s->nqs_per_uld = s->ofldqsets;
5740        }
5741
5742        if (is_ethofld(adap))
5743                s->eoqsets = eoqsets;
5744
5745        /* map for msix */
5746        ret = alloc_msix_info(adap, allocated);
5747        if (ret)
5748                goto out_disable_msix;
5749
5750        for (i = 0; i < allocated; i++) {
5751                adap->msix_info[i].vec = entries[i].vector;
5752                adap->msix_info[i].idx = i;
5753        }
5754
5755        dev_info(adap->pdev_dev,
5756                 "%d MSI-X vectors allocated, nic %d eoqsets %d per uld %d\n",
5757                 allocated, s->max_ethqsets, s->eoqsets, s->nqs_per_uld);
5758
5759        kfree(entries);
5760        return 0;
5761
5762out_disable_msix:
5763        pci_disable_msix(adap->pdev);
5764
5765out_free:
5766        kfree(entries);
5767        return ret;
5768}
5769
5770#undef EXTRA_VECS
5771
5772static int init_rss(struct adapter *adap)
5773{
5774        unsigned int i;
5775        int err;
5776
5777        err = t4_init_rss_mode(adap, adap->mbox);
5778        if (err)
5779                return err;
5780
5781        for_each_port(adap, i) {
5782                struct port_info *pi = adap2pinfo(adap, i);
5783
5784                pi->rss = kcalloc(pi->rss_size, sizeof(u16), GFP_KERNEL);
5785                if (!pi->rss)
5786                        return -ENOMEM;
5787        }
5788        return 0;
5789}
5790
5791/* Dump basic information about the adapter */
5792static void print_adapter_info(struct adapter *adapter)
5793{
5794        /* Hardware/Firmware/etc. Version/Revision IDs */
5795        t4_dump_version_info(adapter);
5796
5797        /* Software/Hardware configuration */
5798        dev_info(adapter->pdev_dev, "Configuration: %sNIC %s, %s capable\n",
5799                 is_offload(adapter) ? "R" : "",
5800                 ((adapter->flags & CXGB4_USING_MSIX) ? "MSI-X" :
5801                  (adapter->flags & CXGB4_USING_MSI) ? "MSI" : ""),
5802                 is_offload(adapter) ? "Offload" : "non-Offload");
5803}
5804
5805static void print_port_info(const struct net_device *dev)
5806{
5807        char buf[80];
5808        char *bufp = buf;
5809        const struct port_info *pi = netdev_priv(dev);
5810        const struct adapter *adap = pi->adapter;
5811
5812        if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_100M)
5813                bufp += sprintf(bufp, "100M/");
5814        if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_1G)
5815                bufp += sprintf(bufp, "1G/");
5816        if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_10G)
5817                bufp += sprintf(bufp, "10G/");
5818        if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_25G)
5819                bufp += sprintf(bufp, "25G/");
5820        if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_40G)
5821                bufp += sprintf(bufp, "40G/");
5822        if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_50G)
5823                bufp += sprintf(bufp, "50G/");
5824        if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_100G)
5825                bufp += sprintf(bufp, "100G/");
5826        if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_200G)
5827                bufp += sprintf(bufp, "200G/");
5828        if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_400G)
5829                bufp += sprintf(bufp, "400G/");
5830        if (bufp != buf)
5831                --bufp;
5832        sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type));
5833
5834        netdev_info(dev, "%s: Chelsio %s (%s) %s\n",
5835                    dev->name, adap->params.vpd.id, adap->name, buf);
5836}
5837
5838/*
5839 * Free the following resources:
5840 * - memory used for tables
5841 * - MSI/MSI-X
5842 * - net devices
5843 * - resources FW is holding for us
5844 */
5845static void free_some_resources(struct adapter *adapter)
5846{
5847        unsigned int i;
5848
5849        kvfree(adapter->smt);
5850        kvfree(adapter->l2t);
5851        kvfree(adapter->srq);
5852        t4_cleanup_sched(adapter);
5853        kvfree(adapter->tids.tid_tab);
5854        cxgb4_cleanup_tc_matchall(adapter);
5855        cxgb4_cleanup_tc_mqprio(adapter);
5856        cxgb4_cleanup_tc_flower(adapter);
5857        cxgb4_cleanup_tc_u32(adapter);
5858        kfree(adapter->sge.egr_map);
5859        kfree(adapter->sge.ingr_map);
5860        kfree(adapter->sge.starving_fl);
5861        kfree(adapter->sge.txq_maperr);
5862#ifdef CONFIG_DEBUG_FS
5863        kfree(adapter->sge.blocked_fl);
5864#endif
5865        disable_msi(adapter);
5866
5867        for_each_port(adapter, i)
5868                if (adapter->port[i]) {
5869                        struct port_info *pi = adap2pinfo(adapter, i);
5870
5871                        if (pi->viid != 0)
5872                                t4_free_vi(adapter, adapter->mbox, adapter->pf,
5873                                           0, pi->viid);
5874                        kfree(adap2pinfo(adapter, i)->rss);
5875                        free_netdev(adapter->port[i]);
5876                }
5877        if (adapter->flags & CXGB4_FW_OK)
5878                t4_fw_bye(adapter, adapter->pf);
5879}
5880
5881#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | \
5882                   NETIF_F_GSO_UDP_L4)
5883#define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
5884                   NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
5885#define SEGMENT_SIZE 128
5886
5887static int t4_get_chip_type(struct adapter *adap, int ver)
5888{
5889        u32 pl_rev = REV_G(t4_read_reg(adap, PL_REV_A));
5890
5891        switch (ver) {
5892        case CHELSIO_T4:
5893                return CHELSIO_CHIP_CODE(CHELSIO_T4, pl_rev);
5894        case CHELSIO_T5:
5895                return CHELSIO_CHIP_CODE(CHELSIO_T5, pl_rev);
5896        case CHELSIO_T6:
5897                return CHELSIO_CHIP_CODE(CHELSIO_T6, pl_rev);
5898        default:
5899                break;
5900        }
5901        return -EINVAL;
5902}
5903
5904#ifdef CONFIG_PCI_IOV
5905static void cxgb4_mgmt_setup(struct net_device *dev)
5906{
5907        dev->type = ARPHRD_NONE;
5908        dev->mtu = 0;
5909        dev->hard_header_len = 0;
5910        dev->addr_len = 0;
5911        dev->tx_queue_len = 0;
5912        dev->flags |= IFF_NOARP;
5913        dev->priv_flags |= IFF_NO_QUEUE;
5914
5915        /* Initialize the device structure. */
5916        dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
5917        dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
5918}
5919
5920static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
5921{
5922        struct adapter *adap = pci_get_drvdata(pdev);
5923        int err = 0;
5924        int current_vfs = pci_num_vf(pdev);
5925        u32 pcie_fw;
5926
5927        pcie_fw = readl(adap->regs + PCIE_FW_A);
5928        /* Check if fw is initialized */
5929        if (!(pcie_fw & PCIE_FW_INIT_F)) {
5930                dev_warn(&pdev->dev, "Device not initialized\n");
5931                return -EOPNOTSUPP;
5932        }
5933
5934        /* If any of the VF's is already assigned to Guest OS, then
5935         * SRIOV for the same cannot be modified
5936         */
5937        if (current_vfs && pci_vfs_assigned(pdev)) {
5938                dev_err(&pdev->dev,
5939                        "Cannot modify SR-IOV while VFs are assigned\n");
5940                return current_vfs;
5941        }
5942        /* Note that the upper-level code ensures that we're never called with
5943         * a non-zero "num_vfs" when we already have VFs instantiated.  But
5944         * it never hurts to code defensively.
5945         */
5946        if (num_vfs != 0 && current_vfs != 0)
5947                return -EBUSY;
5948
5949        /* Nothing to do for no change. */
5950        if (num_vfs == current_vfs)
5951                return num_vfs;
5952
5953        /* Disable SRIOV when zero is passed. */
5954        if (!num_vfs) {
5955                pci_disable_sriov(pdev);
5956                /* free VF Management Interface */
5957                unregister_netdev(adap->port[0]);
5958                free_netdev(adap->port[0]);
5959                adap->port[0] = NULL;
5960
5961                /* free VF resources */
5962                adap->num_vfs = 0;
5963                kfree(adap->vfinfo);
5964                adap->vfinfo = NULL;
5965                return 0;
5966        }
5967
5968        if (!current_vfs) {
5969                struct fw_pfvf_cmd port_cmd, port_rpl;
5970                struct net_device *netdev;
5971                unsigned int pmask, port;
5972                struct pci_dev *pbridge;
5973                struct port_info *pi;
5974                char name[IFNAMSIZ];
5975                u32 devcap2;
5976                u16 flags;
5977
5978                /* If we want to instantiate Virtual Functions, then our
5979                 * parent bridge's PCI-E needs to support Alternative Routing
5980                 * ID (ARI) because our VFs will show up at function offset 8
5981                 * and above.
5982                 */
5983                pbridge = pdev->bus->self;
5984                pcie_capability_read_word(pbridge, PCI_EXP_FLAGS, &flags);
5985                pcie_capability_read_dword(pbridge, PCI_EXP_DEVCAP2, &devcap2);
5986
5987                if ((flags & PCI_EXP_FLAGS_VERS) < 2 ||
5988                    !(devcap2 & PCI_EXP_DEVCAP2_ARI)) {
5989                        /* Our parent bridge does not support ARI so issue a
5990                         * warning and skip instantiating the VFs.  They
5991                         * won't be reachable.
5992                         */
5993                        dev_warn(&pdev->dev, "Parent bridge %02x:%02x.%x doesn't support ARI; can't instantiate Virtual Functions\n",
5994                                 pbridge->bus->number, PCI_SLOT(pbridge->devfn),
5995                                 PCI_FUNC(pbridge->devfn));
5996                        return -ENOTSUPP;
5997                }
5998                memset(&port_cmd, 0, sizeof(port_cmd));
5999                port_cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_PFVF_CMD) |
6000                                                 FW_CMD_REQUEST_F |
6001                                                 FW_CMD_READ_F |
6002                                                 FW_PFVF_CMD_PFN_V(adap->pf) |
6003                                                 FW_PFVF_CMD_VFN_V(0));
6004                port_cmd.retval_len16 = cpu_to_be32(FW_LEN16(port_cmd));
6005                err = t4_wr_mbox(adap, adap->mbox, &port_cmd, sizeof(port_cmd),
6006                                 &port_rpl);
6007                if (err)
6008                        return err;
6009                pmask = FW_PFVF_CMD_PMASK_G(be32_to_cpu(port_rpl.type_to_neq));
6010                port = ffs(pmask) - 1;
6011                /* Allocate VF Management Interface. */
6012                snprintf(name, IFNAMSIZ, "mgmtpf%d,%d", adap->adap_idx,
6013                         adap->pf);
6014                netdev = alloc_netdev(sizeof(struct port_info),
6015                                      name, NET_NAME_UNKNOWN, cxgb4_mgmt_setup);
6016                if (!netdev)
6017                        return -ENOMEM;
6018
6019                pi = netdev_priv(netdev);
6020                pi->adapter = adap;
6021                pi->lport = port;
6022                pi->tx_chan = port;
6023                SET_NETDEV_DEV(netdev, &pdev->dev);
6024
6025                adap->port[0] = netdev;
6026                pi->port_id = 0;
6027
6028                err = register_netdev(adap->port[0]);
6029                if (err) {
6030                        pr_info("Unable to register VF mgmt netdev %s\n", name);
6031                        free_netdev(adap->port[0]);
6032                        adap->port[0] = NULL;
6033                        return err;
6034                }
6035                /* Allocate and set up VF Information. */
6036                adap->vfinfo = kcalloc(pci_sriov_get_totalvfs(pdev),
6037                                       sizeof(struct vf_info), GFP_KERNEL);
6038                if (!adap->vfinfo) {
6039                        unregister_netdev(adap->port[0]);
6040                        free_netdev(adap->port[0]);
6041                        adap->port[0] = NULL;
6042                        return -ENOMEM;
6043                }
6044                cxgb4_mgmt_fill_vf_station_mac_addr(adap);
6045        }
6046        /* Instantiate the requested number of VFs. */
6047        err = pci_enable_sriov(pdev, num_vfs);
6048        if (err) {
6049                pr_info("Unable to instantiate %d VFs\n", num_vfs);
6050                if (!current_vfs) {
6051                        unregister_netdev(adap->port[0]);
6052                        free_netdev(adap->port[0]);
6053                        adap->port[0] = NULL;
6054                        kfree(adap->vfinfo);
6055                        adap->vfinfo = NULL;
6056                }
6057                return err;
6058        }
6059
6060        adap->num_vfs = num_vfs;
6061        return num_vfs;
6062}
6063#endif /* CONFIG_PCI_IOV */
6064
6065static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
6066{
6067        struct net_device *netdev;
6068        struct adapter *adapter;
6069        static int adap_idx = 1;
6070        int s_qpp, qpp, num_seg;
6071        struct port_info *pi;
6072        bool highdma = false;
6073        enum chip_type chip;
6074        void __iomem *regs;
6075        int func, chip_ver;
6076        u16 device_id;
6077        int i, err;
6078        u32 whoami;
6079
6080        err = pci_request_regions(pdev, KBUILD_MODNAME);
6081        if (err) {
6082                /* Just info, some other driver may have claimed the device. */
6083                dev_info(&pdev->dev, "cannot obtain PCI resources\n");
6084                return err;
6085        }
6086
6087        err = pci_enable_device(pdev);
6088        if (err) {
6089                dev_err(&pdev->dev, "cannot enable PCI device\n");
6090                goto out_release_regions;
6091        }
6092
6093        regs = pci_ioremap_bar(pdev, 0);
6094        if (!regs) {
6095                dev_err(&pdev->dev, "cannot map device registers\n");
6096                err = -ENOMEM;
6097                goto out_disable_device;
6098        }
6099
6100        adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
6101        if (!adapter) {
6102                err = -ENOMEM;
6103                goto out_unmap_bar0;
6104        }
6105
6106        adapter->regs = regs;
6107        err = t4_wait_dev_ready(regs);
6108        if (err < 0)
6109                goto out_free_adapter;
6110
6111        /* We control everything through one PF */
6112        whoami = t4_read_reg(adapter, PL_WHOAMI_A);
6113        pci_read_config_word(pdev, PCI_DEVICE_ID, &device_id);
6114        chip = t4_get_chip_type(adapter, CHELSIO_PCI_ID_VER(device_id));
6115        if ((int)chip < 0) {
6116                dev_err(&pdev->dev, "Device %d is not supported\n", device_id);
6117                err = chip;
6118                goto out_free_adapter;
6119        }
6120        chip_ver = CHELSIO_CHIP_VERSION(chip);
6121        func = chip_ver <= CHELSIO_T5 ?
6122               SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
6123
6124        adapter->pdev = pdev;
6125        adapter->pdev_dev = &pdev->dev;
6126        adapter->name = pci_name(pdev);
6127        adapter->mbox = func;
6128        adapter->pf = func;
6129        adapter->params.chip = chip;
6130        adapter->adap_idx = adap_idx;
6131        adapter->msg_enable = DFLT_MSG_ENABLE;
6132        adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
6133                                    (sizeof(struct mbox_cmd) *
6134                                     T4_OS_LOG_MBOX_CMDS),
6135                                    GFP_KERNEL);
6136        if (!adapter->mbox_log) {
6137                err = -ENOMEM;
6138                goto out_free_adapter;
6139        }
6140        spin_lock_init(&adapter->mbox_lock);
6141        INIT_LIST_HEAD(&adapter->mlist.list);
6142        adapter->mbox_log->size = T4_OS_LOG_MBOX_CMDS;
6143        pci_set_drvdata(pdev, adapter);
6144
6145        if (func != ent->driver_data) {
6146                pci_disable_device(pdev);
6147                pci_save_state(pdev);        /* to restore SR-IOV later */
6148                return 0;
6149        }
6150
6151        if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
6152                highdma = true;
6153                err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
6154                if (err) {
6155                        dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
6156                                "coherent allocations\n");
6157                        goto out_free_adapter;
6158                }
6159        } else {
6160                err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
6161                if (err) {
6162                        dev_err(&pdev->dev, "no usable DMA configuration\n");
6163                        goto out_free_adapter;
6164                }
6165        }
6166
6167        pci_enable_pcie_error_reporting(pdev);
6168        pci_set_master(pdev);
6169        pci_save_state(pdev);
6170        adap_idx++;
6171        adapter->workq = create_singlethread_workqueue("cxgb4");
6172        if (!adapter->workq) {
6173                err = -ENOMEM;
6174                goto out_free_adapter;
6175        }
6176
6177        /* PCI device has been enabled */
6178        adapter->flags |= CXGB4_DEV_ENABLED;
6179        memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
6180
6181        /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
6182         * Ingress Packet Data to Free List Buffers in order to allow for
6183         * chipset performance optimizations between the Root Complex and
6184         * Memory Controllers.  (Messages to the associated Ingress Queue
6185         * notifying new Packet Placement in the Free Lists Buffers will be
6186         * send without the Relaxed Ordering Attribute thus guaranteeing that
6187         * all preceding PCIe Transaction Layer Packets will be processed
6188         * first.)  But some Root Complexes have various issues with Upstream
6189         * Transaction Layer Packets with the Relaxed Ordering Attribute set.
6190         * The PCIe devices which under the Root Complexes will be cleared the
6191         * Relaxed Ordering bit in the configuration space, So we check our
6192         * PCIe configuration space to see if it's flagged with advice against
6193         * using Relaxed Ordering.
6194         */
6195        if (!pcie_relaxed_ordering_enabled(pdev))
6196                adapter->flags |= CXGB4_ROOT_NO_RELAXED_ORDERING;
6197
6198        spin_lock_init(&adapter->stats_lock);
6199        spin_lock_init(&adapter->tid_release_lock);
6200        spin_lock_init(&adapter->win0_lock);
6201
6202        INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
6203        INIT_WORK(&adapter->db_full_task, process_db_full);
6204        INIT_WORK(&adapter->db_drop_task, process_db_drop);
6205        INIT_WORK(&adapter->fatal_err_notify_task, notify_fatal_err);
6206
6207        err = t4_prep_adapter(adapter);
6208        if (err)
6209                goto out_free_adapter;
6210
6211        if (is_kdump_kernel()) {
6212                /* Collect hardware state and append to /proc/vmcore */
6213                err = cxgb4_cudbg_vmcore_add_dump(adapter);
6214                if (err) {
6215                        dev_warn(adapter->pdev_dev,
6216                                 "Fail collecting vmcore device dump, err: %d. Continuing\n",
6217                                 err);
6218                        err = 0;
6219                }
6220        }
6221
6222        if (!is_t4(adapter->params.chip)) {
6223                s_qpp = (QUEUESPERPAGEPF0_S +
6224                        (QUEUESPERPAGEPF1_S - QUEUESPERPAGEPF0_S) *
6225                        adapter->pf);
6226                qpp = 1 << QUEUESPERPAGEPF0_G(t4_read_reg(adapter,
6227                      SGE_EGRESS_QUEUES_PER_PAGE_PF_A) >> s_qpp);
6228                num_seg = PAGE_SIZE / SEGMENT_SIZE;
6229
6230                /* Each segment size is 128B. Write coalescing is enabled only
6231                 * when SGE_EGRESS_QUEUES_PER_PAGE_PF reg value for the
6232                 * queue is less no of segments that can be accommodated in
6233                 * a page size.
6234                 */
6235                if (qpp > num_seg) {
6236                        dev_err(&pdev->dev,
6237                                "Incorrect number of egress queues per page\n");
6238                        err = -EINVAL;
6239                        goto out_free_adapter;
6240                }
6241                adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
6242                pci_resource_len(pdev, 2));
6243                if (!adapter->bar2) {
6244                        dev_err(&pdev->dev, "cannot map device bar2 region\n");
6245                        err = -ENOMEM;
6246                        goto out_free_adapter;
6247                }
6248        }
6249
6250        setup_memwin(adapter);
6251        err = adap_init0(adapter, 0);
6252#ifdef CONFIG_DEBUG_FS
6253        bitmap_zero(adapter->sge.blocked_fl, adapter->sge.egr_sz);
6254#endif
6255        setup_memwin_rdma(adapter);
6256        if (err)
6257                goto out_unmap_bar;
6258
6259        /* configure SGE_STAT_CFG_A to read WC stats */
6260        if (!is_t4(adapter->params.chip))
6261                t4_write_reg(adapter, SGE_STAT_CFG_A, STATSOURCE_T5_V(7) |
6262                             (is_t5(adapter->params.chip) ? STATMODE_V(0) :
6263                              T6_STATMODE_V(0)));
6264
6265        /* Initialize hash mac addr list */
6266        INIT_LIST_HEAD(&adapter->mac_hlist);
6267
6268        for_each_port(adapter, i) {
6269                /* For supporting MQPRIO Offload, need some extra
6270                 * queues for each ETHOFLD TIDs. Keep it equal to
6271                 * MAX_ATIDs for now. Once we connect to firmware
6272                 * later and query the EOTID params, we'll come to
6273                 * know the actual # of EOTIDs supported.
6274                 */
6275                netdev = alloc_etherdev_mq(sizeof(struct port_info),
6276                                           MAX_ETH_QSETS + MAX_ATIDS);
6277                if (!netdev) {
6278                        err = -ENOMEM;
6279                        goto out_free_dev;
6280                }
6281
6282                SET_NETDEV_DEV(netdev, &pdev->dev);
6283
6284                adapter->port[i] = netdev;
6285                pi = netdev_priv(netdev);
6286                pi->adapter = adapter;
6287                pi->xact_addr_filt = -1;
6288                pi->port_id = i;
6289                netdev->irq = pdev->irq;
6290
6291                netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
6292                        NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
6293                        NETIF_F_RXCSUM | NETIF_F_RXHASH | NETIF_F_GRO |
6294                        NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
6295                        NETIF_F_HW_TC;
6296
6297                if (chip_ver > CHELSIO_T5) {
6298                        netdev->hw_enc_features |= NETIF_F_IP_CSUM |
6299                                                   NETIF_F_IPV6_CSUM |
6300                                                   NETIF_F_RXCSUM |
6301                                                   NETIF_F_GSO_UDP_TUNNEL |
6302                                                   NETIF_F_GSO_UDP_TUNNEL_CSUM |
6303                                                   NETIF_F_TSO | NETIF_F_TSO6;
6304
6305                        netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
6306                                               NETIF_F_GSO_UDP_TUNNEL_CSUM |
6307                                               NETIF_F_HW_TLS_RECORD;
6308                }
6309
6310                if (highdma)
6311                        netdev->hw_features |= NETIF_F_HIGHDMA;
6312                netdev->features |= netdev->hw_features;
6313                netdev->vlan_features = netdev->features & VLAN_FEAT;
6314
6315                netdev->priv_flags |= IFF_UNICAST_FLT;
6316
6317                /* MTU range: 81 - 9600 */
6318                netdev->min_mtu = 81;              /* accommodate SACK */
6319                netdev->max_mtu = MAX_MTU;
6320
6321                netdev->netdev_ops = &cxgb4_netdev_ops;
6322#ifdef CONFIG_CHELSIO_T4_DCB
6323                netdev->dcbnl_ops = &cxgb4_dcb_ops;
6324                cxgb4_dcb_state_init(netdev);
6325                cxgb4_dcb_version_init(netdev);
6326#endif
6327                cxgb4_set_ethtool_ops(netdev);
6328        }
6329
6330        cxgb4_init_ethtool_dump(adapter);
6331
6332        pci_set_drvdata(pdev, adapter);
6333
6334        if (adapter->flags & CXGB4_FW_OK) {
6335                err = t4_port_init(adapter, func, func, 0);
6336                if (err)
6337                        goto out_free_dev;
6338        } else if (adapter->params.nports == 1) {
6339                /* If we don't have a connection to the firmware -- possibly
6340                 * because of an error -- grab the raw VPD parameters so we
6341                 * can set the proper MAC Address on the debug network
6342                 * interface that we've created.
6343                 */
6344                u8 hw_addr[ETH_ALEN];
6345                u8 *na = adapter->params.vpd.na;
6346
6347                err = t4_get_raw_vpd_params(adapter, &adapter->params.vpd);
6348                if (!err) {
6349                        for (i = 0; i < ETH_ALEN; i++)
6350                                hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 +
6351                                              hex2val(na[2 * i + 1]));
6352                        t4_set_hw_addr(adapter, 0, hw_addr);
6353                }
6354        }
6355
6356        if (!(adapter->flags & CXGB4_FW_OK))
6357                goto fw_attach_fail;
6358
6359        /* Configure queues and allocate tables now, they can be needed as
6360         * soon as the first register_netdev completes.
6361         */
6362        err = cfg_queues(adapter);
6363        if (err)
6364                goto out_free_dev;
6365
6366        adapter->smt = t4_init_smt();
6367        if (!adapter->smt) {
6368                /* We tolerate a lack of SMT, giving up some functionality */
6369                dev_warn(&pdev->dev, "could not allocate SMT, continuing\n");
6370        }
6371
6372        adapter->l2t = t4_init_l2t(adapter->l2t_start, adapter->l2t_end);
6373        if (!adapter->l2t) {
6374                /* We tolerate a lack of L2T, giving up some functionality */
6375                dev_warn(&pdev->dev, "could not allocate L2T, continuing\n");
6376                adapter->params.offload = 0;
6377        }
6378
6379#if IS_ENABLED(CONFIG_IPV6)
6380        if (chip_ver <= CHELSIO_T5 &&
6381            (!(t4_read_reg(adapter, LE_DB_CONFIG_A) & ASLIPCOMPEN_F))) {
6382                /* CLIP functionality is not present in hardware,
6383                 * hence disable all offload features
6384                 */
6385                dev_warn(&pdev->dev,
6386                         "CLIP not enabled in hardware, continuing\n");
6387                adapter->params.offload = 0;
6388        } else {
6389                adapter->clipt = t4_init_clip_tbl(adapter->clipt_start,
6390                                                  adapter->clipt_end);
6391                if (!adapter->clipt) {
6392                        /* We tolerate a lack of clip_table, giving up
6393                         * some functionality
6394                         */
6395                        dev_warn(&pdev->dev,
6396                                 "could not allocate Clip table, continuing\n");
6397                        adapter->params.offload = 0;
6398                }
6399        }
6400#endif
6401
6402        for_each_port(adapter, i) {
6403                pi = adap2pinfo(adapter, i);
6404                pi->sched_tbl = t4_init_sched(adapter->params.nsched_cls);
6405                if (!pi->sched_tbl)
6406                        dev_warn(&pdev->dev,
6407                                 "could not activate scheduling on port %d\n",
6408                                 i);
6409        }
6410
6411        if (tid_init(&adapter->tids) < 0) {
6412                dev_warn(&pdev->dev, "could not allocate TID table, "
6413                         "continuing\n");
6414                adapter->params.offload = 0;
6415        } else {
6416                adapter->tc_u32 = cxgb4_init_tc_u32(adapter);
6417                if (!adapter->tc_u32)
6418                        dev_warn(&pdev->dev,
6419                                 "could not offload tc u32, continuing\n");
6420
6421                if (cxgb4_init_tc_flower(adapter))
6422                        dev_warn(&pdev->dev,
6423                                 "could not offload tc flower, continuing\n");
6424
6425                if (cxgb4_init_tc_mqprio(adapter))
6426                        dev_warn(&pdev->dev,
6427                                 "could not offload tc mqprio, continuing\n");
6428
6429                if (cxgb4_init_tc_matchall(adapter))
6430                        dev_warn(&pdev->dev,
6431                                 "could not offload tc matchall, continuing\n");
6432        }
6433
6434        if (is_offload(adapter) || is_hashfilter(adapter)) {
6435                if (t4_read_reg(adapter, LE_DB_CONFIG_A) & HASHEN_F) {
6436                        u32 hash_base, hash_reg;
6437
6438                        if (chip_ver <= CHELSIO_T5) {
6439                                hash_reg = LE_DB_TID_HASHBASE_A;
6440                                hash_base = t4_read_reg(adapter, hash_reg);
6441                                adapter->tids.hash_base = hash_base / 4;
6442                        } else {
6443                                hash_reg = T6_LE_DB_HASH_TID_BASE_A;
6444                                hash_base = t4_read_reg(adapter, hash_reg);
6445                                adapter->tids.hash_base = hash_base;
6446                        }
6447                }
6448        }
6449
6450        /* See what interrupts we'll be using */
6451        if (msi > 1 && enable_msix(adapter) == 0)
6452                adapter->flags |= CXGB4_USING_MSIX;
6453        else if (msi > 0 && pci_enable_msi(pdev) == 0) {
6454                adapter->flags |= CXGB4_USING_MSI;
6455                if (msi > 1)
6456                        free_msix_info(adapter);
6457        }
6458
6459        /* check for PCI Express bandwidth capabiltites */
6460        pcie_print_link_status(pdev);
6461
6462        cxgb4_init_mps_ref_entries(adapter);
6463
6464        err = init_rss(adapter);
6465        if (err)
6466                goto out_free_dev;
6467
6468        err = setup_non_data_intr(adapter);
6469        if (err) {
6470                dev_err(adapter->pdev_dev,
6471                        "Non Data interrupt allocation failed, err: %d\n", err);
6472                goto out_free_dev;
6473        }
6474
6475        err = setup_fw_sge_queues(adapter);
6476        if (err) {
6477                dev_err(adapter->pdev_dev,
6478                        "FW sge queue allocation failed, err %d", err);
6479                goto out_free_dev;
6480        }
6481
6482fw_attach_fail:
6483        /*
6484         * The card is now ready to go.  If any errors occur during device
6485         * registration we do not fail the whole card but rather proceed only
6486         * with the ports we manage to register successfully.  However we must
6487         * register at least one net device.
6488         */
6489        for_each_port(adapter, i) {
6490                pi = adap2pinfo(adapter, i);
6491                adapter->port[i]->dev_port = pi->lport;
6492                netif_set_real_num_tx_queues(adapter->port[i], pi->nqsets);
6493                netif_set_real_num_rx_queues(adapter->port[i], pi->nqsets);
6494
6495                netif_carrier_off(adapter->port[i]);
6496
6497                err = register_netdev(adapter->port[i]);
6498                if (err)
6499                        break;
6500                adapter->chan_map[pi->tx_chan] = i;
6501                print_port_info(adapter->port[i]);
6502        }
6503        if (i == 0) {
6504                dev_err(&pdev->dev, "could not register any net devices\n");
6505                goto out_free_dev;
6506        }
6507        if (err) {
6508                dev_warn(&pdev->dev, "only %d net devices registered\n", i);
6509                err = 0;
6510        }
6511
6512        if (cxgb4_debugfs_root) {
6513                adapter->debugfs_root = debugfs_create_dir(pci_name(pdev),
6514                                                           cxgb4_debugfs_root);
6515                setup_debugfs(adapter);
6516        }
6517
6518        /* PCIe EEH recovery on powerpc platforms needs fundamental reset */
6519        pdev->needs_freset = 1;
6520
6521        if (is_uld(adapter)) {
6522                mutex_lock(&uld_mutex);
6523                list_add_tail(&adapter->list_node, &adapter_list);
6524                mutex_unlock(&uld_mutex);
6525        }
6526
6527        if (!is_t4(adapter->params.chip))
6528                cxgb4_ptp_init(adapter);
6529
6530        if (IS_REACHABLE(CONFIG_THERMAL) &&
6531            !is_t4(adapter->params.chip) && (adapter->flags & CXGB4_FW_OK))
6532                cxgb4_thermal_init(adapter);
6533
6534        print_adapter_info(adapter);
6535        return 0;
6536
6537 out_free_dev:
6538        t4_free_sge_resources(adapter);
6539        free_some_resources(adapter);
6540        if (adapter->flags & CXGB4_USING_MSIX)
6541                free_msix_info(adapter);
6542        if (adapter->num_uld || adapter->num_ofld_uld)
6543                t4_uld_mem_free(adapter);
6544 out_unmap_bar:
6545        if (!is_t4(adapter->params.chip))
6546                iounmap(adapter->bar2);
6547 out_free_adapter:
6548        if (adapter->workq)
6549                destroy_workqueue(adapter->workq);
6550
6551        kfree(adapter->mbox_log);
6552        kfree(adapter);
6553 out_unmap_bar0:
6554        iounmap(regs);
6555 out_disable_device:
6556        pci_disable_pcie_error_reporting(pdev);
6557        pci_disable_device(pdev);
6558 out_release_regions:
6559        pci_release_regions(pdev);
6560        return err;
6561}
6562
6563static void remove_one(struct pci_dev *pdev)
6564{
6565        struct adapter *adapter = pci_get_drvdata(pdev);
6566        struct hash_mac_addr *entry, *tmp;
6567
6568        if (!adapter) {
6569                pci_release_regions(pdev);
6570                return;
6571        }
6572
6573        /* If we allocated filters, free up state associated with any
6574         * valid filters ...
6575         */
6576        clear_all_filters(adapter);
6577
6578        adapter->flags |= CXGB4_SHUTTING_DOWN;
6579
6580        if (adapter->pf == 4) {
6581                int i;
6582
6583                /* Tear down per-adapter Work Queue first since it can contain
6584                 * references to our adapter data structure.
6585                 */
6586                destroy_workqueue(adapter->workq);
6587
6588                if (is_uld(adapter)) {
6589                        detach_ulds(adapter);
6590                        t4_uld_clean_up(adapter);
6591                }
6592
6593                adap_free_hma_mem(adapter);
6594
6595                disable_interrupts(adapter);
6596
6597                cxgb4_free_mps_ref_entries(adapter);
6598
6599                for_each_port(adapter, i)
6600                        if (adapter->port[i]->reg_state == NETREG_REGISTERED)
6601                                unregister_netdev(adapter->port[i]);
6602
6603                debugfs_remove_recursive(adapter->debugfs_root);
6604
6605                if (!is_t4(adapter->params.chip))
6606                        cxgb4_ptp_stop(adapter);
6607                if (IS_REACHABLE(CONFIG_THERMAL))
6608                        cxgb4_thermal_remove(adapter);
6609
6610                if (adapter->flags & CXGB4_FULL_INIT_DONE)
6611                        cxgb_down(adapter);
6612
6613                if (adapter->flags & CXGB4_USING_MSIX)
6614                        free_msix_info(adapter);
6615                if (adapter->num_uld || adapter->num_ofld_uld)
6616                        t4_uld_mem_free(adapter);
6617                free_some_resources(adapter);
6618                list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist,
6619                                         list) {
6620                        list_del(&entry->list);
6621                        kfree(entry);
6622                }
6623
6624#if IS_ENABLED(CONFIG_IPV6)
6625                t4_cleanup_clip_tbl(adapter);
6626#endif
6627                if (!is_t4(adapter->params.chip))
6628                        iounmap(adapter->bar2);
6629        }
6630#ifdef CONFIG_PCI_IOV
6631        else {
6632                cxgb4_iov_configure(adapter->pdev, 0);
6633        }
6634#endif
6635        iounmap(adapter->regs);
6636        pci_disable_pcie_error_reporting(pdev);
6637        if ((adapter->flags & CXGB4_DEV_ENABLED)) {
6638                pci_disable_device(pdev);
6639                adapter->flags &= ~CXGB4_DEV_ENABLED;
6640        }
6641        pci_release_regions(pdev);
6642        kfree(adapter->mbox_log);
6643        synchronize_rcu();
6644        kfree(adapter);
6645}
6646
6647/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt
6648 * delivery.  This is essentially a stripped down version of the PCI remove()
6649 * function where we do the minimal amount of work necessary to shutdown any
6650 * further activity.
6651 */
6652static void shutdown_one(struct pci_dev *pdev)
6653{
6654        struct adapter *adapter = pci_get_drvdata(pdev);
6655
6656        /* As with remove_one() above (see extended comment), we only want do
6657         * do cleanup on PCI Devices which went all the way through init_one()
6658         * ...
6659         */
6660        if (!adapter) {
6661                pci_release_regions(pdev);
6662                return;
6663        }
6664
6665        adapter->flags |= CXGB4_SHUTTING_DOWN;
6666
6667        if (adapter->pf == 4) {
6668                int i;
6669
6670                for_each_port(adapter, i)
6671                        if (adapter->port[i]->reg_state == NETREG_REGISTERED)
6672                                cxgb_close(adapter->port[i]);
6673
6674                rtnl_lock();
6675                cxgb4_mqprio_stop_offload(adapter);
6676                rtnl_unlock();
6677
6678                if (is_uld(adapter)) {
6679                        detach_ulds(adapter);
6680                        t4_uld_clean_up(adapter);
6681                }
6682
6683                disable_interrupts(adapter);
6684                disable_msi(adapter);
6685
6686                t4_sge_stop(adapter);
6687                if (adapter->flags & CXGB4_FW_OK)
6688                        t4_fw_bye(adapter, adapter->mbox);
6689        }
6690}
6691
6692static struct pci_driver cxgb4_driver = {
6693        .name     = KBUILD_MODNAME,
6694        .id_table = cxgb4_pci_tbl,
6695        .probe    = init_one,
6696        .remove   = remove_one,
6697        .shutdown = shutdown_one,
6698#ifdef CONFIG_PCI_IOV
6699        .sriov_configure = cxgb4_iov_configure,
6700#endif
6701        .err_handler = &cxgb4_eeh,
6702};
6703
6704static int __init cxgb4_init_module(void)
6705{
6706        int ret;
6707
6708        cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
6709
6710        ret = pci_register_driver(&cxgb4_driver);
6711        if (ret < 0)
6712                goto err_pci;
6713
6714#if IS_ENABLED(CONFIG_IPV6)
6715        if (!inet6addr_registered) {
6716                ret = register_inet6addr_notifier(&cxgb4_inet6addr_notifier);
6717                if (ret)
6718                        pci_unregister_driver(&cxgb4_driver);
6719                else
6720                        inet6addr_registered = true;
6721        }
6722#endif
6723
6724        if (ret == 0)
6725                return ret;
6726
6727err_pci:
6728        debugfs_remove(cxgb4_debugfs_root);
6729
6730        return ret;
6731}
6732
6733static void __exit cxgb4_cleanup_module(void)
6734{
6735#if IS_ENABLED(CONFIG_IPV6)
6736        if (inet6addr_registered) {
6737                unregister_inet6addr_notifier(&cxgb4_inet6addr_notifier);
6738                inet6addr_registered = false;
6739        }
6740#endif
6741        pci_unregister_driver(&cxgb4_driver);
6742        debugfs_remove(cxgb4_debugfs_root);  /* NULL ok */
6743}
6744
6745module_init(cxgb4_init_module);
6746module_exit(cxgb4_cleanup_module);
6747