linux/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
<<
>>
Prefs
   1/*
   2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
   3 * driver for Linux.
   4 *
   5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  37
  38#include <linux/module.h>
  39#include <linux/moduleparam.h>
  40#include <linux/init.h>
  41#include <linux/pci.h>
  42#include <linux/dma-mapping.h>
  43#include <linux/netdevice.h>
  44#include <linux/etherdevice.h>
  45#include <linux/debugfs.h>
  46#include <linux/ethtool.h>
  47#include <linux/mdio.h>
  48
  49#include "t4vf_common.h"
  50#include "t4vf_defs.h"
  51
  52#include "../cxgb4/t4_regs.h"
  53#include "../cxgb4/t4_msg.h"
  54
  55/*
  56 * Generic information about the driver.
  57 */
  58#define DRV_VERSION "2.0.0-ko"
  59#define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
  60
  61/*
  62 * Module Parameters.
  63 * ==================
  64 */
  65
  66/*
  67 * Default ethtool "message level" for adapters.
  68 */
  69#define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
  70                         NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
  71                         NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
  72
  73/*
  74 * The driver uses the best interrupt scheme available on a platform in the
  75 * order MSI-X then MSI.  This parameter determines which of these schemes the
  76 * driver may consider as follows:
  77 *
  78 *     msi = 2: choose from among MSI-X and MSI
  79 *     msi = 1: only consider MSI interrupts
  80 *
  81 * Note that unlike the Physical Function driver, this Virtual Function driver
  82 * does _not_ support legacy INTx interrupts (this limitation is mandated by
  83 * the PCI-E SR-IOV standard).
  84 */
  85#define MSI_MSIX        2
  86#define MSI_MSI         1
  87#define MSI_DEFAULT     MSI_MSIX
  88
  89static int msi = MSI_DEFAULT;
  90
  91module_param(msi, int, 0644);
  92MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
  93
  94/*
  95 * Fundamental constants.
  96 * ======================
  97 */
  98
  99enum {
 100        MAX_TXQ_ENTRIES         = 16384,
 101        MAX_RSPQ_ENTRIES        = 16384,
 102        MAX_RX_BUFFERS          = 16384,
 103
 104        MIN_TXQ_ENTRIES         = 32,
 105        MIN_RSPQ_ENTRIES        = 128,
 106        MIN_FL_ENTRIES          = 16,
 107
 108        /*
 109         * For purposes of manipulating the Free List size we need to
 110         * recognize that Free Lists are actually Egress Queues (the host
 111         * produces free buffers which the hardware consumes), Egress Queues
 112         * indices are all in units of Egress Context Units bytes, and free
 113         * list entries are 64-bit PCI DMA addresses.  And since the state of
 114         * the Producer Index == the Consumer Index implies an EMPTY list, we
 115         * always have at least one Egress Unit's worth of Free List entries
 116         * unused.  See sge.c for more details ...
 117         */
 118        EQ_UNIT = SGE_EQ_IDXSIZE,
 119        FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
 120        MIN_FL_RESID = FL_PER_EQ_UNIT,
 121};
 122
 123/*
 124 * Global driver state.
 125 * ====================
 126 */
 127
 128static struct dentry *cxgb4vf_debugfs_root;
 129
 130/*
 131 * OS "Callback" functions.
 132 * ========================
 133 */
 134
 135/*
 136 * The link status has changed on the indicated "port" (Virtual Interface).
 137 */
 138void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 139{
 140        struct net_device *dev = adapter->port[pidx];
 141
 142        /*
 143         * If the port is disabled or the current recorded "link up"
 144         * status matches the new status, just return.
 145         */
 146        if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
 147                return;
 148
 149        /*
 150         * Tell the OS that the link status has changed and print a short
 151         * informative message on the console about the event.
 152         */
 153        if (link_ok) {
 154                const char *s;
 155                const char *fc;
 156                const struct port_info *pi = netdev_priv(dev);
 157
 158                netif_carrier_on(dev);
 159
 160                switch (pi->link_cfg.speed) {
 161                case 100:
 162                        s = "100Mbps";
 163                        break;
 164                case 1000:
 165                        s = "1Gbps";
 166                        break;
 167                case 10000:
 168                        s = "10Gbps";
 169                        break;
 170                case 25000:
 171                        s = "25Gbps";
 172                        break;
 173                case 40000:
 174                        s = "40Gbps";
 175                        break;
 176                case 100000:
 177                        s = "100Gbps";
 178                        break;
 179
 180                default:
 181                        s = "unknown";
 182                        break;
 183                }
 184
 185                switch ((int)pi->link_cfg.fc) {
 186                case PAUSE_RX:
 187                        fc = "RX";
 188                        break;
 189
 190                case PAUSE_TX:
 191                        fc = "TX";
 192                        break;
 193
 194                case PAUSE_RX | PAUSE_TX:
 195                        fc = "RX/TX";
 196                        break;
 197
 198                default:
 199                        fc = "no";
 200                        break;
 201                }
 202
 203                netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
 204        } else {
 205                netif_carrier_off(dev);
 206                netdev_info(dev, "link down\n");
 207        }
 208}
 209
 210/*
 211 * THe port module type has changed on the indicated "port" (Virtual
 212 * Interface).
 213 */
 214void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
 215{
 216        static const char * const mod_str[] = {
 217                NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
 218        };
 219        const struct net_device *dev = adapter->port[pidx];
 220        const struct port_info *pi = netdev_priv(dev);
 221
 222        if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
 223                dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
 224                         dev->name);
 225        else if (pi->mod_type < ARRAY_SIZE(mod_str))
 226                dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
 227                         dev->name, mod_str[pi->mod_type]);
 228        else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
 229                dev_info(adapter->pdev_dev, "%s: unsupported optical port "
 230                         "module inserted\n", dev->name);
 231        else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
 232                dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
 233                         "forcing TWINAX\n", dev->name);
 234        else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
 235                dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
 236                         dev->name);
 237        else
 238                dev_info(adapter->pdev_dev, "%s: unknown module type %d "
 239                         "inserted\n", dev->name, pi->mod_type);
 240}
 241
 242static int cxgb4vf_set_addr_hash(struct port_info *pi)
 243{
 244        struct adapter *adapter = pi->adapter;
 245        u64 vec = 0;
 246        bool ucast = false;
 247        struct hash_mac_addr *entry;
 248
 249        /* Calculate the hash vector for the updated list and program it */
 250        list_for_each_entry(entry, &adapter->mac_hlist, list) {
 251                ucast |= is_unicast_ether_addr(entry->addr);
 252                vec |= (1ULL << hash_mac_addr(entry->addr));
 253        }
 254        return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
 255}
 256
 257/**
 258 *      cxgb4vf_change_mac - Update match filter for a MAC address.
 259 *      @pi: the port_info
 260 *      @viid: the VI id
 261 *      @tcam_idx: TCAM index of existing filter for old value of MAC address,
 262 *                 or -1
 263 *      @addr: the new MAC address value
 264 *      @persist: whether a new MAC allocation should be persistent
 265 *      @add_smt: if true also add the address to the HW SMT
 266 *
 267 *      Modifies an MPS filter and sets it to the new MAC address if
 268 *      @tcam_idx >= 0, or adds the MAC address to a new filter if
 269 *      @tcam_idx < 0. In the latter case the address is added persistently
 270 *      if @persist is %true.
 271 *      Addresses are programmed to hash region, if tcam runs out of entries.
 272 *
 273 */
 274static int cxgb4vf_change_mac(struct port_info *pi, unsigned int viid,
 275                              int *tcam_idx, const u8 *addr, bool persistent)
 276{
 277        struct hash_mac_addr *new_entry, *entry;
 278        struct adapter *adapter = pi->adapter;
 279        int ret;
 280
 281        ret = t4vf_change_mac(adapter, viid, *tcam_idx, addr, persistent);
 282        /* We ran out of TCAM entries. try programming hash region. */
 283        if (ret == -ENOMEM) {
 284                /* If the MAC address to be updated is in the hash addr
 285                 * list, update it from the list
 286                 */
 287                list_for_each_entry(entry, &adapter->mac_hlist, list) {
 288                        if (entry->iface_mac) {
 289                                ether_addr_copy(entry->addr, addr);
 290                                goto set_hash;
 291                        }
 292                }
 293                new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
 294                if (!new_entry)
 295                        return -ENOMEM;
 296                ether_addr_copy(new_entry->addr, addr);
 297                new_entry->iface_mac = true;
 298                list_add_tail(&new_entry->list, &adapter->mac_hlist);
 299set_hash:
 300                ret = cxgb4vf_set_addr_hash(pi);
 301        } else if (ret >= 0) {
 302                *tcam_idx = ret;
 303                ret = 0;
 304        }
 305
 306        return ret;
 307}
 308
 309/*
 310 * Net device operations.
 311 * ======================
 312 */
 313
 314
 315
 316
 317/*
 318 * Perform the MAC and PHY actions needed to enable a "port" (Virtual
 319 * Interface).
 320 */
 321static int link_start(struct net_device *dev)
 322{
 323        int ret;
 324        struct port_info *pi = netdev_priv(dev);
 325
 326        /*
 327         * We do not set address filters and promiscuity here, the stack does
 328         * that step explicitly. Enable vlan accel.
 329         */
 330        ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
 331                              true);
 332        if (ret == 0)
 333                ret = cxgb4vf_change_mac(pi, pi->viid,
 334                                         &pi->xact_addr_filt,
 335                                         dev->dev_addr, true);
 336
 337        /*
 338         * We don't need to actually "start the link" itself since the
 339         * firmware will do that for us when the first Virtual Interface
 340         * is enabled on a port.
 341         */
 342        if (ret == 0)
 343                ret = t4vf_enable_pi(pi->adapter, pi, true, true);
 344
 345        return ret;
 346}
 347
 348/*
 349 * Name the MSI-X interrupts.
 350 */
 351static void name_msix_vecs(struct adapter *adapter)
 352{
 353        int namelen = sizeof(adapter->msix_info[0].desc) - 1;
 354        int pidx;
 355
 356        /*
 357         * Firmware events.
 358         */
 359        snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
 360                 "%s-FWeventq", adapter->name);
 361        adapter->msix_info[MSIX_FW].desc[namelen] = 0;
 362
 363        /*
 364         * Ethernet queues.
 365         */
 366        for_each_port(adapter, pidx) {
 367                struct net_device *dev = adapter->port[pidx];
 368                const struct port_info *pi = netdev_priv(dev);
 369                int qs, msi;
 370
 371                for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
 372                        snprintf(adapter->msix_info[msi].desc, namelen,
 373                                 "%s-%d", dev->name, qs);
 374                        adapter->msix_info[msi].desc[namelen] = 0;
 375                }
 376        }
 377}
 378
 379/*
 380 * Request all of our MSI-X resources.
 381 */
 382static int request_msix_queue_irqs(struct adapter *adapter)
 383{
 384        struct sge *s = &adapter->sge;
 385        int rxq, msi, err;
 386
 387        /*
 388         * Firmware events.
 389         */
 390        err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
 391                          0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
 392        if (err)
 393                return err;
 394
 395        /*
 396         * Ethernet queues.
 397         */
 398        msi = MSIX_IQFLINT;
 399        for_each_ethrxq(s, rxq) {
 400                err = request_irq(adapter->msix_info[msi].vec,
 401                                  t4vf_sge_intr_msix, 0,
 402                                  adapter->msix_info[msi].desc,
 403                                  &s->ethrxq[rxq].rspq);
 404                if (err)
 405                        goto err_free_irqs;
 406                msi++;
 407        }
 408        return 0;
 409
 410err_free_irqs:
 411        while (--rxq >= 0)
 412                free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
 413        free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 414        return err;
 415}
 416
 417/*
 418 * Free our MSI-X resources.
 419 */
 420static void free_msix_queue_irqs(struct adapter *adapter)
 421{
 422        struct sge *s = &adapter->sge;
 423        int rxq, msi;
 424
 425        free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 426        msi = MSIX_IQFLINT;
 427        for_each_ethrxq(s, rxq)
 428                free_irq(adapter->msix_info[msi++].vec,
 429                         &s->ethrxq[rxq].rspq);
 430}
 431
 432/*
 433 * Turn on NAPI and start up interrupts on a response queue.
 434 */
 435static void qenable(struct sge_rspq *rspq)
 436{
 437        napi_enable(&rspq->napi);
 438
 439        /*
 440         * 0-increment the Going To Sleep register to start the timer and
 441         * enable interrupts.
 442         */
 443        t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 444                     CIDXINC_V(0) |
 445                     SEINTARM_V(rspq->intr_params) |
 446                     INGRESSQID_V(rspq->cntxt_id));
 447}
 448
 449/*
 450 * Enable NAPI scheduling and interrupt generation for all Receive Queues.
 451 */
 452static void enable_rx(struct adapter *adapter)
 453{
 454        int rxq;
 455        struct sge *s = &adapter->sge;
 456
 457        for_each_ethrxq(s, rxq)
 458                qenable(&s->ethrxq[rxq].rspq);
 459        qenable(&s->fw_evtq);
 460
 461        /*
 462         * The interrupt queue doesn't use NAPI so we do the 0-increment of
 463         * its Going To Sleep register here to get it started.
 464         */
 465        if (adapter->flags & CXGB4VF_USING_MSI)
 466                t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 467                             CIDXINC_V(0) |
 468                             SEINTARM_V(s->intrq.intr_params) |
 469                             INGRESSQID_V(s->intrq.cntxt_id));
 470
 471}
 472
 473/*
 474 * Wait until all NAPI handlers are descheduled.
 475 */
 476static void quiesce_rx(struct adapter *adapter)
 477{
 478        struct sge *s = &adapter->sge;
 479        int rxq;
 480
 481        for_each_ethrxq(s, rxq)
 482                napi_disable(&s->ethrxq[rxq].rspq.napi);
 483        napi_disable(&s->fw_evtq.napi);
 484}
 485
 486/*
 487 * Response queue handler for the firmware event queue.
 488 */
 489static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
 490                          const struct pkt_gl *gl)
 491{
 492        /*
 493         * Extract response opcode and get pointer to CPL message body.
 494         */
 495        struct adapter *adapter = rspq->adapter;
 496        u8 opcode = ((const struct rss_header *)rsp)->opcode;
 497        void *cpl = (void *)(rsp + 1);
 498
 499        switch (opcode) {
 500        case CPL_FW6_MSG: {
 501                /*
 502                 * We've received an asynchronous message from the firmware.
 503                 */
 504                const struct cpl_fw6_msg *fw_msg = cpl;
 505                if (fw_msg->type == FW6_TYPE_CMD_RPL)
 506                        t4vf_handle_fw_rpl(adapter, fw_msg->data);
 507                break;
 508        }
 509
 510        case CPL_FW4_MSG: {
 511                /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
 512                 */
 513                const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
 514                opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
 515                if (opcode != CPL_SGE_EGR_UPDATE) {
 516                        dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
 517                                , opcode);
 518                        break;
 519                }
 520                cpl = (void *)p;
 521        }
 522                /* Fall through */
 523
 524        case CPL_SGE_EGR_UPDATE: {
 525                /*
 526                 * We've received an Egress Queue Status Update message.  We
 527                 * get these, if the SGE is configured to send these when the
 528                 * firmware passes certain points in processing our TX
 529                 * Ethernet Queue or if we make an explicit request for one.
 530                 * We use these updates to determine when we may need to
 531                 * restart a TX Ethernet Queue which was stopped for lack of
 532                 * free TX Queue Descriptors ...
 533                 */
 534                const struct cpl_sge_egr_update *p = cpl;
 535                unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
 536                struct sge *s = &adapter->sge;
 537                struct sge_txq *tq;
 538                struct sge_eth_txq *txq;
 539                unsigned int eq_idx;
 540
 541                /*
 542                 * Perform sanity checking on the Queue ID to make sure it
 543                 * really refers to one of our TX Ethernet Egress Queues which
 544                 * is active and matches the queue's ID.  None of these error
 545                 * conditions should ever happen so we may want to either make
 546                 * them fatal and/or conditionalized under DEBUG.
 547                 */
 548                eq_idx = EQ_IDX(s, qid);
 549                if (unlikely(eq_idx >= MAX_EGRQ)) {
 550                        dev_err(adapter->pdev_dev,
 551                                "Egress Update QID %d out of range\n", qid);
 552                        break;
 553                }
 554                tq = s->egr_map[eq_idx];
 555                if (unlikely(tq == NULL)) {
 556                        dev_err(adapter->pdev_dev,
 557                                "Egress Update QID %d TXQ=NULL\n", qid);
 558                        break;
 559                }
 560                txq = container_of(tq, struct sge_eth_txq, q);
 561                if (unlikely(tq->abs_id != qid)) {
 562                        dev_err(adapter->pdev_dev,
 563                                "Egress Update QID %d refers to TXQ %d\n",
 564                                qid, tq->abs_id);
 565                        break;
 566                }
 567
 568                /*
 569                 * Restart a stopped TX Queue which has less than half of its
 570                 * TX ring in use ...
 571                 */
 572                txq->q.restarts++;
 573                netif_tx_wake_queue(txq->txq);
 574                break;
 575        }
 576
 577        default:
 578                dev_err(adapter->pdev_dev,
 579                        "unexpected CPL %#x on FW event queue\n", opcode);
 580        }
 581
 582        return 0;
 583}
 584
 585/*
 586 * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
 587 * to use and initializes them.  We support multiple "Queue Sets" per port if
 588 * we have MSI-X, otherwise just one queue set per port.
 589 */
 590static int setup_sge_queues(struct adapter *adapter)
 591{
 592        struct sge *s = &adapter->sge;
 593        int err, pidx, msix;
 594
 595        /*
 596         * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
 597         * state.
 598         */
 599        bitmap_zero(s->starving_fl, MAX_EGRQ);
 600
 601        /*
 602         * If we're using MSI interrupt mode we need to set up a "forwarded
 603         * interrupt" queue which we'll set up with our MSI vector.  The rest
 604         * of the ingress queues will be set up to forward their interrupts to
 605         * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
 606         * the intrq's queue ID as the interrupt forwarding queue for the
 607         * subsequent calls ...
 608         */
 609        if (adapter->flags & CXGB4VF_USING_MSI) {
 610                err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
 611                                         adapter->port[0], 0, NULL, NULL);
 612                if (err)
 613                        goto err_free_queues;
 614        }
 615
 616        /*
 617         * Allocate our ingress queue for asynchronous firmware messages.
 618         */
 619        err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
 620                                 MSIX_FW, NULL, fwevtq_handler);
 621        if (err)
 622                goto err_free_queues;
 623
 624        /*
 625         * Allocate each "port"'s initial Queue Sets.  These can be changed
 626         * later on ... up to the point where any interface on the adapter is
 627         * brought up at which point lots of things get nailed down
 628         * permanently ...
 629         */
 630        msix = MSIX_IQFLINT;
 631        for_each_port(adapter, pidx) {
 632                struct net_device *dev = adapter->port[pidx];
 633                struct port_info *pi = netdev_priv(dev);
 634                struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 635                struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 636                int qs;
 637
 638                for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 639                        err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
 640                                                 dev, msix++,
 641                                                 &rxq->fl, t4vf_ethrx_handler);
 642                        if (err)
 643                                goto err_free_queues;
 644
 645                        err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
 646                                             netdev_get_tx_queue(dev, qs),
 647                                             s->fw_evtq.cntxt_id);
 648                        if (err)
 649                                goto err_free_queues;
 650
 651                        rxq->rspq.idx = qs;
 652                        memset(&rxq->stats, 0, sizeof(rxq->stats));
 653                }
 654        }
 655
 656        /*
 657         * Create the reverse mappings for the queues.
 658         */
 659        s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
 660        s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
 661        IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
 662        for_each_port(adapter, pidx) {
 663                struct net_device *dev = adapter->port[pidx];
 664                struct port_info *pi = netdev_priv(dev);
 665                struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 666                struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 667                int qs;
 668
 669                for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 670                        IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
 671                        EQ_MAP(s, txq->q.abs_id) = &txq->q;
 672
 673                        /*
 674                         * The FW_IQ_CMD doesn't return the Absolute Queue IDs
 675                         * for Free Lists but since all of the Egress Queues
 676                         * (including Free Lists) have Relative Queue IDs
 677                         * which are computed as Absolute - Base Queue ID, we
 678                         * can synthesize the Absolute Queue IDs for the Free
 679                         * Lists.  This is useful for debugging purposes when
 680                         * we want to dump Queue Contexts via the PF Driver.
 681                         */
 682                        rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
 683                        EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
 684                }
 685        }
 686        return 0;
 687
 688err_free_queues:
 689        t4vf_free_sge_resources(adapter);
 690        return err;
 691}
 692
 693/*
 694 * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
 695 * queues.  We configure the RSS CPU lookup table to distribute to the number
 696 * of HW receive queues, and the response queue lookup table to narrow that
 697 * down to the response queues actually configured for each "port" (Virtual
 698 * Interface).  We always configure the RSS mapping for all ports since the
 699 * mapping table has plenty of entries.
 700 */
 701static int setup_rss(struct adapter *adapter)
 702{
 703        int pidx;
 704
 705        for_each_port(adapter, pidx) {
 706                struct port_info *pi = adap2pinfo(adapter, pidx);
 707                struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
 708                u16 rss[MAX_PORT_QSETS];
 709                int qs, err;
 710
 711                for (qs = 0; qs < pi->nqsets; qs++)
 712                        rss[qs] = rxq[qs].rspq.abs_id;
 713
 714                err = t4vf_config_rss_range(adapter, pi->viid,
 715                                            0, pi->rss_size, rss, pi->nqsets);
 716                if (err)
 717                        return err;
 718
 719                /*
 720                 * Perform Global RSS Mode-specific initialization.
 721                 */
 722                switch (adapter->params.rss.mode) {
 723                case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
 724                        /*
 725                         * If Tunnel All Lookup isn't specified in the global
 726                         * RSS Configuration, then we need to specify a
 727                         * default Ingress Queue for any ingress packets which
 728                         * aren't hashed.  We'll use our first ingress queue
 729                         * ...
 730                         */
 731                        if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
 732                                union rss_vi_config config;
 733                                err = t4vf_read_rss_vi_config(adapter,
 734                                                              pi->viid,
 735                                                              &config);
 736                                if (err)
 737                                        return err;
 738                                config.basicvirtual.defaultq =
 739                                        rxq[0].rspq.abs_id;
 740                                err = t4vf_write_rss_vi_config(adapter,
 741                                                               pi->viid,
 742                                                               &config);
 743                                if (err)
 744                                        return err;
 745                        }
 746                        break;
 747                }
 748        }
 749
 750        return 0;
 751}
 752
 753/*
 754 * Bring the adapter up.  Called whenever we go from no "ports" open to having
 755 * one open.  This function performs the actions necessary to make an adapter
 756 * operational, such as completing the initialization of HW modules, and
 757 * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
 758 * this is called "cxgb_up" in the PF Driver.)
 759 */
 760static int adapter_up(struct adapter *adapter)
 761{
 762        int err;
 763
 764        /*
 765         * If this is the first time we've been called, perform basic
 766         * adapter setup.  Once we've done this, many of our adapter
 767         * parameters can no longer be changed ...
 768         */
 769        if ((adapter->flags & CXGB4VF_FULL_INIT_DONE) == 0) {
 770                err = setup_sge_queues(adapter);
 771                if (err)
 772                        return err;
 773                err = setup_rss(adapter);
 774                if (err) {
 775                        t4vf_free_sge_resources(adapter);
 776                        return err;
 777                }
 778
 779                if (adapter->flags & CXGB4VF_USING_MSIX)
 780                        name_msix_vecs(adapter);
 781
 782                adapter->flags |= CXGB4VF_FULL_INIT_DONE;
 783        }
 784
 785        /*
 786         * Acquire our interrupt resources.  We only support MSI-X and MSI.
 787         */
 788        BUG_ON((adapter->flags &
 789               (CXGB4VF_USING_MSIX | CXGB4VF_USING_MSI)) == 0);
 790        if (adapter->flags & CXGB4VF_USING_MSIX)
 791                err = request_msix_queue_irqs(adapter);
 792        else
 793                err = request_irq(adapter->pdev->irq,
 794                                  t4vf_intr_handler(adapter), 0,
 795                                  adapter->name, adapter);
 796        if (err) {
 797                dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
 798                        err);
 799                return err;
 800        }
 801
 802        /*
 803         * Enable NAPI ingress processing and return success.
 804         */
 805        enable_rx(adapter);
 806        t4vf_sge_start(adapter);
 807
 808        return 0;
 809}
 810
 811/*
 812 * Bring the adapter down.  Called whenever the last "port" (Virtual
 813 * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
 814 * Driver.)
 815 */
 816static void adapter_down(struct adapter *adapter)
 817{
 818        /*
 819         * Free interrupt resources.
 820         */
 821        if (adapter->flags & CXGB4VF_USING_MSIX)
 822                free_msix_queue_irqs(adapter);
 823        else
 824                free_irq(adapter->pdev->irq, adapter);
 825
 826        /*
 827         * Wait for NAPI handlers to finish.
 828         */
 829        quiesce_rx(adapter);
 830}
 831
 832/*
 833 * Start up a net device.
 834 */
 835static int cxgb4vf_open(struct net_device *dev)
 836{
 837        int err;
 838        struct port_info *pi = netdev_priv(dev);
 839        struct adapter *adapter = pi->adapter;
 840
 841        /*
 842         * If we don't have a connection to the firmware there's nothing we
 843         * can do.
 844         */
 845        if (!(adapter->flags & CXGB4VF_FW_OK))
 846                return -ENXIO;
 847
 848        /*
 849         * If this is the first interface that we're opening on the "adapter",
 850         * bring the "adapter" up now.
 851         */
 852        if (adapter->open_device_map == 0) {
 853                err = adapter_up(adapter);
 854                if (err)
 855                        return err;
 856        }
 857
 858        /* It's possible that the basic port information could have
 859         * changed since we first read it.
 860         */
 861        err = t4vf_update_port_info(pi);
 862        if (err < 0)
 863                return err;
 864
 865        /*
 866         * Note that this interface is up and start everything up ...
 867         */
 868        err = link_start(dev);
 869        if (err)
 870                goto err_unwind;
 871
 872        pi->vlan_id = t4vf_get_vf_vlan_acl(adapter);
 873
 874        netif_tx_start_all_queues(dev);
 875        set_bit(pi->port_id, &adapter->open_device_map);
 876        return 0;
 877
 878err_unwind:
 879        if (adapter->open_device_map == 0)
 880                adapter_down(adapter);
 881        return err;
 882}
 883
 884/*
 885 * Shut down a net device.  This routine is called "cxgb_close" in the PF
 886 * Driver ...
 887 */
 888static int cxgb4vf_stop(struct net_device *dev)
 889{
 890        struct port_info *pi = netdev_priv(dev);
 891        struct adapter *adapter = pi->adapter;
 892
 893        netif_tx_stop_all_queues(dev);
 894        netif_carrier_off(dev);
 895        t4vf_enable_pi(adapter, pi, false, false);
 896
 897        clear_bit(pi->port_id, &adapter->open_device_map);
 898        if (adapter->open_device_map == 0)
 899                adapter_down(adapter);
 900        return 0;
 901}
 902
 903/*
 904 * Translate our basic statistics into the standard "ifconfig" statistics.
 905 */
 906static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
 907{
 908        struct t4vf_port_stats stats;
 909        struct port_info *pi = netdev2pinfo(dev);
 910        struct adapter *adapter = pi->adapter;
 911        struct net_device_stats *ns = &dev->stats;
 912        int err;
 913
 914        spin_lock(&adapter->stats_lock);
 915        err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
 916        spin_unlock(&adapter->stats_lock);
 917
 918        memset(ns, 0, sizeof(*ns));
 919        if (err)
 920                return ns;
 921
 922        ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
 923                        stats.tx_ucast_bytes + stats.tx_offload_bytes);
 924        ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
 925                          stats.tx_ucast_frames + stats.tx_offload_frames);
 926        ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
 927                        stats.rx_ucast_bytes);
 928        ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
 929                          stats.rx_ucast_frames);
 930        ns->multicast = stats.rx_mcast_frames;
 931        ns->tx_errors = stats.tx_drop_frames;
 932        ns->rx_errors = stats.rx_err_frames;
 933
 934        return ns;
 935}
 936
 937static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
 938{
 939        struct port_info *pi = netdev_priv(netdev);
 940        struct adapter *adapter = pi->adapter;
 941        int ret;
 942        u64 mhash = 0;
 943        u64 uhash = 0;
 944        bool free = false;
 945        bool ucast = is_unicast_ether_addr(mac_addr);
 946        const u8 *maclist[1] = {mac_addr};
 947        struct hash_mac_addr *new_entry;
 948
 949        ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
 950                                  NULL, ucast ? &uhash : &mhash, false);
 951        if (ret < 0)
 952                goto out;
 953        /* if hash != 0, then add the addr to hash addr list
 954         * so on the end we will calculate the hash for the
 955         * list and program it
 956         */
 957        if (uhash || mhash) {
 958                new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
 959                if (!new_entry)
 960                        return -ENOMEM;
 961                ether_addr_copy(new_entry->addr, mac_addr);
 962                list_add_tail(&new_entry->list, &adapter->mac_hlist);
 963                ret = cxgb4vf_set_addr_hash(pi);
 964        }
 965out:
 966        return ret < 0 ? ret : 0;
 967}
 968
 969static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
 970{
 971        struct port_info *pi = netdev_priv(netdev);
 972        struct adapter *adapter = pi->adapter;
 973        int ret;
 974        const u8 *maclist[1] = {mac_addr};
 975        struct hash_mac_addr *entry, *tmp;
 976
 977        /* If the MAC address to be removed is in the hash addr
 978         * list, delete it from the list and update hash vector
 979         */
 980        list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) {
 981                if (ether_addr_equal(entry->addr, mac_addr)) {
 982                        list_del(&entry->list);
 983                        kfree(entry);
 984                        return cxgb4vf_set_addr_hash(pi);
 985                }
 986        }
 987
 988        ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false);
 989        return ret < 0 ? -EINVAL : 0;
 990}
 991
 992/*
 993 * Set RX properties of a port, such as promiscruity, address filters, and MTU.
 994 * If @mtu is -1 it is left unchanged.
 995 */
 996static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
 997{
 998        struct port_info *pi = netdev_priv(dev);
 999
1000        __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
1001        __dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
1002        return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
1003                               (dev->flags & IFF_PROMISC) != 0,
1004                               (dev->flags & IFF_ALLMULTI) != 0,
1005                               1, -1, sleep_ok);
1006}
1007
1008/*
1009 * Set the current receive modes on the device.
1010 */
1011static void cxgb4vf_set_rxmode(struct net_device *dev)
1012{
1013        /* unfortunately we can't return errors to the stack */
1014        set_rxmode(dev, -1, false);
1015}
1016
1017/*
1018 * Find the entry in the interrupt holdoff timer value array which comes
1019 * closest to the specified interrupt holdoff value.
1020 */
1021static int closest_timer(const struct sge *s, int us)
1022{
1023        int i, timer_idx = 0, min_delta = INT_MAX;
1024
1025        for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
1026                int delta = us - s->timer_val[i];
1027                if (delta < 0)
1028                        delta = -delta;
1029                if (delta < min_delta) {
1030                        min_delta = delta;
1031                        timer_idx = i;
1032                }
1033        }
1034        return timer_idx;
1035}
1036
1037static int closest_thres(const struct sge *s, int thres)
1038{
1039        int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
1040
1041        for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
1042                delta = thres - s->counter_val[i];
1043                if (delta < 0)
1044                        delta = -delta;
1045                if (delta < min_delta) {
1046                        min_delta = delta;
1047                        pktcnt_idx = i;
1048                }
1049        }
1050        return pktcnt_idx;
1051}
1052
1053/*
1054 * Return a queue's interrupt hold-off time in us.  0 means no timer.
1055 */
1056static unsigned int qtimer_val(const struct adapter *adapter,
1057                               const struct sge_rspq *rspq)
1058{
1059        unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
1060
1061        return timer_idx < SGE_NTIMERS
1062                ? adapter->sge.timer_val[timer_idx]
1063                : 0;
1064}
1065
1066/**
1067 *      set_rxq_intr_params - set a queue's interrupt holdoff parameters
1068 *      @adapter: the adapter
1069 *      @rspq: the RX response queue
1070 *      @us: the hold-off time in us, or 0 to disable timer
1071 *      @cnt: the hold-off packet count, or 0 to disable counter
1072 *
1073 *      Sets an RX response queue's interrupt hold-off time and packet count.
1074 *      At least one of the two needs to be enabled for the queue to generate
1075 *      interrupts.
1076 */
1077static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1078                               unsigned int us, unsigned int cnt)
1079{
1080        unsigned int timer_idx;
1081
1082        /*
1083         * If both the interrupt holdoff timer and count are specified as
1084         * zero, default to a holdoff count of 1 ...
1085         */
1086        if ((us | cnt) == 0)
1087                cnt = 1;
1088
1089        /*
1090         * If an interrupt holdoff count has been specified, then find the
1091         * closest configured holdoff count and use that.  If the response
1092         * queue has already been created, then update its queue context
1093         * parameters ...
1094         */
1095        if (cnt) {
1096                int err;
1097                u32 v, pktcnt_idx;
1098
1099                pktcnt_idx = closest_thres(&adapter->sge, cnt);
1100                if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1101                        v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1102                            FW_PARAMS_PARAM_X_V(
1103                                        FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1104                            FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1105                        err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1106                        if (err)
1107                                return err;
1108                }
1109                rspq->pktcnt_idx = pktcnt_idx;
1110        }
1111
1112        /*
1113         * Compute the closest holdoff timer index from the supplied holdoff
1114         * timer value.
1115         */
1116        timer_idx = (us == 0
1117                     ? SGE_TIMER_RSTRT_CNTR
1118                     : closest_timer(&adapter->sge, us));
1119
1120        /*
1121         * Update the response queue's interrupt coalescing parameters and
1122         * return success.
1123         */
1124        rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1125                             QINTR_CNT_EN_V(cnt > 0));
1126        return 0;
1127}
1128
1129/*
1130 * Return a version number to identify the type of adapter.  The scheme is:
1131 * - bits 0..9: chip version
1132 * - bits 10..15: chip revision
1133 */
1134static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1135{
1136        /*
1137         * Chip version 4, revision 0x3f (cxgb4vf).
1138         */
1139        return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1140}
1141
1142/*
1143 * Execute the specified ioctl command.
1144 */
1145static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1146{
1147        int ret = 0;
1148
1149        switch (cmd) {
1150            /*
1151             * The VF Driver doesn't have access to any of the other
1152             * common Ethernet device ioctl()'s (like reading/writing
1153             * PHY registers, etc.
1154             */
1155
1156        default:
1157                ret = -EOPNOTSUPP;
1158                break;
1159        }
1160        return ret;
1161}
1162
1163/*
1164 * Change the device's MTU.
1165 */
1166static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1167{
1168        int ret;
1169        struct port_info *pi = netdev_priv(dev);
1170
1171        ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1172                              -1, -1, -1, -1, true);
1173        if (!ret)
1174                dev->mtu = new_mtu;
1175        return ret;
1176}
1177
1178static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1179        netdev_features_t features)
1180{
1181        /*
1182         * Since there is no support for separate rx/tx vlan accel
1183         * enable/disable make sure tx flag is always in same state as rx.
1184         */
1185        if (features & NETIF_F_HW_VLAN_CTAG_RX)
1186                features |= NETIF_F_HW_VLAN_CTAG_TX;
1187        else
1188                features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1189
1190        return features;
1191}
1192
1193static int cxgb4vf_set_features(struct net_device *dev,
1194        netdev_features_t features)
1195{
1196        struct port_info *pi = netdev_priv(dev);
1197        netdev_features_t changed = dev->features ^ features;
1198
1199        if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1200                t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1201                                features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1202
1203        return 0;
1204}
1205
1206/*
1207 * Change the devices MAC address.
1208 */
1209static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1210{
1211        int ret;
1212        struct sockaddr *addr = _addr;
1213        struct port_info *pi = netdev_priv(dev);
1214
1215        if (!is_valid_ether_addr(addr->sa_data))
1216                return -EADDRNOTAVAIL;
1217
1218        ret = cxgb4vf_change_mac(pi, pi->viid, &pi->xact_addr_filt,
1219                                 addr->sa_data, true);
1220        if (ret < 0)
1221                return ret;
1222
1223        memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1224        return 0;
1225}
1226
1227#ifdef CONFIG_NET_POLL_CONTROLLER
1228/*
1229 * Poll all of our receive queues.  This is called outside of normal interrupt
1230 * context.
1231 */
1232static void cxgb4vf_poll_controller(struct net_device *dev)
1233{
1234        struct port_info *pi = netdev_priv(dev);
1235        struct adapter *adapter = pi->adapter;
1236
1237        if (adapter->flags & CXGB4VF_USING_MSIX) {
1238                struct sge_eth_rxq *rxq;
1239                int nqsets;
1240
1241                rxq = &adapter->sge.ethrxq[pi->first_qset];
1242                for (nqsets = pi->nqsets; nqsets; nqsets--) {
1243                        t4vf_sge_intr_msix(0, &rxq->rspq);
1244                        rxq++;
1245                }
1246        } else
1247                t4vf_intr_handler(adapter)(0, adapter);
1248}
1249#endif
1250
1251/*
1252 * Ethtool operations.
1253 * ===================
1254 *
1255 * Note that we don't support any ethtool operations which change the physical
1256 * state of the port to which we're linked.
1257 */
1258
1259/**
1260 *      from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool
1261 *      @port_type: Firmware Port Type
1262 *      @mod_type: Firmware Module Type
1263 *
1264 *      Translate Firmware Port/Module type to Ethtool Port Type.
1265 */
1266static int from_fw_port_mod_type(enum fw_port_type port_type,
1267                                 enum fw_port_module_type mod_type)
1268{
1269        if (port_type == FW_PORT_TYPE_BT_SGMII ||
1270            port_type == FW_PORT_TYPE_BT_XFI ||
1271            port_type == FW_PORT_TYPE_BT_XAUI) {
1272                return PORT_TP;
1273        } else if (port_type == FW_PORT_TYPE_FIBER_XFI ||
1274                   port_type == FW_PORT_TYPE_FIBER_XAUI) {
1275                return PORT_FIBRE;
1276        } else if (port_type == FW_PORT_TYPE_SFP ||
1277                   port_type == FW_PORT_TYPE_QSFP_10G ||
1278                   port_type == FW_PORT_TYPE_QSA ||
1279                   port_type == FW_PORT_TYPE_QSFP ||
1280                   port_type == FW_PORT_TYPE_CR4_QSFP ||
1281                   port_type == FW_PORT_TYPE_CR_QSFP ||
1282                   port_type == FW_PORT_TYPE_CR2_QSFP ||
1283                   port_type == FW_PORT_TYPE_SFP28) {
1284                if (mod_type == FW_PORT_MOD_TYPE_LR ||
1285                    mod_type == FW_PORT_MOD_TYPE_SR ||
1286                    mod_type == FW_PORT_MOD_TYPE_ER ||
1287                    mod_type == FW_PORT_MOD_TYPE_LRM)
1288                        return PORT_FIBRE;
1289                else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1290                         mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1291                        return PORT_DA;
1292                else
1293                        return PORT_OTHER;
1294        } else if (port_type == FW_PORT_TYPE_KR4_100G ||
1295                   port_type == FW_PORT_TYPE_KR_SFP28 ||
1296                   port_type == FW_PORT_TYPE_KR_XLAUI) {
1297                return PORT_NONE;
1298        }
1299
1300        return PORT_OTHER;
1301}
1302
1303/**
1304 *      fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask
1305 *      @port_type: Firmware Port Type
1306 *      @fw_caps: Firmware Port Capabilities
1307 *      @link_mode_mask: ethtool Link Mode Mask
1308 *
1309 *      Translate a Firmware Port Capabilities specification to an ethtool
1310 *      Link Mode Mask.
1311 */
1312static void fw_caps_to_lmm(enum fw_port_type port_type,
1313                           unsigned int fw_caps,
1314                           unsigned long *link_mode_mask)
1315{
1316        #define SET_LMM(__lmm_name) \
1317                __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
1318                          link_mode_mask)
1319
1320        #define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
1321                do { \
1322                        if (fw_caps & FW_PORT_CAP32_ ## __fw_name) \
1323                                SET_LMM(__lmm_name); \
1324                } while (0)
1325
1326        switch (port_type) {
1327        case FW_PORT_TYPE_BT_SGMII:
1328        case FW_PORT_TYPE_BT_XFI:
1329        case FW_PORT_TYPE_BT_XAUI:
1330                SET_LMM(TP);
1331                FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
1332                FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1333                FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1334                break;
1335
1336        case FW_PORT_TYPE_KX4:
1337        case FW_PORT_TYPE_KX:
1338                SET_LMM(Backplane);
1339                FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1340                FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1341                break;
1342
1343        case FW_PORT_TYPE_KR:
1344                SET_LMM(Backplane);
1345                FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1346                break;
1347
1348        case FW_PORT_TYPE_BP_AP:
1349                SET_LMM(Backplane);
1350                FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1351                FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
1352                FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1353                break;
1354
1355        case FW_PORT_TYPE_BP4_AP:
1356                SET_LMM(Backplane);
1357                FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1358                FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
1359                FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1360                FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1361                break;
1362
1363        case FW_PORT_TYPE_FIBER_XFI:
1364        case FW_PORT_TYPE_FIBER_XAUI:
1365        case FW_PORT_TYPE_SFP:
1366        case FW_PORT_TYPE_QSFP_10G:
1367        case FW_PORT_TYPE_QSA:
1368                SET_LMM(FIBRE);
1369                FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1370                FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1371                break;
1372
1373        case FW_PORT_TYPE_BP40_BA:
1374        case FW_PORT_TYPE_QSFP:
1375                SET_LMM(FIBRE);
1376                FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1377                FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1378                FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
1379                break;
1380
1381        case FW_PORT_TYPE_CR_QSFP:
1382        case FW_PORT_TYPE_SFP28:
1383                SET_LMM(FIBRE);
1384                FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1385                FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1386                FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
1387                break;
1388
1389        case FW_PORT_TYPE_KR_SFP28:
1390                SET_LMM(Backplane);
1391                FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1392                FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1393                FW_CAPS_TO_LMM(SPEED_25G, 25000baseKR_Full);
1394                break;
1395
1396        case FW_PORT_TYPE_KR_XLAUI:
1397                SET_LMM(Backplane);
1398                FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1399                FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1400                FW_CAPS_TO_LMM(SPEED_40G, 40000baseKR4_Full);
1401                break;
1402
1403        case FW_PORT_TYPE_CR2_QSFP:
1404                SET_LMM(FIBRE);
1405                FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full);
1406                break;
1407
1408        case FW_PORT_TYPE_KR4_100G:
1409        case FW_PORT_TYPE_CR4_QSFP:
1410                SET_LMM(FIBRE);
1411                FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
1412                FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1413                FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
1414                FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
1415                FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
1416                FW_CAPS_TO_LMM(SPEED_100G, 100000baseCR4_Full);
1417                break;
1418
1419        default:
1420                break;
1421        }
1422
1423        if (fw_caps & FW_PORT_CAP32_FEC_V(FW_PORT_CAP32_FEC_M)) {
1424                FW_CAPS_TO_LMM(FEC_RS, FEC_RS);
1425                FW_CAPS_TO_LMM(FEC_BASER_RS, FEC_BASER);
1426        } else {
1427                SET_LMM(FEC_NONE);
1428        }
1429
1430        FW_CAPS_TO_LMM(ANEG, Autoneg);
1431        FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
1432        FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
1433
1434        #undef FW_CAPS_TO_LMM
1435        #undef SET_LMM
1436}
1437
1438static int cxgb4vf_get_link_ksettings(struct net_device *dev,
1439                                  struct ethtool_link_ksettings *link_ksettings)
1440{
1441        struct port_info *pi = netdev_priv(dev);
1442        struct ethtool_link_settings *base = &link_ksettings->base;
1443
1444        /* For the nonce, the Firmware doesn't send up Port State changes
1445         * when the Virtual Interface attached to the Port is down.  So
1446         * if it's down, let's grab any changes.
1447         */
1448        if (!netif_running(dev))
1449                (void)t4vf_update_port_info(pi);
1450
1451        ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
1452        ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
1453        ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
1454
1455        base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
1456
1457        if (pi->mdio_addr >= 0) {
1458                base->phy_address = pi->mdio_addr;
1459                base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII
1460                                      ? ETH_MDIO_SUPPORTS_C22
1461                                      : ETH_MDIO_SUPPORTS_C45);
1462        } else {
1463                base->phy_address = 255;
1464                base->mdio_support = 0;
1465        }
1466
1467        fw_caps_to_lmm(pi->port_type, pi->link_cfg.pcaps,
1468                       link_ksettings->link_modes.supported);
1469        fw_caps_to_lmm(pi->port_type, pi->link_cfg.acaps,
1470                       link_ksettings->link_modes.advertising);
1471        fw_caps_to_lmm(pi->port_type, pi->link_cfg.lpacaps,
1472                       link_ksettings->link_modes.lp_advertising);
1473
1474        if (netif_carrier_ok(dev)) {
1475                base->speed = pi->link_cfg.speed;
1476                base->duplex = DUPLEX_FULL;
1477        } else {
1478                base->speed = SPEED_UNKNOWN;
1479                base->duplex = DUPLEX_UNKNOWN;
1480        }
1481
1482        base->autoneg = pi->link_cfg.autoneg;
1483        if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG)
1484                ethtool_link_ksettings_add_link_mode(link_ksettings,
1485                                                     supported, Autoneg);
1486        if (pi->link_cfg.autoneg)
1487                ethtool_link_ksettings_add_link_mode(link_ksettings,
1488                                                     advertising, Autoneg);
1489
1490        return 0;
1491}
1492
1493/* Translate the Firmware FEC value into the ethtool value. */
1494static inline unsigned int fwcap_to_eth_fec(unsigned int fw_fec)
1495{
1496        unsigned int eth_fec = 0;
1497
1498        if (fw_fec & FW_PORT_CAP32_FEC_RS)
1499                eth_fec |= ETHTOOL_FEC_RS;
1500        if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
1501                eth_fec |= ETHTOOL_FEC_BASER;
1502
1503        /* if nothing is set, then FEC is off */
1504        if (!eth_fec)
1505                eth_fec = ETHTOOL_FEC_OFF;
1506
1507        return eth_fec;
1508}
1509
1510/* Translate Common Code FEC value into ethtool value. */
1511static inline unsigned int cc_to_eth_fec(unsigned int cc_fec)
1512{
1513        unsigned int eth_fec = 0;
1514
1515        if (cc_fec & FEC_AUTO)
1516                eth_fec |= ETHTOOL_FEC_AUTO;
1517        if (cc_fec & FEC_RS)
1518                eth_fec |= ETHTOOL_FEC_RS;
1519        if (cc_fec & FEC_BASER_RS)
1520                eth_fec |= ETHTOOL_FEC_BASER;
1521
1522        /* if nothing is set, then FEC is off */
1523        if (!eth_fec)
1524                eth_fec = ETHTOOL_FEC_OFF;
1525
1526        return eth_fec;
1527}
1528
1529static int cxgb4vf_get_fecparam(struct net_device *dev,
1530                                struct ethtool_fecparam *fec)
1531{
1532        const struct port_info *pi = netdev_priv(dev);
1533        const struct link_config *lc = &pi->link_cfg;
1534
1535        /* Translate the Firmware FEC Support into the ethtool value.  We
1536         * always support IEEE 802.3 "automatic" selection of Link FEC type if
1537         * any FEC is supported.
1538         */
1539        fec->fec = fwcap_to_eth_fec(lc->pcaps);
1540        if (fec->fec != ETHTOOL_FEC_OFF)
1541                fec->fec |= ETHTOOL_FEC_AUTO;
1542
1543        /* Translate the current internal FEC parameters into the
1544         * ethtool values.
1545         */
1546        fec->active_fec = cc_to_eth_fec(lc->fec);
1547        return 0;
1548}
1549
1550/*
1551 * Return our driver information.
1552 */
1553static void cxgb4vf_get_drvinfo(struct net_device *dev,
1554                                struct ethtool_drvinfo *drvinfo)
1555{
1556        struct adapter *adapter = netdev2adap(dev);
1557
1558        strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1559        strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1560        strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1561                sizeof(drvinfo->bus_info));
1562        snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1563                 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1564                 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1565                 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1566                 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1567                 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1568                 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1569                 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1570                 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1571                 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1572}
1573
1574/*
1575 * Return current adapter message level.
1576 */
1577static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1578{
1579        return netdev2adap(dev)->msg_enable;
1580}
1581
1582/*
1583 * Set current adapter message level.
1584 */
1585static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1586{
1587        netdev2adap(dev)->msg_enable = msglevel;
1588}
1589
1590/*
1591 * Return the device's current Queue Set ring size parameters along with the
1592 * allowed maximum values.  Since ethtool doesn't understand the concept of
1593 * multi-queue devices, we just return the current values associated with the
1594 * first Queue Set.
1595 */
1596static void cxgb4vf_get_ringparam(struct net_device *dev,
1597                                  struct ethtool_ringparam *rp)
1598{
1599        const struct port_info *pi = netdev_priv(dev);
1600        const struct sge *s = &pi->adapter->sge;
1601
1602        rp->rx_max_pending = MAX_RX_BUFFERS;
1603        rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1604        rp->rx_jumbo_max_pending = 0;
1605        rp->tx_max_pending = MAX_TXQ_ENTRIES;
1606
1607        rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1608        rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1609        rp->rx_jumbo_pending = 0;
1610        rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1611}
1612
1613/*
1614 * Set the Queue Set ring size parameters for the device.  Again, since
1615 * ethtool doesn't allow for the concept of multiple queues per device, we'll
1616 * apply these new values across all of the Queue Sets associated with the
1617 * device -- after vetting them of course!
1618 */
1619static int cxgb4vf_set_ringparam(struct net_device *dev,
1620                                 struct ethtool_ringparam *rp)
1621{
1622        const struct port_info *pi = netdev_priv(dev);
1623        struct adapter *adapter = pi->adapter;
1624        struct sge *s = &adapter->sge;
1625        int qs;
1626
1627        if (rp->rx_pending > MAX_RX_BUFFERS ||
1628            rp->rx_jumbo_pending ||
1629            rp->tx_pending > MAX_TXQ_ENTRIES ||
1630            rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1631            rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1632            rp->rx_pending < MIN_FL_ENTRIES ||
1633            rp->tx_pending < MIN_TXQ_ENTRIES)
1634                return -EINVAL;
1635
1636        if (adapter->flags & CXGB4VF_FULL_INIT_DONE)
1637                return -EBUSY;
1638
1639        for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1640                s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1641                s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1642                s->ethtxq[qs].q.size = rp->tx_pending;
1643        }
1644        return 0;
1645}
1646
1647/*
1648 * Return the interrupt holdoff timer and count for the first Queue Set on the
1649 * device.  Our extension ioctl() (the cxgbtool interface) allows the
1650 * interrupt holdoff timer to be read on all of the device's Queue Sets.
1651 */
1652static int cxgb4vf_get_coalesce(struct net_device *dev,
1653                                struct ethtool_coalesce *coalesce)
1654{
1655        const struct port_info *pi = netdev_priv(dev);
1656        const struct adapter *adapter = pi->adapter;
1657        const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1658
1659        coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1660        coalesce->rx_max_coalesced_frames =
1661                ((rspq->intr_params & QINTR_CNT_EN_F)
1662                 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1663                 : 0);
1664        return 0;
1665}
1666
1667/*
1668 * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1669 * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1670 * the interrupt holdoff timer on any of the device's Queue Sets.
1671 */
1672static int cxgb4vf_set_coalesce(struct net_device *dev,
1673                                struct ethtool_coalesce *coalesce)
1674{
1675        const struct port_info *pi = netdev_priv(dev);
1676        struct adapter *adapter = pi->adapter;
1677
1678        return set_rxq_intr_params(adapter,
1679                                   &adapter->sge.ethrxq[pi->first_qset].rspq,
1680                                   coalesce->rx_coalesce_usecs,
1681                                   coalesce->rx_max_coalesced_frames);
1682}
1683
1684/*
1685 * Report current port link pause parameter settings.
1686 */
1687static void cxgb4vf_get_pauseparam(struct net_device *dev,
1688                                   struct ethtool_pauseparam *pauseparam)
1689{
1690        struct port_info *pi = netdev_priv(dev);
1691
1692        pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1693        pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1694        pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1695}
1696
1697/*
1698 * Identify the port by blinking the port's LED.
1699 */
1700static int cxgb4vf_phys_id(struct net_device *dev,
1701                           enum ethtool_phys_id_state state)
1702{
1703        unsigned int val;
1704        struct port_info *pi = netdev_priv(dev);
1705
1706        if (state == ETHTOOL_ID_ACTIVE)
1707                val = 0xffff;
1708        else if (state == ETHTOOL_ID_INACTIVE)
1709                val = 0;
1710        else
1711                return -EINVAL;
1712
1713        return t4vf_identify_port(pi->adapter, pi->viid, val);
1714}
1715
1716/*
1717 * Port stats maintained per queue of the port.
1718 */
1719struct queue_port_stats {
1720        u64 tso;
1721        u64 tx_csum;
1722        u64 rx_csum;
1723        u64 vlan_ex;
1724        u64 vlan_ins;
1725        u64 lro_pkts;
1726        u64 lro_merged;
1727};
1728
1729/*
1730 * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1731 * these need to match the order of statistics returned by
1732 * t4vf_get_port_stats().
1733 */
1734static const char stats_strings[][ETH_GSTRING_LEN] = {
1735        /*
1736         * These must match the layout of the t4vf_port_stats structure.
1737         */
1738        "TxBroadcastBytes  ",
1739        "TxBroadcastFrames ",
1740        "TxMulticastBytes  ",
1741        "TxMulticastFrames ",
1742        "TxUnicastBytes    ",
1743        "TxUnicastFrames   ",
1744        "TxDroppedFrames   ",
1745        "TxOffloadBytes    ",
1746        "TxOffloadFrames   ",
1747        "RxBroadcastBytes  ",
1748        "RxBroadcastFrames ",
1749        "RxMulticastBytes  ",
1750        "RxMulticastFrames ",
1751        "RxUnicastBytes    ",
1752        "RxUnicastFrames   ",
1753        "RxErrorFrames     ",
1754
1755        /*
1756         * These are accumulated per-queue statistics and must match the
1757         * order of the fields in the queue_port_stats structure.
1758         */
1759        "TSO               ",
1760        "TxCsumOffload     ",
1761        "RxCsumGood        ",
1762        "VLANextractions   ",
1763        "VLANinsertions    ",
1764        "GROPackets        ",
1765        "GROMerged         ",
1766};
1767
1768/*
1769 * Return the number of statistics in the specified statistics set.
1770 */
1771static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1772{
1773        switch (sset) {
1774        case ETH_SS_STATS:
1775                return ARRAY_SIZE(stats_strings);
1776        default:
1777                return -EOPNOTSUPP;
1778        }
1779        /*NOTREACHED*/
1780}
1781
1782/*
1783 * Return the strings for the specified statistics set.
1784 */
1785static void cxgb4vf_get_strings(struct net_device *dev,
1786                                u32 sset,
1787                                u8 *data)
1788{
1789        switch (sset) {
1790        case ETH_SS_STATS:
1791                memcpy(data, stats_strings, sizeof(stats_strings));
1792                break;
1793        }
1794}
1795
1796/*
1797 * Small utility routine to accumulate queue statistics across the queues of
1798 * a "port".
1799 */
1800static void collect_sge_port_stats(const struct adapter *adapter,
1801                                   const struct port_info *pi,
1802                                   struct queue_port_stats *stats)
1803{
1804        const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1805        const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1806        int qs;
1807
1808        memset(stats, 0, sizeof(*stats));
1809        for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1810                stats->tso += txq->tso;
1811                stats->tx_csum += txq->tx_cso;
1812                stats->rx_csum += rxq->stats.rx_cso;
1813                stats->vlan_ex += rxq->stats.vlan_ex;
1814                stats->vlan_ins += txq->vlan_ins;
1815                stats->lro_pkts += rxq->stats.lro_pkts;
1816                stats->lro_merged += rxq->stats.lro_merged;
1817        }
1818}
1819
1820/*
1821 * Return the ETH_SS_STATS statistics set.
1822 */
1823static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1824                                      struct ethtool_stats *stats,
1825                                      u64 *data)
1826{
1827        struct port_info *pi = netdev2pinfo(dev);
1828        struct adapter *adapter = pi->adapter;
1829        int err = t4vf_get_port_stats(adapter, pi->pidx,
1830                                      (struct t4vf_port_stats *)data);
1831        if (err)
1832                memset(data, 0, sizeof(struct t4vf_port_stats));
1833
1834        data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1835        collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1836}
1837
1838/*
1839 * Return the size of our register map.
1840 */
1841static int cxgb4vf_get_regs_len(struct net_device *dev)
1842{
1843        return T4VF_REGMAP_SIZE;
1844}
1845
1846/*
1847 * Dump a block of registers, start to end inclusive, into a buffer.
1848 */
1849static void reg_block_dump(struct adapter *adapter, void *regbuf,
1850                           unsigned int start, unsigned int end)
1851{
1852        u32 *bp = regbuf + start - T4VF_REGMAP_START;
1853
1854        for ( ; start <= end; start += sizeof(u32)) {
1855                /*
1856                 * Avoid reading the Mailbox Control register since that
1857                 * can trigger a Mailbox Ownership Arbitration cycle and
1858                 * interfere with communication with the firmware.
1859                 */
1860                if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1861                        *bp++ = 0xffff;
1862                else
1863                        *bp++ = t4_read_reg(adapter, start);
1864        }
1865}
1866
1867/*
1868 * Copy our entire register map into the provided buffer.
1869 */
1870static void cxgb4vf_get_regs(struct net_device *dev,
1871                             struct ethtool_regs *regs,
1872                             void *regbuf)
1873{
1874        struct adapter *adapter = netdev2adap(dev);
1875
1876        regs->version = mk_adap_vers(adapter);
1877
1878        /*
1879         * Fill in register buffer with our register map.
1880         */
1881        memset(regbuf, 0, T4VF_REGMAP_SIZE);
1882
1883        reg_block_dump(adapter, regbuf,
1884                       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1885                       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1886        reg_block_dump(adapter, regbuf,
1887                       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1888                       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1889
1890        /* T5 adds new registers in the PL Register map.
1891         */
1892        reg_block_dump(adapter, regbuf,
1893                       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1894                       T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1895                       ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1896        reg_block_dump(adapter, regbuf,
1897                       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1898                       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1899
1900        reg_block_dump(adapter, regbuf,
1901                       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1902                       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1903}
1904
1905/*
1906 * Report current Wake On LAN settings.
1907 */
1908static void cxgb4vf_get_wol(struct net_device *dev,
1909                            struct ethtool_wolinfo *wol)
1910{
1911        wol->supported = 0;
1912        wol->wolopts = 0;
1913        memset(&wol->sopass, 0, sizeof(wol->sopass));
1914}
1915
1916/*
1917 * TCP Segmentation Offload flags which we support.
1918 */
1919#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1920#define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
1921                   NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
1922
1923static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1924        .get_link_ksettings     = cxgb4vf_get_link_ksettings,
1925        .get_fecparam           = cxgb4vf_get_fecparam,
1926        .get_drvinfo            = cxgb4vf_get_drvinfo,
1927        .get_msglevel           = cxgb4vf_get_msglevel,
1928        .set_msglevel           = cxgb4vf_set_msglevel,
1929        .get_ringparam          = cxgb4vf_get_ringparam,
1930        .set_ringparam          = cxgb4vf_set_ringparam,
1931        .get_coalesce           = cxgb4vf_get_coalesce,
1932        .set_coalesce           = cxgb4vf_set_coalesce,
1933        .get_pauseparam         = cxgb4vf_get_pauseparam,
1934        .get_link               = ethtool_op_get_link,
1935        .get_strings            = cxgb4vf_get_strings,
1936        .set_phys_id            = cxgb4vf_phys_id,
1937        .get_sset_count         = cxgb4vf_get_sset_count,
1938        .get_ethtool_stats      = cxgb4vf_get_ethtool_stats,
1939        .get_regs_len           = cxgb4vf_get_regs_len,
1940        .get_regs               = cxgb4vf_get_regs,
1941        .get_wol                = cxgb4vf_get_wol,
1942};
1943
1944/*
1945 * /sys/kernel/debug/cxgb4vf support code and data.
1946 * ================================================
1947 */
1948
1949/*
1950 * Show Firmware Mailbox Command/Reply Log
1951 *
1952 * Note that we don't do any locking when dumping the Firmware Mailbox Log so
1953 * it's possible that we can catch things during a log update and therefore
1954 * see partially corrupted log entries.  But i9t's probably Good Enough(tm).
1955 * If we ever decide that we want to make sure that we're dumping a coherent
1956 * log, we'd need to perform locking in the mailbox logging and in
1957 * mboxlog_open() where we'd need to grab the entire mailbox log in one go
1958 * like we do for the Firmware Device Log.  But as stated above, meh ...
1959 */
1960static int mboxlog_show(struct seq_file *seq, void *v)
1961{
1962        struct adapter *adapter = seq->private;
1963        struct mbox_cmd_log *log = adapter->mbox_log;
1964        struct mbox_cmd *entry;
1965        int entry_idx, i;
1966
1967        if (v == SEQ_START_TOKEN) {
1968                seq_printf(seq,
1969                           "%10s  %15s  %5s  %5s  %s\n",
1970                           "Seq#", "Tstamp", "Atime", "Etime",
1971                           "Command/Reply");
1972                return 0;
1973        }
1974
1975        entry_idx = log->cursor + ((uintptr_t)v - 2);
1976        if (entry_idx >= log->size)
1977                entry_idx -= log->size;
1978        entry = mbox_cmd_log_entry(log, entry_idx);
1979
1980        /* skip over unused entries */
1981        if (entry->timestamp == 0)
1982                return 0;
1983
1984        seq_printf(seq, "%10u  %15llu  %5d  %5d",
1985                   entry->seqno, entry->timestamp,
1986                   entry->access, entry->execute);
1987        for (i = 0; i < MBOX_LEN / 8; i++) {
1988                u64 flit = entry->cmd[i];
1989                u32 hi = (u32)(flit >> 32);
1990                u32 lo = (u32)flit;
1991
1992                seq_printf(seq, "  %08x %08x", hi, lo);
1993        }
1994        seq_puts(seq, "\n");
1995        return 0;
1996}
1997
1998static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos)
1999{
2000        struct adapter *adapter = seq->private;
2001        struct mbox_cmd_log *log = adapter->mbox_log;
2002
2003        return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL);
2004}
2005
2006static void *mboxlog_start(struct seq_file *seq, loff_t *pos)
2007{
2008        return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN;
2009}
2010
2011static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos)
2012{
2013        ++*pos;
2014        return mboxlog_get_idx(seq, *pos);
2015}
2016
2017static void mboxlog_stop(struct seq_file *seq, void *v)
2018{
2019}
2020
2021static const struct seq_operations mboxlog_seq_ops = {
2022        .start = mboxlog_start,
2023        .next  = mboxlog_next,
2024        .stop  = mboxlog_stop,
2025        .show  = mboxlog_show
2026};
2027
2028static int mboxlog_open(struct inode *inode, struct file *file)
2029{
2030        int res = seq_open(file, &mboxlog_seq_ops);
2031
2032        if (!res) {
2033                struct seq_file *seq = file->private_data;
2034
2035                seq->private = inode->i_private;
2036        }
2037        return res;
2038}
2039
2040static const struct file_operations mboxlog_fops = {
2041        .owner   = THIS_MODULE,
2042        .open    = mboxlog_open,
2043        .read    = seq_read,
2044        .llseek  = seq_lseek,
2045        .release = seq_release,
2046};
2047
2048/*
2049 * Show SGE Queue Set information.  We display QPL Queues Sets per line.
2050 */
2051#define QPL     4
2052
2053static int sge_qinfo_show(struct seq_file *seq, void *v)
2054{
2055        struct adapter *adapter = seq->private;
2056        int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2057        int qs, r = (uintptr_t)v - 1;
2058
2059        if (r)
2060                seq_putc(seq, '\n');
2061
2062        #define S3(fmt_spec, s, v) \
2063                do {\
2064                        seq_printf(seq, "%-12s", s); \
2065                        for (qs = 0; qs < n; ++qs) \
2066                                seq_printf(seq, " %16" fmt_spec, v); \
2067                        seq_putc(seq, '\n'); \
2068                } while (0)
2069        #define S(s, v)         S3("s", s, v)
2070        #define T(s, v)         S3("u", s, txq[qs].v)
2071        #define R(s, v)         S3("u", s, rxq[qs].v)
2072
2073        if (r < eth_entries) {
2074                const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2075                const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2076                int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2077
2078                S("QType:", "Ethernet");
2079                S("Interface:",
2080                  (rxq[qs].rspq.netdev
2081                   ? rxq[qs].rspq.netdev->name
2082                   : "N/A"));
2083                S3("d", "Port:",
2084                   (rxq[qs].rspq.netdev
2085                    ? ((struct port_info *)
2086                       netdev_priv(rxq[qs].rspq.netdev))->port_id
2087                    : -1));
2088                T("TxQ ID:", q.abs_id);
2089                T("TxQ size:", q.size);
2090                T("TxQ inuse:", q.in_use);
2091                T("TxQ PIdx:", q.pidx);
2092                T("TxQ CIdx:", q.cidx);
2093                R("RspQ ID:", rspq.abs_id);
2094                R("RspQ size:", rspq.size);
2095                R("RspQE size:", rspq.iqe_len);
2096                S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
2097                S3("u", "Intr pktcnt:",
2098                   adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
2099                R("RspQ CIdx:", rspq.cidx);
2100                R("RspQ Gen:", rspq.gen);
2101                R("FL ID:", fl.abs_id);
2102                R("FL size:", fl.size - MIN_FL_RESID);
2103                R("FL avail:", fl.avail);
2104                R("FL PIdx:", fl.pidx);
2105                R("FL CIdx:", fl.cidx);
2106                return 0;
2107        }
2108
2109        r -= eth_entries;
2110        if (r == 0) {
2111                const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2112
2113                seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
2114                seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
2115                seq_printf(seq, "%-12s %16u\n", "Intr delay:",
2116                           qtimer_val(adapter, evtq));
2117                seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
2118                           adapter->sge.counter_val[evtq->pktcnt_idx]);
2119                seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
2120                seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
2121        } else if (r == 1) {
2122                const struct sge_rspq *intrq = &adapter->sge.intrq;
2123
2124                seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
2125                seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
2126                seq_printf(seq, "%-12s %16u\n", "Intr delay:",
2127                           qtimer_val(adapter, intrq));
2128                seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
2129                           adapter->sge.counter_val[intrq->pktcnt_idx]);
2130                seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
2131                seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
2132        }
2133
2134        #undef R
2135        #undef T
2136        #undef S
2137        #undef S3
2138
2139        return 0;
2140}
2141
2142/*
2143 * Return the number of "entries" in our "file".  We group the multi-Queue
2144 * sections with QPL Queue Sets per "entry".  The sections of the output are:
2145 *
2146 *     Ethernet RX/TX Queue Sets
2147 *     Firmware Event Queue
2148 *     Forwarded Interrupt Queue (if in MSI mode)
2149 */
2150static int sge_queue_entries(const struct adapter *adapter)
2151{
2152        return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2153                ((adapter->flags & CXGB4VF_USING_MSI) != 0);
2154}
2155
2156static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
2157{
2158        int entries = sge_queue_entries(seq->private);
2159
2160        return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2161}
2162
2163static void sge_queue_stop(struct seq_file *seq, void *v)
2164{
2165}
2166
2167static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
2168{
2169        int entries = sge_queue_entries(seq->private);
2170
2171        ++*pos;
2172        return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2173}
2174
2175static const struct seq_operations sge_qinfo_seq_ops = {
2176        .start = sge_queue_start,
2177        .next  = sge_queue_next,
2178        .stop  = sge_queue_stop,
2179        .show  = sge_qinfo_show
2180};
2181
2182static int sge_qinfo_open(struct inode *inode, struct file *file)
2183{
2184        int res = seq_open(file, &sge_qinfo_seq_ops);
2185
2186        if (!res) {
2187                struct seq_file *seq = file->private_data;
2188                seq->private = inode->i_private;
2189        }
2190        return res;
2191}
2192
2193static const struct file_operations sge_qinfo_debugfs_fops = {
2194        .owner   = THIS_MODULE,
2195        .open    = sge_qinfo_open,
2196        .read    = seq_read,
2197        .llseek  = seq_lseek,
2198        .release = seq_release,
2199};
2200
2201/*
2202 * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
2203 */
2204#define QPL     4
2205
2206static int sge_qstats_show(struct seq_file *seq, void *v)
2207{
2208        struct adapter *adapter = seq->private;
2209        int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2210        int qs, r = (uintptr_t)v - 1;
2211
2212        if (r)
2213                seq_putc(seq, '\n');
2214
2215        #define S3(fmt, s, v) \
2216                do { \
2217                        seq_printf(seq, "%-16s", s); \
2218                        for (qs = 0; qs < n; ++qs) \
2219                                seq_printf(seq, " %8" fmt, v); \
2220                        seq_putc(seq, '\n'); \
2221                } while (0)
2222        #define S(s, v)         S3("s", s, v)
2223
2224        #define T3(fmt, s, v)   S3(fmt, s, txq[qs].v)
2225        #define T(s, v)         T3("lu", s, v)
2226
2227        #define R3(fmt, s, v)   S3(fmt, s, rxq[qs].v)
2228        #define R(s, v)         R3("lu", s, v)
2229
2230        if (r < eth_entries) {
2231                const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2232                const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2233                int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2234
2235                S("QType:", "Ethernet");
2236                S("Interface:",
2237                  (rxq[qs].rspq.netdev
2238                   ? rxq[qs].rspq.netdev->name
2239                   : "N/A"));
2240                R3("u", "RspQNullInts:", rspq.unhandled_irqs);
2241                R("RxPackets:", stats.pkts);
2242                R("RxCSO:", stats.rx_cso);
2243                R("VLANxtract:", stats.vlan_ex);
2244                R("LROmerged:", stats.lro_merged);
2245                R("LROpackets:", stats.lro_pkts);
2246                R("RxDrops:", stats.rx_drops);
2247                T("TSO:", tso);
2248                T("TxCSO:", tx_cso);
2249                T("VLANins:", vlan_ins);
2250                T("TxQFull:", q.stops);
2251                T("TxQRestarts:", q.restarts);
2252                T("TxMapErr:", mapping_err);
2253                R("FLAllocErr:", fl.alloc_failed);
2254                R("FLLrgAlcErr:", fl.large_alloc_failed);
2255                R("FLStarving:", fl.starving);
2256                return 0;
2257        }
2258
2259        r -= eth_entries;
2260        if (r == 0) {
2261                const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2262
2263                seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
2264                seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2265                           evtq->unhandled_irqs);
2266                seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
2267                seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
2268        } else if (r == 1) {
2269                const struct sge_rspq *intrq = &adapter->sge.intrq;
2270
2271                seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
2272                seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2273                           intrq->unhandled_irqs);
2274                seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
2275                seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
2276        }
2277
2278        #undef R
2279        #undef T
2280        #undef S
2281        #undef R3
2282        #undef T3
2283        #undef S3
2284
2285        return 0;
2286}
2287
2288/*
2289 * Return the number of "entries" in our "file".  We group the multi-Queue
2290 * sections with QPL Queue Sets per "entry".  The sections of the output are:
2291 *
2292 *     Ethernet RX/TX Queue Sets
2293 *     Firmware Event Queue
2294 *     Forwarded Interrupt Queue (if in MSI mode)
2295 */
2296static int sge_qstats_entries(const struct adapter *adapter)
2297{
2298        return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2299                ((adapter->flags & CXGB4VF_USING_MSI) != 0);
2300}
2301
2302static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
2303{
2304        int entries = sge_qstats_entries(seq->private);
2305
2306        return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2307}
2308
2309static void sge_qstats_stop(struct seq_file *seq, void *v)
2310{
2311}
2312
2313static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
2314{
2315        int entries = sge_qstats_entries(seq->private);
2316
2317        (*pos)++;
2318        return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2319}
2320
2321static const struct seq_operations sge_qstats_seq_ops = {
2322        .start = sge_qstats_start,
2323        .next  = sge_qstats_next,
2324        .stop  = sge_qstats_stop,
2325        .show  = sge_qstats_show
2326};
2327
2328static int sge_qstats_open(struct inode *inode, struct file *file)
2329{
2330        int res = seq_open(file, &sge_qstats_seq_ops);
2331
2332        if (res == 0) {
2333                struct seq_file *seq = file->private_data;
2334                seq->private = inode->i_private;
2335        }
2336        return res;
2337}
2338
2339static const struct file_operations sge_qstats_proc_fops = {
2340        .owner   = THIS_MODULE,
2341        .open    = sge_qstats_open,
2342        .read    = seq_read,
2343        .llseek  = seq_lseek,
2344        .release = seq_release,
2345};
2346
2347/*
2348 * Show PCI-E SR-IOV Virtual Function Resource Limits.
2349 */
2350static int resources_show(struct seq_file *seq, void *v)
2351{
2352        struct adapter *adapter = seq->private;
2353        struct vf_resources *vfres = &adapter->params.vfres;
2354
2355        #define S(desc, fmt, var) \
2356                seq_printf(seq, "%-60s " fmt "\n", \
2357                           desc " (" #var "):", vfres->var)
2358
2359        S("Virtual Interfaces", "%d", nvi);
2360        S("Egress Queues", "%d", neq);
2361        S("Ethernet Control", "%d", nethctrl);
2362        S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2363        S("Ingress Queues", "%d", niq);
2364        S("Traffic Class", "%d", tc);
2365        S("Port Access Rights Mask", "%#x", pmask);
2366        S("MAC Address Filters", "%d", nexactf);
2367        S("Firmware Command Read Capabilities", "%#x", r_caps);
2368        S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2369
2370        #undef S
2371
2372        return 0;
2373}
2374DEFINE_SHOW_ATTRIBUTE(resources);
2375
2376/*
2377 * Show Virtual Interfaces.
2378 */
2379static int interfaces_show(struct seq_file *seq, void *v)
2380{
2381        if (v == SEQ_START_TOKEN) {
2382                seq_puts(seq, "Interface  Port   VIID\n");
2383        } else {
2384                struct adapter *adapter = seq->private;
2385                int pidx = (uintptr_t)v - 2;
2386                struct net_device *dev = adapter->port[pidx];
2387                struct port_info *pi = netdev_priv(dev);
2388
2389                seq_printf(seq, "%9s  %4d  %#5x\n",
2390                           dev->name, pi->port_id, pi->viid);
2391        }
2392        return 0;
2393}
2394
2395static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2396{
2397        return pos <= adapter->params.nports
2398                ? (void *)(uintptr_t)(pos + 1)
2399                : NULL;
2400}
2401
2402static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2403{
2404        return *pos
2405                ? interfaces_get_idx(seq->private, *pos)
2406                : SEQ_START_TOKEN;
2407}
2408
2409static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2410{
2411        (*pos)++;
2412        return interfaces_get_idx(seq->private, *pos);
2413}
2414
2415static void interfaces_stop(struct seq_file *seq, void *v)
2416{
2417}
2418
2419static const struct seq_operations interfaces_seq_ops = {
2420        .start = interfaces_start,
2421        .next  = interfaces_next,
2422        .stop  = interfaces_stop,
2423        .show  = interfaces_show
2424};
2425
2426static int interfaces_open(struct inode *inode, struct file *file)
2427{
2428        int res = seq_open(file, &interfaces_seq_ops);
2429
2430        if (res == 0) {
2431                struct seq_file *seq = file->private_data;
2432                seq->private = inode->i_private;
2433        }
2434        return res;
2435}
2436
2437static const struct file_operations interfaces_proc_fops = {
2438        .owner   = THIS_MODULE,
2439        .open    = interfaces_open,
2440        .read    = seq_read,
2441        .llseek  = seq_lseek,
2442        .release = seq_release,
2443};
2444
2445/*
2446 * /sys/kernel/debugfs/cxgb4vf/ files list.
2447 */
2448struct cxgb4vf_debugfs_entry {
2449        const char *name;               /* name of debugfs node */
2450        umode_t mode;                   /* file system mode */
2451        const struct file_operations *fops;
2452};
2453
2454static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2455        { "mboxlog",    0444, &mboxlog_fops },
2456        { "sge_qinfo",  0444, &sge_qinfo_debugfs_fops },
2457        { "sge_qstats", 0444, &sge_qstats_proc_fops },
2458        { "resources",  0444, &resources_fops },
2459        { "interfaces", 0444, &interfaces_proc_fops },
2460};
2461
2462/*
2463 * Module and device initialization and cleanup code.
2464 * ==================================================
2465 */
2466
2467/*
2468 * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2469 * directory (debugfs_root) has already been set up.
2470 */
2471static int setup_debugfs(struct adapter *adapter)
2472{
2473        int i;
2474
2475        BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2476
2477        /*
2478         * Debugfs support is best effort.
2479         */
2480        for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2481                (void)debugfs_create_file(debugfs_files[i].name,
2482                                  debugfs_files[i].mode,
2483                                  adapter->debugfs_root,
2484                                  (void *)adapter,
2485                                  debugfs_files[i].fops);
2486
2487        return 0;
2488}
2489
2490/*
2491 * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2492 * it to our caller to tear down the directory (debugfs_root).
2493 */
2494static void cleanup_debugfs(struct adapter *adapter)
2495{
2496        BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2497
2498        /*
2499         * Unlike our sister routine cleanup_proc(), we don't need to remove
2500         * individual entries because a call will be made to
2501         * debugfs_remove_recursive().  We just need to clean up any ancillary
2502         * persistent state.
2503         */
2504        /* nothing to do */
2505}
2506
2507/* Figure out how many Ports and Queue Sets we can support.  This depends on
2508 * knowing our Virtual Function Resources and may be called a second time if
2509 * we fall back from MSI-X to MSI Interrupt Mode.
2510 */
2511static void size_nports_qsets(struct adapter *adapter)
2512{
2513        struct vf_resources *vfres = &adapter->params.vfres;
2514        unsigned int ethqsets, pmask_nports;
2515
2516        /* The number of "ports" which we support is equal to the number of
2517         * Virtual Interfaces with which we've been provisioned.
2518         */
2519        adapter->params.nports = vfres->nvi;
2520        if (adapter->params.nports > MAX_NPORTS) {
2521                dev_warn(adapter->pdev_dev, "only using %d of %d maximum"
2522                         " allowed virtual interfaces\n", MAX_NPORTS,
2523                         adapter->params.nports);
2524                adapter->params.nports = MAX_NPORTS;
2525        }
2526
2527        /* We may have been provisioned with more VIs than the number of
2528         * ports we're allowed to access (our Port Access Rights Mask).
2529         * This is obviously a configuration conflict but we don't want to
2530         * crash the kernel or anything silly just because of that.
2531         */
2532        pmask_nports = hweight32(adapter->params.vfres.pmask);
2533        if (pmask_nports < adapter->params.nports) {
2534                dev_warn(adapter->pdev_dev, "only using %d of %d provisioned"
2535                         " virtual interfaces; limited by Port Access Rights"
2536                         " mask %#x\n", pmask_nports, adapter->params.nports,
2537                         adapter->params.vfres.pmask);
2538                adapter->params.nports = pmask_nports;
2539        }
2540
2541        /* We need to reserve an Ingress Queue for the Asynchronous Firmware
2542         * Event Queue.  And if we're using MSI Interrupts, we'll also need to
2543         * reserve an Ingress Queue for a Forwarded Interrupts.
2544         *
2545         * The rest of the FL/Intr-capable ingress queues will be matched up
2546         * one-for-one with Ethernet/Control egress queues in order to form
2547         * "Queue Sets" which will be aportioned between the "ports".  For
2548         * each Queue Set, we'll need the ability to allocate two Egress
2549         * Contexts -- one for the Ingress Queue Free List and one for the TX
2550         * Ethernet Queue.
2551         *
2552         * Note that even if we're currently configured to use MSI-X
2553         * Interrupts (module variable msi == MSI_MSIX) we may get downgraded
2554         * to MSI Interrupts if we can't get enough MSI-X Interrupts.  If that
2555         * happens we'll need to adjust things later.
2556         */
2557        ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI);
2558        if (vfres->nethctrl != ethqsets)
2559                ethqsets = min(vfres->nethctrl, ethqsets);
2560        if (vfres->neq < ethqsets*2)
2561                ethqsets = vfres->neq/2;
2562        if (ethqsets > MAX_ETH_QSETS)
2563                ethqsets = MAX_ETH_QSETS;
2564        adapter->sge.max_ethqsets = ethqsets;
2565
2566        if (adapter->sge.max_ethqsets < adapter->params.nports) {
2567                dev_warn(adapter->pdev_dev, "only using %d of %d available"
2568                         " virtual interfaces (too few Queue Sets)\n",
2569                         adapter->sge.max_ethqsets, adapter->params.nports);
2570                adapter->params.nports = adapter->sge.max_ethqsets;
2571        }
2572}
2573
2574/*
2575 * Perform early "adapter" initialization.  This is where we discover what
2576 * adapter parameters we're going to be using and initialize basic adapter
2577 * hardware support.
2578 */
2579static int adap_init0(struct adapter *adapter)
2580{
2581        struct sge_params *sge_params = &adapter->params.sge;
2582        struct sge *s = &adapter->sge;
2583        int err;
2584        u32 param, val = 0;
2585
2586        /*
2587         * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2588         * 2.6.31 and later we can't call pci_reset_function() in order to
2589         * issue an FLR because of a self- deadlock on the device semaphore.
2590         * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2591         * cases where they're needed -- for instance, some versions of KVM
2592         * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2593         * use the firmware based reset in order to reset any per function
2594         * state.
2595         */
2596        err = t4vf_fw_reset(adapter);
2597        if (err < 0) {
2598                dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2599                return err;
2600        }
2601
2602        /*
2603         * Grab basic operational parameters.  These will predominantly have
2604         * been set up by the Physical Function Driver or will be hard coded
2605         * into the adapter.  We just have to live with them ...  Note that
2606         * we _must_ get our VPD parameters before our SGE parameters because
2607         * we need to know the adapter's core clock from the VPD in order to
2608         * properly decode the SGE Timer Values.
2609         */
2610        err = t4vf_get_dev_params(adapter);
2611        if (err) {
2612                dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2613                        " device parameters: err=%d\n", err);
2614                return err;
2615        }
2616        err = t4vf_get_vpd_params(adapter);
2617        if (err) {
2618                dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2619                        " VPD parameters: err=%d\n", err);
2620                return err;
2621        }
2622        err = t4vf_get_sge_params(adapter);
2623        if (err) {
2624                dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2625                        " SGE parameters: err=%d\n", err);
2626                return err;
2627        }
2628        err = t4vf_get_rss_glb_config(adapter);
2629        if (err) {
2630                dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2631                        " RSS parameters: err=%d\n", err);
2632                return err;
2633        }
2634        if (adapter->params.rss.mode !=
2635            FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2636                dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2637                        " mode %d\n", adapter->params.rss.mode);
2638                return -EINVAL;
2639        }
2640        err = t4vf_sge_init(adapter);
2641        if (err) {
2642                dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2643                        " err=%d\n", err);
2644                return err;
2645        }
2646
2647        /* If we're running on newer firmware, let it know that we're
2648         * prepared to deal with encapsulated CPL messages.  Older
2649         * firmware won't understand this and we'll just get
2650         * unencapsulated messages ...
2651         */
2652        param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2653                FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2654        val = 1;
2655        (void) t4vf_set_params(adapter, 1, &param, &val);
2656
2657        /*
2658         * Retrieve our RX interrupt holdoff timer values and counter
2659         * threshold values from the SGE parameters.
2660         */
2661        s->timer_val[0] = core_ticks_to_us(adapter,
2662                TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2663        s->timer_val[1] = core_ticks_to_us(adapter,
2664                TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2665        s->timer_val[2] = core_ticks_to_us(adapter,
2666                TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2667        s->timer_val[3] = core_ticks_to_us(adapter,
2668                TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2669        s->timer_val[4] = core_ticks_to_us(adapter,
2670                TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2671        s->timer_val[5] = core_ticks_to_us(adapter,
2672                TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2673
2674        s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2675        s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2676        s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2677        s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2678
2679        /*
2680         * Grab our Virtual Interface resource allocation, extract the
2681         * features that we're interested in and do a bit of sanity testing on
2682         * what we discover.
2683         */
2684        err = t4vf_get_vfres(adapter);
2685        if (err) {
2686                dev_err(adapter->pdev_dev, "unable to get virtual interface"
2687                        " resources: err=%d\n", err);
2688                return err;
2689        }
2690
2691        /* Check for various parameter sanity issues */
2692        if (adapter->params.vfres.pmask == 0) {
2693                dev_err(adapter->pdev_dev, "no port access configured\n"
2694                        "usable!\n");
2695                return -EINVAL;
2696        }
2697        if (adapter->params.vfres.nvi == 0) {
2698                dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2699                        "usable!\n");
2700                return -EINVAL;
2701        }
2702
2703        /* Initialize nports and max_ethqsets now that we have our Virtual
2704         * Function Resources.
2705         */
2706        size_nports_qsets(adapter);
2707
2708        adapter->flags |= CXGB4VF_FW_OK;
2709        return 0;
2710}
2711
2712static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2713                             u8 pkt_cnt_idx, unsigned int size,
2714                             unsigned int iqe_size)
2715{
2716        rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2717                             (pkt_cnt_idx < SGE_NCOUNTERS ?
2718                              QINTR_CNT_EN_F : 0));
2719        rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2720                            ? pkt_cnt_idx
2721                            : 0);
2722        rspq->iqe_len = iqe_size;
2723        rspq->size = size;
2724}
2725
2726/*
2727 * Perform default configuration of DMA queues depending on the number and
2728 * type of ports we found and the number of available CPUs.  Most settings can
2729 * be modified by the admin via ethtool and cxgbtool prior to the adapter
2730 * being brought up for the first time.
2731 */
2732static void cfg_queues(struct adapter *adapter)
2733{
2734        struct sge *s = &adapter->sge;
2735        int q10g, n10g, qidx, pidx, qs;
2736        size_t iqe_size;
2737
2738        /*
2739         * We should not be called till we know how many Queue Sets we can
2740         * support.  In particular, this means that we need to know what kind
2741         * of interrupts we'll be using ...
2742         */
2743        BUG_ON((adapter->flags &
2744               (CXGB4VF_USING_MSIX | CXGB4VF_USING_MSI)) == 0);
2745
2746        /*
2747         * Count the number of 10GbE Virtual Interfaces that we have.
2748         */
2749        n10g = 0;
2750        for_each_port(adapter, pidx)
2751                n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2752
2753        /*
2754         * We default to 1 queue per non-10G port and up to # of cores queues
2755         * per 10G port.
2756         */
2757        if (n10g == 0)
2758                q10g = 0;
2759        else {
2760                int n1g = (adapter->params.nports - n10g);
2761                q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2762                if (q10g > num_online_cpus())
2763                        q10g = num_online_cpus();
2764        }
2765
2766        /*
2767         * Allocate the "Queue Sets" to the various Virtual Interfaces.
2768         * The layout will be established in setup_sge_queues() when the
2769         * adapter is brough up for the first time.
2770         */
2771        qidx = 0;
2772        for_each_port(adapter, pidx) {
2773                struct port_info *pi = adap2pinfo(adapter, pidx);
2774
2775                pi->first_qset = qidx;
2776                pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2777                qidx += pi->nqsets;
2778        }
2779        s->ethqsets = qidx;
2780
2781        /*
2782         * The Ingress Queue Entry Size for our various Response Queues needs
2783         * to be big enough to accommodate the largest message we can receive
2784         * from the chip/firmware; which is 64 bytes ...
2785         */
2786        iqe_size = 64;
2787
2788        /*
2789         * Set up default Queue Set parameters ...  Start off with the
2790         * shortest interrupt holdoff timer.
2791         */
2792        for (qs = 0; qs < s->max_ethqsets; qs++) {
2793                struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2794                struct sge_eth_txq *txq = &s->ethtxq[qs];
2795
2796                init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2797                rxq->fl.size = 72;
2798                txq->q.size = 1024;
2799        }
2800
2801        /*
2802         * The firmware event queue is used for link state changes and
2803         * notifications of TX DMA completions.
2804         */
2805        init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2806
2807        /*
2808         * The forwarded interrupt queue is used when we're in MSI interrupt
2809         * mode.  In this mode all interrupts associated with RX queues will
2810         * be forwarded to a single queue which we'll associate with our MSI
2811         * interrupt vector.  The messages dropped in the forwarded interrupt
2812         * queue will indicate which ingress queue needs servicing ...  This
2813         * queue needs to be large enough to accommodate all of the ingress
2814         * queues which are forwarding their interrupt (+1 to prevent the PIDX
2815         * from equalling the CIDX if every ingress queue has an outstanding
2816         * interrupt).  The queue doesn't need to be any larger because no
2817         * ingress queue will ever have more than one outstanding interrupt at
2818         * any time ...
2819         */
2820        init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2821                  iqe_size);
2822}
2823
2824/*
2825 * Reduce the number of Ethernet queues across all ports to at most n.
2826 * n provides at least one queue per port.
2827 */
2828static void reduce_ethqs(struct adapter *adapter, int n)
2829{
2830        int i;
2831        struct port_info *pi;
2832
2833        /*
2834         * While we have too many active Ether Queue Sets, interate across the
2835         * "ports" and reduce their individual Queue Set allocations.
2836         */
2837        BUG_ON(n < adapter->params.nports);
2838        while (n < adapter->sge.ethqsets)
2839                for_each_port(adapter, i) {
2840                        pi = adap2pinfo(adapter, i);
2841                        if (pi->nqsets > 1) {
2842                                pi->nqsets--;
2843                                adapter->sge.ethqsets--;
2844                                if (adapter->sge.ethqsets <= n)
2845                                        break;
2846                        }
2847                }
2848
2849        /*
2850         * Reassign the starting Queue Sets for each of the "ports" ...
2851         */
2852        n = 0;
2853        for_each_port(adapter, i) {
2854                pi = adap2pinfo(adapter, i);
2855                pi->first_qset = n;
2856                n += pi->nqsets;
2857        }
2858}
2859
2860/*
2861 * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2862 * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2863 * need.  Minimally we need one for every Virtual Interface plus those needed
2864 * for our "extras".  Note that this process may lower the maximum number of
2865 * allowed Queue Sets ...
2866 */
2867static int enable_msix(struct adapter *adapter)
2868{
2869        int i, want, need, nqsets;
2870        struct msix_entry entries[MSIX_ENTRIES];
2871        struct sge *s = &adapter->sge;
2872
2873        for (i = 0; i < MSIX_ENTRIES; ++i)
2874                entries[i].entry = i;
2875
2876        /*
2877         * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2878         * plus those needed for our "extras" (for example, the firmware
2879         * message queue).  We _need_ at least one "Queue Set" per Virtual
2880         * Interface plus those needed for our "extras".  So now we get to see
2881         * if the song is right ...
2882         */
2883        want = s->max_ethqsets + MSIX_EXTRAS;
2884        need = adapter->params.nports + MSIX_EXTRAS;
2885
2886        want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2887        if (want < 0)
2888                return want;
2889
2890        nqsets = want - MSIX_EXTRAS;
2891        if (nqsets < s->max_ethqsets) {
2892                dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2893                         " for %d Queue Sets\n", nqsets);
2894                s->max_ethqsets = nqsets;
2895                if (nqsets < s->ethqsets)
2896                        reduce_ethqs(adapter, nqsets);
2897        }
2898        for (i = 0; i < want; ++i)
2899                adapter->msix_info[i].vec = entries[i].vector;
2900
2901        return 0;
2902}
2903
2904static const struct net_device_ops cxgb4vf_netdev_ops   = {
2905        .ndo_open               = cxgb4vf_open,
2906        .ndo_stop               = cxgb4vf_stop,
2907        .ndo_start_xmit         = t4vf_eth_xmit,
2908        .ndo_get_stats          = cxgb4vf_get_stats,
2909        .ndo_set_rx_mode        = cxgb4vf_set_rxmode,
2910        .ndo_set_mac_address    = cxgb4vf_set_mac_addr,
2911        .ndo_validate_addr      = eth_validate_addr,
2912        .ndo_do_ioctl           = cxgb4vf_do_ioctl,
2913        .ndo_change_mtu         = cxgb4vf_change_mtu,
2914        .ndo_fix_features       = cxgb4vf_fix_features,
2915        .ndo_set_features       = cxgb4vf_set_features,
2916#ifdef CONFIG_NET_POLL_CONTROLLER
2917        .ndo_poll_controller    = cxgb4vf_poll_controller,
2918#endif
2919};
2920
2921/*
2922 * "Probe" a device: initialize a device and construct all kernel and driver
2923 * state needed to manage the device.  This routine is called "init_one" in
2924 * the PF Driver ...
2925 */
2926static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2927                             const struct pci_device_id *ent)
2928{
2929        int pci_using_dac;
2930        int err, pidx;
2931        unsigned int pmask;
2932        struct adapter *adapter;
2933        struct port_info *pi;
2934        struct net_device *netdev;
2935        unsigned int pf;
2936
2937        /*
2938         * Print our driver banner the first time we're called to initialize a
2939         * device.
2940         */
2941        pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
2942
2943        /*
2944         * Initialize generic PCI device state.
2945         */
2946        err = pci_enable_device(pdev);
2947        if (err) {
2948                dev_err(&pdev->dev, "cannot enable PCI device\n");
2949                return err;
2950        }
2951
2952        /*
2953         * Reserve PCI resources for the device.  If we can't get them some
2954         * other driver may have already claimed the device ...
2955         */
2956        err = pci_request_regions(pdev, KBUILD_MODNAME);
2957        if (err) {
2958                dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2959                goto err_disable_device;
2960        }
2961
2962        /*
2963         * Set up our DMA mask: try for 64-bit address masking first and
2964         * fall back to 32-bit if we can't get 64 bits ...
2965         */
2966        err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2967        if (err == 0) {
2968                err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2969                if (err) {
2970                        dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2971                                " coherent allocations\n");
2972                        goto err_release_regions;
2973                }
2974                pci_using_dac = 1;
2975        } else {
2976                err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2977                if (err != 0) {
2978                        dev_err(&pdev->dev, "no usable DMA configuration\n");
2979                        goto err_release_regions;
2980                }
2981                pci_using_dac = 0;
2982        }
2983
2984        /*
2985         * Enable bus mastering for the device ...
2986         */
2987        pci_set_master(pdev);
2988
2989        /*
2990         * Allocate our adapter data structure and attach it to the device.
2991         */
2992        adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2993        if (!adapter) {
2994                err = -ENOMEM;
2995                goto err_release_regions;
2996        }
2997        pci_set_drvdata(pdev, adapter);
2998        adapter->pdev = pdev;
2999        adapter->pdev_dev = &pdev->dev;
3000
3001        adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
3002                                    (sizeof(struct mbox_cmd) *
3003                                     T4VF_OS_LOG_MBOX_CMDS),
3004                                    GFP_KERNEL);
3005        if (!adapter->mbox_log) {
3006                err = -ENOMEM;
3007                goto err_free_adapter;
3008        }
3009        adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS;
3010
3011        /*
3012         * Initialize SMP data synchronization resources.
3013         */
3014        spin_lock_init(&adapter->stats_lock);
3015        spin_lock_init(&adapter->mbox_lock);
3016        INIT_LIST_HEAD(&adapter->mlist.list);
3017
3018        /*
3019         * Map our I/O registers in BAR0.
3020         */
3021        adapter->regs = pci_ioremap_bar(pdev, 0);
3022        if (!adapter->regs) {
3023                dev_err(&pdev->dev, "cannot map device registers\n");
3024                err = -ENOMEM;
3025                goto err_free_adapter;
3026        }
3027
3028        /* Wait for the device to become ready before proceeding ...
3029         */
3030        err = t4vf_prep_adapter(adapter);
3031        if (err) {
3032                dev_err(adapter->pdev_dev, "device didn't become ready:"
3033                        " err=%d\n", err);
3034                goto err_unmap_bar0;
3035        }
3036
3037        /* For T5 and later we want to use the new BAR-based User Doorbells,
3038         * so we need to map BAR2 here ...
3039         */
3040        if (!is_t4(adapter->params.chip)) {
3041                adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
3042                                           pci_resource_len(pdev, 2));
3043                if (!adapter->bar2) {
3044                        dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
3045                        err = -ENOMEM;
3046                        goto err_unmap_bar0;
3047                }
3048        }
3049        /*
3050         * Initialize adapter level features.
3051         */
3052        adapter->name = pci_name(pdev);
3053        adapter->msg_enable = DFLT_MSG_ENABLE;
3054
3055        /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
3056         * Ingress Packet Data to Free List Buffers in order to allow for
3057         * chipset performance optimizations between the Root Complex and
3058         * Memory Controllers.  (Messages to the associated Ingress Queue
3059         * notifying new Packet Placement in the Free Lists Buffers will be
3060         * send without the Relaxed Ordering Attribute thus guaranteeing that
3061         * all preceding PCIe Transaction Layer Packets will be processed
3062         * first.)  But some Root Complexes have various issues with Upstream
3063         * Transaction Layer Packets with the Relaxed Ordering Attribute set.
3064         * The PCIe devices which under the Root Complexes will be cleared the
3065         * Relaxed Ordering bit in the configuration space, So we check our
3066         * PCIe configuration space to see if it's flagged with advice against
3067         * using Relaxed Ordering.
3068         */
3069        if (!pcie_relaxed_ordering_enabled(pdev))
3070                adapter->flags |= CXGB4VF_ROOT_NO_RELAXED_ORDERING;
3071
3072        err = adap_init0(adapter);
3073        if (err)
3074                dev_err(&pdev->dev,
3075                        "Adapter initialization failed, error %d. Continuing in debug mode\n",
3076                        err);
3077
3078        /* Initialize hash mac addr list */
3079        INIT_LIST_HEAD(&adapter->mac_hlist);
3080
3081        /*
3082         * Allocate our "adapter ports" and stitch everything together.
3083         */
3084        pmask = adapter->params.vfres.pmask;
3085        pf = t4vf_get_pf_from_vf(adapter);
3086        for_each_port(adapter, pidx) {
3087                int port_id, viid;
3088                u8 mac[ETH_ALEN];
3089                unsigned int naddr = 1;
3090
3091                /*
3092                 * We simplistically allocate our virtual interfaces
3093                 * sequentially across the port numbers to which we have
3094                 * access rights.  This should be configurable in some manner
3095                 * ...
3096                 */
3097                if (pmask == 0)
3098                        break;
3099                port_id = ffs(pmask) - 1;
3100                pmask &= ~(1 << port_id);
3101
3102                /*
3103                 * Allocate our network device and stitch things together.
3104                 */
3105                netdev = alloc_etherdev_mq(sizeof(struct port_info),
3106                                           MAX_PORT_QSETS);
3107                if (netdev == NULL) {
3108                        err = -ENOMEM;
3109                        goto err_free_dev;
3110                }
3111                adapter->port[pidx] = netdev;
3112                SET_NETDEV_DEV(netdev, &pdev->dev);
3113                pi = netdev_priv(netdev);
3114                pi->adapter = adapter;
3115                pi->pidx = pidx;
3116                pi->port_id = port_id;
3117
3118                /*
3119                 * Initialize the starting state of our "port" and register
3120                 * it.
3121                 */
3122                pi->xact_addr_filt = -1;
3123                netdev->irq = pdev->irq;
3124
3125                netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_GRO |
3126                        NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
3127                        NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
3128                netdev->features = netdev->hw_features;
3129                if (pci_using_dac)
3130                        netdev->features |= NETIF_F_HIGHDMA;
3131                netdev->vlan_features = netdev->features & VLAN_FEAT;
3132
3133                netdev->priv_flags |= IFF_UNICAST_FLT;
3134                netdev->min_mtu = 81;
3135                netdev->max_mtu = ETH_MAX_MTU;
3136
3137                netdev->netdev_ops = &cxgb4vf_netdev_ops;
3138                netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
3139                netdev->dev_port = pi->port_id;
3140
3141                /*
3142                 * If we haven't been able to contact the firmware, there's
3143                 * nothing else we can do for this "port" ...
3144                 */
3145                if (!(adapter->flags & CXGB4VF_FW_OK))
3146                        continue;
3147
3148                viid = t4vf_alloc_vi(adapter, port_id);
3149                if (viid < 0) {
3150                        dev_err(&pdev->dev,
3151                                "cannot allocate VI for port %d: err=%d\n",
3152                                port_id, viid);
3153                        err = viid;
3154                        goto err_free_dev;
3155                }
3156                pi->viid = viid;
3157
3158                /*
3159                 * Initialize the hardware/software state for the port.
3160                 */
3161                err = t4vf_port_init(adapter, pidx);
3162                if (err) {
3163                        dev_err(&pdev->dev, "cannot initialize port %d\n",
3164                                pidx);
3165                        goto err_free_dev;
3166                }
3167
3168                err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac);
3169                if (err) {
3170                        dev_err(&pdev->dev,
3171                                "unable to determine MAC ACL address, "
3172                                "continuing anyway.. (status %d)\n", err);
3173                } else if (naddr && adapter->params.vfres.nvi == 1) {
3174                        struct sockaddr addr;
3175
3176                        ether_addr_copy(addr.sa_data, mac);
3177                        err = cxgb4vf_set_mac_addr(netdev, &addr);
3178                        if (err) {
3179                                dev_err(&pdev->dev,
3180                                        "unable to set MAC address %pM\n",
3181                                        mac);
3182                                goto err_free_dev;
3183                        }
3184                        dev_info(&pdev->dev,
3185                                 "Using assigned MAC ACL: %pM\n", mac);
3186                }
3187        }
3188
3189        /* See what interrupts we'll be using.  If we've been configured to
3190         * use MSI-X interrupts, try to enable them but fall back to using
3191         * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
3192         * get MSI interrupts we bail with the error.
3193         */
3194        if (msi == MSI_MSIX && enable_msix(adapter) == 0)
3195                adapter->flags |= CXGB4VF_USING_MSIX;
3196        else {
3197                if (msi == MSI_MSIX) {
3198                        dev_info(adapter->pdev_dev,
3199                                 "Unable to use MSI-X Interrupts; falling "
3200                                 "back to MSI Interrupts\n");
3201
3202                        /* We're going to need a Forwarded Interrupt Queue so
3203                         * that may cut into how many Queue Sets we can
3204                         * support.
3205                         */
3206                        msi = MSI_MSI;
3207                        size_nports_qsets(adapter);
3208                }
3209                err = pci_enable_msi(pdev);
3210                if (err) {
3211                        dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;"
3212                                " err=%d\n", err);
3213                        goto err_free_dev;
3214                }
3215                adapter->flags |= CXGB4VF_USING_MSI;
3216        }
3217
3218        /* Now that we know how many "ports" we have and what interrupt
3219         * mechanism we're going to use, we can configure our queue resources.
3220         */
3221        cfg_queues(adapter);
3222
3223        /*
3224         * The "card" is now ready to go.  If any errors occur during device
3225         * registration we do not fail the whole "card" but rather proceed
3226         * only with the ports we manage to register successfully.  However we
3227         * must register at least one net device.
3228         */
3229        for_each_port(adapter, pidx) {
3230                struct port_info *pi = netdev_priv(adapter->port[pidx]);
3231                netdev = adapter->port[pidx];
3232                if (netdev == NULL)
3233                        continue;
3234
3235                netif_set_real_num_tx_queues(netdev, pi->nqsets);
3236                netif_set_real_num_rx_queues(netdev, pi->nqsets);
3237
3238                err = register_netdev(netdev);
3239                if (err) {
3240                        dev_warn(&pdev->dev, "cannot register net device %s,"
3241                                 " skipping\n", netdev->name);
3242                        continue;
3243                }
3244
3245                netif_carrier_off(netdev);
3246                set_bit(pidx, &adapter->registered_device_map);
3247        }
3248        if (adapter->registered_device_map == 0) {
3249                dev_err(&pdev->dev, "could not register any net devices\n");
3250                goto err_disable_interrupts;
3251        }
3252
3253        /*
3254         * Set up our debugfs entries.
3255         */
3256        if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
3257                adapter->debugfs_root =
3258                        debugfs_create_dir(pci_name(pdev),
3259                                           cxgb4vf_debugfs_root);
3260                if (IS_ERR_OR_NULL(adapter->debugfs_root))
3261                        dev_warn(&pdev->dev, "could not create debugfs"
3262                                 " directory");
3263                else
3264                        setup_debugfs(adapter);
3265        }
3266
3267        /*
3268         * Print a short notice on the existence and configuration of the new
3269         * VF network device ...
3270         */
3271        for_each_port(adapter, pidx) {
3272                dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
3273                         adapter->port[pidx]->name,
3274                         (adapter->flags & CXGB4VF_USING_MSIX) ? "MSI-X" :
3275                         (adapter->flags & CXGB4VF_USING_MSI)  ? "MSI" : "");
3276        }
3277
3278        /*
3279         * Return success!
3280         */
3281        return 0;
3282
3283        /*
3284         * Error recovery and exit code.  Unwind state that's been created
3285         * so far and return the error.
3286         */
3287err_disable_interrupts:
3288        if (adapter->flags & CXGB4VF_USING_MSIX) {
3289                pci_disable_msix(adapter->pdev);
3290                adapter->flags &= ~CXGB4VF_USING_MSIX;
3291        } else if (adapter->flags & CXGB4VF_USING_MSI) {
3292                pci_disable_msi(adapter->pdev);
3293                adapter->flags &= ~CXGB4VF_USING_MSI;
3294        }
3295
3296err_free_dev:
3297        for_each_port(adapter, pidx) {
3298                netdev = adapter->port[pidx];
3299                if (netdev == NULL)
3300                        continue;
3301                pi = netdev_priv(netdev);
3302                if (pi->viid)
3303                        t4vf_free_vi(adapter, pi->viid);
3304                if (test_bit(pidx, &adapter->registered_device_map))
3305                        unregister_netdev(netdev);
3306                free_netdev(netdev);
3307        }
3308
3309        if (!is_t4(adapter->params.chip))
3310                iounmap(adapter->bar2);
3311
3312err_unmap_bar0:
3313        iounmap(adapter->regs);
3314
3315err_free_adapter:
3316        kfree(adapter->mbox_log);
3317        kfree(adapter);
3318
3319err_release_regions:
3320        pci_release_regions(pdev);
3321        pci_clear_master(pdev);
3322
3323err_disable_device:
3324        pci_disable_device(pdev);
3325
3326        return err;
3327}
3328
3329/*
3330 * "Remove" a device: tear down all kernel and driver state created in the
3331 * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
3332 * that this is called "remove_one" in the PF Driver.)
3333 */
3334static void cxgb4vf_pci_remove(struct pci_dev *pdev)
3335{
3336        struct adapter *adapter = pci_get_drvdata(pdev);
3337        struct hash_mac_addr *entry, *tmp;
3338
3339        /*
3340         * Tear down driver state associated with device.
3341         */
3342        if (adapter) {
3343                int pidx;
3344
3345                /*
3346                 * Stop all of our activity.  Unregister network port,
3347                 * disable interrupts, etc.
3348                 */
3349                for_each_port(adapter, pidx)
3350                        if (test_bit(pidx, &adapter->registered_device_map))
3351                                unregister_netdev(adapter->port[pidx]);
3352                t4vf_sge_stop(adapter);
3353                if (adapter->flags & CXGB4VF_USING_MSIX) {
3354                        pci_disable_msix(adapter->pdev);
3355                        adapter->flags &= ~CXGB4VF_USING_MSIX;
3356                } else if (adapter->flags & CXGB4VF_USING_MSI) {
3357                        pci_disable_msi(adapter->pdev);
3358                        adapter->flags &= ~CXGB4VF_USING_MSI;
3359                }
3360
3361                /*
3362                 * Tear down our debugfs entries.
3363                 */
3364                if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
3365                        cleanup_debugfs(adapter);
3366                        debugfs_remove_recursive(adapter->debugfs_root);
3367                }
3368
3369                /*
3370                 * Free all of the various resources which we've acquired ...
3371                 */
3372                t4vf_free_sge_resources(adapter);
3373                for_each_port(adapter, pidx) {
3374                        struct net_device *netdev = adapter->port[pidx];
3375                        struct port_info *pi;
3376
3377                        if (netdev == NULL)
3378                                continue;
3379
3380                        pi = netdev_priv(netdev);
3381                        if (pi->viid)
3382                                t4vf_free_vi(adapter, pi->viid);
3383                        free_netdev(netdev);
3384                }
3385                iounmap(adapter->regs);
3386                if (!is_t4(adapter->params.chip))
3387                        iounmap(adapter->bar2);
3388                kfree(adapter->mbox_log);
3389                list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist,
3390                                         list) {
3391                        list_del(&entry->list);
3392                        kfree(entry);
3393                }
3394                kfree(adapter);
3395        }
3396
3397        /*
3398         * Disable the device and release its PCI resources.
3399         */
3400        pci_disable_device(pdev);
3401        pci_clear_master(pdev);
3402        pci_release_regions(pdev);
3403}
3404
3405/*
3406 * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
3407 * delivery.
3408 */
3409static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
3410{
3411        struct adapter *adapter;
3412        int pidx;
3413
3414        adapter = pci_get_drvdata(pdev);
3415        if (!adapter)
3416                return;
3417
3418        /* Disable all Virtual Interfaces.  This will shut down the
3419         * delivery of all ingress packets into the chip for these
3420         * Virtual Interfaces.
3421         */
3422        for_each_port(adapter, pidx)
3423                if (test_bit(pidx, &adapter->registered_device_map))
3424                        unregister_netdev(adapter->port[pidx]);
3425
3426        /* Free up all Queues which will prevent further DMA and
3427         * Interrupts allowing various internal pathways to drain.
3428         */
3429        t4vf_sge_stop(adapter);
3430        if (adapter->flags & CXGB4VF_USING_MSIX) {
3431                pci_disable_msix(adapter->pdev);
3432                adapter->flags &= ~CXGB4VF_USING_MSIX;
3433        } else if (adapter->flags & CXGB4VF_USING_MSI) {
3434                pci_disable_msi(adapter->pdev);
3435                adapter->flags &= ~CXGB4VF_USING_MSI;
3436        }
3437
3438        /*
3439         * Free up all Queues which will prevent further DMA and
3440         * Interrupts allowing various internal pathways to drain.
3441         */
3442        t4vf_free_sge_resources(adapter);
3443        pci_set_drvdata(pdev, NULL);
3444}
3445
3446/* Macros needed to support the PCI Device ID Table ...
3447 */
3448#define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3449        static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3450#define CH_PCI_DEVICE_ID_FUNCTION       0x8
3451
3452#define CH_PCI_ID_TABLE_ENTRY(devid) \
3453                { PCI_VDEVICE(CHELSIO, (devid)), 0 }
3454
3455#define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3456
3457#include "../cxgb4/t4_pci_id_tbl.h"
3458
3459MODULE_DESCRIPTION(DRV_DESC);
3460MODULE_AUTHOR("Chelsio Communications");
3461MODULE_LICENSE("Dual BSD/GPL");
3462MODULE_VERSION(DRV_VERSION);
3463MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3464
3465static struct pci_driver cxgb4vf_driver = {
3466        .name           = KBUILD_MODNAME,
3467        .id_table       = cxgb4vf_pci_tbl,
3468        .probe          = cxgb4vf_pci_probe,
3469        .remove         = cxgb4vf_pci_remove,
3470        .shutdown       = cxgb4vf_pci_shutdown,
3471};
3472
3473/*
3474 * Initialize global driver state.
3475 */
3476static int __init cxgb4vf_module_init(void)
3477{
3478        int ret;
3479
3480        /*
3481         * Vet our module parameters.
3482         */
3483        if (msi != MSI_MSIX && msi != MSI_MSI) {
3484                pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3485                        msi, MSI_MSIX, MSI_MSI);
3486                return -EINVAL;
3487        }
3488
3489        /* Debugfs support is optional, just warn if this fails */
3490        cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3491        if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3492                pr_warn("could not create debugfs entry, continuing\n");
3493
3494        ret = pci_register_driver(&cxgb4vf_driver);
3495        if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3496                debugfs_remove(cxgb4vf_debugfs_root);
3497        return ret;
3498}
3499
3500/*
3501 * Tear down global driver state.
3502 */
3503static void __exit cxgb4vf_module_exit(void)
3504{
3505        pci_unregister_driver(&cxgb4vf_driver);
3506        debugfs_remove(cxgb4vf_debugfs_root);
3507}
3508
3509module_init(cxgb4vf_module_init);
3510module_exit(cxgb4vf_module_exit);
3511