LXR linux/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf

   1/*
   2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
   3 * driver for Linux.
   4 *
   5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  37
  38#include <linux/module.h>
  39#include <linux/moduleparam.h>
  40#include <linux/init.h>
  41#include <linux/pci.h>
  42#include <linux/dma-mapping.h>
  43#include <linux/netdevice.h>
  44#include <linux/etherdevice.h>
  45#include <linux/debugfs.h>
  46#include <linux/ethtool.h>
  47#include <linux/mdio.h>
  48
  49#include "t4vf_common.h"
  50#include "t4vf_defs.h"
  51
  52#include "../cxgb4/t4_regs.h"
  53#include "../cxgb4/t4_msg.h"
  54
  55/*
  56 * Generic information about the driver.
  57 */
  58#define DRV_VERSION "2.0.0-ko"
  59#define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
  60
  61/*
  62 * Module Parameters.
  63 * ==================
  64 */
  65
  66/*
  67 * Default ethtool "message level" for adapters.
  68 */
  69#define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
  70                         NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
  71                         NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
  72
  73/*
  74 * The driver uses the best interrupt scheme available on a platform in the
  75 * order MSI-X then MSI.  This parameter determines which of these schemes the
  76 * driver may consider as follows:
  77 *
  78 *     msi = 2: choose from among MSI-X and MSI
  79 *     msi = 1: only consider MSI interrupts
  80 *
  81 * Note that unlike the Physical Function driver, this Virtual Function driver
  82 * does _not_ support legacy INTx interrupts (this limitation is mandated by
  83 * the PCI-E SR-IOV standard).
  84 */
  85#define MSI_MSIX        2
  86#define MSI_MSI         1
  87#define MSI_DEFAULT     MSI_MSIX
  88
  89static int msi = MSI_DEFAULT;
  90
  91module_param(msi, int, 0644);
  92MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
  93
  94/*
  95 * Fundamental constants.
  96 * ======================
  97 */
  98
  99enum {
 100        MAX_TXQ_ENTRIES         = 16384,
 101        MAX_RSPQ_ENTRIES        = 16384,
 102        MAX_RX_BUFFERS          = 16384,
 103
 104        MIN_TXQ_ENTRIES         = 32,
 105        MIN_RSPQ_ENTRIES        = 128,
 106        MIN_FL_ENTRIES          = 16,
 107
 108        /*
 109         * For purposes of manipulating the Free List size we need to
 110         * recognize that Free Lists are actually Egress Queues (the host
 111         * produces free buffers which the hardware consumes), Egress Queues
 112         * indices are all in units of Egress Context Units bytes, and free
 113         * list entries are 64-bit PCI DMA addresses.  And since the state of
 114         * the Producer Index == the Consumer Index implies an EMPTY list, we
 115         * always have at least one Egress Unit's worth of Free List entries
 116         * unused.  See sge.c for more details ...
 117         */
 118        EQ_UNIT = SGE_EQ_IDXSIZE,
 119        FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
 120        MIN_FL_RESID = FL_PER_EQ_UNIT,
 121};
 122
 123/*
 124 * Global driver state.
 125 * ====================
 126 */
 127
 128static struct dentry *cxgb4vf_debugfs_root;
 129
 130/*
 131 * OS "Callback" functions.
 132 * ========================
 133 */
 134
 135/*
 136 * The link status has changed on the indicated "port" (Virtual Interface).
 137 */
 138void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 139{
 140        struct net_device *dev = adapter->port[pidx];
 141
 142        /*
 143         * If the port is disabled or the current recorded "link up"
 144         * status matches the new status, just return.
 145         */
 146        if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
 147                return;
 148
 149        /*
 150         * Tell the OS that the link status has changed and print a short
 151         * informative message on the console about the event.
 152         */
 153        if (link_ok) {
 154                const char *s;
 155                const char *fc;
 156                const struct port_info *pi = netdev_priv(dev);
 157
 158                netif_carrier_on(dev);
 159
 160                switch (pi->link_cfg.speed) {
 161                case 100:
 162                        s = "100Mbps";
 163                        break;
 164                case 1000:
 165                        s = "1Gbps";
 166                        break;
 167                case 10000:
 168                        s = "10Gbps";
 169                        break;
 170                case 25000:
 171                        s = "25Gbps";
 172                        break;
 173                case 40000:
 174                        s = "40Gbps";
 175                        break;
 176                case 100000:
 177                        s = "100Gbps";
 178                        break;
 179
 180                default:
 181                        s = "unknown";
 182                        break;
 183                }
 184
 185                switch (pi->link_cfg.fc) {
 186                case PAUSE_RX:
 187                        fc = "RX";
 188                        break;
 189
 190                case PAUSE_TX:
 191                        fc = "TX";
 192                        break;
 193
 194                case PAUSE_RX|PAUSE_TX:
 195                        fc = "RX/TX";
 196                        break;
 197
 198                default:
 199                        fc = "no";
 200                        break;
 201                }
 202
 203                netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
 204        } else {
 205                netif_carrier_off(dev);
 206                netdev_info(dev, "link down\n");
 207        }
 208}
 209
 210/*
 211 * THe port module type has changed on the indicated "port" (Virtual
 212 * Interface).
 213 */
 214void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
 215{
 216        static const char * const mod_str[] = {
 217                NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
 218        };
 219        const struct net_device *dev = adapter->port[pidx];
 220        const struct port_info *pi = netdev_priv(dev);
 221
 222        if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
 223                dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
 224                         dev->name);
 225        else if (pi->mod_type < ARRAY_SIZE(mod_str))
 226                dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
 227                         dev->name, mod_str[pi->mod_type]);
 228        else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
 229                dev_info(adapter->pdev_dev, "%s: unsupported optical port "
 230                         "module inserted\n", dev->name);
 231        else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
 232                dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
 233                         "forcing TWINAX\n", dev->name);
 234        else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
 235                dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
 236                         dev->name);
 237        else
 238                dev_info(adapter->pdev_dev, "%s: unknown module type %d "
 239                         "inserted\n", dev->name, pi->mod_type);
 240}
 241
 242/*
 243 * Net device operations.
 244 * ======================
 245 */
 246
 247
 248
 249
 250/*
 251 * Perform the MAC and PHY actions needed to enable a "port" (Virtual
 252 * Interface).
 253 */
 254static int link_start(struct net_device *dev)
 255{
 256        int ret;
 257        struct port_info *pi = netdev_priv(dev);
 258
 259        /*
 260         * We do not set address filters and promiscuity here, the stack does
 261         * that step explicitly. Enable vlan accel.
 262         */
 263        ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
 264                              true);
 265        if (ret == 0) {
 266                ret = t4vf_change_mac(pi->adapter, pi->viid,
 267                                      pi->xact_addr_filt, dev->dev_addr, true);
 268                if (ret >= 0) {
 269                        pi->xact_addr_filt = ret;
 270                        ret = 0;
 271                }
 272        }
 273
 274        /*
 275         * We don't need to actually "start the link" itself since the
 276         * firmware will do that for us when the first Virtual Interface
 277         * is enabled on a port.
 278         */
 279        if (ret == 0)
 280                ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
 281        return ret;
 282}
 283
 284/*
 285 * Name the MSI-X interrupts.
 286 */
 287static void name_msix_vecs(struct adapter *adapter)
 288{
 289        int namelen = sizeof(adapter->msix_info[0].desc) - 1;
 290        int pidx;
 291
 292        /*
 293         * Firmware events.
 294         */
 295        snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
 296                 "%s-FWeventq", adapter->name);
 297        adapter->msix_info[MSIX_FW].desc[namelen] = 0;
 298
 299        /*
 300         * Ethernet queues.
 301         */
 302        for_each_port(adapter, pidx) {
 303                struct net_device *dev = adapter->port[pidx];
 304                const struct port_info *pi = netdev_priv(dev);
 305                int qs, msi;
 306
 307                for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
 308                        snprintf(adapter->msix_info[msi].desc, namelen,
 309                                 "%s-%d", dev->name, qs);
 310                        adapter->msix_info[msi].desc[namelen] = 0;
 311                }
 312        }
 313}
 314
 315/*
 316 * Request all of our MSI-X resources.
 317 */
 318static int request_msix_queue_irqs(struct adapter *adapter)
 319{
 320        struct sge *s = &adapter->sge;
 321        int rxq, msi, err;
 322
 323        /*
 324         * Firmware events.
 325         */
 326        err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
 327                          0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
 328        if (err)
 329                return err;
 330
 331        /*
 332         * Ethernet queues.
 333         */
 334        msi = MSIX_IQFLINT;
 335        for_each_ethrxq(s, rxq) {
 336                err = request_irq(adapter->msix_info[msi].vec,
 337                                  t4vf_sge_intr_msix, 0,
 338                                  adapter->msix_info[msi].desc,
 339                                  &s->ethrxq[rxq].rspq);
 340                if (err)
 341                        goto err_free_irqs;
 342                msi++;
 343        }
 344        return 0;
 345
 346err_free_irqs:
 347        while (--rxq >= 0)
 348                free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
 349        free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 350        return err;
 351}
 352
 353/*
 354 * Free our MSI-X resources.
 355 */
 356static void free_msix_queue_irqs(struct adapter *adapter)
 357{
 358        struct sge *s = &adapter->sge;
 359        int rxq, msi;
 360
 361        free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 362        msi = MSIX_IQFLINT;
 363        for_each_ethrxq(s, rxq)
 364                free_irq(adapter->msix_info[msi++].vec,
 365                         &s->ethrxq[rxq].rspq);
 366}
 367
 368/*
 369 * Turn on NAPI and start up interrupts on a response queue.
 370 */
 371static void qenable(struct sge_rspq *rspq)
 372{
 373        napi_enable(&rspq->napi);
 374
 375        /*
 376         * 0-increment the Going To Sleep register to start the timer and
 377         * enable interrupts.
 378         */
 379        t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 380                     CIDXINC_V(0) |
 381                     SEINTARM_V(rspq->intr_params) |
 382                     INGRESSQID_V(rspq->cntxt_id));
 383}
 384
 385/*
 386 * Enable NAPI scheduling and interrupt generation for all Receive Queues.
 387 */
 388static void enable_rx(struct adapter *adapter)
 389{
 390        int rxq;
 391        struct sge *s = &adapter->sge;
 392
 393        for_each_ethrxq(s, rxq)
 394                qenable(&s->ethrxq[rxq].rspq);
 395        qenable(&s->fw_evtq);
 396
 397        /*
 398         * The interrupt queue doesn't use NAPI so we do the 0-increment of
 399         * its Going To Sleep register here to get it started.
 400         */
 401        if (adapter->flags & USING_MSI)
 402                t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 403                             CIDXINC_V(0) |
 404                             SEINTARM_V(s->intrq.intr_params) |
 405                             INGRESSQID_V(s->intrq.cntxt_id));
 406
 407}
 408
 409/*
 410 * Wait until all NAPI handlers are descheduled.
 411 */
 412static void quiesce_rx(struct adapter *adapter)
 413{
 414        struct sge *s = &adapter->sge;
 415        int rxq;
 416
 417        for_each_ethrxq(s, rxq)
 418                napi_disable(&s->ethrxq[rxq].rspq.napi);
 419        napi_disable(&s->fw_evtq.napi);
 420}
 421
 422/*
 423 * Response queue handler for the firmware event queue.
 424 */
 425static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
 426                          const struct pkt_gl *gl)
 427{
 428        /*
 429         * Extract response opcode and get pointer to CPL message body.
 430         */
 431        struct adapter *adapter = rspq->adapter;
 432        u8 opcode = ((const struct rss_header *)rsp)->opcode;
 433        void *cpl = (void *)(rsp + 1);
 434
 435        switch (opcode) {
 436        case CPL_FW6_MSG: {
 437                /*
 438                 * We've received an asynchronous message from the firmware.
 439                 */
 440                const struct cpl_fw6_msg *fw_msg = cpl;
 441                if (fw_msg->type == FW6_TYPE_CMD_RPL)
 442                        t4vf_handle_fw_rpl(adapter, fw_msg->data);
 443                break;
 444        }
 445
 446        case CPL_FW4_MSG: {
 447                /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
 448                 */
 449                const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
 450                opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
 451                if (opcode != CPL_SGE_EGR_UPDATE) {
 452                        dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
 453                                , opcode);
 454                        break;
 455                }
 456                cpl = (void *)p;
 457                /*FALLTHROUGH*/
 458        }
 459
 460        case CPL_SGE_EGR_UPDATE: {
 461                /*
 462                 * We've received an Egress Queue Status Update message.  We
 463                 * get these, if the SGE is configured to send these when the
 464                 * firmware passes certain points in processing our TX
 465                 * Ethernet Queue or if we make an explicit request for one.
 466                 * We use these updates to determine when we may need to
 467                 * restart a TX Ethernet Queue which was stopped for lack of
 468                 * free TX Queue Descriptors ...
 469                 */
 470                const struct cpl_sge_egr_update *p = cpl;
 471                unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
 472                struct sge *s = &adapter->sge;
 473                struct sge_txq *tq;
 474                struct sge_eth_txq *txq;
 475                unsigned int eq_idx;
 476
 477                /*
 478                 * Perform sanity checking on the Queue ID to make sure it
 479                 * really refers to one of our TX Ethernet Egress Queues which
 480                 * is active and matches the queue's ID.  None of these error
 481                 * conditions should ever happen so we may want to either make
 482                 * them fatal and/or conditionalized under DEBUG.
 483                 */
 484                eq_idx = EQ_IDX(s, qid);
 485                if (unlikely(eq_idx >= MAX_EGRQ)) {
 486                        dev_err(adapter->pdev_dev,
 487                                "Egress Update QID %d out of range\n", qid);
 488                        break;
 489                }
 490                tq = s->egr_map[eq_idx];
 491                if (unlikely(tq == NULL)) {
 492                        dev_err(adapter->pdev_dev,
 493                                "Egress Update QID %d TXQ=NULL\n", qid);
 494                        break;
 495                }
 496                txq = container_of(tq, struct sge_eth_txq, q);
 497                if (unlikely(tq->abs_id != qid)) {
 498                        dev_err(adapter->pdev_dev,
 499                                "Egress Update QID %d refers to TXQ %d\n",
 500                                qid, tq->abs_id);
 501                        break;
 502                }
 503
 504                /*
 505                 * Restart a stopped TX Queue which has less than half of its
 506                 * TX ring in use ...
 507                 */
 508                txq->q.restarts++;
 509                netif_tx_wake_queue(txq->txq);
 510                break;
 511        }
 512
 513        default:
 514                dev_err(adapter->pdev_dev,
 515                        "unexpected CPL %#x on FW event queue\n", opcode);
 516        }
 517
 518        return 0;
 519}
 520
 521/*
 522 * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
 523 * to use and initializes them.  We support multiple "Queue Sets" per port if
 524 * we have MSI-X, otherwise just one queue set per port.
 525 */
 526static int setup_sge_queues(struct adapter *adapter)
 527{
 528        struct sge *s = &adapter->sge;
 529        int err, pidx, msix;
 530
 531        /*
 532         * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
 533         * state.
 534         */
 535        bitmap_zero(s->starving_fl, MAX_EGRQ);
 536
 537        /*
 538         * If we're using MSI interrupt mode we need to set up a "forwarded
 539         * interrupt" queue which we'll set up with our MSI vector.  The rest
 540         * of the ingress queues will be set up to forward their interrupts to
 541         * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
 542         * the intrq's queue ID as the interrupt forwarding queue for the
 543         * subsequent calls ...
 544         */
 545        if (adapter->flags & USING_MSI) {
 546                err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
 547                                         adapter->port[0], 0, NULL, NULL);
 548                if (err)
 549                        goto err_free_queues;
 550        }
 551
 552        /*
 553         * Allocate our ingress queue for asynchronous firmware messages.
 554         */
 555        err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
 556                                 MSIX_FW, NULL, fwevtq_handler);
 557        if (err)
 558                goto err_free_queues;
 559
 560        /*
 561         * Allocate each "port"'s initial Queue Sets.  These can be changed
 562         * later on ... up to the point where any interface on the adapter is
 563         * brought up at which point lots of things get nailed down
 564         * permanently ...
 565         */
 566        msix = MSIX_IQFLINT;
 567        for_each_port(adapter, pidx) {
 568                struct net_device *dev = adapter->port[pidx];
 569                struct port_info *pi = netdev_priv(dev);
 570                struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 571                struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 572                int qs;
 573
 574                for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 575                        err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
 576                                                 dev, msix++,
 577                                                 &rxq->fl, t4vf_ethrx_handler);
 578                        if (err)
 579                                goto err_free_queues;
 580
 581                        err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
 582                                             netdev_get_tx_queue(dev, qs),
 583                                             s->fw_evtq.cntxt_id);
 584                        if (err)
 585                                goto err_free_queues;
 586
 587                        rxq->rspq.idx = qs;
 588                        memset(&rxq->stats, 0, sizeof(rxq->stats));
 589                }
 590        }
 591
 592        /*
 593         * Create the reverse mappings for the queues.
 594         */
 595        s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
 596        s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
 597        IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
 598        for_each_port(adapter, pidx) {
 599                struct net_device *dev = adapter->port[pidx];
 600                struct port_info *pi = netdev_priv(dev);
 601                struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 602                struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 603                int qs;
 604
 605                for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 606                        IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
 607                        EQ_MAP(s, txq->q.abs_id) = &txq->q;
 608
 609                        /*
 610                         * The FW_IQ_CMD doesn't return the Absolute Queue IDs
 611                         * for Free Lists but since all of the Egress Queues
 612                         * (including Free Lists) have Relative Queue IDs
 613                         * which are computed as Absolute - Base Queue ID, we
 614                         * can synthesize the Absolute Queue IDs for the Free
 615                         * Lists.  This is useful for debugging purposes when
 616                         * we want to dump Queue Contexts via the PF Driver.
 617                         */
 618                        rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
 619                        EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
 620                }
 621        }
 622        return 0;
 623
 624err_free_queues:
 625        t4vf_free_sge_resources(adapter);
 626        return err;
 627}
 628
 629/*
 630 * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
 631 * queues.  We configure the RSS CPU lookup table to distribute to the number
 632 * of HW receive queues, and the response queue lookup table to narrow that
 633 * down to the response queues actually configured for each "port" (Virtual
 634 * Interface).  We always configure the RSS mapping for all ports since the
 635 * mapping table has plenty of entries.
 636 */
 637static int setup_rss(struct adapter *adapter)
 638{
 639        int pidx;
 640
 641        for_each_port(adapter, pidx) {
 642                struct port_info *pi = adap2pinfo(adapter, pidx);
 643                struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
 644                u16 rss[MAX_PORT_QSETS];
 645                int qs, err;
 646
 647                for (qs = 0; qs < pi->nqsets; qs++)
 648                        rss[qs] = rxq[qs].rspq.abs_id;
 649
 650                err = t4vf_config_rss_range(adapter, pi->viid,
 651                                            0, pi->rss_size, rss, pi->nqsets);
 652                if (err)
 653                        return err;
 654
 655                /*
 656                 * Perform Global RSS Mode-specific initialization.
 657                 */
 658                switch (adapter->params.rss.mode) {
 659                case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
 660                        /*
 661                         * If Tunnel All Lookup isn't specified in the global
 662                         * RSS Configuration, then we need to specify a
 663                         * default Ingress Queue for any ingress packets which
 664                         * aren't hashed.  We'll use our first ingress queue
 665                         * ...
 666                         */
 667                        if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
 668                                union rss_vi_config config;
 669                                err = t4vf_read_rss_vi_config(adapter,
 670                                                              pi->viid,
 671                                                              &config);
 672                                if (err)
 673                                        return err;
 674                                config.basicvirtual.defaultq =
 675                                        rxq[0].rspq.abs_id;
 676                                err = t4vf_write_rss_vi_config(adapter,
 677                                                               pi->viid,
 678                                                               &config);
 679                                if (err)
 680                                        return err;
 681                        }
 682                        break;
 683                }
 684        }
 685
 686        return 0;
 687}
 688
 689/*
 690 * Bring the adapter up.  Called whenever we go from no "ports" open to having
 691 * one open.  This function performs the actions necessary to make an adapter
 692 * operational, such as completing the initialization of HW modules, and
 693 * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
 694 * this is called "cxgb_up" in the PF Driver.)
 695 */
 696static int adapter_up(struct adapter *adapter)
 697{
 698        int err;
 699
 700        /*
 701         * If this is the first time we've been called, perform basic
 702         * adapter setup.  Once we've done this, many of our adapter
 703         * parameters can no longer be changed ...
 704         */
 705        if ((adapter->flags & FULL_INIT_DONE) == 0) {
 706                err = setup_sge_queues(adapter);
 707                if (err)
 708                        return err;
 709                err = setup_rss(adapter);
 710                if (err) {
 711                        t4vf_free_sge_resources(adapter);
 712                        return err;
 713                }
 714
 715                if (adapter->flags & USING_MSIX)
 716                        name_msix_vecs(adapter);
 717                adapter->flags |= FULL_INIT_DONE;
 718        }
 719
 720        /*
 721         * Acquire our interrupt resources.  We only support MSI-X and MSI.
 722         */
 723        BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
 724        if (adapter->flags & USING_MSIX)
 725                err = request_msix_queue_irqs(adapter);
 726        else
 727                err = request_irq(adapter->pdev->irq,
 728                                  t4vf_intr_handler(adapter), 0,
 729                                  adapter->name, adapter);
 730        if (err) {
 731                dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
 732                        err);
 733                return err;
 734        }
 735
 736        /*
 737         * Enable NAPI ingress processing and return success.
 738         */
 739        enable_rx(adapter);
 740        t4vf_sge_start(adapter);
 741
 742        /* Initialize hash mac addr list*/
 743        INIT_LIST_HEAD(&adapter->mac_hlist);
 744        return 0;
 745}
 746
 747/*
 748 * Bring the adapter down.  Called whenever the last "port" (Virtual
 749 * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
 750 * Driver.)
 751 */
 752static void adapter_down(struct adapter *adapter)
 753{
 754        /*
 755         * Free interrupt resources.
 756         */
 757        if (adapter->flags & USING_MSIX)
 758                free_msix_queue_irqs(adapter);
 759        else
 760                free_irq(adapter->pdev->irq, adapter);
 761
 762        /*
 763         * Wait for NAPI handlers to finish.
 764         */
 765        quiesce_rx(adapter);
 766}
 767
 768/*
 769 * Start up a net device.
 770 */
 771static int cxgb4vf_open(struct net_device *dev)
 772{
 773        int err;
 774        struct port_info *pi = netdev_priv(dev);
 775        struct adapter *adapter = pi->adapter;
 776
 777        /*
 778         * If this is the first interface that we're opening on the "adapter",
 779         * bring the "adapter" up now.
 780         */
 781        if (adapter->open_device_map == 0) {
 782                err = adapter_up(adapter);
 783                if (err)
 784                        return err;
 785        }
 786
 787        /*
 788         * Note that this interface is up and start everything up ...
 789         */
 790        err = link_start(dev);
 791        if (err)
 792                goto err_unwind;
 793
 794        netif_tx_start_all_queues(dev);
 795        set_bit(pi->port_id, &adapter->open_device_map);
 796        return 0;
 797
 798err_unwind:
 799        if (adapter->open_device_map == 0)
 800                adapter_down(adapter);
 801        return err;
 802}
 803
 804/*
 805 * Shut down a net device.  This routine is called "cxgb_close" in the PF
 806 * Driver ...
 807 */
 808static int cxgb4vf_stop(struct net_device *dev)
 809{
 810        struct port_info *pi = netdev_priv(dev);
 811        struct adapter *adapter = pi->adapter;
 812
 813        netif_tx_stop_all_queues(dev);
 814        netif_carrier_off(dev);
 815        t4vf_enable_vi(adapter, pi->viid, false, false);
 816        pi->link_cfg.link_ok = 0;
 817
 818        clear_bit(pi->port_id, &adapter->open_device_map);
 819        if (adapter->open_device_map == 0)
 820                adapter_down(adapter);
 821        return 0;
 822}
 823
 824/*
 825 * Translate our basic statistics into the standard "ifconfig" statistics.
 826 */
 827static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
 828{
 829        struct t4vf_port_stats stats;
 830        struct port_info *pi = netdev2pinfo(dev);
 831        struct adapter *adapter = pi->adapter;
 832        struct net_device_stats *ns = &dev->stats;
 833        int err;
 834
 835        spin_lock(&adapter->stats_lock);
 836        err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
 837        spin_unlock(&adapter->stats_lock);
 838
 839        memset(ns, 0, sizeof(*ns));
 840        if (err)
 841                return ns;
 842
 843        ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
 844                        stats.tx_ucast_bytes + stats.tx_offload_bytes);
 845        ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
 846                          stats.tx_ucast_frames + stats.tx_offload_frames);
 847        ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
 848                        stats.rx_ucast_bytes);
 849        ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
 850                          stats.rx_ucast_frames);
 851        ns->multicast = stats.rx_mcast_frames;
 852        ns->tx_errors = stats.tx_drop_frames;
 853        ns->rx_errors = stats.rx_err_frames;
 854
 855        return ns;
 856}
 857
 858static inline int cxgb4vf_set_addr_hash(struct port_info *pi)
 859{
 860        struct adapter *adapter = pi->adapter;
 861        u64 vec = 0;
 862        bool ucast = false;
 863        struct hash_mac_addr *entry;
 864
 865        /* Calculate the hash vector for the updated list and program it */
 866        list_for_each_entry(entry, &adapter->mac_hlist, list) {
 867                ucast |= is_unicast_ether_addr(entry->addr);
 868                vec |= (1ULL << hash_mac_addr(entry->addr));
 869        }
 870        return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
 871}
 872
 873static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
 874{
 875        struct port_info *pi = netdev_priv(netdev);
 876        struct adapter *adapter = pi->adapter;
 877        int ret;
 878        u64 mhash = 0;
 879        u64 uhash = 0;
 880        bool free = false;
 881        bool ucast = is_unicast_ether_addr(mac_addr);
 882        const u8 *maclist[1] = {mac_addr};
 883        struct hash_mac_addr *new_entry;
 884
 885        ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
 886                                  NULL, ucast ? &uhash : &mhash, false);
 887        if (ret < 0)
 888                goto out;
 889        /* if hash != 0, then add the addr to hash addr list
 890         * so on the end we will calculate the hash for the
 891         * list and program it
 892         */
 893        if (uhash || mhash) {
 894                new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
 895                if (!new_entry)
 896                        return -ENOMEM;
 897                ether_addr_copy(new_entry->addr, mac_addr);
 898                list_add_tail(&new_entry->list, &adapter->mac_hlist);
 899                ret = cxgb4vf_set_addr_hash(pi);
 900        }
 901out:
 902        return ret < 0 ? ret : 0;
 903}
 904
 905static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
 906{
 907        struct port_info *pi = netdev_priv(netdev);
 908        struct adapter *adapter = pi->adapter;
 909        int ret;
 910        const u8 *maclist[1] = {mac_addr};
 911        struct hash_mac_addr *entry, *tmp;
 912
 913        /* If the MAC address to be removed is in the hash addr
 914         * list, delete it from the list and update hash vector
 915         */
 916        list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) {
 917                if (ether_addr_equal(entry->addr, mac_addr)) {
 918                        list_del(&entry->list);
 919                        kfree(entry);
 920                        return cxgb4vf_set_addr_hash(pi);
 921                }
 922        }
 923
 924        ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false);
 925        return ret < 0 ? -EINVAL : 0;
 926}
 927
 928/*
 929 * Set RX properties of a port, such as promiscruity, address filters, and MTU.
 930 * If @mtu is -1 it is left unchanged.
 931 */
 932static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
 933{
 934        struct port_info *pi = netdev_priv(dev);
 935
 936        __dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
 937        __dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
 938        return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
 939                               (dev->flags & IFF_PROMISC) != 0,
 940                               (dev->flags & IFF_ALLMULTI) != 0,
 941                               1, -1, sleep_ok);
 942}
 943
 944/*
 945 * Set the current receive modes on the device.
 946 */
 947static void cxgb4vf_set_rxmode(struct net_device *dev)
 948{
 949        /* unfortunately we can't return errors to the stack */
 950        set_rxmode(dev, -1, false);
 951}
 952
 953/*
 954 * Find the entry in the interrupt holdoff timer value array which comes
 955 * closest to the specified interrupt holdoff value.
 956 */
 957static int closest_timer(const struct sge *s, int us)
 958{
 959        int i, timer_idx = 0, min_delta = INT_MAX;
 960
 961        for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
 962                int delta = us - s->timer_val[i];
 963                if (delta < 0)
 964                        delta = -delta;
 965                if (delta < min_delta) {
 966                        min_delta = delta;
 967                        timer_idx = i;
 968                }
 969        }
 970        return timer_idx;
 971}
 972
 973static int closest_thres(const struct sge *s, int thres)
 974{
 975        int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
 976
 977        for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
 978                delta = thres - s->counter_val[i];
 979                if (delta < 0)
 980                        delta = -delta;
 981                if (delta < min_delta) {
 982                        min_delta = delta;
 983                        pktcnt_idx = i;
 984                }
 985        }
 986        return pktcnt_idx;
 987}
 988
 989/*
 990 * Return a queue's interrupt hold-off time in us.  0 means no timer.
 991 */
 992static unsigned int qtimer_val(const struct adapter *adapter,
 993                               const struct sge_rspq *rspq)
 994{
 995        unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
 996
 997        return timer_idx < SGE_NTIMERS
 998                ? adapter->sge.timer_val[timer_idx]
 999                : 0;
1000}

1001
1002/**
1003 *      set_rxq_intr_params - set a queue's interrupt holdoff parameters
1004 *      @adapter: the adapter
1005 *      @rspq: the RX response queue
1006 *      @us: the hold-off time in us, or 0 to disable timer
1007 *      @cnt: the hold-off packet count, or 0 to disable counter
1008 *
1009 *      Sets an RX response queue's interrupt hold-off time and packet count.
1010 *      At least one of the two needs to be enabled for the queue to generate
1011 *      interrupts.
1012 */
1013static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1014                               unsigned int us, unsigned int cnt)
1015{
1016        unsigned int timer_idx;
1017
1018        /*
1019         * If both the interrupt holdoff timer and count are specified as
1020         * zero, default to a holdoff count of 1 ...
1021         */
1022        if ((us | cnt) == 0)
1023                cnt = 1;
1024
1025        /*
1026         * If an interrupt holdoff count has been specified, then find the
1027         * closest configured holdoff count and use that.  If the response
1028         * queue has already been created, then update its queue context
1029         * parameters ...
1030         */
1031        if (cnt) {
1032                int err;
1033                u32 v, pktcnt_idx;
1034
1035                pktcnt_idx = closest_thres(&adapter->sge, cnt);
1036                if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1037                        v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1038                            FW_PARAMS_PARAM_X_V(
1039                                        FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1040                            FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1041                        err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1042                        if (err)
1043                                return err;
1044                }
1045                rspq->pktcnt_idx = pktcnt_idx;
1046        }
1047
1048        /*
1049         * Compute the closest holdoff timer index from the supplied holdoff
1050         * timer value.
1051         */
1052        timer_idx = (us == 0
1053                     ? SGE_TIMER_RSTRT_CNTR
1054                     : closest_timer(&adapter->sge, us));
1055
1056        /*
1057         * Update the response queue's interrupt coalescing parameters and
1058         * return success.
1059         */
1060        rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1061                             QINTR_CNT_EN_V(cnt > 0));
1062        return 0;
1063}
1064
1065/*
1066 * Return a version number to identify the type of adapter.  The scheme is:
1067 * - bits 0..9: chip version
1068 * - bits 10..15: chip revision
1069 */
1070static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1071{
1072        /*
1073         * Chip version 4, revision 0x3f (cxgb4vf).
1074         */
1075        return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1076}
1077
1078/*
1079 * Execute the specified ioctl command.
1080 */
1081static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1082{
1083        int ret = 0;
1084
1085        switch (cmd) {
1086            /*
1087             * The VF Driver doesn't have access to any of the other
1088             * common Ethernet device ioctl()'s (like reading/writing
1089             * PHY registers, etc.
1090             */
1091
1092        default:
1093                ret = -EOPNOTSUPP;
1094                break;
1095        }
1096        return ret;
1097}
1098
1099/*
1100 * Change the device's MTU.
1101 */
1102static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1103{
1104        int ret;
1105        struct port_info *pi = netdev_priv(dev);
1106
1107        /* accommodate SACK */
1108        if (new_mtu < 81)
1109                return -EINVAL;
1110
1111        ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1112                              -1, -1, -1, -1, true);
1113        if (!ret)
1114                dev->mtu = new_mtu;
1115        return ret;
1116}
1117
1118static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1119        netdev_features_t features)
1120{
1121        /*
1122         * Since there is no support for separate rx/tx vlan accel
1123         * enable/disable make sure tx flag is always in same state as rx.
1124         */
1125        if (features & NETIF_F_HW_VLAN_CTAG_RX)
1126                features |= NETIF_F_HW_VLAN_CTAG_TX;
1127        else
1128                features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1129
1130        return features;
1131}
1132
1133static int cxgb4vf_set_features(struct net_device *dev,
1134        netdev_features_t features)
1135{
1136        struct port_info *pi = netdev_priv(dev);
1137        netdev_features_t changed = dev->features ^ features;
1138
1139        if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1140                t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1141                                features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1142
1143        return 0;
1144}
1145
1146/*
1147 * Change the devices MAC address.
1148 */
1149static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1150{
1151        int ret;
1152        struct sockaddr *addr = _addr;
1153        struct port_info *pi = netdev_priv(dev);
1154
1155        if (!is_valid_ether_addr(addr->sa_data))
1156                return -EADDRNOTAVAIL;
1157
1158        ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1159                              addr->sa_data, true);
1160        if (ret < 0)
1161                return ret;
1162
1163        memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1164        pi->xact_addr_filt = ret;
1165        return 0;
1166}
1167
1168#ifdef CONFIG_NET_POLL_CONTROLLER
1169/*
1170 * Poll all of our receive queues.  This is called outside of normal interrupt
1171 * context.
1172 */
1173static void cxgb4vf_poll_controller(struct net_device *dev)
1174{
1175        struct port_info *pi = netdev_priv(dev);
1176        struct adapter *adapter = pi->adapter;
1177
1178        if (adapter->flags & USING_MSIX) {
1179                struct sge_eth_rxq *rxq;
1180                int nqsets;
1181
1182                rxq = &adapter->sge.ethrxq[pi->first_qset];
1183                for (nqsets = pi->nqsets; nqsets; nqsets--) {
1184                        t4vf_sge_intr_msix(0, &rxq->rspq);
1185                        rxq++;
1186                }
1187        } else
1188                t4vf_intr_handler(adapter)(0, adapter);
1189}
1190#endif
1191
1192/*
1193 * Ethtool operations.
1194 * ===================
1195 *
1196 * Note that we don't support any ethtool operations which change the physical
1197 * state of the port to which we're linked.
1198 */
1199
1200/**
1201 *      from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool
1202 *      @port_type: Firmware Port Type
1203 *      @mod_type: Firmware Module Type
1204 *
1205 *      Translate Firmware Port/Module type to Ethtool Port Type.
1206 */
1207static int from_fw_port_mod_type(enum fw_port_type port_type,
1208                                 enum fw_port_module_type mod_type)
1209{
1210        if (port_type == FW_PORT_TYPE_BT_SGMII ||
1211            port_type == FW_PORT_TYPE_BT_XFI ||
1212            port_type == FW_PORT_TYPE_BT_XAUI) {
1213                return PORT_TP;
1214        } else if (port_type == FW_PORT_TYPE_FIBER_XFI ||
1215                   port_type == FW_PORT_TYPE_FIBER_XAUI) {
1216                return PORT_FIBRE;
1217        } else if (port_type == FW_PORT_TYPE_SFP ||
1218                   port_type == FW_PORT_TYPE_QSFP_10G ||
1219                   port_type == FW_PORT_TYPE_QSA ||
1220                   port_type == FW_PORT_TYPE_QSFP) {
1221                if (mod_type == FW_PORT_MOD_TYPE_LR ||
1222                    mod_type == FW_PORT_MOD_TYPE_SR ||
1223                    mod_type == FW_PORT_MOD_TYPE_ER ||
1224                    mod_type == FW_PORT_MOD_TYPE_LRM)
1225                        return PORT_FIBRE;
1226                else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1227                         mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1228                        return PORT_DA;
1229                else
1230                        return PORT_OTHER;
1231        }
1232
1233        return PORT_OTHER;
1234}
1235
1236/**
1237 *      fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask
1238 *      @port_type: Firmware Port Type
1239 *      @fw_caps: Firmware Port Capabilities
1240 *      @link_mode_mask: ethtool Link Mode Mask
1241 *
1242 *      Translate a Firmware Port Capabilities specification to an ethtool
1243 *      Link Mode Mask.
1244 */
1245static void fw_caps_to_lmm(enum fw_port_type port_type,
1246                           unsigned int fw_caps,
1247                           unsigned long *link_mode_mask)
1248{
1249        #define SET_LMM(__lmm_name) __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name\
1250                         ## _BIT, link_mode_mask)
1251
1252        #define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
1253                do { \
1254                        if (fw_caps & FW_PORT_CAP_ ## __fw_name) \
1255                                SET_LMM(__lmm_name); \
1256                } while (0)
1257
1258        switch (port_type) {
1259        case FW_PORT_TYPE_BT_SGMII:
1260        case FW_PORT_TYPE_BT_XFI:
1261        case FW_PORT_TYPE_BT_XAUI:
1262                SET_LMM(TP);
1263                FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
1264                FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1265                FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1266                break;
1267
1268        case FW_PORT_TYPE_KX4:
1269        case FW_PORT_TYPE_KX:
1270                SET_LMM(Backplane);
1271                FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1272                FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1273                break;
1274
1275        case FW_PORT_TYPE_KR:
1276                SET_LMM(Backplane);
1277                SET_LMM(10000baseKR_Full);
1278                break;
1279
1280        case FW_PORT_TYPE_BP_AP:
1281                SET_LMM(Backplane);
1282                SET_LMM(10000baseR_FEC);
1283                SET_LMM(10000baseKR_Full);
1284                SET_LMM(1000baseKX_Full);
1285                break;
1286
1287        case FW_PORT_TYPE_BP4_AP:
1288                SET_LMM(Backplane);
1289                SET_LMM(10000baseR_FEC);
1290                SET_LMM(10000baseKR_Full);
1291                SET_LMM(1000baseKX_Full);
1292                SET_LMM(10000baseKX4_Full);
1293                break;
1294
1295        case FW_PORT_TYPE_FIBER_XFI:
1296        case FW_PORT_TYPE_FIBER_XAUI:
1297        case FW_PORT_TYPE_SFP:
1298        case FW_PORT_TYPE_QSFP_10G:
1299        case FW_PORT_TYPE_QSA:
1300                SET_LMM(FIBRE);
1301                FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1302                FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1303                break;
1304
1305        case FW_PORT_TYPE_BP40_BA:
1306        case FW_PORT_TYPE_QSFP:
1307                SET_LMM(FIBRE);
1308                SET_LMM(40000baseSR4_Full);
1309                break;
1310
1311        case FW_PORT_TYPE_CR_QSFP:
1312        case FW_PORT_TYPE_SFP28:
1313                SET_LMM(FIBRE);
1314                SET_LMM(25000baseCR_Full);
1315                break;
1316
1317        case FW_PORT_TYPE_KR4_100G:
1318        case FW_PORT_TYPE_CR4_QSFP:
1319                SET_LMM(FIBRE);
1320                SET_LMM(100000baseCR4_Full);
1321                break;
1322
1323        default:
1324                break;
1325        }
1326
1327        FW_CAPS_TO_LMM(ANEG, Autoneg);
1328        FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
1329        FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
1330
1331        #undef FW_CAPS_TO_LMM
1332        #undef SET_LMM
1333}
1334
1335static int cxgb4vf_get_link_ksettings(struct net_device *dev,
1336                                      struct ethtool_link_ksettings
1337                                                        *link_ksettings)
1338{
1339        const struct port_info *pi = netdev_priv(dev);
1340        struct ethtool_link_settings *base = &link_ksettings->base;
1341
1342        ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
1343        ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
1344        ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
1345
1346        base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
1347
1348        if (pi->mdio_addr >= 0) {
1349                base->phy_address = pi->mdio_addr;
1350                base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII
1351                                      ? ETH_MDIO_SUPPORTS_C22
1352                                      : ETH_MDIO_SUPPORTS_C45);
1353        } else {
1354                base->phy_address = 255;
1355                base->mdio_support = 0;
1356        }
1357
1358        fw_caps_to_lmm(pi->port_type, pi->link_cfg.supported,
1359                       link_ksettings->link_modes.supported);
1360        fw_caps_to_lmm(pi->port_type, pi->link_cfg.advertising,
1361                       link_ksettings->link_modes.advertising);
1362        fw_caps_to_lmm(pi->port_type, pi->link_cfg.lp_advertising,
1363                       link_ksettings->link_modes.lp_advertising);
1364
1365        if (netif_carrier_ok(dev)) {
1366                base->speed = pi->link_cfg.speed;
1367                base->duplex = DUPLEX_FULL;
1368        } else {
1369                base->speed = SPEED_UNKNOWN;
1370                base->duplex = DUPLEX_UNKNOWN;
1371        }
1372
1373        base->autoneg = pi->link_cfg.autoneg;
1374        if (pi->link_cfg.supported & FW_PORT_CAP_ANEG)
1375                ethtool_link_ksettings_add_link_mode(link_ksettings,
1376                                                     supported, Autoneg);
1377        if (pi->link_cfg.autoneg)
1378                ethtool_link_ksettings_add_link_mode(link_ksettings,
1379                                                     advertising, Autoneg);
1380
1381        return 0;
1382}
1383
1384/*
1385 * Return our driver information.
1386 */
1387static void cxgb4vf_get_drvinfo(struct net_device *dev,
1388                                struct ethtool_drvinfo *drvinfo)
1389{
1390        struct adapter *adapter = netdev2adap(dev);
1391
1392        strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1393        strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1394        strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1395                sizeof(drvinfo->bus_info));
1396        snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1397                 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1398                 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1399                 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1400                 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1401                 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1402                 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1403                 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1404                 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1405                 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1406}
1407
1408/*
1409 * Return current adapter message level.
1410 */
1411static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1412{
1413        return netdev2adap(dev)->msg_enable;
1414}
1415
1416/*
1417 * Set current adapter message level.
1418 */
1419static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1420{
1421        netdev2adap(dev)->msg_enable = msglevel;
1422}
1423
1424/*
1425 * Return the device's current Queue Set ring size parameters along with the
1426 * allowed maximum values.  Since ethtool doesn't understand the concept of
1427 * multi-queue devices, we just return the current values associated with the
1428 * first Queue Set.
1429 */
1430static void cxgb4vf_get_ringparam(struct net_device *dev,
1431                                  struct ethtool_ringparam *rp)
1432{
1433        const struct port_info *pi = netdev_priv(dev);
1434        const struct sge *s = &pi->adapter->sge;
1435
1436        rp->rx_max_pending = MAX_RX_BUFFERS;
1437        rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1438        rp->rx_jumbo_max_pending = 0;
1439        rp->tx_max_pending = MAX_TXQ_ENTRIES;
1440
1441        rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1442        rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1443        rp->rx_jumbo_pending = 0;
1444        rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1445}
1446
1447/*
1448 * Set the Queue Set ring size parameters for the device.  Again, since
1449 * ethtool doesn't allow for the concept of multiple queues per device, we'll
1450 * apply these new values across all of the Queue Sets associated with the
1451 * device -- after vetting them of course!
1452 */
1453static int cxgb4vf_set_ringparam(struct net_device *dev,
1454                                 struct ethtool_ringparam *rp)
1455{
1456        const struct port_info *pi = netdev_priv(dev);
1457        struct adapter *adapter = pi->adapter;
1458        struct sge *s = &adapter->sge;
1459        int qs;
1460
1461        if (rp->rx_pending > MAX_RX_BUFFERS ||
1462            rp->rx_jumbo_pending ||
1463            rp->tx_pending > MAX_TXQ_ENTRIES ||
1464            rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1465            rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1466            rp->rx_pending < MIN_FL_ENTRIES ||
1467            rp->tx_pending < MIN_TXQ_ENTRIES)
1468                return -EINVAL;
1469
1470        if (adapter->flags & FULL_INIT_DONE)
1471                return -EBUSY;
1472
1473        for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1474                s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1475                s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1476                s->ethtxq[qs].q.size = rp->tx_pending;
1477        }
1478        return 0;
1479}
1480
1481/*
1482 * Return the interrupt holdoff timer and count for the first Queue Set on the
1483 * device.  Our extension ioctl() (the cxgbtool interface) allows the
1484 * interrupt holdoff timer to be read on all of the device's Queue Sets.
1485 */
1486static int cxgb4vf_get_coalesce(struct net_device *dev,
1487                                struct ethtool_coalesce *coalesce)
1488{
1489        const struct port_info *pi = netdev_priv(dev);
1490        const struct adapter *adapter = pi->adapter;
1491        const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1492
1493        coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1494        coalesce->rx_max_coalesced_frames =
1495                ((rspq->intr_params & QINTR_CNT_EN_F)
1496                 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1497                 : 0);
1498        return 0;
1499}
1500
1501/*
1502 * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1503 * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1504 * the interrupt holdoff timer on any of the device's Queue Sets.
1505 */
1506static int cxgb4vf_set_coalesce(struct net_device *dev,
1507                                struct ethtool_coalesce *coalesce)
1508{
1509        const struct port_info *pi = netdev_priv(dev);
1510        struct adapter *adapter = pi->adapter;
1511
1512        return set_rxq_intr_params(adapter,
1513                                   &adapter->sge.ethrxq[pi->first_qset].rspq,
1514                                   coalesce->rx_coalesce_usecs,
1515                                   coalesce->rx_max_coalesced_frames);
1516}
1517
1518/*
1519 * Report current port link pause parameter settings.
1520 */
1521static void cxgb4vf_get_pauseparam(struct net_device *dev,
1522                                   struct ethtool_pauseparam *pauseparam)
1523{
1524        struct port_info *pi = netdev_priv(dev);
1525
1526        pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1527        pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1528        pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1529}
1530
1531/*
1532 * Identify the port by blinking the port's LED.
1533 */
1534static int cxgb4vf_phys_id(struct net_device *dev,
1535                           enum ethtool_phys_id_state state)
1536{
1537        unsigned int val;
1538        struct port_info *pi = netdev_priv(dev);
1539
1540        if (state == ETHTOOL_ID_ACTIVE)
1541                val = 0xffff;
1542        else if (state == ETHTOOL_ID_INACTIVE)
1543                val = 0;
1544        else
1545                return -EINVAL;
1546
1547        return t4vf_identify_port(pi->adapter, pi->viid, val);
1548}
1549
1550/*
1551 * Port stats maintained per queue of the port.
1552 */
1553struct queue_port_stats {
1554        u64 tso;
1555        u64 tx_csum;
1556        u64 rx_csum;
1557        u64 vlan_ex;
1558        u64 vlan_ins;
1559        u64 lro_pkts;
1560        u64 lro_merged;
1561};
1562
1563/*
1564 * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1565 * these need to match the order of statistics returned by
1566 * t4vf_get_port_stats().
1567 */
1568static const char stats_strings[][ETH_GSTRING_LEN] = {
1569        /*
1570         * These must match the layout of the t4vf_port_stats structure.
1571         */
1572        "TxBroadcastBytes  ",
1573        "TxBroadcastFrames ",
1574        "TxMulticastBytes  ",
1575        "TxMulticastFrames ",
1576        "TxUnicastBytes    ",
1577        "TxUnicastFrames   ",
1578        "TxDroppedFrames   ",
1579        "TxOffloadBytes    ",
1580        "TxOffloadFrames   ",
1581        "RxBroadcastBytes  ",
1582        "RxBroadcastFrames ",
1583        "RxMulticastBytes  ",
1584        "RxMulticastFrames ",
1585        "RxUnicastBytes    ",
1586        "RxUnicastFrames   ",
1587        "RxErrorFrames     ",
1588
1589        /*
1590         * These are accumulated per-queue statistics and must match the
1591         * order of the fields in the queue_port_stats structure.
1592         */
1593        "TSO               ",
1594        "TxCsumOffload     ",
1595        "RxCsumGood        ",
1596        "VLANextractions   ",
1597        "VLANinsertions    ",
1598        "GROPackets        ",
1599        "GROMerged         ",
1600};
1601
1602/*
1603 * Return the number of statistics in the specified statistics set.
1604 */
1605static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1606{
1607        switch (sset) {
1608        case ETH_SS_STATS:
1609                return ARRAY_SIZE(stats_strings);
1610        default:
1611                return -EOPNOTSUPP;
1612        }
1613        /*NOTREACHED*/
1614}
1615
1616/*
1617 * Return the strings for the specified statistics set.
1618 */
1619static void cxgb4vf_get_strings(struct net_device *dev,
1620                                u32 sset,
1621                                u8 *data)
1622{
1623        switch (sset) {
1624        case ETH_SS_STATS:
1625                memcpy(data, stats_strings, sizeof(stats_strings));
1626                break;
1627        }
1628}
1629
1630/*
1631 * Small utility routine to accumulate queue statistics across the queues of
1632 * a "port".
1633 */
1634static void collect_sge_port_stats(const struct adapter *adapter,
1635                                   const struct port_info *pi,
1636                                   struct queue_port_stats *stats)
1637{
1638        const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1639        const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1640        int qs;
1641
1642        memset(stats, 0, sizeof(*stats));
1643        for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1644                stats->tso += txq->tso;
1645                stats->tx_csum += txq->tx_cso;
1646                stats->rx_csum += rxq->stats.rx_cso;
1647                stats->vlan_ex += rxq->stats.vlan_ex;
1648                stats->vlan_ins += txq->vlan_ins;
1649                stats->lro_pkts += rxq->stats.lro_pkts;
1650                stats->lro_merged += rxq->stats.lro_merged;
1651        }
1652}
1653
1654/*
1655 * Return the ETH_SS_STATS statistics set.
1656 */
1657static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1658                                      struct ethtool_stats *stats,
1659                                      u64 *data)
1660{
1661        struct port_info *pi = netdev2pinfo(dev);
1662        struct adapter *adapter = pi->adapter;
1663        int err = t4vf_get_port_stats(adapter, pi->pidx,
1664                                      (struct t4vf_port_stats *)data);
1665        if (err)
1666                memset(data, 0, sizeof(struct t4vf_port_stats));
1667
1668        data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1669        collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1670}
1671
1672/*
1673 * Return the size of our register map.
1674 */
1675static int cxgb4vf_get_regs_len(struct net_device *dev)
1676{
1677        return T4VF_REGMAP_SIZE;
1678}
1679
1680/*
1681 * Dump a block of registers, start to end inclusive, into a buffer.
1682 */
1683static void reg_block_dump(struct adapter *adapter, void *regbuf,
1684                           unsigned int start, unsigned int end)
1685{
1686        u32 *bp = regbuf + start - T4VF_REGMAP_START;
1687
1688        for ( ; start <= end; start += sizeof(u32)) {
1689                /*
1690                 * Avoid reading the Mailbox Control register since that
1691                 * can trigger a Mailbox Ownership Arbitration cycle and
1692                 * interfere with communication with the firmware.
1693                 */
1694                if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1695                        *bp++ = 0xffff;
1696                else
1697                        *bp++ = t4_read_reg(adapter, start);
1698        }
1699}
1700
1701/*
1702 * Copy our entire register map into the provided buffer.
1703 */
1704static void cxgb4vf_get_regs(struct net_device *dev,
1705                             struct ethtool_regs *regs,
1706                             void *regbuf)
1707{
1708        struct adapter *adapter = netdev2adap(dev);
1709
1710        regs->version = mk_adap_vers(adapter);
1711
1712        /*
1713         * Fill in register buffer with our register map.
1714         */
1715        memset(regbuf, 0, T4VF_REGMAP_SIZE);
1716
1717        reg_block_dump(adapter, regbuf,
1718                       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1719                       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1720        reg_block_dump(adapter, regbuf,
1721                       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1722                       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1723
1724        /* T5 adds new registers in the PL Register map.
1725         */
1726        reg_block_dump(adapter, regbuf,
1727                       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1728                       T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1729                       ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1730        reg_block_dump(adapter, regbuf,
1731                       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1732                       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1733
1734        reg_block_dump(adapter, regbuf,
1735                       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1736                       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1737}
1738
1739/*
1740 * Report current Wake On LAN settings.
1741 */
1742static void cxgb4vf_get_wol(struct net_device *dev,
1743                            struct ethtool_wolinfo *wol)
1744{
1745        wol->supported = 0;
1746        wol->wolopts = 0;
1747        memset(&wol->sopass, 0, sizeof(wol->sopass));
1748}
1749
1750/*
1751 * TCP Segmentation Offload flags which we support.
1752 */
1753#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1754
1755static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1756        .get_link_ksettings     = cxgb4vf_get_link_ksettings,
1757        .get_drvinfo            = cxgb4vf_get_drvinfo,
1758        .get_msglevel           = cxgb4vf_get_msglevel,
1759        .set_msglevel           = cxgb4vf_set_msglevel,
1760        .get_ringparam          = cxgb4vf_get_ringparam,
1761        .set_ringparam          = cxgb4vf_set_ringparam,
1762        .get_coalesce           = cxgb4vf_get_coalesce,
1763        .set_coalesce           = cxgb4vf_set_coalesce,
1764        .get_pauseparam         = cxgb4vf_get_pauseparam,
1765        .get_link               = ethtool_op_get_link,
1766        .get_strings            = cxgb4vf_get_strings,
1767        .set_phys_id            = cxgb4vf_phys_id,
1768        .get_sset_count         = cxgb4vf_get_sset_count,
1769        .get_ethtool_stats      = cxgb4vf_get_ethtool_stats,
1770        .get_regs_len           = cxgb4vf_get_regs_len,
1771        .get_regs               = cxgb4vf_get_regs,
1772        .get_wol                = cxgb4vf_get_wol,
1773};
1774
1775/*
1776 * /sys/kernel/debug/cxgb4vf support code and data.
1777 * ================================================
1778 */
1779
1780/*
1781 * Show Firmware Mailbox Command/Reply Log
1782 *
1783 * Note that we don't do any locking when dumping the Firmware Mailbox Log so
1784 * it's possible that we can catch things during a log update and therefore
1785 * see partially corrupted log entries.  But i9t's probably Good Enough(tm).
1786 * If we ever decide that we want to make sure that we're dumping a coherent
1787 * log, we'd need to perform locking in the mailbox logging and in
1788 * mboxlog_open() where we'd need to grab the entire mailbox log in one go
1789 * like we do for the Firmware Device Log.  But as stated above, meh ...
1790 */
1791static int mboxlog_show(struct seq_file *seq, void *v)
1792{
1793        struct adapter *adapter = seq->private;
1794        struct mbox_cmd_log *log = adapter->mbox_log;
1795        struct mbox_cmd *entry;
1796        int entry_idx, i;
1797
1798        if (v == SEQ_START_TOKEN) {
1799                seq_printf(seq,
1800                           "%10s  %15s  %5s  %5s  %s\n",
1801                           "Seq#", "Tstamp", "Atime", "Etime",
1802                           "Command/Reply");
1803                return 0;
1804        }
1805
1806        entry_idx = log->cursor + ((uintptr_t)v - 2);
1807        if (entry_idx >= log->size)
1808                entry_idx -= log->size;
1809        entry = mbox_cmd_log_entry(log, entry_idx);
1810
1811        /* skip over unused entries */
1812        if (entry->timestamp == 0)
1813                return 0;
1814
1815        seq_printf(seq, "%10u  %15llu  %5d  %5d",
1816                   entry->seqno, entry->timestamp,
1817                   entry->access, entry->execute);
1818        for (i = 0; i < MBOX_LEN / 8; i++) {
1819                u64 flit = entry->cmd[i];
1820                u32 hi = (u32)(flit >> 32);
1821                u32 lo = (u32)flit;
1822
1823                seq_printf(seq, "  %08x %08x", hi, lo);
1824        }
1825        seq_puts(seq, "\n");
1826        return 0;
1827}
1828
1829static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos)
1830{
1831        struct adapter *adapter = seq->private;
1832        struct mbox_cmd_log *log = adapter->mbox_log;
1833
1834        return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL);
1835}
1836
1837static void *mboxlog_start(struct seq_file *seq, loff_t *pos)
1838{
1839        return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN;
1840}
1841
1842static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos)
1843{
1844        ++*pos;
1845        return mboxlog_get_idx(seq, *pos);
1846}
1847
1848static void mboxlog_stop(struct seq_file *seq, void *v)
1849{
1850}
1851
1852static const struct seq_operations mboxlog_seq_ops = {
1853        .start = mboxlog_start,
1854        .next  = mboxlog_next,
1855        .stop  = mboxlog_stop,
1856        .show  = mboxlog_show
1857};
1858
1859static int mboxlog_open(struct inode *inode, struct file *file)
1860{
1861        int res = seq_open(file, &mboxlog_seq_ops);
1862
1863        if (!res) {
1864                struct seq_file *seq = file->private_data;
1865
1866                seq->private = inode->i_private;
1867        }
1868        return res;
1869}
1870
1871static const struct file_operations mboxlog_fops = {
1872        .owner   = THIS_MODULE,
1873        .open    = mboxlog_open,
1874        .read    = seq_read,
1875        .llseek  = seq_lseek,
1876        .release = seq_release,
1877};
1878
1879/*
1880 * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1881 */
1882#define QPL     4
1883
1884static int sge_qinfo_show(struct seq_file *seq, void *v)
1885{
1886        struct adapter *adapter = seq->private;
1887        int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1888        int qs, r = (uintptr_t)v - 1;
1889
1890        if (r)
1891                seq_putc(seq, '\n');
1892
1893        #define S3(fmt_spec, s, v) \
1894                do {\
1895                        seq_printf(seq, "%-12s", s); \
1896                        for (qs = 0; qs < n; ++qs) \
1897                                seq_printf(seq, " %16" fmt_spec, v); \
1898                        seq_putc(seq, '\n'); \
1899                } while (0)
1900        #define S(s, v)         S3("s", s, v)
1901        #define T(s, v)         S3("u", s, txq[qs].v)
1902        #define R(s, v)         S3("u", s, rxq[qs].v)
1903
1904        if (r < eth_entries) {
1905                const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1906                const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1907                int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1908
1909                S("QType:", "Ethernet");
1910                S("Interface:",
1911                  (rxq[qs].rspq.netdev
1912                   ? rxq[qs].rspq.netdev->name
1913                   : "N/A"));
1914                S3("d", "Port:",
1915                   (rxq[qs].rspq.netdev
1916                    ? ((struct port_info *)
1917                       netdev_priv(rxq[qs].rspq.netdev))->port_id
1918                    : -1));
1919                T("TxQ ID:", q.abs_id);
1920                T("TxQ size:", q.size);
1921                T("TxQ inuse:", q.in_use);
1922                T("TxQ PIdx:", q.pidx);
1923                T("TxQ CIdx:", q.cidx);
1924                R("RspQ ID:", rspq.abs_id);
1925                R("RspQ size:", rspq.size);
1926                R("RspQE size:", rspq.iqe_len);
1927                S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1928                S3("u", "Intr pktcnt:",
1929                   adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1930                R("RspQ CIdx:", rspq.cidx);
1931                R("RspQ Gen:", rspq.gen);
1932                R("FL ID:", fl.abs_id);
1933                R("FL size:", fl.size - MIN_FL_RESID);
1934                R("FL avail:", fl.avail);
1935                R("FL PIdx:", fl.pidx);
1936                R("FL CIdx:", fl.cidx);
1937                return 0;
1938        }
1939
1940        r -= eth_entries;
1941        if (r == 0) {
1942                const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1943
1944                seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1945                seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1946                seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1947                           qtimer_val(adapter, evtq));
1948                seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1949                           adapter->sge.counter_val[evtq->pktcnt_idx]);
1950                seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1951                seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1952        } else if (r == 1) {
1953                const struct sge_rspq *intrq = &adapter->sge.intrq;
1954
1955                seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1956                seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1957                seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1958                           qtimer_val(adapter, intrq));
1959                seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1960                           adapter->sge.counter_val[intrq->pktcnt_idx]);
1961                seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1962                seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1963        }
1964
1965        #undef R
1966        #undef T
1967        #undef S
1968        #undef S3
1969
1970        return 0;
1971}
1972
1973/*
1974 * Return the number of "entries" in our "file".  We group the multi-Queue
1975 * sections with QPL Queue Sets per "entry".  The sections of the output are:
1976 *
1977 *     Ethernet RX/TX Queue Sets
1978 *     Firmware Event Queue
1979 *     Forwarded Interrupt Queue (if in MSI mode)
1980 */
1981static int sge_queue_entries(const struct adapter *adapter)
1982{
1983        return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1984                ((adapter->flags & USING_MSI) != 0);
1985}
1986
1987static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1988{
1989        int entries = sge_queue_entries(seq->private);
1990
1991        return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1992}
1993
1994static void sge_queue_stop(struct seq_file *seq, void *v)
1995{
1996}
1997
1998static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
1999{
2000        int entries = sge_queue_entries(seq->private);

2001
2002        ++*pos;
2003        return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2004}
2005
2006static const struct seq_operations sge_qinfo_seq_ops = {
2007        .start = sge_queue_start,
2008        .next  = sge_queue_next,
2009        .stop  = sge_queue_stop,
2010        .show  = sge_qinfo_show
2011};
2012
2013static int sge_qinfo_open(struct inode *inode, struct file *file)
2014{
2015        int res = seq_open(file, &sge_qinfo_seq_ops);
2016
2017        if (!res) {
2018                struct seq_file *seq = file->private_data;
2019                seq->private = inode->i_private;
2020        }
2021        return res;
2022}
2023
2024static const struct file_operations sge_qinfo_debugfs_fops = {
2025        .owner   = THIS_MODULE,
2026        .open    = sge_qinfo_open,
2027        .read    = seq_read,
2028        .llseek  = seq_lseek,
2029        .release = seq_release,
2030};
2031
2032/*
2033 * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
2034 */
2035#define QPL     4
2036
2037static int sge_qstats_show(struct seq_file *seq, void *v)
2038{
2039        struct adapter *adapter = seq->private;
2040        int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2041        int qs, r = (uintptr_t)v - 1;
2042
2043        if (r)
2044                seq_putc(seq, '\n');
2045
2046        #define S3(fmt, s, v) \
2047                do { \
2048                        seq_printf(seq, "%-16s", s); \
2049                        for (qs = 0; qs < n; ++qs) \
2050                                seq_printf(seq, " %8" fmt, v); \
2051                        seq_putc(seq, '\n'); \
2052                } while (0)
2053        #define S(s, v)         S3("s", s, v)
2054
2055        #define T3(fmt, s, v)   S3(fmt, s, txq[qs].v)
2056        #define T(s, v)         T3("lu", s, v)
2057
2058        #define R3(fmt, s, v)   S3(fmt, s, rxq[qs].v)
2059        #define R(s, v)         R3("lu", s, v)
2060
2061        if (r < eth_entries) {
2062                const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2063                const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2064                int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2065
2066                S("QType:", "Ethernet");
2067                S("Interface:",
2068                  (rxq[qs].rspq.netdev
2069                   ? rxq[qs].rspq.netdev->name
2070                   : "N/A"));
2071                R3("u", "RspQNullInts:", rspq.unhandled_irqs);
2072                R("RxPackets:", stats.pkts);
2073                R("RxCSO:", stats.rx_cso);
2074                R("VLANxtract:", stats.vlan_ex);
2075                R("LROmerged:", stats.lro_merged);
2076                R("LROpackets:", stats.lro_pkts);
2077                R("RxDrops:", stats.rx_drops);
2078                T("TSO:", tso);
2079                T("TxCSO:", tx_cso);
2080                T("VLANins:", vlan_ins);
2081                T("TxQFull:", q.stops);
2082                T("TxQRestarts:", q.restarts);
2083                T("TxMapErr:", mapping_err);
2084                R("FLAllocErr:", fl.alloc_failed);
2085                R("FLLrgAlcErr:", fl.large_alloc_failed);
2086                R("FLStarving:", fl.starving);
2087                return 0;
2088        }
2089
2090        r -= eth_entries;
2091        if (r == 0) {
2092                const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2093
2094                seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
2095                seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2096                           evtq->unhandled_irqs);
2097                seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
2098                seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
2099        } else if (r == 1) {
2100                const struct sge_rspq *intrq = &adapter->sge.intrq;
2101
2102                seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
2103                seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2104                           intrq->unhandled_irqs);
2105                seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
2106                seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
2107        }
2108
2109        #undef R
2110        #undef T
2111        #undef S
2112        #undef R3
2113        #undef T3
2114        #undef S3
2115
2116        return 0;
2117}
2118
2119/*
2120 * Return the number of "entries" in our "file".  We group the multi-Queue
2121 * sections with QPL Queue Sets per "entry".  The sections of the output are:
2122 *
2123 *     Ethernet RX/TX Queue Sets
2124 *     Firmware Event Queue
2125 *     Forwarded Interrupt Queue (if in MSI mode)
2126 */
2127static int sge_qstats_entries(const struct adapter *adapter)
2128{
2129        return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2130                ((adapter->flags & USING_MSI) != 0);
2131}
2132
2133static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
2134{
2135        int entries = sge_qstats_entries(seq->private);
2136
2137        return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2138}
2139
2140static void sge_qstats_stop(struct seq_file *seq, void *v)
2141{
2142}
2143
2144static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
2145{
2146        int entries = sge_qstats_entries(seq->private);
2147
2148        (*pos)++;
2149        return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2150}
2151
2152static const struct seq_operations sge_qstats_seq_ops = {
2153        .start = sge_qstats_start,
2154        .next  = sge_qstats_next,
2155        .stop  = sge_qstats_stop,
2156        .show  = sge_qstats_show
2157};
2158
2159static int sge_qstats_open(struct inode *inode, struct file *file)
2160{
2161        int res = seq_open(file, &sge_qstats_seq_ops);
2162
2163        if (res == 0) {
2164                struct seq_file *seq = file->private_data;
2165                seq->private = inode->i_private;
2166        }
2167        return res;
2168}
2169
2170static const struct file_operations sge_qstats_proc_fops = {
2171        .owner   = THIS_MODULE,
2172        .open    = sge_qstats_open,
2173        .read    = seq_read,
2174        .llseek  = seq_lseek,
2175        .release = seq_release,
2176};
2177
2178/*
2179 * Show PCI-E SR-IOV Virtual Function Resource Limits.
2180 */
2181static int resources_show(struct seq_file *seq, void *v)
2182{
2183        struct adapter *adapter = seq->private;
2184        struct vf_resources *vfres = &adapter->params.vfres;
2185
2186        #define S(desc, fmt, var) \
2187                seq_printf(seq, "%-60s " fmt "\n", \
2188                           desc " (" #var "):", vfres->var)
2189
2190        S("Virtual Interfaces", "%d", nvi);
2191        S("Egress Queues", "%d", neq);
2192        S("Ethernet Control", "%d", nethctrl);
2193        S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2194        S("Ingress Queues", "%d", niq);
2195        S("Traffic Class", "%d", tc);
2196        S("Port Access Rights Mask", "%#x", pmask);
2197        S("MAC Address Filters", "%d", nexactf);
2198        S("Firmware Command Read Capabilities", "%#x", r_caps);
2199        S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2200
2201        #undef S
2202
2203        return 0;
2204}
2205
2206static int resources_open(struct inode *inode, struct file *file)
2207{
2208        return single_open(file, resources_show, inode->i_private);
2209}
2210
2211static const struct file_operations resources_proc_fops = {
2212        .owner   = THIS_MODULE,
2213        .open    = resources_open,
2214        .read    = seq_read,
2215        .llseek  = seq_lseek,
2216        .release = single_release,
2217};
2218
2219/*
2220 * Show Virtual Interfaces.
2221 */
2222static int interfaces_show(struct seq_file *seq, void *v)
2223{
2224        if (v == SEQ_START_TOKEN) {
2225                seq_puts(seq, "Interface  Port   VIID\n");
2226        } else {
2227                struct adapter *adapter = seq->private;
2228                int pidx = (uintptr_t)v - 2;
2229                struct net_device *dev = adapter->port[pidx];
2230                struct port_info *pi = netdev_priv(dev);
2231
2232                seq_printf(seq, "%9s  %4d  %#5x\n",
2233                           dev->name, pi->port_id, pi->viid);
2234        }
2235        return 0;
2236}
2237
2238static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2239{
2240        return pos <= adapter->params.nports
2241                ? (void *)(uintptr_t)(pos + 1)
2242                : NULL;
2243}
2244
2245static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2246{
2247        return *pos
2248                ? interfaces_get_idx(seq->private, *pos)
2249                : SEQ_START_TOKEN;
2250}
2251
2252static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2253{
2254        (*pos)++;
2255        return interfaces_get_idx(seq->private, *pos);
2256}
2257
2258static void interfaces_stop(struct seq_file *seq, void *v)
2259{
2260}
2261
2262static const struct seq_operations interfaces_seq_ops = {
2263        .start = interfaces_start,
2264        .next  = interfaces_next,
2265        .stop  = interfaces_stop,
2266        .show  = interfaces_show
2267};
2268
2269static int interfaces_open(struct inode *inode, struct file *file)
2270{
2271        int res = seq_open(file, &interfaces_seq_ops);
2272
2273        if (res == 0) {
2274                struct seq_file *seq = file->private_data;
2275                seq->private = inode->i_private;
2276        }
2277        return res;
2278}
2279
2280static const struct file_operations interfaces_proc_fops = {
2281        .owner   = THIS_MODULE,
2282        .open    = interfaces_open,
2283        .read    = seq_read,
2284        .llseek  = seq_lseek,
2285        .release = seq_release,
2286};
2287
2288/*
2289 * /sys/kernel/debugfs/cxgb4vf/ files list.
2290 */
2291struct cxgb4vf_debugfs_entry {
2292        const char *name;               /* name of debugfs node */
2293        umode_t mode;                   /* file system mode */
2294        const struct file_operations *fops;
2295};
2296
2297static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2298        { "mboxlog",    S_IRUGO, &mboxlog_fops },
2299        { "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
2300        { "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2301        { "resources",  S_IRUGO, &resources_proc_fops },
2302        { "interfaces", S_IRUGO, &interfaces_proc_fops },
2303};
2304
2305/*
2306 * Module and device initialization and cleanup code.
2307 * ==================================================
2308 */
2309
2310/*
2311 * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2312 * directory (debugfs_root) has already been set up.
2313 */
2314static int setup_debugfs(struct adapter *adapter)
2315{
2316        int i;
2317
2318        BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2319
2320        /*
2321         * Debugfs support is best effort.
2322         */
2323        for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2324                (void)debugfs_create_file(debugfs_files[i].name,
2325                                  debugfs_files[i].mode,
2326                                  adapter->debugfs_root,
2327                                  (void *)adapter,
2328                                  debugfs_files[i].fops);
2329
2330        return 0;
2331}
2332
2333/*
2334 * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2335 * it to our caller to tear down the directory (debugfs_root).
2336 */
2337static void cleanup_debugfs(struct adapter *adapter)
2338{
2339        BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2340
2341        /*
2342         * Unlike our sister routine cleanup_proc(), we don't need to remove
2343         * individual entries because a call will be made to
2344         * debugfs_remove_recursive().  We just need to clean up any ancillary
2345         * persistent state.
2346         */
2347        /* nothing to do */
2348}
2349
2350/* Figure out how many Ports and Queue Sets we can support.  This depends on
2351 * knowing our Virtual Function Resources and may be called a second time if
2352 * we fall back from MSI-X to MSI Interrupt Mode.
2353 */
2354static void size_nports_qsets(struct adapter *adapter)
2355{
2356        struct vf_resources *vfres = &adapter->params.vfres;
2357        unsigned int ethqsets, pmask_nports;
2358
2359        /* The number of "ports" which we support is equal to the number of
2360         * Virtual Interfaces with which we've been provisioned.
2361         */
2362        adapter->params.nports = vfres->nvi;
2363        if (adapter->params.nports > MAX_NPORTS) {
2364                dev_warn(adapter->pdev_dev, "only using %d of %d maximum"
2365                         " allowed virtual interfaces\n", MAX_NPORTS,
2366                         adapter->params.nports);
2367                adapter->params.nports = MAX_NPORTS;
2368        }
2369
2370        /* We may have been provisioned with more VIs than the number of
2371         * ports we're allowed to access (our Port Access Rights Mask).
2372         * This is obviously a configuration conflict but we don't want to
2373         * crash the kernel or anything silly just because of that.
2374         */
2375        pmask_nports = hweight32(adapter->params.vfres.pmask);
2376        if (pmask_nports < adapter->params.nports) {
2377                dev_warn(adapter->pdev_dev, "only using %d of %d provisioned"
2378                         " virtual interfaces; limited by Port Access Rights"
2379                         " mask %#x\n", pmask_nports, adapter->params.nports,
2380                         adapter->params.vfres.pmask);
2381                adapter->params.nports = pmask_nports;
2382        }
2383
2384        /* We need to reserve an Ingress Queue for the Asynchronous Firmware
2385         * Event Queue.  And if we're using MSI Interrupts, we'll also need to
2386         * reserve an Ingress Queue for a Forwarded Interrupts.
2387         *
2388         * The rest of the FL/Intr-capable ingress queues will be matched up
2389         * one-for-one with Ethernet/Control egress queues in order to form
2390         * "Queue Sets" which will be aportioned between the "ports".  For
2391         * each Queue Set, we'll need the ability to allocate two Egress
2392         * Contexts -- one for the Ingress Queue Free List and one for the TX
2393         * Ethernet Queue.
2394         *
2395         * Note that even if we're currently configured to use MSI-X
2396         * Interrupts (module variable msi == MSI_MSIX) we may get downgraded
2397         * to MSI Interrupts if we can't get enough MSI-X Interrupts.  If that
2398         * happens we'll need to adjust things later.
2399         */
2400        ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI);
2401        if (vfres->nethctrl != ethqsets)
2402                ethqsets = min(vfres->nethctrl, ethqsets);
2403        if (vfres->neq < ethqsets*2)
2404                ethqsets = vfres->neq/2;
2405        if (ethqsets > MAX_ETH_QSETS)
2406                ethqsets = MAX_ETH_QSETS;
2407        adapter->sge.max_ethqsets = ethqsets;
2408
2409        if (adapter->sge.max_ethqsets < adapter->params.nports) {
2410                dev_warn(adapter->pdev_dev, "only using %d of %d available"
2411                         " virtual interfaces (too few Queue Sets)\n",
2412                         adapter->sge.max_ethqsets, adapter->params.nports);
2413                adapter->params.nports = adapter->sge.max_ethqsets;
2414        }
2415}
2416
2417/*
2418 * Perform early "adapter" initialization.  This is where we discover what
2419 * adapter parameters we're going to be using and initialize basic adapter
2420 * hardware support.
2421 */
2422static int adap_init0(struct adapter *adapter)
2423{
2424        struct sge_params *sge_params = &adapter->params.sge;
2425        struct sge *s = &adapter->sge;
2426        int err;
2427        u32 param, val = 0;
2428
2429        /*
2430         * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2431         * 2.6.31 and later we can't call pci_reset_function() in order to
2432         * issue an FLR because of a self- deadlock on the device semaphore.
2433         * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2434         * cases where they're needed -- for instance, some versions of KVM
2435         * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2436         * use the firmware based reset in order to reset any per function
2437         * state.
2438         */
2439        err = t4vf_fw_reset(adapter);
2440        if (err < 0) {
2441                dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2442                return err;
2443        }
2444
2445        /*
2446         * Grab basic operational parameters.  These will predominantly have
2447         * been set up by the Physical Function Driver or will be hard coded
2448         * into the adapter.  We just have to live with them ...  Note that
2449         * we _must_ get our VPD parameters before our SGE parameters because
2450         * we need to know the adapter's core clock from the VPD in order to
2451         * properly decode the SGE Timer Values.
2452         */
2453        err = t4vf_get_dev_params(adapter);
2454        if (err) {
2455                dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2456                        " device parameters: err=%d\n", err);
2457                return err;
2458        }
2459        err = t4vf_get_vpd_params(adapter);
2460        if (err) {
2461                dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2462                        " VPD parameters: err=%d\n", err);
2463                return err;
2464        }
2465        err = t4vf_get_sge_params(adapter);
2466        if (err) {
2467                dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2468                        " SGE parameters: err=%d\n", err);
2469                return err;
2470        }
2471        err = t4vf_get_rss_glb_config(adapter);
2472        if (err) {
2473                dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2474                        " RSS parameters: err=%d\n", err);
2475                return err;
2476        }
2477        if (adapter->params.rss.mode !=
2478            FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2479                dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2480                        " mode %d\n", adapter->params.rss.mode);
2481                return -EINVAL;
2482        }
2483        err = t4vf_sge_init(adapter);
2484        if (err) {
2485                dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2486                        " err=%d\n", err);
2487                return err;
2488        }
2489
2490        /* If we're running on newer firmware, let it know that we're
2491         * prepared to deal with encapsulated CPL messages.  Older
2492         * firmware won't understand this and we'll just get
2493         * unencapsulated messages ...
2494         */
2495        param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2496                FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2497        val = 1;
2498        (void) t4vf_set_params(adapter, 1, &param, &val);
2499
2500        /*
2501         * Retrieve our RX interrupt holdoff timer values and counter
2502         * threshold values from the SGE parameters.
2503         */
2504        s->timer_val[0] = core_ticks_to_us(adapter,
2505                TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2506        s->timer_val[1] = core_ticks_to_us(adapter,
2507                TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2508        s->timer_val[2] = core_ticks_to_us(adapter,
2509                TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2510        s->timer_val[3] = core_ticks_to_us(adapter,
2511                TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2512        s->timer_val[4] = core_ticks_to_us(adapter,
2513                TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2514        s->timer_val[5] = core_ticks_to_us(adapter,
2515                TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2516
2517        s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2518        s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2519        s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2520        s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2521
2522        /*
2523         * Grab our Virtual Interface resource allocation, extract the
2524         * features that we're interested in and do a bit of sanity testing on
2525         * what we discover.
2526         */
2527        err = t4vf_get_vfres(adapter);
2528        if (err) {
2529                dev_err(adapter->pdev_dev, "unable to get virtual interface"
2530                        " resources: err=%d\n", err);
2531                return err;
2532        }
2533
2534        /* Check for various parameter sanity issues */
2535        if (adapter->params.vfres.pmask == 0) {
2536                dev_err(adapter->pdev_dev, "no port access configured\n"
2537                        "usable!\n");
2538                return -EINVAL;
2539        }
2540        if (adapter->params.vfres.nvi == 0) {
2541                dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2542                        "usable!\n");
2543                return -EINVAL;
2544        }
2545
2546        /* Initialize nports and max_ethqsets now that we have our Virtual
2547         * Function Resources.
2548         */
2549        size_nports_qsets(adapter);
2550
2551        return 0;
2552}
2553
2554static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2555                             u8 pkt_cnt_idx, unsigned int size,
2556                             unsigned int iqe_size)
2557{
2558        rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2559                             (pkt_cnt_idx < SGE_NCOUNTERS ?
2560                              QINTR_CNT_EN_F : 0));
2561        rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2562                            ? pkt_cnt_idx
2563                            : 0);
2564        rspq->iqe_len = iqe_size;
2565        rspq->size = size;
2566}
2567
2568/*
2569 * Perform default configuration of DMA queues depending on the number and
2570 * type of ports we found and the number of available CPUs.  Most settings can
2571 * be modified by the admin via ethtool and cxgbtool prior to the adapter
2572 * being brought up for the first time.
2573 */
2574static void cfg_queues(struct adapter *adapter)
2575{
2576        struct sge *s = &adapter->sge;
2577        int q10g, n10g, qidx, pidx, qs;
2578        size_t iqe_size;
2579
2580        /*
2581         * We should not be called till we know how many Queue Sets we can
2582         * support.  In particular, this means that we need to know what kind
2583         * of interrupts we'll be using ...
2584         */
2585        BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2586
2587        /*
2588         * Count the number of 10GbE Virtual Interfaces that we have.
2589         */
2590        n10g = 0;
2591        for_each_port(adapter, pidx)
2592                n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2593
2594        /*
2595         * We default to 1 queue per non-10G port and up to # of cores queues
2596         * per 10G port.
2597         */
2598        if (n10g == 0)
2599                q10g = 0;
2600        else {
2601                int n1g = (adapter->params.nports - n10g);
2602                q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2603                if (q10g > num_online_cpus())
2604                        q10g = num_online_cpus();
2605        }
2606
2607        /*
2608         * Allocate the "Queue Sets" to the various Virtual Interfaces.
2609         * The layout will be established in setup_sge_queues() when the
2610         * adapter is brough up for the first time.
2611         */
2612        qidx = 0;
2613        for_each_port(adapter, pidx) {
2614                struct port_info *pi = adap2pinfo(adapter, pidx);
2615
2616                pi->first_qset = qidx;
2617                pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2618                qidx += pi->nqsets;
2619        }
2620        s->ethqsets = qidx;
2621
2622        /*
2623         * The Ingress Queue Entry Size for our various Response Queues needs
2624         * to be big enough to accommodate the largest message we can receive
2625         * from the chip/firmware; which is 64 bytes ...
2626         */
2627        iqe_size = 64;
2628
2629        /*
2630         * Set up default Queue Set parameters ...  Start off with the
2631         * shortest interrupt holdoff timer.
2632         */
2633        for (qs = 0; qs < s->max_ethqsets; qs++) {
2634                struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2635                struct sge_eth_txq *txq = &s->ethtxq[qs];
2636
2637                init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2638                rxq->fl.size = 72;
2639                txq->q.size = 1024;
2640        }
2641
2642        /*
2643         * The firmware event queue is used for link state changes and
2644         * notifications of TX DMA completions.
2645         */
2646        init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2647
2648        /*
2649         * The forwarded interrupt queue is used when we're in MSI interrupt
2650         * mode.  In this mode all interrupts associated with RX queues will
2651         * be forwarded to a single queue which we'll associate with our MSI
2652         * interrupt vector.  The messages dropped in the forwarded interrupt
2653         * queue will indicate which ingress queue needs servicing ...  This
2654         * queue needs to be large enough to accommodate all of the ingress
2655         * queues which are forwarding their interrupt (+1 to prevent the PIDX
2656         * from equalling the CIDX if every ingress queue has an outstanding
2657         * interrupt).  The queue doesn't need to be any larger because no
2658         * ingress queue will ever have more than one outstanding interrupt at
2659         * any time ...
2660         */
2661        init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2662                  iqe_size);
2663}
2664
2665/*
2666 * Reduce the number of Ethernet queues across all ports to at most n.
2667 * n provides at least one queue per port.
2668 */
2669static void reduce_ethqs(struct adapter *adapter, int n)
2670{
2671        int i;
2672        struct port_info *pi;
2673
2674        /*
2675         * While we have too many active Ether Queue Sets, interate across the
2676         * "ports" and reduce their individual Queue Set allocations.
2677         */
2678        BUG_ON(n < adapter->params.nports);
2679        while (n < adapter->sge.ethqsets)
2680                for_each_port(adapter, i) {
2681                        pi = adap2pinfo(adapter, i);
2682                        if (pi->nqsets > 1) {
2683                                pi->nqsets--;
2684                                adapter->sge.ethqsets--;
2685                                if (adapter->sge.ethqsets <= n)
2686                                        break;
2687                        }
2688                }
2689
2690        /*
2691         * Reassign the starting Queue Sets for each of the "ports" ...
2692         */
2693        n = 0;
2694        for_each_port(adapter, i) {
2695                pi = adap2pinfo(adapter, i);
2696                pi->first_qset = n;
2697                n += pi->nqsets;
2698        }
2699}
2700
2701/*
2702 * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2703 * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2704 * need.  Minimally we need one for every Virtual Interface plus those needed
2705 * for our "extras".  Note that this process may lower the maximum number of
2706 * allowed Queue Sets ...
2707 */
2708static int enable_msix(struct adapter *adapter)
2709{
2710        int i, want, need, nqsets;
2711        struct msix_entry entries[MSIX_ENTRIES];
2712        struct sge *s = &adapter->sge;
2713
2714        for (i = 0; i < MSIX_ENTRIES; ++i)
2715                entries[i].entry = i;
2716
2717        /*
2718         * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2719         * plus those needed for our "extras" (for example, the firmware
2720         * message queue).  We _need_ at least one "Queue Set" per Virtual
2721         * Interface plus those needed for our "extras".  So now we get to see
2722         * if the song is right ...
2723         */
2724        want = s->max_ethqsets + MSIX_EXTRAS;
2725        need = adapter->params.nports + MSIX_EXTRAS;
2726
2727        want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2728        if (want < 0)
2729                return want;
2730
2731        nqsets = want - MSIX_EXTRAS;
2732        if (nqsets < s->max_ethqsets) {
2733                dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2734                         " for %d Queue Sets\n", nqsets);
2735                s->max_ethqsets = nqsets;
2736                if (nqsets < s->ethqsets)
2737                        reduce_ethqs(adapter, nqsets);
2738        }
2739        for (i = 0; i < want; ++i)
2740                adapter->msix_info[i].vec = entries[i].vector;
2741
2742        return 0;
2743}
2744
2745static const struct net_device_ops cxgb4vf_netdev_ops   = {
2746        .ndo_open               = cxgb4vf_open,
2747        .ndo_stop               = cxgb4vf_stop,
2748        .ndo_start_xmit         = t4vf_eth_xmit,
2749        .ndo_get_stats          = cxgb4vf_get_stats,
2750        .ndo_set_rx_mode        = cxgb4vf_set_rxmode,
2751        .ndo_set_mac_address    = cxgb4vf_set_mac_addr,
2752        .ndo_validate_addr      = eth_validate_addr,
2753        .ndo_do_ioctl           = cxgb4vf_do_ioctl,
2754        .ndo_change_mtu_rh74    = cxgb4vf_change_mtu,
2755        .ndo_fix_features       = cxgb4vf_fix_features,
2756        .ndo_set_features       = cxgb4vf_set_features,
2757#ifdef CONFIG_NET_POLL_CONTROLLER
2758        .ndo_poll_controller    = cxgb4vf_poll_controller,
2759#endif
2760};
2761
2762/*
2763 * "Probe" a device: initialize a device and construct all kernel and driver
2764 * state needed to manage the device.  This routine is called "init_one" in
2765 * the PF Driver ...
2766 */
2767static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2768                             const struct pci_device_id *ent)
2769{
2770        int pci_using_dac;
2771        int err, pidx;
2772        unsigned int pmask;
2773        struct adapter *adapter;
2774        struct port_info *pi;
2775        struct net_device *netdev;
2776        unsigned int pf;
2777
2778        /*
2779         * Print our driver banner the first time we're called to initialize a
2780         * device.
2781         */
2782        pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
2783
2784        /*
2785         * Initialize generic PCI device state.
2786         */
2787        err = pci_enable_device(pdev);
2788        if (err) {
2789                dev_err(&pdev->dev, "cannot enable PCI device\n");
2790                return err;
2791        }
2792
2793        /*
2794         * Reserve PCI resources for the device.  If we can't get them some
2795         * other driver may have already claimed the device ...
2796         */
2797        err = pci_request_regions(pdev, KBUILD_MODNAME);
2798        if (err) {
2799                dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2800                goto err_disable_device;
2801        }
2802
2803        /*
2804         * Set up our DMA mask: try for 64-bit address masking first and
2805         * fall back to 32-bit if we can't get 64 bits ...
2806         */
2807        err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2808        if (err == 0) {
2809                err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2810                if (err) {
2811                        dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2812                                " coherent allocations\n");
2813                        goto err_release_regions;
2814                }
2815                pci_using_dac = 1;
2816        } else {
2817                err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2818                if (err != 0) {
2819                        dev_err(&pdev->dev, "no usable DMA configuration\n");
2820                        goto err_release_regions;
2821                }
2822                pci_using_dac = 0;
2823        }
2824
2825        /*
2826         * Enable bus mastering for the device ...
2827         */
2828        pci_set_master(pdev);
2829
2830        /*
2831         * Allocate our adapter data structure and attach it to the device.
2832         */
2833        adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2834        if (!adapter) {
2835                err = -ENOMEM;
2836                goto err_release_regions;
2837        }
2838        pci_set_drvdata(pdev, adapter);
2839        adapter->pdev = pdev;
2840        adapter->pdev_dev = &pdev->dev;
2841
2842        adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
2843                                    (sizeof(struct mbox_cmd) *
2844                                     T4VF_OS_LOG_MBOX_CMDS),
2845                                    GFP_KERNEL);
2846        if (!adapter->mbox_log) {
2847                err = -ENOMEM;
2848                goto err_free_adapter;
2849        }
2850        adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS;
2851
2852        /*
2853         * Initialize SMP data synchronization resources.
2854         */
2855        spin_lock_init(&adapter->stats_lock);
2856        spin_lock_init(&adapter->mbox_lock);
2857        INIT_LIST_HEAD(&adapter->mlist.list);
2858
2859        /*
2860         * Map our I/O registers in BAR0.
2861         */
2862        adapter->regs = pci_ioremap_bar(pdev, 0);
2863        if (!adapter->regs) {
2864                dev_err(&pdev->dev, "cannot map device registers\n");
2865                err = -ENOMEM;
2866                goto err_free_adapter;
2867        }
2868
2869        /* Wait for the device to become ready before proceeding ...
2870         */
2871        err = t4vf_prep_adapter(adapter);
2872        if (err) {
2873                dev_err(adapter->pdev_dev, "device didn't become ready:"
2874                        " err=%d\n", err);
2875                goto err_unmap_bar0;
2876        }
2877
2878        /* For T5 and later we want to use the new BAR-based User Doorbells,
2879         * so we need to map BAR2 here ...
2880         */
2881        if (!is_t4(adapter->params.chip)) {
2882                adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
2883                                           pci_resource_len(pdev, 2));
2884                if (!adapter->bar2) {
2885                        dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
2886                        err = -ENOMEM;
2887                        goto err_unmap_bar0;
2888                }
2889        }
2890        /*
2891         * Initialize adapter level features.
2892         */
2893        adapter->name = pci_name(pdev);
2894        adapter->msg_enable = DFLT_MSG_ENABLE;
2895
2896        /* If possible, we use PCIe Relaxed Ordering Attribute to deliver
2897         * Ingress Packet Data to Free List Buffers in order to allow for
2898         * chipset performance optimizations between the Root Complex and
2899         * Memory Controllers.  (Messages to the associated Ingress Queue
2900         * notifying new Packet Placement in the Free Lists Buffers will be
2901         * send without the Relaxed Ordering Attribute thus guaranteeing that
2902         * all preceding PCIe Transaction Layer Packets will be processed
2903         * first.)  But some Root Complexes have various issues with Upstream
2904         * Transaction Layer Packets with the Relaxed Ordering Attribute set.
2905         * The PCIe devices which under the Root Complexes will be cleared the
2906         * Relaxed Ordering bit in the configuration space, So we check our
2907         * PCIe configuration space to see if it's flagged with advice against
2908         * using Relaxed Ordering.
2909         */
2910        if (!pcie_relaxed_ordering_enabled(pdev))
2911                adapter->flags |= ROOT_NO_RELAXED_ORDERING;
2912
2913        err = adap_init0(adapter);
2914        if (err)
2915                goto err_unmap_bar;
2916
2917        /*
2918         * Allocate our "adapter ports" and stitch everything together.
2919         */
2920        pmask = adapter->params.vfres.pmask;
2921        pf = t4vf_get_pf_from_vf(adapter);
2922        for_each_port(adapter, pidx) {
2923                int port_id, viid;
2924                u8 mac[ETH_ALEN];
2925                unsigned int naddr = 1;
2926
2927                /*
2928                 * We simplistically allocate our virtual interfaces
2929                 * sequentially across the port numbers to which we have
2930                 * access rights.  This should be configurable in some manner
2931                 * ...
2932                 */
2933                if (pmask == 0)
2934                        break;
2935                port_id = ffs(pmask) - 1;
2936                pmask &= ~(1 << port_id);
2937                viid = t4vf_alloc_vi(adapter, port_id);
2938                if (viid < 0) {
2939                        dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2940                                " err=%d\n", port_id, viid);
2941                        err = viid;
2942                        goto err_free_dev;
2943                }
2944
2945                /*
2946                 * Allocate our network device and stitch things together.
2947                 */
2948                netdev = alloc_etherdev_mq(sizeof(struct port_info),
2949                                           MAX_PORT_QSETS);
2950                if (netdev == NULL) {
2951                        t4vf_free_vi(adapter, viid);
2952                        err = -ENOMEM;
2953                        goto err_free_dev;
2954                }
2955                adapter->port[pidx] = netdev;
2956                SET_NETDEV_DEV(netdev, &pdev->dev);
2957                pi = netdev_priv(netdev);
2958                pi->adapter = adapter;
2959                pi->pidx = pidx;
2960                pi->port_id = port_id;
2961                pi->viid = viid;
2962
2963                /*
2964                 * Initialize the starting state of our "port" and register
2965                 * it.
2966                 */
2967                pi->xact_addr_filt = -1;
2968                netif_carrier_off(netdev);
2969                netdev->irq = pdev->irq;
2970
2971                netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2972                        NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2973                        NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
2974                netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2975                        NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2976                        NETIF_F_HIGHDMA;
2977                netdev->features = netdev->hw_features |
2978                                   NETIF_F_HW_VLAN_CTAG_TX;
2979                if (pci_using_dac)
2980                        netdev->features |= NETIF_F_HIGHDMA;
2981
2982                netdev->priv_flags |= IFF_UNICAST_FLT;
2983
2984                netdev->netdev_ops = &cxgb4vf_netdev_ops;
2985                SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops);
2986                netdev->dev_port = pi->port_id;
2987
2988                /*
2989                 * Initialize the hardware/software state for the port.
2990                 */
2991                err = t4vf_port_init(adapter, pidx);
2992                if (err) {
2993                        dev_err(&pdev->dev, "cannot initialize port %d\n",
2994                                pidx);
2995                        goto err_free_dev;
2996                }
2997
2998                err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac);
2999                if (err) {
3000                        dev_err(&pdev->dev,

3001                                "unable to determine MAC ACL address, "
3002                                "continuing anyway.. (status %d)\n", err);
3003                } else if (naddr && adapter->params.vfres.nvi == 1) {
3004                        struct sockaddr addr;
3005
3006                        ether_addr_copy(addr.sa_data, mac);
3007                        err = cxgb4vf_set_mac_addr(netdev, &addr);
3008                        if (err) {
3009                                dev_err(&pdev->dev,
3010                                        "unable to set MAC address %pM\n",
3011                                        mac);
3012                                goto err_free_dev;
3013                        }
3014                        dev_info(&pdev->dev,
3015                                 "Using assigned MAC ACL: %pM\n", mac);
3016                }
3017        }
3018
3019        /* See what interrupts we'll be using.  If we've been configured to
3020         * use MSI-X interrupts, try to enable them but fall back to using
3021         * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
3022         * get MSI interrupts we bail with the error.
3023         */
3024        if (msi == MSI_MSIX && enable_msix(adapter) == 0)
3025                adapter->flags |= USING_MSIX;
3026        else {
3027                if (msi == MSI_MSIX) {
3028                        dev_info(adapter->pdev_dev,
3029                                 "Unable to use MSI-X Interrupts; falling "
3030                                 "back to MSI Interrupts\n");
3031
3032                        /* We're going to need a Forwarded Interrupt Queue so
3033                         * that may cut into how many Queue Sets we can
3034                         * support.
3035                         */
3036                        msi = MSI_MSI;
3037                        size_nports_qsets(adapter);
3038                }
3039                err = pci_enable_msi(pdev);
3040                if (err) {
3041                        dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;"
3042                                " err=%d\n", err);
3043                        goto err_free_dev;
3044                }
3045                adapter->flags |= USING_MSI;
3046        }
3047
3048        /* Now that we know how many "ports" we have and what interrupt
3049         * mechanism we're going to use, we can configure our queue resources.
3050         */
3051        cfg_queues(adapter);
3052
3053        /*
3054         * The "card" is now ready to go.  If any errors occur during device
3055         * registration we do not fail the whole "card" but rather proceed
3056         * only with the ports we manage to register successfully.  However we
3057         * must register at least one net device.
3058         */
3059        for_each_port(adapter, pidx) {
3060                struct port_info *pi = netdev_priv(adapter->port[pidx]);
3061                netdev = adapter->port[pidx];
3062                if (netdev == NULL)
3063                        continue;
3064
3065                netif_set_real_num_tx_queues(netdev, pi->nqsets);
3066                netif_set_real_num_rx_queues(netdev, pi->nqsets);
3067
3068                err = register_netdev(netdev);
3069                if (err) {
3070                        dev_warn(&pdev->dev, "cannot register net device %s,"
3071                                 " skipping\n", netdev->name);
3072                        continue;
3073                }
3074
3075                set_bit(pidx, &adapter->registered_device_map);
3076        }
3077        if (adapter->registered_device_map == 0) {
3078                dev_err(&pdev->dev, "could not register any net devices\n");
3079                goto err_disable_interrupts;
3080        }
3081
3082        /*
3083         * Set up our debugfs entries.
3084         */
3085        if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
3086                adapter->debugfs_root =
3087                        debugfs_create_dir(pci_name(pdev),
3088                                           cxgb4vf_debugfs_root);
3089                if (IS_ERR_OR_NULL(adapter->debugfs_root))
3090                        dev_warn(&pdev->dev, "could not create debugfs"
3091                                 " directory");
3092                else
3093                        setup_debugfs(adapter);
3094        }
3095
3096        /*
3097         * Print a short notice on the existence and configuration of the new
3098         * VF network device ...
3099         */
3100        for_each_port(adapter, pidx) {
3101                dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
3102                         adapter->port[pidx]->name,
3103                         (adapter->flags & USING_MSIX) ? "MSI-X" :
3104                         (adapter->flags & USING_MSI)  ? "MSI" : "");
3105        }
3106
3107        /*
3108         * Return success!
3109         */
3110        return 0;
3111
3112        /*
3113         * Error recovery and exit code.  Unwind state that's been created
3114         * so far and return the error.
3115         */
3116err_disable_interrupts:
3117        if (adapter->flags & USING_MSIX) {
3118                pci_disable_msix(adapter->pdev);
3119                adapter->flags &= ~USING_MSIX;
3120        } else if (adapter->flags & USING_MSI) {
3121                pci_disable_msi(adapter->pdev);
3122                adapter->flags &= ~USING_MSI;
3123        }
3124
3125err_free_dev:
3126        for_each_port(adapter, pidx) {
3127                netdev = adapter->port[pidx];
3128                if (netdev == NULL)
3129                        continue;
3130                pi = netdev_priv(netdev);
3131                t4vf_free_vi(adapter, pi->viid);
3132                if (test_bit(pidx, &adapter->registered_device_map))
3133                        unregister_netdev(netdev);
3134                free_netdev(netdev);
3135        }
3136
3137err_unmap_bar:
3138        if (!is_t4(adapter->params.chip))
3139                iounmap(adapter->bar2);
3140
3141err_unmap_bar0:
3142        iounmap(adapter->regs);
3143
3144err_free_adapter:
3145        kfree(adapter->mbox_log);
3146        kfree(adapter);
3147
3148err_release_regions:
3149        pci_release_regions(pdev);
3150        pci_clear_master(pdev);
3151
3152err_disable_device:
3153        pci_disable_device(pdev);
3154
3155        return err;
3156}
3157
3158/*
3159 * "Remove" a device: tear down all kernel and driver state created in the
3160 * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
3161 * that this is called "remove_one" in the PF Driver.)
3162 */
3163static void cxgb4vf_pci_remove(struct pci_dev *pdev)
3164{
3165        struct adapter *adapter = pci_get_drvdata(pdev);
3166
3167        /*
3168         * Tear down driver state associated with device.
3169         */
3170        if (adapter) {
3171                int pidx;
3172
3173                /*
3174                 * Stop all of our activity.  Unregister network port,
3175                 * disable interrupts, etc.
3176                 */
3177                for_each_port(adapter, pidx)
3178                        if (test_bit(pidx, &adapter->registered_device_map))
3179                                unregister_netdev(adapter->port[pidx]);
3180                t4vf_sge_stop(adapter);
3181                if (adapter->flags & USING_MSIX) {
3182                        pci_disable_msix(adapter->pdev);
3183                        adapter->flags &= ~USING_MSIX;
3184                } else if (adapter->flags & USING_MSI) {
3185                        pci_disable_msi(adapter->pdev);
3186                        adapter->flags &= ~USING_MSI;
3187                }
3188
3189                /*
3190                 * Tear down our debugfs entries.
3191                 */
3192                if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
3193                        cleanup_debugfs(adapter);
3194                        debugfs_remove_recursive(adapter->debugfs_root);
3195                }
3196
3197                /*
3198                 * Free all of the various resources which we've acquired ...
3199                 */
3200                t4vf_free_sge_resources(adapter);
3201                for_each_port(adapter, pidx) {
3202                        struct net_device *netdev = adapter->port[pidx];
3203                        struct port_info *pi;
3204
3205                        if (netdev == NULL)
3206                                continue;
3207
3208                        pi = netdev_priv(netdev);
3209                        t4vf_free_vi(adapter, pi->viid);
3210                        free_netdev(netdev);
3211                }
3212                iounmap(adapter->regs);
3213                if (!is_t4(adapter->params.chip))
3214                        iounmap(adapter->bar2);
3215                kfree(adapter->mbox_log);
3216                kfree(adapter);
3217        }
3218
3219        /*
3220         * Disable the device and release its PCI resources.
3221         */
3222        pci_disable_device(pdev);
3223        pci_clear_master(pdev);
3224        pci_release_regions(pdev);
3225}
3226
3227/*
3228 * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
3229 * delivery.
3230 */
3231static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
3232{
3233        struct adapter *adapter;
3234        int pidx;
3235
3236        adapter = pci_get_drvdata(pdev);
3237        if (!adapter)
3238                return;
3239
3240        /* Disable all Virtual Interfaces.  This will shut down the
3241         * delivery of all ingress packets into the chip for these
3242         * Virtual Interfaces.
3243         */
3244        for_each_port(adapter, pidx)
3245                if (test_bit(pidx, &adapter->registered_device_map))
3246                        unregister_netdev(adapter->port[pidx]);
3247
3248        /* Free up all Queues which will prevent further DMA and
3249         * Interrupts allowing various internal pathways to drain.
3250         */
3251        t4vf_sge_stop(adapter);
3252        if (adapter->flags & USING_MSIX) {
3253                pci_disable_msix(adapter->pdev);
3254                adapter->flags &= ~USING_MSIX;
3255        } else if (adapter->flags & USING_MSI) {
3256                pci_disable_msi(adapter->pdev);
3257                adapter->flags &= ~USING_MSI;
3258        }
3259
3260        /*
3261         * Free up all Queues which will prevent further DMA and
3262         * Interrupts allowing various internal pathways to drain.
3263         */
3264        t4vf_free_sge_resources(adapter);
3265        pci_set_drvdata(pdev, NULL);
3266}
3267
3268/* Macros needed to support the PCI Device ID Table ...
3269 */
3270#define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3271        static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3272#define CH_PCI_DEVICE_ID_FUNCTION       0x8
3273
3274#define CH_PCI_ID_TABLE_ENTRY(devid) \
3275                { PCI_VDEVICE(CHELSIO, (devid)), 0 }
3276
3277#define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3278
3279#include "../cxgb4/t4_pci_id_tbl.h"
3280
3281MODULE_DESCRIPTION(DRV_DESC);
3282MODULE_AUTHOR("Chelsio Communications");
3283MODULE_LICENSE("Dual BSD/GPL");
3284MODULE_VERSION(DRV_VERSION);
3285MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3286
3287static struct pci_driver cxgb4vf_driver = {
3288        .name           = KBUILD_MODNAME,
3289        .id_table       = cxgb4vf_pci_tbl,
3290        .probe          = cxgb4vf_pci_probe,
3291        .remove         = cxgb4vf_pci_remove,
3292        .shutdown       = cxgb4vf_pci_shutdown,
3293};
3294
3295/*
3296 * Initialize global driver state.
3297 */
3298static int __init cxgb4vf_module_init(void)
3299{
3300        int ret;
3301
3302        /*
3303         * Vet our module parameters.
3304         */
3305        if (msi != MSI_MSIX && msi != MSI_MSI) {
3306                pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3307                        msi, MSI_MSIX, MSI_MSI);
3308                return -EINVAL;
3309        }
3310
3311        /* Debugfs support is optional, just warn if this fails */
3312        cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3313        if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3314                pr_warn("could not create debugfs entry, continuing\n");
3315
3316        ret = pci_register_driver(&cxgb4vf_driver);
3317        if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3318                debugfs_remove(cxgb4vf_debugfs_root);
3319        return ret;
3320}
3321
3322/*
3323 * Tear down global driver state.
3324 */
3325static void __exit cxgb4vf_module_exit(void)
3326{
3327        pci_unregister_driver(&cxgb4vf_driver);
3328        debugfs_remove(cxgb4vf_debugfs_root);
3329}
3330
3331module_init(cxgb4vf_module_init);
3332module_exit(cxgb4vf_module_exit);
3333