linux/drivers/net/vmxnet3/vmxnet3_drv.c
<<
>>
Prefs
   1/*
   2 * Linux driver for VMware's vmxnet3 ethernet NIC.
   3 *
   4 * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License as published by the
   8 * Free Software Foundation; version 2 of the License and no later version.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  13 * NON INFRINGEMENT. See the GNU General Public License for more
  14 * details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 *
  20 * The full GNU General Public License is included in this distribution in
  21 * the file called "COPYING".
  22 *
  23 * Maintained by: pv-drivers@vmware.com
  24 *
  25 */
  26
  27#include <linux/module.h>
  28#include <net/ip6_checksum.h>
  29
  30#include "vmxnet3_int.h"
  31
  32char vmxnet3_driver_name[] = "vmxnet3";
  33#define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
  34
  35/*
  36 * PCI Device ID Table
  37 * Last entry must be all 0s
  38 */
  39static const struct pci_device_id vmxnet3_pciid_table[] = {
  40        {PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
  41        {0}
  42};
  43
  44MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
  45
  46static int enable_mq = 1;
  47
  48static void
  49vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
  50
  51/*
  52 *    Enable/Disable the given intr
  53 */
  54static void
  55vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
  56{
  57        VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
  58}
  59
  60
  61static void
  62vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
  63{
  64        VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
  65}
  66
  67
  68/*
  69 *    Enable/Disable all intrs used by the device
  70 */
  71static void
  72vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
  73{
  74        int i;
  75
  76        for (i = 0; i < adapter->intr.num_intrs; i++)
  77                vmxnet3_enable_intr(adapter, i);
  78        adapter->shared->devRead.intrConf.intrCtrl &=
  79                                        cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
  80}
  81
  82
  83static void
  84vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
  85{
  86        int i;
  87
  88        adapter->shared->devRead.intrConf.intrCtrl |=
  89                                        cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
  90        for (i = 0; i < adapter->intr.num_intrs; i++)
  91                vmxnet3_disable_intr(adapter, i);
  92}
  93
  94
  95static void
  96vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
  97{
  98        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
  99}
 100
 101
 102static bool
 103vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 104{
 105        return tq->stopped;
 106}
 107
 108
 109static void
 110vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 111{
 112        tq->stopped = false;
 113        netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
 114}
 115
 116
 117static void
 118vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 119{
 120        tq->stopped = false;
 121        netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
 122}
 123
 124
 125static void
 126vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 127{
 128        tq->stopped = true;
 129        tq->num_stop++;
 130        netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
 131}
 132
 133
 134/*
 135 * Check the link state. This may start or stop the tx queue.
 136 */
 137static void
 138vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
 139{
 140        u32 ret;
 141        int i;
 142        unsigned long flags;
 143
 144        spin_lock_irqsave(&adapter->cmd_lock, flags);
 145        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
 146        ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
 147        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
 148
 149        adapter->link_speed = ret >> 16;
 150        if (ret & 1) { /* Link is up. */
 151                netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
 152                            adapter->link_speed);
 153                netif_carrier_on(adapter->netdev);
 154
 155                if (affectTxQueue) {
 156                        for (i = 0; i < adapter->num_tx_queues; i++)
 157                                vmxnet3_tq_start(&adapter->tx_queue[i],
 158                                                 adapter);
 159                }
 160        } else {
 161                netdev_info(adapter->netdev, "NIC Link is Down\n");
 162                netif_carrier_off(adapter->netdev);
 163
 164                if (affectTxQueue) {
 165                        for (i = 0; i < adapter->num_tx_queues; i++)
 166                                vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
 167                }
 168        }
 169}
 170
 171static void
 172vmxnet3_process_events(struct vmxnet3_adapter *adapter)
 173{
 174        int i;
 175        unsigned long flags;
 176        u32 events = le32_to_cpu(adapter->shared->ecr);
 177        if (!events)
 178                return;
 179
 180        vmxnet3_ack_events(adapter, events);
 181
 182        /* Check if link state has changed */
 183        if (events & VMXNET3_ECR_LINK)
 184                vmxnet3_check_link(adapter, true);
 185
 186        /* Check if there is an error on xmit/recv queues */
 187        if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
 188                spin_lock_irqsave(&adapter->cmd_lock, flags);
 189                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
 190                                       VMXNET3_CMD_GET_QUEUE_STATUS);
 191                spin_unlock_irqrestore(&adapter->cmd_lock, flags);
 192
 193                for (i = 0; i < adapter->num_tx_queues; i++)
 194                        if (adapter->tqd_start[i].status.stopped)
 195                                dev_err(&adapter->netdev->dev,
 196                                        "%s: tq[%d] error 0x%x\n",
 197                                        adapter->netdev->name, i, le32_to_cpu(
 198                                        adapter->tqd_start[i].status.error));
 199                for (i = 0; i < adapter->num_rx_queues; i++)
 200                        if (adapter->rqd_start[i].status.stopped)
 201                                dev_err(&adapter->netdev->dev,
 202                                        "%s: rq[%d] error 0x%x\n",
 203                                        adapter->netdev->name, i,
 204                                        adapter->rqd_start[i].status.error);
 205
 206                schedule_work(&adapter->work);
 207        }
 208}
 209
 210#ifdef __BIG_ENDIAN_BITFIELD
 211/*
 212 * The device expects the bitfields in shared structures to be written in
 213 * little endian. When CPU is big endian, the following routines are used to
 214 * correctly read and write into ABI.
 215 * The general technique used here is : double word bitfields are defined in
 216 * opposite order for big endian architecture. Then before reading them in
 217 * driver the complete double word is translated using le32_to_cpu. Similarly
 218 * After the driver writes into bitfields, cpu_to_le32 is used to translate the
 219 * double words into required format.
 220 * In order to avoid touching bits in shared structure more than once, temporary
 221 * descriptors are used. These are passed as srcDesc to following functions.
 222 */
 223static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
 224                                struct Vmxnet3_RxDesc *dstDesc)
 225{
 226        u32 *src = (u32 *)srcDesc + 2;
 227        u32 *dst = (u32 *)dstDesc + 2;
 228        dstDesc->addr = le64_to_cpu(srcDesc->addr);
 229        *dst = le32_to_cpu(*src);
 230        dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
 231}
 232
 233static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
 234                               struct Vmxnet3_TxDesc *dstDesc)
 235{
 236        int i;
 237        u32 *src = (u32 *)(srcDesc + 1);
 238        u32 *dst = (u32 *)(dstDesc + 1);
 239
 240        /* Working backwards so that the gen bit is set at the end. */
 241        for (i = 2; i > 0; i--) {
 242                src--;
 243                dst--;
 244                *dst = cpu_to_le32(*src);
 245        }
 246}
 247
 248
 249static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
 250                                struct Vmxnet3_RxCompDesc *dstDesc)
 251{
 252        int i = 0;
 253        u32 *src = (u32 *)srcDesc;
 254        u32 *dst = (u32 *)dstDesc;
 255        for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
 256                *dst = le32_to_cpu(*src);
 257                src++;
 258                dst++;
 259        }
 260}
 261
 262
 263/* Used to read bitfield values from double words. */
 264static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
 265{
 266        u32 temp = le32_to_cpu(*bitfield);
 267        u32 mask = ((1 << size) - 1) << pos;
 268        temp &= mask;
 269        temp >>= pos;
 270        return temp;
 271}
 272
 273
 274
 275#endif  /* __BIG_ENDIAN_BITFIELD */
 276
 277#ifdef __BIG_ENDIAN_BITFIELD
 278
 279#   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
 280                        txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
 281                        VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
 282#   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
 283                        txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
 284                        VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
 285#   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
 286                        VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
 287                        VMXNET3_TCD_GEN_SIZE)
 288#   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
 289                        VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
 290#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
 291                        (dstrcd) = (tmp); \
 292                        vmxnet3_RxCompToCPU((rcd), (tmp)); \
 293                } while (0)
 294#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
 295                        (dstrxd) = (tmp); \
 296                        vmxnet3_RxDescToCPU((rxd), (tmp)); \
 297                } while (0)
 298
 299#else
 300
 301#   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
 302#   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
 303#   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
 304#   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
 305#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
 306#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
 307
 308#endif /* __BIG_ENDIAN_BITFIELD  */
 309
 310
 311static void
 312vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
 313                     struct pci_dev *pdev)
 314{
 315        if (tbi->map_type == VMXNET3_MAP_SINGLE)
 316                dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len,
 317                                 PCI_DMA_TODEVICE);
 318        else if (tbi->map_type == VMXNET3_MAP_PAGE)
 319                dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len,
 320                               PCI_DMA_TODEVICE);
 321        else
 322                BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
 323
 324        tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
 325}
 326
 327
 328static int
 329vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
 330                  struct pci_dev *pdev, struct vmxnet3_adapter *adapter)
 331{
 332        struct sk_buff *skb;
 333        int entries = 0;
 334
 335        /* no out of order completion */
 336        BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
 337        BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
 338
 339        skb = tq->buf_info[eop_idx].skb;
 340        BUG_ON(skb == NULL);
 341        tq->buf_info[eop_idx].skb = NULL;
 342
 343        VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
 344
 345        while (tq->tx_ring.next2comp != eop_idx) {
 346                vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
 347                                     pdev);
 348
 349                /* update next2comp w/o tx_lock. Since we are marking more,
 350                 * instead of less, tx ring entries avail, the worst case is
 351                 * that the tx routine incorrectly re-queues a pkt due to
 352                 * insufficient tx ring entries.
 353                 */
 354                vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
 355                entries++;
 356        }
 357
 358        dev_kfree_skb_any(skb);
 359        return entries;
 360}
 361
 362
 363static int
 364vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
 365                        struct vmxnet3_adapter *adapter)
 366{
 367        int completed = 0;
 368        union Vmxnet3_GenericDesc *gdesc;
 369
 370        gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
 371        while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
 372                /* Prevent any &gdesc->tcd field from being (speculatively)
 373                 * read before (&gdesc->tcd)->gen is read.
 374                 */
 375                dma_rmb();
 376
 377                completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
 378                                               &gdesc->tcd), tq, adapter->pdev,
 379                                               adapter);
 380
 381                vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
 382                gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
 383        }
 384
 385        if (completed) {
 386                spin_lock(&tq->tx_lock);
 387                if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
 388                             vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
 389                             VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
 390                             netif_carrier_ok(adapter->netdev))) {
 391                        vmxnet3_tq_wake(tq, adapter);
 392                }
 393                spin_unlock(&tq->tx_lock);
 394        }
 395        return completed;
 396}
 397
 398
 399static void
 400vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
 401                   struct vmxnet3_adapter *adapter)
 402{
 403        int i;
 404
 405        while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
 406                struct vmxnet3_tx_buf_info *tbi;
 407
 408                tbi = tq->buf_info + tq->tx_ring.next2comp;
 409
 410                vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
 411                if (tbi->skb) {
 412                        dev_kfree_skb_any(tbi->skb);
 413                        tbi->skb = NULL;
 414                }
 415                vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
 416        }
 417
 418        /* sanity check, verify all buffers are indeed unmapped and freed */
 419        for (i = 0; i < tq->tx_ring.size; i++) {
 420                BUG_ON(tq->buf_info[i].skb != NULL ||
 421                       tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
 422        }
 423
 424        tq->tx_ring.gen = VMXNET3_INIT_GEN;
 425        tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
 426
 427        tq->comp_ring.gen = VMXNET3_INIT_GEN;
 428        tq->comp_ring.next2proc = 0;
 429}
 430
 431
 432static void
 433vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
 434                   struct vmxnet3_adapter *adapter)
 435{
 436        if (tq->tx_ring.base) {
 437                dma_free_coherent(&adapter->pdev->dev, tq->tx_ring.size *
 438                                  sizeof(struct Vmxnet3_TxDesc),
 439                                  tq->tx_ring.base, tq->tx_ring.basePA);
 440                tq->tx_ring.base = NULL;
 441        }
 442        if (tq->data_ring.base) {
 443                dma_free_coherent(&adapter->pdev->dev,
 444                                  tq->data_ring.size * tq->txdata_desc_size,
 445                                  tq->data_ring.base, tq->data_ring.basePA);
 446                tq->data_ring.base = NULL;
 447        }
 448        if (tq->comp_ring.base) {
 449                dma_free_coherent(&adapter->pdev->dev, tq->comp_ring.size *
 450                                  sizeof(struct Vmxnet3_TxCompDesc),
 451                                  tq->comp_ring.base, tq->comp_ring.basePA);
 452                tq->comp_ring.base = NULL;
 453        }
 454        if (tq->buf_info) {
 455                dma_free_coherent(&adapter->pdev->dev,
 456                                  tq->tx_ring.size * sizeof(tq->buf_info[0]),
 457                                  tq->buf_info, tq->buf_info_pa);
 458                tq->buf_info = NULL;
 459        }
 460}
 461
 462
 463/* Destroy all tx queues */
 464void
 465vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
 466{
 467        int i;
 468
 469        for (i = 0; i < adapter->num_tx_queues; i++)
 470                vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
 471}
 472
 473
 474static void
 475vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
 476                struct vmxnet3_adapter *adapter)
 477{
 478        int i;
 479
 480        /* reset the tx ring contents to 0 and reset the tx ring states */
 481        memset(tq->tx_ring.base, 0, tq->tx_ring.size *
 482               sizeof(struct Vmxnet3_TxDesc));
 483        tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
 484        tq->tx_ring.gen = VMXNET3_INIT_GEN;
 485
 486        memset(tq->data_ring.base, 0,
 487               tq->data_ring.size * tq->txdata_desc_size);
 488
 489        /* reset the tx comp ring contents to 0 and reset comp ring states */
 490        memset(tq->comp_ring.base, 0, tq->comp_ring.size *
 491               sizeof(struct Vmxnet3_TxCompDesc));
 492        tq->comp_ring.next2proc = 0;
 493        tq->comp_ring.gen = VMXNET3_INIT_GEN;
 494
 495        /* reset the bookkeeping data */
 496        memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
 497        for (i = 0; i < tq->tx_ring.size; i++)
 498                tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
 499
 500        /* stats are not reset */
 501}
 502
 503
 504static int
 505vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
 506                  struct vmxnet3_adapter *adapter)
 507{
 508        size_t sz;
 509
 510        BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
 511               tq->comp_ring.base || tq->buf_info);
 512
 513        tq->tx_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
 514                        tq->tx_ring.size * sizeof(struct Vmxnet3_TxDesc),
 515                        &tq->tx_ring.basePA, GFP_KERNEL);
 516        if (!tq->tx_ring.base) {
 517                netdev_err(adapter->netdev, "failed to allocate tx ring\n");
 518                goto err;
 519        }
 520
 521        tq->data_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
 522                        tq->data_ring.size * tq->txdata_desc_size,
 523                        &tq->data_ring.basePA, GFP_KERNEL);
 524        if (!tq->data_ring.base) {
 525                netdev_err(adapter->netdev, "failed to allocate tx data ring\n");
 526                goto err;
 527        }
 528
 529        tq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
 530                        tq->comp_ring.size * sizeof(struct Vmxnet3_TxCompDesc),
 531                        &tq->comp_ring.basePA, GFP_KERNEL);
 532        if (!tq->comp_ring.base) {
 533                netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
 534                goto err;
 535        }
 536
 537        sz = tq->tx_ring.size * sizeof(tq->buf_info[0]);
 538        tq->buf_info = dma_alloc_coherent(&adapter->pdev->dev, sz,
 539                                          &tq->buf_info_pa, GFP_KERNEL);
 540        if (!tq->buf_info)
 541                goto err;
 542
 543        return 0;
 544
 545err:
 546        vmxnet3_tq_destroy(tq, adapter);
 547        return -ENOMEM;
 548}
 549
 550static void
 551vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
 552{
 553        int i;
 554
 555        for (i = 0; i < adapter->num_tx_queues; i++)
 556                vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
 557}
 558
 559/*
 560 *    starting from ring->next2fill, allocate rx buffers for the given ring
 561 *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
 562 *    are allocated or allocation fails
 563 */
 564
 565static int
 566vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
 567                        int num_to_alloc, struct vmxnet3_adapter *adapter)
 568{
 569        int num_allocated = 0;
 570        struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
 571        struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
 572        u32 val;
 573
 574        while (num_allocated <= num_to_alloc) {
 575                struct vmxnet3_rx_buf_info *rbi;
 576                union Vmxnet3_GenericDesc *gd;
 577
 578                rbi = rbi_base + ring->next2fill;
 579                gd = ring->base + ring->next2fill;
 580
 581                if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
 582                        if (rbi->skb == NULL) {
 583                                rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
 584                                                                       rbi->len,
 585                                                                       GFP_KERNEL);
 586                                if (unlikely(rbi->skb == NULL)) {
 587                                        rq->stats.rx_buf_alloc_failure++;
 588                                        break;
 589                                }
 590
 591                                rbi->dma_addr = dma_map_single(
 592                                                &adapter->pdev->dev,
 593                                                rbi->skb->data, rbi->len,
 594                                                PCI_DMA_FROMDEVICE);
 595                                if (dma_mapping_error(&adapter->pdev->dev,
 596                                                      rbi->dma_addr)) {
 597                                        dev_kfree_skb_any(rbi->skb);
 598                                        rq->stats.rx_buf_alloc_failure++;
 599                                        break;
 600                                }
 601                        } else {
 602                                /* rx buffer skipped by the device */
 603                        }
 604                        val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
 605                } else {
 606                        BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
 607                               rbi->len  != PAGE_SIZE);
 608
 609                        if (rbi->page == NULL) {
 610                                rbi->page = alloc_page(GFP_ATOMIC);
 611                                if (unlikely(rbi->page == NULL)) {
 612                                        rq->stats.rx_buf_alloc_failure++;
 613                                        break;
 614                                }
 615                                rbi->dma_addr = dma_map_page(
 616                                                &adapter->pdev->dev,
 617                                                rbi->page, 0, PAGE_SIZE,
 618                                                PCI_DMA_FROMDEVICE);
 619                                if (dma_mapping_error(&adapter->pdev->dev,
 620                                                      rbi->dma_addr)) {
 621                                        put_page(rbi->page);
 622                                        rq->stats.rx_buf_alloc_failure++;
 623                                        break;
 624                                }
 625                        } else {
 626                                /* rx buffers skipped by the device */
 627                        }
 628                        val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
 629                }
 630
 631                gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
 632                gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
 633                                           | val | rbi->len);
 634
 635                /* Fill the last buffer but dont mark it ready, or else the
 636                 * device will think that the queue is full */
 637                if (num_allocated == num_to_alloc)
 638                        break;
 639
 640                gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
 641                num_allocated++;
 642                vmxnet3_cmd_ring_adv_next2fill(ring);
 643        }
 644
 645        netdev_dbg(adapter->netdev,
 646                "alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
 647                num_allocated, ring->next2fill, ring->next2comp);
 648
 649        /* so that the device can distinguish a full ring and an empty ring */
 650        BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
 651
 652        return num_allocated;
 653}
 654
 655
 656static void
 657vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
 658                    struct vmxnet3_rx_buf_info *rbi)
 659{
 660        struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
 661                skb_shinfo(skb)->nr_frags;
 662
 663        BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
 664
 665        __skb_frag_set_page(frag, rbi->page);
 666        frag->page_offset = 0;
 667        skb_frag_size_set(frag, rcd->len);
 668        skb->data_len += rcd->len;
 669        skb->truesize += PAGE_SIZE;
 670        skb_shinfo(skb)->nr_frags++;
 671}
 672
 673
 674static int
 675vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 676                struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
 677                struct vmxnet3_adapter *adapter)
 678{
 679        u32 dw2, len;
 680        unsigned long buf_offset;
 681        int i;
 682        union Vmxnet3_GenericDesc *gdesc;
 683        struct vmxnet3_tx_buf_info *tbi = NULL;
 684
 685        BUG_ON(ctx->copy_size > skb_headlen(skb));
 686
 687        /* use the previous gen bit for the SOP desc */
 688        dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
 689
 690        ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
 691        gdesc = ctx->sop_txd; /* both loops below can be skipped */
 692
 693        /* no need to map the buffer if headers are copied */
 694        if (ctx->copy_size) {
 695                ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
 696                                        tq->tx_ring.next2fill *
 697                                        tq->txdata_desc_size);
 698                ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
 699                ctx->sop_txd->dword[3] = 0;
 700
 701                tbi = tq->buf_info + tq->tx_ring.next2fill;
 702                tbi->map_type = VMXNET3_MAP_NONE;
 703
 704                netdev_dbg(adapter->netdev,
 705                        "txd[%u]: 0x%Lx 0x%x 0x%x\n",
 706                        tq->tx_ring.next2fill,
 707                        le64_to_cpu(ctx->sop_txd->txd.addr),
 708                        ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
 709                vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
 710
 711                /* use the right gen for non-SOP desc */
 712                dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
 713        }
 714
 715        /* linear part can use multiple tx desc if it's big */
 716        len = skb_headlen(skb) - ctx->copy_size;
 717        buf_offset = ctx->copy_size;
 718        while (len) {
 719                u32 buf_size;
 720
 721                if (len < VMXNET3_MAX_TX_BUF_SIZE) {
 722                        buf_size = len;
 723                        dw2 |= len;
 724                } else {
 725                        buf_size = VMXNET3_MAX_TX_BUF_SIZE;
 726                        /* spec says that for TxDesc.len, 0 == 2^14 */
 727                }
 728
 729                tbi = tq->buf_info + tq->tx_ring.next2fill;
 730                tbi->map_type = VMXNET3_MAP_SINGLE;
 731                tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
 732                                skb->data + buf_offset, buf_size,
 733                                PCI_DMA_TODEVICE);
 734                if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
 735                        return -EFAULT;
 736
 737                tbi->len = buf_size;
 738
 739                gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
 740                BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
 741
 742                gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
 743                gdesc->dword[2] = cpu_to_le32(dw2);
 744                gdesc->dword[3] = 0;
 745
 746                netdev_dbg(adapter->netdev,
 747                        "txd[%u]: 0x%Lx 0x%x 0x%x\n",
 748                        tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
 749                        le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
 750                vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
 751                dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
 752
 753                len -= buf_size;
 754                buf_offset += buf_size;
 755        }
 756
 757        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 758                const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
 759                u32 buf_size;
 760
 761                buf_offset = 0;
 762                len = skb_frag_size(frag);
 763                while (len) {
 764                        tbi = tq->buf_info + tq->tx_ring.next2fill;
 765                        if (len < VMXNET3_MAX_TX_BUF_SIZE) {
 766                                buf_size = len;
 767                                dw2 |= len;
 768                        } else {
 769                                buf_size = VMXNET3_MAX_TX_BUF_SIZE;
 770                                /* spec says that for TxDesc.len, 0 == 2^14 */
 771                        }
 772                        tbi->map_type = VMXNET3_MAP_PAGE;
 773                        tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
 774                                                         buf_offset, buf_size,
 775                                                         DMA_TO_DEVICE);
 776                        if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
 777                                return -EFAULT;
 778
 779                        tbi->len = buf_size;
 780
 781                        gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
 782                        BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
 783
 784                        gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
 785                        gdesc->dword[2] = cpu_to_le32(dw2);
 786                        gdesc->dword[3] = 0;
 787
 788                        netdev_dbg(adapter->netdev,
 789                                "txd[%u]: 0x%llx %u %u\n",
 790                                tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
 791                                le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
 792                        vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
 793                        dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
 794
 795                        len -= buf_size;
 796                        buf_offset += buf_size;
 797                }
 798        }
 799
 800        ctx->eop_txd = gdesc;
 801
 802        /* set the last buf_info for the pkt */
 803        tbi->skb = skb;
 804        tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
 805
 806        return 0;
 807}
 808
 809
 810/* Init all tx queues */
 811static void
 812vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
 813{
 814        int i;
 815
 816        for (i = 0; i < adapter->num_tx_queues; i++)
 817                vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
 818}
 819
 820
 821/*
 822 *    parse relevant protocol headers:
 823 *      For a tso pkt, relevant headers are L2/3/4 including options
 824 *      For a pkt requesting csum offloading, they are L2/3 and may include L4
 825 *      if it's a TCP/UDP pkt
 826 *
 827 * Returns:
 828 *    -1:  error happens during parsing
 829 *     0:  protocol headers parsed, but too big to be copied
 830 *     1:  protocol headers parsed and copied
 831 *
 832 * Other effects:
 833 *    1. related *ctx fields are updated.
 834 *    2. ctx->copy_size is # of bytes copied
 835 *    3. the portion to be copied is guaranteed to be in the linear part
 836 *
 837 */
 838static int
 839vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 840                  struct vmxnet3_tx_ctx *ctx,
 841                  struct vmxnet3_adapter *adapter)
 842{
 843        u8 protocol = 0;
 844
 845        if (ctx->mss) { /* TSO */
 846                ctx->eth_ip_hdr_size = skb_transport_offset(skb);
 847                ctx->l4_hdr_size = tcp_hdrlen(skb);
 848                ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
 849        } else {
 850                if (skb->ip_summed == CHECKSUM_PARTIAL) {
 851                        ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
 852
 853                        if (ctx->ipv4) {
 854                                const struct iphdr *iph = ip_hdr(skb);
 855
 856                                protocol = iph->protocol;
 857                        } else if (ctx->ipv6) {
 858                                const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 859
 860                                protocol = ipv6h->nexthdr;
 861                        }
 862
 863                        switch (protocol) {
 864                        case IPPROTO_TCP:
 865                                ctx->l4_hdr_size = tcp_hdrlen(skb);
 866                                break;
 867                        case IPPROTO_UDP:
 868                                ctx->l4_hdr_size = sizeof(struct udphdr);
 869                                break;
 870                        default:
 871                                ctx->l4_hdr_size = 0;
 872                                break;
 873                        }
 874
 875                        ctx->copy_size = min(ctx->eth_ip_hdr_size +
 876                                         ctx->l4_hdr_size, skb->len);
 877                } else {
 878                        ctx->eth_ip_hdr_size = 0;
 879                        ctx->l4_hdr_size = 0;
 880                        /* copy as much as allowed */
 881                        ctx->copy_size = min_t(unsigned int,
 882                                               tq->txdata_desc_size,
 883                                               skb_headlen(skb));
 884                }
 885
 886                if (skb->len <= VMXNET3_HDR_COPY_SIZE)
 887                        ctx->copy_size = skb->len;
 888
 889                /* make sure headers are accessible directly */
 890                if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
 891                        goto err;
 892        }
 893
 894        if (unlikely(ctx->copy_size > tq->txdata_desc_size)) {
 895                tq->stats.oversized_hdr++;
 896                ctx->copy_size = 0;
 897                return 0;
 898        }
 899
 900        return 1;
 901err:
 902        return -1;
 903}
 904
 905/*
 906 *    copy relevant protocol headers to the transmit ring:
 907 *      For a tso pkt, relevant headers are L2/3/4 including options
 908 *      For a pkt requesting csum offloading, they are L2/3 and may include L4
 909 *      if it's a TCP/UDP pkt
 910 *
 911 *
 912 *    Note that this requires that vmxnet3_parse_hdr be called first to set the
 913 *      appropriate bits in ctx first
 914 */
 915static void
 916vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 917                 struct vmxnet3_tx_ctx *ctx,
 918                 struct vmxnet3_adapter *adapter)
 919{
 920        struct Vmxnet3_TxDataDesc *tdd;
 921
 922        tdd = (struct Vmxnet3_TxDataDesc *)((u8 *)tq->data_ring.base +
 923                                            tq->tx_ring.next2fill *
 924                                            tq->txdata_desc_size);
 925
 926        memcpy(tdd->data, skb->data, ctx->copy_size);
 927        netdev_dbg(adapter->netdev,
 928                "copy %u bytes to dataRing[%u]\n",
 929                ctx->copy_size, tq->tx_ring.next2fill);
 930}
 931
 932
 933static void
 934vmxnet3_prepare_tso(struct sk_buff *skb,
 935                    struct vmxnet3_tx_ctx *ctx)
 936{
 937        struct tcphdr *tcph = tcp_hdr(skb);
 938
 939        if (ctx->ipv4) {
 940                struct iphdr *iph = ip_hdr(skb);
 941
 942                iph->check = 0;
 943                tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
 944                                                 IPPROTO_TCP, 0);
 945        } else if (ctx->ipv6) {
 946                struct ipv6hdr *iph = ipv6_hdr(skb);
 947
 948                tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
 949                                               IPPROTO_TCP, 0);
 950        }
 951}
 952
 953static int txd_estimate(const struct sk_buff *skb)
 954{
 955        int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
 956        int i;
 957
 958        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 959                const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
 960
 961                count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
 962        }
 963        return count;
 964}
 965
 966/*
 967 * Transmits a pkt thru a given tq
 968 * Returns:
 969 *    NETDEV_TX_OK:      descriptors are setup successfully
 970 *    NETDEV_TX_OK:      error occurred, the pkt is dropped
 971 *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
 972 *
 973 * Side-effects:
 974 *    1. tx ring may be changed
 975 *    2. tq stats may be updated accordingly
 976 *    3. shared->txNumDeferred may be updated
 977 */
 978
 979static int
 980vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 981                struct vmxnet3_adapter *adapter, struct net_device *netdev)
 982{
 983        int ret;
 984        u32 count;
 985        int num_pkts;
 986        int tx_num_deferred;
 987        unsigned long flags;
 988        struct vmxnet3_tx_ctx ctx;
 989        union Vmxnet3_GenericDesc *gdesc;
 990#ifdef __BIG_ENDIAN_BITFIELD
 991        /* Use temporary descriptor to avoid touching bits multiple times */
 992        union Vmxnet3_GenericDesc tempTxDesc;
 993#endif
 994
 995        count = txd_estimate(skb);
 996
 997        ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
 998        ctx.ipv6 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IPV6));
 999
1000        ctx.mss = skb_shinfo(skb)->gso_size;
1001        if (ctx.mss) {
1002                if (skb_header_cloned(skb)) {
1003                        if (unlikely(pskb_expand_head(skb, 0, 0,
1004                                                      GFP_ATOMIC) != 0)) {
1005                                tq->stats.drop_tso++;
1006                                goto drop_pkt;
1007                        }
1008                        tq->stats.copy_skb_header++;
1009                }
1010                vmxnet3_prepare_tso(skb, &ctx);
1011        } else {
1012                if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
1013
1014                        /* non-tso pkts must not use more than
1015                         * VMXNET3_MAX_TXD_PER_PKT entries
1016                         */
1017                        if (skb_linearize(skb) != 0) {
1018                                tq->stats.drop_too_many_frags++;
1019                                goto drop_pkt;
1020                        }
1021                        tq->stats.linearized++;
1022
1023                        /* recalculate the # of descriptors to use */
1024                        count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
1025                }
1026        }
1027
1028        ret = vmxnet3_parse_hdr(skb, tq, &ctx, adapter);
1029        if (ret >= 0) {
1030                BUG_ON(ret <= 0 && ctx.copy_size != 0);
1031                /* hdrs parsed, check against other limits */
1032                if (ctx.mss) {
1033                        if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
1034                                     VMXNET3_MAX_TX_BUF_SIZE)) {
1035                                tq->stats.drop_oversized_hdr++;
1036                                goto drop_pkt;
1037                        }
1038                } else {
1039                        if (skb->ip_summed == CHECKSUM_PARTIAL) {
1040                                if (unlikely(ctx.eth_ip_hdr_size +
1041                                             skb->csum_offset >
1042                                             VMXNET3_MAX_CSUM_OFFSET)) {
1043                                        tq->stats.drop_oversized_hdr++;
1044                                        goto drop_pkt;
1045                                }
1046                        }
1047                }
1048        } else {
1049                tq->stats.drop_hdr_inspect_err++;
1050                goto drop_pkt;
1051        }
1052
1053        spin_lock_irqsave(&tq->tx_lock, flags);
1054
1055        if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
1056                tq->stats.tx_ring_full++;
1057                netdev_dbg(adapter->netdev,
1058                        "tx queue stopped on %s, next2comp %u"
1059                        " next2fill %u\n", adapter->netdev->name,
1060                        tq->tx_ring.next2comp, tq->tx_ring.next2fill);
1061
1062                vmxnet3_tq_stop(tq, adapter);
1063                spin_unlock_irqrestore(&tq->tx_lock, flags);
1064                return NETDEV_TX_BUSY;
1065        }
1066
1067
1068        vmxnet3_copy_hdr(skb, tq, &ctx, adapter);
1069
1070        /* fill tx descs related to addr & len */
1071        if (vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter))
1072                goto unlock_drop_pkt;
1073
1074        /* setup the EOP desc */
1075        ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1076
1077        /* setup the SOP desc */
1078#ifdef __BIG_ENDIAN_BITFIELD
1079        gdesc = &tempTxDesc;
1080        gdesc->dword[2] = ctx.sop_txd->dword[2];
1081        gdesc->dword[3] = ctx.sop_txd->dword[3];
1082#else
1083        gdesc = ctx.sop_txd;
1084#endif
1085        tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred);
1086        if (ctx.mss) {
1087                gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1088                gdesc->txd.om = VMXNET3_OM_TSO;
1089                gdesc->txd.msscof = ctx.mss;
1090                num_pkts = (skb->len - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss;
1091        } else {
1092                if (skb->ip_summed == CHECKSUM_PARTIAL) {
1093                        gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1094                        gdesc->txd.om = VMXNET3_OM_CSUM;
1095                        gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1096                                            skb->csum_offset;
1097                } else {
1098                        gdesc->txd.om = 0;
1099                        gdesc->txd.msscof = 0;
1100                }
1101                num_pkts = 1;
1102        }
1103        le32_add_cpu(&tq->shared->txNumDeferred, num_pkts);
1104        tx_num_deferred += num_pkts;
1105
1106        if (skb_vlan_tag_present(skb)) {
1107                gdesc->txd.ti = 1;
1108                gdesc->txd.tci = skb_vlan_tag_get(skb);
1109        }
1110
1111        /* Ensure that the write to (&gdesc->txd)->gen will be observed after
1112         * all other writes to &gdesc->txd.
1113         */
1114        dma_wmb();
1115
1116        /* finally flips the GEN bit of the SOP desc. */
1117        gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1118                                                  VMXNET3_TXD_GEN);
1119#ifdef __BIG_ENDIAN_BITFIELD
1120        /* Finished updating in bitfields of Tx Desc, so write them in original
1121         * place.
1122         */
1123        vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1124                           (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1125        gdesc = ctx.sop_txd;
1126#endif
1127        netdev_dbg(adapter->netdev,
1128                "txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1129                (u32)(ctx.sop_txd -
1130                tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1131                le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1132
1133        spin_unlock_irqrestore(&tq->tx_lock, flags);
1134
1135        if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) {
1136                tq->shared->txNumDeferred = 0;
1137                VMXNET3_WRITE_BAR0_REG(adapter,
1138                                       VMXNET3_REG_TXPROD + tq->qid * 8,
1139                                       tq->tx_ring.next2fill);
1140        }
1141
1142        return NETDEV_TX_OK;
1143
1144unlock_drop_pkt:
1145        spin_unlock_irqrestore(&tq->tx_lock, flags);
1146drop_pkt:
1147        tq->stats.drop_total++;
1148        dev_kfree_skb_any(skb);
1149        return NETDEV_TX_OK;
1150}
1151
1152
1153static netdev_tx_t
1154vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1155{
1156        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1157
1158        BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1159        return vmxnet3_tq_xmit(skb,
1160                               &adapter->tx_queue[skb->queue_mapping],
1161                               adapter, netdev);
1162}
1163
1164
1165static void
1166vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1167                struct sk_buff *skb,
1168                union Vmxnet3_GenericDesc *gdesc)
1169{
1170        if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1171                if (gdesc->rcd.v4 &&
1172                    (le32_to_cpu(gdesc->dword[3]) &
1173                     VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) {
1174                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1175                        BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1176                        BUG_ON(gdesc->rcd.frg);
1177                } else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) &
1178                                             (1 << VMXNET3_RCD_TUC_SHIFT))) {
1179                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1180                        BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1181                        BUG_ON(gdesc->rcd.frg);
1182                } else {
1183                        if (gdesc->rcd.csum) {
1184                                skb->csum = htons(gdesc->rcd.csum);
1185                                skb->ip_summed = CHECKSUM_PARTIAL;
1186                        } else {
1187                                skb_checksum_none_assert(skb);
1188                        }
1189                }
1190        } else {
1191                skb_checksum_none_assert(skb);
1192        }
1193}
1194
1195
1196static void
1197vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1198                 struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1199{
1200        rq->stats.drop_err++;
1201        if (!rcd->fcs)
1202                rq->stats.drop_fcs++;
1203
1204        rq->stats.drop_total++;
1205
1206        /*
1207         * We do not unmap and chain the rx buffer to the skb.
1208         * We basically pretend this buffer is not used and will be recycled
1209         * by vmxnet3_rq_alloc_rx_buf()
1210         */
1211
1212        /*
1213         * ctx->skb may be NULL if this is the first and the only one
1214         * desc for the pkt
1215         */
1216        if (ctx->skb)
1217                dev_kfree_skb_irq(ctx->skb);
1218
1219        ctx->skb = NULL;
1220}
1221
1222
1223static u32
1224vmxnet3_get_hdr_len(struct vmxnet3_adapter *adapter, struct sk_buff *skb,
1225                    union Vmxnet3_GenericDesc *gdesc)
1226{
1227        u32 hlen, maplen;
1228        union {
1229                void *ptr;
1230                struct ethhdr *eth;
1231                struct vlan_ethhdr *veth;
1232                struct iphdr *ipv4;
1233                struct ipv6hdr *ipv6;
1234                struct tcphdr *tcp;
1235        } hdr;
1236        BUG_ON(gdesc->rcd.tcp == 0);
1237
1238        maplen = skb_headlen(skb);
1239        if (unlikely(sizeof(struct iphdr) + sizeof(struct tcphdr) > maplen))
1240                return 0;
1241
1242        if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
1243            skb->protocol == cpu_to_be16(ETH_P_8021AD))
1244                hlen = sizeof(struct vlan_ethhdr);
1245        else
1246                hlen = sizeof(struct ethhdr);
1247
1248        hdr.eth = eth_hdr(skb);
1249        if (gdesc->rcd.v4) {
1250                BUG_ON(hdr.eth->h_proto != htons(ETH_P_IP) &&
1251                       hdr.veth->h_vlan_encapsulated_proto != htons(ETH_P_IP));
1252                hdr.ptr += hlen;
1253                BUG_ON(hdr.ipv4->protocol != IPPROTO_TCP);
1254                hlen = hdr.ipv4->ihl << 2;
1255                hdr.ptr += hdr.ipv4->ihl << 2;
1256        } else if (gdesc->rcd.v6) {
1257                BUG_ON(hdr.eth->h_proto != htons(ETH_P_IPV6) &&
1258                       hdr.veth->h_vlan_encapsulated_proto != htons(ETH_P_IPV6));
1259                hdr.ptr += hlen;
1260                /* Use an estimated value, since we also need to handle
1261                 * TSO case.
1262                 */
1263                if (hdr.ipv6->nexthdr != IPPROTO_TCP)
1264                        return sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1265                hlen = sizeof(struct ipv6hdr);
1266                hdr.ptr += sizeof(struct ipv6hdr);
1267        } else {
1268                /* Non-IP pkt, dont estimate header length */
1269                return 0;
1270        }
1271
1272        if (hlen + sizeof(struct tcphdr) > maplen)
1273                return 0;
1274
1275        return (hlen + (hdr.tcp->doff << 2));
1276}
1277
1278static int
1279vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1280                       struct vmxnet3_adapter *adapter, int quota)
1281{
1282        static const u32 rxprod_reg[2] = {
1283                VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1284        };
1285        u32 num_pkts = 0;
1286        bool skip_page_frags = false;
1287        struct Vmxnet3_RxCompDesc *rcd;
1288        struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1289        u16 segCnt = 0, mss = 0;
1290#ifdef __BIG_ENDIAN_BITFIELD
1291        struct Vmxnet3_RxDesc rxCmdDesc;
1292        struct Vmxnet3_RxCompDesc rxComp;
1293#endif
1294        vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1295                          &rxComp);
1296        while (rcd->gen == rq->comp_ring.gen) {
1297                struct vmxnet3_rx_buf_info *rbi;
1298                struct sk_buff *skb, *new_skb = NULL;
1299                struct page *new_page = NULL;
1300                dma_addr_t new_dma_addr;
1301                int num_to_alloc;
1302                struct Vmxnet3_RxDesc *rxd;
1303                u32 idx, ring_idx;
1304                struct vmxnet3_cmd_ring *ring = NULL;
1305                if (num_pkts >= quota) {
1306                        /* we may stop even before we see the EOP desc of
1307                         * the current pkt
1308                         */
1309                        break;
1310                }
1311
1312                /* Prevent any rcd field from being (speculatively) read before
1313                 * rcd->gen is read.
1314                 */
1315                dma_rmb();
1316
1317                BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 &&
1318                       rcd->rqID != rq->dataRingQid);
1319                idx = rcd->rxdIdx;
1320                ring_idx = VMXNET3_GET_RING_IDX(adapter, rcd->rqID);
1321                ring = rq->rx_ring + ring_idx;
1322                vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1323                                  &rxCmdDesc);
1324                rbi = rq->buf_info[ring_idx] + idx;
1325
1326                BUG_ON(rxd->addr != rbi->dma_addr ||
1327                       rxd->len != rbi->len);
1328
1329                if (unlikely(rcd->eop && rcd->err)) {
1330                        vmxnet3_rx_error(rq, rcd, ctx, adapter);
1331                        goto rcd_done;
1332                }
1333
1334                if (rcd->sop) { /* first buf of the pkt */
1335                        bool rxDataRingUsed;
1336                        u16 len;
1337
1338                        BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1339                               (rcd->rqID != rq->qid &&
1340                                rcd->rqID != rq->dataRingQid));
1341
1342                        BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1343                        BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1344
1345                        if (unlikely(rcd->len == 0)) {
1346                                /* Pretend the rx buffer is skipped. */
1347                                BUG_ON(!(rcd->sop && rcd->eop));
1348                                netdev_dbg(adapter->netdev,
1349                                        "rxRing[%u][%u] 0 length\n",
1350                                        ring_idx, idx);
1351                                goto rcd_done;
1352                        }
1353
1354                        skip_page_frags = false;
1355                        ctx->skb = rbi->skb;
1356
1357                        rxDataRingUsed =
1358                                VMXNET3_RX_DATA_RING(adapter, rcd->rqID);
1359                        len = rxDataRingUsed ? rcd->len : rbi->len;
1360                        new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1361                                                            len);
1362                        if (new_skb == NULL) {
1363                                /* Skb allocation failed, do not handover this
1364                                 * skb to stack. Reuse it. Drop the existing pkt
1365                                 */
1366                                rq->stats.rx_buf_alloc_failure++;
1367                                ctx->skb = NULL;
1368                                rq->stats.drop_total++;
1369                                skip_page_frags = true;
1370                                goto rcd_done;
1371                        }
1372
1373                        if (rxDataRingUsed) {
1374                                size_t sz;
1375
1376                                BUG_ON(rcd->len > rq->data_ring.desc_size);
1377
1378                                ctx->skb = new_skb;
1379                                sz = rcd->rxdIdx * rq->data_ring.desc_size;
1380                                memcpy(new_skb->data,
1381                                       &rq->data_ring.base[sz], rcd->len);
1382                        } else {
1383                                ctx->skb = rbi->skb;
1384
1385                                new_dma_addr =
1386                                        dma_map_single(&adapter->pdev->dev,
1387                                                       new_skb->data, rbi->len,
1388                                                       PCI_DMA_FROMDEVICE);
1389                                if (dma_mapping_error(&adapter->pdev->dev,
1390                                                      new_dma_addr)) {
1391                                        dev_kfree_skb(new_skb);
1392                                        /* Skb allocation failed, do not
1393                                         * handover this skb to stack. Reuse
1394                                         * it. Drop the existing pkt.
1395                                         */
1396                                        rq->stats.rx_buf_alloc_failure++;
1397                                        ctx->skb = NULL;
1398                                        rq->stats.drop_total++;
1399                                        skip_page_frags = true;
1400                                        goto rcd_done;
1401                                }
1402
1403                                dma_unmap_single(&adapter->pdev->dev,
1404                                                 rbi->dma_addr,
1405                                                 rbi->len,
1406                                                 PCI_DMA_FROMDEVICE);
1407
1408                                /* Immediate refill */
1409                                rbi->skb = new_skb;
1410                                rbi->dma_addr = new_dma_addr;
1411                                rxd->addr = cpu_to_le64(rbi->dma_addr);
1412                                rxd->len = rbi->len;
1413                        }
1414
1415#ifdef VMXNET3_RSS
1416                        if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1417                            (adapter->netdev->features & NETIF_F_RXHASH))
1418                                skb_set_hash(ctx->skb,
1419                                             le32_to_cpu(rcd->rssHash),
1420                                             PKT_HASH_TYPE_L3);
1421#endif
1422                        skb_put(ctx->skb, rcd->len);
1423
1424                        if (VMXNET3_VERSION_GE_2(adapter) &&
1425                            rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
1426                                struct Vmxnet3_RxCompDescExt *rcdlro;
1427                                rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd;
1428
1429                                segCnt = rcdlro->segCnt;
1430                                WARN_ON_ONCE(segCnt == 0);
1431                                mss = rcdlro->mss;
1432                                if (unlikely(segCnt <= 1))
1433                                        segCnt = 0;
1434                        } else {
1435                                segCnt = 0;
1436                        }
1437                } else {
1438                        BUG_ON(ctx->skb == NULL && !skip_page_frags);
1439
1440                        /* non SOP buffer must be type 1 in most cases */
1441                        BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1442                        BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1443
1444                        /* If an sop buffer was dropped, skip all
1445                         * following non-sop fragments. They will be reused.
1446                         */
1447                        if (skip_page_frags)
1448                                goto rcd_done;
1449
1450                        if (rcd->len) {
1451                                new_page = alloc_page(GFP_ATOMIC);
1452                                /* Replacement page frag could not be allocated.
1453                                 * Reuse this page. Drop the pkt and free the
1454                                 * skb which contained this page as a frag. Skip
1455                                 * processing all the following non-sop frags.
1456                                 */
1457                                if (unlikely(!new_page)) {
1458                                        rq->stats.rx_buf_alloc_failure++;
1459                                        dev_kfree_skb(ctx->skb);
1460                                        ctx->skb = NULL;
1461                                        skip_page_frags = true;
1462                                        goto rcd_done;
1463                                }
1464                                new_dma_addr = dma_map_page(&adapter->pdev->dev,
1465                                                            new_page,
1466                                                            0, PAGE_SIZE,
1467                                                            PCI_DMA_FROMDEVICE);
1468                                if (dma_mapping_error(&adapter->pdev->dev,
1469                                                      new_dma_addr)) {
1470                                        put_page(new_page);
1471                                        rq->stats.rx_buf_alloc_failure++;
1472                                        dev_kfree_skb(ctx->skb);
1473                                        ctx->skb = NULL;
1474                                        skip_page_frags = true;
1475                                        goto rcd_done;
1476                                }
1477
1478                                dma_unmap_page(&adapter->pdev->dev,
1479                                               rbi->dma_addr, rbi->len,
1480                                               PCI_DMA_FROMDEVICE);
1481
1482                                vmxnet3_append_frag(ctx->skb, rcd, rbi);
1483
1484                                /* Immediate refill */
1485                                rbi->page = new_page;
1486                                rbi->dma_addr = new_dma_addr;
1487                                rxd->addr = cpu_to_le64(rbi->dma_addr);
1488                                rxd->len = rbi->len;
1489                        }
1490                }
1491
1492
1493                skb = ctx->skb;
1494                if (rcd->eop) {
1495                        u32 mtu = adapter->netdev->mtu;
1496                        skb->len += skb->data_len;
1497
1498                        vmxnet3_rx_csum(adapter, skb,
1499                                        (union Vmxnet3_GenericDesc *)rcd);
1500                        skb->protocol = eth_type_trans(skb, adapter->netdev);
1501                        if (!rcd->tcp ||
1502                            !(adapter->netdev->features & NETIF_F_LRO))
1503                                goto not_lro;
1504
1505                        if (segCnt != 0 && mss != 0) {
1506                                skb_shinfo(skb)->gso_type = rcd->v4 ?
1507                                        SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
1508                                skb_shinfo(skb)->gso_size = mss;
1509                                skb_shinfo(skb)->gso_segs = segCnt;
1510                        } else if (segCnt != 0 || skb->len > mtu) {
1511                                u32 hlen;
1512
1513                                hlen = vmxnet3_get_hdr_len(adapter, skb,
1514                                        (union Vmxnet3_GenericDesc *)rcd);
1515                                if (hlen == 0)
1516                                        goto not_lro;
1517
1518                                skb_shinfo(skb)->gso_type =
1519                                        rcd->v4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
1520                                if (segCnt != 0) {
1521                                        skb_shinfo(skb)->gso_segs = segCnt;
1522                                        skb_shinfo(skb)->gso_size =
1523                                                DIV_ROUND_UP(skb->len -
1524                                                        hlen, segCnt);
1525                                } else {
1526                                        skb_shinfo(skb)->gso_size = mtu - hlen;
1527                                }
1528                        }
1529not_lro:
1530                        if (unlikely(rcd->ts))
1531                                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci);
1532
1533                        if (adapter->netdev->features & NETIF_F_LRO)
1534                                netif_receive_skb(skb);
1535                        else
1536                                napi_gro_receive(&rq->napi, skb);
1537
1538                        ctx->skb = NULL;
1539                        num_pkts++;
1540                }
1541
1542rcd_done:
1543                /* device may have skipped some rx descs */
1544                ring->next2comp = idx;
1545                num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1546                ring = rq->rx_ring + ring_idx;
1547
1548                /* Ensure that the writes to rxd->gen bits will be observed
1549                 * after all other writes to rxd objects.
1550                 */
1551                dma_wmb();
1552
1553                while (num_to_alloc) {
1554                        vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1555                                          &rxCmdDesc);
1556                        BUG_ON(!rxd->addr);
1557
1558                        /* Recv desc is ready to be used by the device */
1559                        rxd->gen = ring->gen;
1560                        vmxnet3_cmd_ring_adv_next2fill(ring);
1561                        num_to_alloc--;
1562                }
1563
1564                /* if needed, update the register */
1565                if (unlikely(rq->shared->updateRxProd)) {
1566                        VMXNET3_WRITE_BAR0_REG(adapter,
1567                                               rxprod_reg[ring_idx] + rq->qid * 8,
1568                                               ring->next2fill);
1569                }
1570
1571                vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1572                vmxnet3_getRxComp(rcd,
1573                                  &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1574        }
1575
1576        return num_pkts;
1577}
1578
1579
1580static void
1581vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1582                   struct vmxnet3_adapter *adapter)
1583{
1584        u32 i, ring_idx;
1585        struct Vmxnet3_RxDesc *rxd;
1586
1587        for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1588                for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1589#ifdef __BIG_ENDIAN_BITFIELD
1590                        struct Vmxnet3_RxDesc rxDesc;
1591#endif
1592                        vmxnet3_getRxDesc(rxd,
1593                                &rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1594
1595                        if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1596                                        rq->buf_info[ring_idx][i].skb) {
1597                                dma_unmap_single(&adapter->pdev->dev, rxd->addr,
1598                                                 rxd->len, PCI_DMA_FROMDEVICE);
1599                                dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1600                                rq->buf_info[ring_idx][i].skb = NULL;
1601                        } else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1602                                        rq->buf_info[ring_idx][i].page) {
1603                                dma_unmap_page(&adapter->pdev->dev, rxd->addr,
1604                                               rxd->len, PCI_DMA_FROMDEVICE);
1605                                put_page(rq->buf_info[ring_idx][i].page);
1606                                rq->buf_info[ring_idx][i].page = NULL;
1607                        }
1608                }
1609
1610                rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1611                rq->rx_ring[ring_idx].next2fill =
1612                                        rq->rx_ring[ring_idx].next2comp = 0;
1613        }
1614
1615        rq->comp_ring.gen = VMXNET3_INIT_GEN;
1616        rq->comp_ring.next2proc = 0;
1617}
1618
1619
1620static void
1621vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1622{
1623        int i;
1624
1625        for (i = 0; i < adapter->num_rx_queues; i++)
1626                vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1627}
1628
1629
1630static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1631                               struct vmxnet3_adapter *adapter)
1632{
1633        int i;
1634        int j;
1635
1636        /* all rx buffers must have already been freed */
1637        for (i = 0; i < 2; i++) {
1638                if (rq->buf_info[i]) {
1639                        for (j = 0; j < rq->rx_ring[i].size; j++)
1640                                BUG_ON(rq->buf_info[i][j].page != NULL);
1641                }
1642        }
1643
1644
1645        for (i = 0; i < 2; i++) {
1646                if (rq->rx_ring[i].base) {
1647                        dma_free_coherent(&adapter->pdev->dev,
1648                                          rq->rx_ring[i].size
1649                                          * sizeof(struct Vmxnet3_RxDesc),
1650                                          rq->rx_ring[i].base,
1651                                          rq->rx_ring[i].basePA);
1652                        rq->rx_ring[i].base = NULL;
1653                }
1654        }
1655
1656        if (rq->data_ring.base) {
1657                dma_free_coherent(&adapter->pdev->dev,
1658                                  rq->rx_ring[0].size * rq->data_ring.desc_size,
1659                                  rq->data_ring.base, rq->data_ring.basePA);
1660                rq->data_ring.base = NULL;
1661        }
1662
1663        if (rq->comp_ring.base) {
1664                dma_free_coherent(&adapter->pdev->dev, rq->comp_ring.size
1665                                  * sizeof(struct Vmxnet3_RxCompDesc),
1666                                  rq->comp_ring.base, rq->comp_ring.basePA);
1667                rq->comp_ring.base = NULL;
1668        }
1669
1670        if (rq->buf_info[0]) {
1671                size_t sz = sizeof(struct vmxnet3_rx_buf_info) *
1672                        (rq->rx_ring[0].size + rq->rx_ring[1].size);
1673                dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
1674                                  rq->buf_info_pa);
1675                rq->buf_info[0] = rq->buf_info[1] = NULL;
1676        }
1677}
1678
1679static void
1680vmxnet3_rq_destroy_all_rxdataring(struct vmxnet3_adapter *adapter)
1681{
1682        int i;
1683
1684        for (i = 0; i < adapter->num_rx_queues; i++) {
1685                struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1686
1687                if (rq->data_ring.base) {
1688                        dma_free_coherent(&adapter->pdev->dev,
1689                                          (rq->rx_ring[0].size *
1690                                          rq->data_ring.desc_size),
1691                                          rq->data_ring.base,
1692                                          rq->data_ring.basePA);
1693                        rq->data_ring.base = NULL;
1694                        rq->data_ring.desc_size = 0;
1695                }
1696        }
1697}
1698
1699static int
1700vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1701                struct vmxnet3_adapter  *adapter)
1702{
1703        int i;
1704
1705        /* initialize buf_info */
1706        for (i = 0; i < rq->rx_ring[0].size; i++) {
1707
1708                /* 1st buf for a pkt is skbuff */
1709                if (i % adapter->rx_buf_per_pkt == 0) {
1710                        rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1711                        rq->buf_info[0][i].len = adapter->skb_buf_size;
1712                } else { /* subsequent bufs for a pkt is frag */
1713                        rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1714                        rq->buf_info[0][i].len = PAGE_SIZE;
1715                }
1716        }
1717        for (i = 0; i < rq->rx_ring[1].size; i++) {
1718                rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1719                rq->buf_info[1][i].len = PAGE_SIZE;
1720        }
1721
1722        /* reset internal state and allocate buffers for both rings */
1723        for (i = 0; i < 2; i++) {
1724                rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1725
1726                memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1727                       sizeof(struct Vmxnet3_RxDesc));
1728                rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1729        }
1730        if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1731                                    adapter) == 0) {
1732                /* at least has 1 rx buffer for the 1st ring */
1733                return -ENOMEM;
1734        }
1735        vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1736
1737        /* reset the comp ring */
1738        rq->comp_ring.next2proc = 0;
1739        memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1740               sizeof(struct Vmxnet3_RxCompDesc));
1741        rq->comp_ring.gen = VMXNET3_INIT_GEN;
1742
1743        /* reset rxctx */
1744        rq->rx_ctx.skb = NULL;
1745
1746        /* stats are not reset */
1747        return 0;
1748}
1749
1750
1751static int
1752vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1753{
1754        int i, err = 0;
1755
1756        for (i = 0; i < adapter->num_rx_queues; i++) {
1757                err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1758                if (unlikely(err)) {
1759                        dev_err(&adapter->netdev->dev, "%s: failed to "
1760                                "initialize rx queue%i\n",
1761                                adapter->netdev->name, i);
1762                        break;
1763                }
1764        }
1765        return err;
1766
1767}
1768
1769
1770static int
1771vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1772{
1773        int i;
1774        size_t sz;
1775        struct vmxnet3_rx_buf_info *bi;
1776
1777        for (i = 0; i < 2; i++) {
1778
1779                sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1780                rq->rx_ring[i].base = dma_alloc_coherent(
1781                                                &adapter->pdev->dev, sz,
1782                                                &rq->rx_ring[i].basePA,
1783                                                GFP_KERNEL);
1784                if (!rq->rx_ring[i].base) {
1785                        netdev_err(adapter->netdev,
1786                                   "failed to allocate rx ring %d\n", i);
1787                        goto err;
1788                }
1789        }
1790
1791        if ((adapter->rxdataring_enabled) && (rq->data_ring.desc_size != 0)) {
1792                sz = rq->rx_ring[0].size * rq->data_ring.desc_size;
1793                rq->data_ring.base =
1794                        dma_alloc_coherent(&adapter->pdev->dev, sz,
1795                                           &rq->data_ring.basePA,
1796                                           GFP_KERNEL);
1797                if (!rq->data_ring.base) {
1798                        netdev_err(adapter->netdev,
1799                                   "rx data ring will be disabled\n");
1800                        adapter->rxdataring_enabled = false;
1801                }
1802        } else {
1803                rq->data_ring.base = NULL;
1804                rq->data_ring.desc_size = 0;
1805        }
1806
1807        sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1808        rq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev, sz,
1809                                                &rq->comp_ring.basePA,
1810                                                GFP_KERNEL);
1811        if (!rq->comp_ring.base) {
1812                netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1813                goto err;
1814        }
1815
1816        sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1817                                                   rq->rx_ring[1].size);
1818        bi = dma_alloc_coherent(&adapter->pdev->dev, sz, &rq->buf_info_pa,
1819                                GFP_KERNEL);
1820        if (!bi)
1821                goto err;
1822
1823        rq->buf_info[0] = bi;
1824        rq->buf_info[1] = bi + rq->rx_ring[0].size;
1825
1826        return 0;
1827
1828err:
1829        vmxnet3_rq_destroy(rq, adapter);
1830        return -ENOMEM;
1831}
1832
1833
1834static int
1835vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1836{
1837        int i, err = 0;
1838
1839        adapter->rxdataring_enabled = VMXNET3_VERSION_GE_3(adapter);
1840
1841        for (i = 0; i < adapter->num_rx_queues; i++) {
1842                err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1843                if (unlikely(err)) {
1844                        dev_err(&adapter->netdev->dev,
1845                                "%s: failed to create rx queue%i\n",
1846                                adapter->netdev->name, i);
1847                        goto err_out;
1848                }
1849        }
1850
1851        if (!adapter->rxdataring_enabled)
1852                vmxnet3_rq_destroy_all_rxdataring(adapter);
1853
1854        return err;
1855err_out:
1856        vmxnet3_rq_destroy_all(adapter);
1857        return err;
1858
1859}
1860
1861/* Multiple queue aware polling function for tx and rx */
1862
1863static int
1864vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1865{
1866        int rcd_done = 0, i;
1867        if (unlikely(adapter->shared->ecr))
1868                vmxnet3_process_events(adapter);
1869        for (i = 0; i < adapter->num_tx_queues; i++)
1870                vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1871
1872        for (i = 0; i < adapter->num_rx_queues; i++)
1873                rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1874                                                   adapter, budget);
1875        return rcd_done;
1876}
1877
1878
1879static int
1880vmxnet3_poll(struct napi_struct *napi, int budget)
1881{
1882        struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1883                                          struct vmxnet3_rx_queue, napi);
1884        int rxd_done;
1885
1886        rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1887
1888        if (rxd_done < budget) {
1889                napi_complete_done(napi, rxd_done);
1890                vmxnet3_enable_all_intrs(rx_queue->adapter);
1891        }
1892        return rxd_done;
1893}
1894
1895/*
1896 * NAPI polling function for MSI-X mode with multiple Rx queues
1897 * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1898 */
1899
1900static int
1901vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1902{
1903        struct vmxnet3_rx_queue *rq = container_of(napi,
1904                                                struct vmxnet3_rx_queue, napi);
1905        struct vmxnet3_adapter *adapter = rq->adapter;
1906        int rxd_done;
1907
1908        /* When sharing interrupt with corresponding tx queue, process
1909         * tx completions in that queue as well
1910         */
1911        if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1912                struct vmxnet3_tx_queue *tq =
1913                                &adapter->tx_queue[rq - adapter->rx_queue];
1914                vmxnet3_tq_tx_complete(tq, adapter);
1915        }
1916
1917        rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1918
1919        if (rxd_done < budget) {
1920                napi_complete_done(napi, rxd_done);
1921                vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1922        }
1923        return rxd_done;
1924}
1925
1926
1927#ifdef CONFIG_PCI_MSI
1928
1929/*
1930 * Handle completion interrupts on tx queues
1931 * Returns whether or not the intr is handled
1932 */
1933
1934static irqreturn_t
1935vmxnet3_msix_tx(int irq, void *data)
1936{
1937        struct vmxnet3_tx_queue *tq = data;
1938        struct vmxnet3_adapter *adapter = tq->adapter;
1939
1940        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1941                vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1942
1943        /* Handle the case where only one irq is allocate for all tx queues */
1944        if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1945                int i;
1946                for (i = 0; i < adapter->num_tx_queues; i++) {
1947                        struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1948                        vmxnet3_tq_tx_complete(txq, adapter);
1949                }
1950        } else {
1951                vmxnet3_tq_tx_complete(tq, adapter);
1952        }
1953        vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1954
1955        return IRQ_HANDLED;
1956}
1957
1958
1959/*
1960 * Handle completion interrupts on rx queues. Returns whether or not the
1961 * intr is handled
1962 */
1963
1964static irqreturn_t
1965vmxnet3_msix_rx(int irq, void *data)
1966{
1967        struct vmxnet3_rx_queue *rq = data;
1968        struct vmxnet3_adapter *adapter = rq->adapter;
1969
1970        /* disable intr if needed */
1971        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1972                vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1973        napi_schedule(&rq->napi);
1974
1975        return IRQ_HANDLED;
1976}
1977
1978/*
1979 *----------------------------------------------------------------------------
1980 *
1981 * vmxnet3_msix_event --
1982 *
1983 *    vmxnet3 msix event intr handler
1984 *
1985 * Result:
1986 *    whether or not the intr is handled
1987 *
1988 *----------------------------------------------------------------------------
1989 */
1990
1991static irqreturn_t
1992vmxnet3_msix_event(int irq, void *data)
1993{
1994        struct net_device *dev = data;
1995        struct vmxnet3_adapter *adapter = netdev_priv(dev);
1996
1997        /* disable intr if needed */
1998        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1999                vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
2000
2001        if (adapter->shared->ecr)
2002                vmxnet3_process_events(adapter);
2003
2004        vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
2005
2006        return IRQ_HANDLED;
2007}
2008
2009#endif /* CONFIG_PCI_MSI  */
2010
2011
2012/* Interrupt handler for vmxnet3  */
2013static irqreturn_t
2014vmxnet3_intr(int irq, void *dev_id)
2015{
2016        struct net_device *dev = dev_id;
2017        struct vmxnet3_adapter *adapter = netdev_priv(dev);
2018
2019        if (adapter->intr.type == VMXNET3_IT_INTX) {
2020                u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
2021                if (unlikely(icr == 0))
2022                        /* not ours */
2023                        return IRQ_NONE;
2024        }
2025
2026
2027        /* disable intr if needed */
2028        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
2029                vmxnet3_disable_all_intrs(adapter);
2030
2031        napi_schedule(&adapter->rx_queue[0].napi);
2032
2033        return IRQ_HANDLED;
2034}
2035
2036#ifdef CONFIG_NET_POLL_CONTROLLER
2037
2038/* netpoll callback. */
2039static void
2040vmxnet3_netpoll(struct net_device *netdev)
2041{
2042        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2043
2044        switch (adapter->intr.type) {
2045#ifdef CONFIG_PCI_MSI
2046        case VMXNET3_IT_MSIX: {
2047                int i;
2048                for (i = 0; i < adapter->num_rx_queues; i++)
2049                        vmxnet3_msix_rx(0, &adapter->rx_queue[i]);
2050                break;
2051        }
2052#endif
2053        case VMXNET3_IT_MSI:
2054        default:
2055                vmxnet3_intr(0, adapter->netdev);
2056                break;
2057        }
2058
2059}
2060#endif  /* CONFIG_NET_POLL_CONTROLLER */
2061
2062static int
2063vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
2064{
2065        struct vmxnet3_intr *intr = &adapter->intr;
2066        int err = 0, i;
2067        int vector = 0;
2068
2069#ifdef CONFIG_PCI_MSI
2070        if (adapter->intr.type == VMXNET3_IT_MSIX) {
2071                for (i = 0; i < adapter->num_tx_queues; i++) {
2072                        if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
2073                                sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
2074                                        adapter->netdev->name, vector);
2075                                err = request_irq(
2076                                              intr->msix_entries[vector].vector,
2077                                              vmxnet3_msix_tx, 0,
2078                                              adapter->tx_queue[i].name,
2079                                              &adapter->tx_queue[i]);
2080                        } else {
2081                                sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
2082                                        adapter->netdev->name, vector);
2083                        }
2084                        if (err) {
2085                                dev_err(&adapter->netdev->dev,
2086                                        "Failed to request irq for MSIX, %s, "
2087                                        "error %d\n",
2088                                        adapter->tx_queue[i].name, err);
2089                                return err;
2090                        }
2091
2092                        /* Handle the case where only 1 MSIx was allocated for
2093                         * all tx queues */
2094                        if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
2095                                for (; i < adapter->num_tx_queues; i++)
2096                                        adapter->tx_queue[i].comp_ring.intr_idx
2097                                                                = vector;
2098                                vector++;
2099                                break;
2100                        } else {
2101                                adapter->tx_queue[i].comp_ring.intr_idx
2102                                                                = vector++;
2103                        }
2104                }
2105                if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
2106                        vector = 0;
2107
2108                for (i = 0; i < adapter->num_rx_queues; i++) {
2109                        if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
2110                                sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
2111                                        adapter->netdev->name, vector);
2112                        else
2113                                sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
2114                                        adapter->netdev->name, vector);
2115                        err = request_irq(intr->msix_entries[vector].vector,
2116                                          vmxnet3_msix_rx, 0,
2117                                          adapter->rx_queue[i].name,
2118                                          &(adapter->rx_queue[i]));
2119                        if (err) {
2120                                netdev_err(adapter->netdev,
2121                                           "Failed to request irq for MSIX, "
2122                                           "%s, error %d\n",
2123                                           adapter->rx_queue[i].name, err);
2124                                return err;
2125                        }
2126
2127                        adapter->rx_queue[i].comp_ring.intr_idx = vector++;
2128                }
2129
2130                sprintf(intr->event_msi_vector_name, "%s-event-%d",
2131                        adapter->netdev->name, vector);
2132                err = request_irq(intr->msix_entries[vector].vector,
2133                                  vmxnet3_msix_event, 0,
2134                                  intr->event_msi_vector_name, adapter->netdev);
2135                intr->event_intr_idx = vector;
2136
2137        } else if (intr->type == VMXNET3_IT_MSI) {
2138                adapter->num_rx_queues = 1;
2139                err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
2140                                  adapter->netdev->name, adapter->netdev);
2141        } else {
2142#endif
2143                adapter->num_rx_queues = 1;
2144                err = request_irq(adapter->pdev->irq, vmxnet3_intr,
2145                                  IRQF_SHARED, adapter->netdev->name,
2146                                  adapter->netdev);
2147#ifdef CONFIG_PCI_MSI
2148        }
2149#endif
2150        intr->num_intrs = vector + 1;
2151        if (err) {
2152                netdev_err(adapter->netdev,
2153                           "Failed to request irq (intr type:%d), error %d\n",
2154                           intr->type, err);
2155        } else {
2156                /* Number of rx queues will not change after this */
2157                for (i = 0; i < adapter->num_rx_queues; i++) {
2158                        struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2159                        rq->qid = i;
2160                        rq->qid2 = i + adapter->num_rx_queues;
2161                        rq->dataRingQid = i + 2 * adapter->num_rx_queues;
2162                }
2163
2164                /* init our intr settings */
2165                for (i = 0; i < intr->num_intrs; i++)
2166                        intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
2167                if (adapter->intr.type != VMXNET3_IT_MSIX) {
2168                        adapter->intr.event_intr_idx = 0;
2169                        for (i = 0; i < adapter->num_tx_queues; i++)
2170                                adapter->tx_queue[i].comp_ring.intr_idx = 0;
2171                        adapter->rx_queue[0].comp_ring.intr_idx = 0;
2172                }
2173
2174                netdev_info(adapter->netdev,
2175                            "intr type %u, mode %u, %u vectors allocated\n",
2176                            intr->type, intr->mask_mode, intr->num_intrs);
2177        }
2178
2179        return err;
2180}
2181
2182
2183static void
2184vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
2185{
2186        struct vmxnet3_intr *intr = &adapter->intr;
2187        BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
2188
2189        switch (intr->type) {
2190#ifdef CONFIG_PCI_MSI
2191        case VMXNET3_IT_MSIX:
2192        {
2193                int i, vector = 0;
2194
2195                if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
2196                        for (i = 0; i < adapter->num_tx_queues; i++) {
2197                                free_irq(intr->msix_entries[vector++].vector,
2198                                         &(adapter->tx_queue[i]));
2199                                if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
2200                                        break;
2201                        }
2202                }
2203
2204                for (i = 0; i < adapter->num_rx_queues; i++) {
2205                        free_irq(intr->msix_entries[vector++].vector,
2206                                 &(adapter->rx_queue[i]));
2207                }
2208
2209                free_irq(intr->msix_entries[vector].vector,
2210                         adapter->netdev);
2211                BUG_ON(vector >= intr->num_intrs);
2212                break;
2213        }
2214#endif
2215        case VMXNET3_IT_MSI:
2216                free_irq(adapter->pdev->irq, adapter->netdev);
2217                break;
2218        case VMXNET3_IT_INTX:
2219                free_irq(adapter->pdev->irq, adapter->netdev);
2220                break;
2221        default:
2222                BUG();
2223        }
2224}
2225
2226
2227static void
2228vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
2229{
2230        u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2231        u16 vid;
2232
2233        /* allow untagged pkts */
2234        VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
2235
2236        for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
2237                VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
2238}
2239
2240
2241static int
2242vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
2243{
2244        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2245
2246        if (!(netdev->flags & IFF_PROMISC)) {
2247                u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2248                unsigned long flags;
2249
2250                VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
2251                spin_lock_irqsave(&adapter->cmd_lock, flags);
2252                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2253                                       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2254                spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2255        }
2256
2257        set_bit(vid, adapter->active_vlans);
2258
2259        return 0;
2260}
2261
2262
2263static int
2264vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
2265{
2266        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2267
2268        if (!(netdev->flags & IFF_PROMISC)) {
2269                u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2270                unsigned long flags;
2271
2272                VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
2273                spin_lock_irqsave(&adapter->cmd_lock, flags);
2274                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2275                                       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2276                spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2277        }
2278
2279        clear_bit(vid, adapter->active_vlans);
2280
2281        return 0;
2282}
2283
2284
2285static u8 *
2286vmxnet3_copy_mc(struct net_device *netdev)
2287{
2288        u8 *buf = NULL;
2289        u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
2290
2291        /* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
2292        if (sz <= 0xffff) {
2293                /* We may be called with BH disabled */
2294                buf = kmalloc(sz, GFP_ATOMIC);
2295                if (buf) {
2296                        struct netdev_hw_addr *ha;
2297                        int i = 0;
2298
2299                        netdev_for_each_mc_addr(ha, netdev)
2300                                memcpy(buf + i++ * ETH_ALEN, ha->addr,
2301                                       ETH_ALEN);
2302                }
2303        }
2304        return buf;
2305}
2306
2307
2308static void
2309vmxnet3_set_mc(struct net_device *netdev)
2310{
2311        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2312        unsigned long flags;
2313        struct Vmxnet3_RxFilterConf *rxConf =
2314                                        &adapter->shared->devRead.rxFilterConf;
2315        u8 *new_table = NULL;
2316        dma_addr_t new_table_pa = 0;
2317        bool new_table_pa_valid = false;
2318        u32 new_mode = VMXNET3_RXM_UCAST;
2319
2320        if (netdev->flags & IFF_PROMISC) {
2321                u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2322                memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2323
2324                new_mode |= VMXNET3_RXM_PROMISC;
2325        } else {
2326                vmxnet3_restore_vlan(adapter);
2327        }
2328
2329        if (netdev->flags & IFF_BROADCAST)
2330                new_mode |= VMXNET3_RXM_BCAST;
2331
2332        if (netdev->flags & IFF_ALLMULTI)
2333                new_mode |= VMXNET3_RXM_ALL_MULTI;
2334        else
2335                if (!netdev_mc_empty(netdev)) {
2336                        new_table = vmxnet3_copy_mc(netdev);
2337                        if (new_table) {
2338                                size_t sz = netdev_mc_count(netdev) * ETH_ALEN;
2339
2340                                rxConf->mfTableLen = cpu_to_le16(sz);
2341                                new_table_pa = dma_map_single(
2342                                                        &adapter->pdev->dev,
2343                                                        new_table,
2344                                                        sz,
2345                                                        PCI_DMA_TODEVICE);
2346                                if (!dma_mapping_error(&adapter->pdev->dev,
2347                                                       new_table_pa)) {
2348                                        new_mode |= VMXNET3_RXM_MCAST;
2349                                        new_table_pa_valid = true;
2350                                        rxConf->mfTablePA = cpu_to_le64(
2351                                                                new_table_pa);
2352                                }
2353                        }
2354                        if (!new_table_pa_valid) {
2355                                netdev_info(netdev,
2356                                            "failed to copy mcast list, setting ALL_MULTI\n");
2357                                new_mode |= VMXNET3_RXM_ALL_MULTI;
2358                        }
2359                }
2360
2361        if (!(new_mode & VMXNET3_RXM_MCAST)) {
2362                rxConf->mfTableLen = 0;
2363                rxConf->mfTablePA = 0;
2364        }
2365
2366        spin_lock_irqsave(&adapter->cmd_lock, flags);
2367        if (new_mode != rxConf->rxMode) {
2368                rxConf->rxMode = cpu_to_le32(new_mode);
2369                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2370                                       VMXNET3_CMD_UPDATE_RX_MODE);
2371                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2372                                       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2373        }
2374
2375        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2376                               VMXNET3_CMD_UPDATE_MAC_FILTERS);
2377        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2378
2379        if (new_table_pa_valid)
2380                dma_unmap_single(&adapter->pdev->dev, new_table_pa,
2381                                 rxConf->mfTableLen, PCI_DMA_TODEVICE);
2382        kfree(new_table);
2383}
2384
2385void
2386vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2387{
2388        int i;
2389
2390        for (i = 0; i < adapter->num_rx_queues; i++)
2391                vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2392}
2393
2394
2395/*
2396 *   Set up driver_shared based on settings in adapter.
2397 */
2398
2399static void
2400vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2401{
2402        struct Vmxnet3_DriverShared *shared = adapter->shared;
2403        struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2404        struct Vmxnet3_TxQueueConf *tqc;
2405        struct Vmxnet3_RxQueueConf *rqc;
2406        int i;
2407
2408        memset(shared, 0, sizeof(*shared));
2409
2410        /* driver settings */
2411        shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2412        devRead->misc.driverInfo.version = cpu_to_le32(
2413                                                VMXNET3_DRIVER_VERSION_NUM);
2414        devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2415                                VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2416        devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2417        *((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2418                                *((u32 *)&devRead->misc.driverInfo.gos));
2419        devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2420        devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2421
2422        devRead->misc.ddPA = cpu_to_le64(adapter->adapter_pa);
2423        devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2424
2425        /* set up feature flags */
2426        if (adapter->netdev->features & NETIF_F_RXCSUM)
2427                devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2428
2429        if (adapter->netdev->features & NETIF_F_LRO) {
2430                devRead->misc.uptFeatures |= UPT1_F_LRO;
2431                devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2432        }
2433        if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
2434                devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2435
2436        devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2437        devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2438        devRead->misc.queueDescLen = cpu_to_le32(
2439                adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2440                adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2441
2442        /* tx queue settings */
2443        devRead->misc.numTxQueues =  adapter->num_tx_queues;
2444        for (i = 0; i < adapter->num_tx_queues; i++) {
2445                struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2446                BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2447                tqc = &adapter->tqd_start[i].conf;
2448                tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2449                tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2450                tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2451                tqc->ddPA           = cpu_to_le64(tq->buf_info_pa);
2452                tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2453                tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2454                tqc->txDataRingDescSize = cpu_to_le32(tq->txdata_desc_size);
2455                tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2456                tqc->ddLen          = cpu_to_le32(
2457                                        sizeof(struct vmxnet3_tx_buf_info) *
2458                                        tqc->txRingSize);
2459                tqc->intrIdx        = tq->comp_ring.intr_idx;
2460        }
2461
2462        /* rx queue settings */
2463        devRead->misc.numRxQueues = adapter->num_rx_queues;
2464        for (i = 0; i < adapter->num_rx_queues; i++) {
2465                struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2466                rqc = &adapter->rqd_start[i].conf;
2467                rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2468                rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2469                rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2470                rqc->ddPA            = cpu_to_le64(rq->buf_info_pa);
2471                rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2472                rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2473                rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2474                rqc->ddLen           = cpu_to_le32(
2475                                        sizeof(struct vmxnet3_rx_buf_info) *
2476                                        (rqc->rxRingSize[0] +
2477                                         rqc->rxRingSize[1]));
2478                rqc->intrIdx         = rq->comp_ring.intr_idx;
2479                if (VMXNET3_VERSION_GE_3(adapter)) {
2480                        rqc->rxDataRingBasePA =
2481                                cpu_to_le64(rq->data_ring.basePA);
2482                        rqc->rxDataRingDescSize =
2483                                cpu_to_le16(rq->data_ring.desc_size);
2484                }
2485        }
2486
2487#ifdef VMXNET3_RSS
2488        memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2489
2490        if (adapter->rss) {
2491                struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2492
2493                devRead->misc.uptFeatures |= UPT1_F_RSS;
2494                devRead->misc.numRxQueues = adapter->num_rx_queues;
2495                rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2496                                    UPT1_RSS_HASH_TYPE_IPV4 |
2497                                    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2498                                    UPT1_RSS_HASH_TYPE_IPV6;
2499                rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2500                rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2501                rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2502                netdev_rss_key_fill(rssConf->hashKey, sizeof(rssConf->hashKey));
2503
2504                for (i = 0; i < rssConf->indTableSize; i++)
2505                        rssConf->indTable[i] = ethtool_rxfh_indir_default(
2506                                i, adapter->num_rx_queues);
2507
2508                devRead->rssConfDesc.confVer = 1;
2509                devRead->rssConfDesc.confLen = cpu_to_le32(sizeof(*rssConf));
2510                devRead->rssConfDesc.confPA =
2511                        cpu_to_le64(adapter->rss_conf_pa);
2512        }
2513
2514#endif /* VMXNET3_RSS */
2515
2516        /* intr settings */
2517        devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2518                                     VMXNET3_IMM_AUTO;
2519        devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2520        for (i = 0; i < adapter->intr.num_intrs; i++)
2521                devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2522
2523        devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2524        devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2525
2526        /* rx filter settings */
2527        devRead->rxFilterConf.rxMode = 0;
2528        vmxnet3_restore_vlan(adapter);
2529        vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2530
2531        /* the rest are already zeroed */
2532}
2533
2534static void
2535vmxnet3_init_coalesce(struct vmxnet3_adapter *adapter)
2536{
2537        struct Vmxnet3_DriverShared *shared = adapter->shared;
2538        union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
2539        unsigned long flags;
2540
2541        if (!VMXNET3_VERSION_GE_3(adapter))
2542                return;
2543
2544        spin_lock_irqsave(&adapter->cmd_lock, flags);
2545        cmdInfo->varConf.confVer = 1;
2546        cmdInfo->varConf.confLen =
2547                cpu_to_le32(sizeof(*adapter->coal_conf));
2548        cmdInfo->varConf.confPA  = cpu_to_le64(adapter->coal_conf_pa);
2549
2550        if (adapter->default_coal_mode) {
2551                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2552                                       VMXNET3_CMD_GET_COALESCE);
2553        } else {
2554                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2555                                       VMXNET3_CMD_SET_COALESCE);
2556        }
2557
2558        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2559}
2560
2561int
2562vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2563{
2564        int err, i;
2565        u32 ret;
2566        unsigned long flags;
2567
2568        netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2569                " ring sizes %u %u %u\n", adapter->netdev->name,
2570                adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2571                adapter->tx_queue[0].tx_ring.size,
2572                adapter->rx_queue[0].rx_ring[0].size,
2573                adapter->rx_queue[0].rx_ring[1].size);
2574
2575        vmxnet3_tq_init_all(adapter);
2576        err = vmxnet3_rq_init_all(adapter);
2577        if (err) {
2578                netdev_err(adapter->netdev,
2579                           "Failed to init rx queue error %d\n", err);
2580                goto rq_err;
2581        }
2582
2583        err = vmxnet3_request_irqs(adapter);
2584        if (err) {
2585                netdev_err(adapter->netdev,
2586                           "Failed to setup irq for error %d\n", err);
2587                goto irq_err;
2588        }
2589
2590        vmxnet3_setup_driver_shared(adapter);
2591
2592        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2593                               adapter->shared_pa));
2594        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2595                               adapter->shared_pa));
2596        spin_lock_irqsave(&adapter->cmd_lock, flags);
2597        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2598                               VMXNET3_CMD_ACTIVATE_DEV);
2599        ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2600        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2601
2602        if (ret != 0) {
2603                netdev_err(adapter->netdev,
2604                           "Failed to activate dev: error %u\n", ret);
2605                err = -EINVAL;
2606                goto activate_err;
2607        }
2608
2609        vmxnet3_init_coalesce(adapter);
2610
2611        for (i = 0; i < adapter->num_rx_queues; i++) {
2612                VMXNET3_WRITE_BAR0_REG(adapter,
2613                                VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2614                                adapter->rx_queue[i].rx_ring[0].next2fill);
2615                VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2616                                (i * VMXNET3_REG_ALIGN)),
2617                                adapter->rx_queue[i].rx_ring[1].next2fill);
2618        }
2619
2620        /* Apply the rx filter settins last. */
2621        vmxnet3_set_mc(adapter->netdev);
2622
2623        /*
2624         * Check link state when first activating device. It will start the
2625         * tx queue if the link is up.
2626         */
2627        vmxnet3_check_link(adapter, true);
2628        for (i = 0; i < adapter->num_rx_queues; i++)
2629                napi_enable(&adapter->rx_queue[i].napi);
2630        vmxnet3_enable_all_intrs(adapter);
2631        clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2632        return 0;
2633
2634activate_err:
2635        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2636        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2637        vmxnet3_free_irqs(adapter);
2638irq_err:
2639rq_err:
2640        /* free up buffers we allocated */
2641        vmxnet3_rq_cleanup_all(adapter);
2642        return err;
2643}
2644
2645
2646void
2647vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2648{
2649        unsigned long flags;
2650        spin_lock_irqsave(&adapter->cmd_lock, flags);
2651        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2652        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2653}
2654
2655
2656int
2657vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2658{
2659        int i;
2660        unsigned long flags;
2661        if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2662                return 0;
2663
2664
2665        spin_lock_irqsave(&adapter->cmd_lock, flags);
2666        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2667                               VMXNET3_CMD_QUIESCE_DEV);
2668        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2669        vmxnet3_disable_all_intrs(adapter);
2670
2671        for (i = 0; i < adapter->num_rx_queues; i++)
2672                napi_disable(&adapter->rx_queue[i].napi);
2673        netif_tx_disable(adapter->netdev);
2674        adapter->link_speed = 0;
2675        netif_carrier_off(adapter->netdev);
2676
2677        vmxnet3_tq_cleanup_all(adapter);
2678        vmxnet3_rq_cleanup_all(adapter);
2679        vmxnet3_free_irqs(adapter);
2680        return 0;
2681}
2682
2683
2684static void
2685vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2686{
2687        u32 tmp;
2688
2689        tmp = *(u32 *)mac;
2690        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2691
2692        tmp = (mac[5] << 8) | mac[4];
2693        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2694}
2695
2696
2697static int
2698vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2699{
2700        struct sockaddr *addr = p;
2701        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2702
2703        memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2704        vmxnet3_write_mac_addr(adapter, addr->sa_data);
2705
2706        return 0;
2707}
2708
2709
2710/* ==================== initialization and cleanup routines ============ */
2711
2712static int
2713vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter)
2714{
2715        int err;
2716        unsigned long mmio_start, mmio_len;
2717        struct pci_dev *pdev = adapter->pdev;
2718
2719        err = pci_enable_device(pdev);
2720        if (err) {
2721                dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2722                return err;
2723        }
2724
2725        err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2726                                           vmxnet3_driver_name);
2727        if (err) {
2728                dev_err(&pdev->dev,
2729                        "Failed to request region for adapter: error %d\n", err);
2730                goto err_enable_device;
2731        }
2732
2733        pci_set_master(pdev);
2734
2735        mmio_start = pci_resource_start(pdev, 0);
2736        mmio_len = pci_resource_len(pdev, 0);
2737        adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2738        if (!adapter->hw_addr0) {
2739                dev_err(&pdev->dev, "Failed to map bar0\n");
2740                err = -EIO;
2741                goto err_ioremap;
2742        }
2743
2744        mmio_start = pci_resource_start(pdev, 1);
2745        mmio_len = pci_resource_len(pdev, 1);
2746        adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2747        if (!adapter->hw_addr1) {
2748                dev_err(&pdev->dev, "Failed to map bar1\n");
2749                err = -EIO;
2750                goto err_bar1;
2751        }
2752        return 0;
2753
2754err_bar1:
2755        iounmap(adapter->hw_addr0);
2756err_ioremap:
2757        pci_release_selected_regions(pdev, (1 << 2) - 1);
2758err_enable_device:
2759        pci_disable_device(pdev);
2760        return err;
2761}
2762
2763
2764static void
2765vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2766{
2767        BUG_ON(!adapter->pdev);
2768
2769        iounmap(adapter->hw_addr0);
2770        iounmap(adapter->hw_addr1);
2771        pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2772        pci_disable_device(adapter->pdev);
2773}
2774
2775
2776static void
2777vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2778{
2779        size_t sz, i, ring0_size, ring1_size, comp_size;
2780        if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2781                                    VMXNET3_MAX_ETH_HDR_SIZE) {
2782                adapter->skb_buf_size = adapter->netdev->mtu +
2783                                        VMXNET3_MAX_ETH_HDR_SIZE;
2784                if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2785                        adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2786
2787                adapter->rx_buf_per_pkt = 1;
2788        } else {
2789                adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2790                sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2791                                            VMXNET3_MAX_ETH_HDR_SIZE;
2792                adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2793        }
2794
2795        /*
2796         * for simplicity, force the ring0 size to be a multiple of
2797         * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2798         */
2799        sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2800        ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2801        ring0_size = (ring0_size + sz - 1) / sz * sz;
2802        ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2803                           sz * sz);
2804        ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2805        ring1_size = (ring1_size + sz - 1) / sz * sz;
2806        ring1_size = min_t(u32, ring1_size, VMXNET3_RX_RING2_MAX_SIZE /
2807                           sz * sz);
2808        comp_size = ring0_size + ring1_size;
2809
2810        for (i = 0; i < adapter->num_rx_queues; i++) {
2811                struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2812
2813                rq->rx_ring[0].size = ring0_size;
2814                rq->rx_ring[1].size = ring1_size;
2815                rq->comp_ring.size = comp_size;
2816        }
2817}
2818
2819
2820int
2821vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2822                      u32 rx_ring_size, u32 rx_ring2_size,
2823                      u16 txdata_desc_size, u16 rxdata_desc_size)
2824{
2825        int err = 0, i;
2826
2827        for (i = 0; i < adapter->num_tx_queues; i++) {
2828                struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2829                tq->tx_ring.size   = tx_ring_size;
2830                tq->data_ring.size = tx_ring_size;
2831                tq->comp_ring.size = tx_ring_size;
2832                tq->txdata_desc_size = txdata_desc_size;
2833                tq->shared = &adapter->tqd_start[i].ctrl;
2834                tq->stopped = true;
2835                tq->adapter = adapter;
2836                tq->qid = i;
2837                err = vmxnet3_tq_create(tq, adapter);
2838                /*
2839                 * Too late to change num_tx_queues. We cannot do away with
2840                 * lesser number of queues than what we asked for
2841                 */
2842                if (err)
2843                        goto queue_err;
2844        }
2845
2846        adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2847        adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2848        vmxnet3_adjust_rx_ring_size(adapter);
2849
2850        adapter->rxdataring_enabled = VMXNET3_VERSION_GE_3(adapter);
2851        for (i = 0; i < adapter->num_rx_queues; i++) {
2852                struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2853                /* qid and qid2 for rx queues will be assigned later when num
2854                 * of rx queues is finalized after allocating intrs */
2855                rq->shared = &adapter->rqd_start[i].ctrl;
2856                rq->adapter = adapter;
2857                rq->data_ring.desc_size = rxdata_desc_size;
2858                err = vmxnet3_rq_create(rq, adapter);
2859                if (err) {
2860                        if (i == 0) {
2861                                netdev_err(adapter->netdev,
2862                                           "Could not allocate any rx queues. "
2863                                           "Aborting.\n");
2864                                goto queue_err;
2865                        } else {
2866                                netdev_info(adapter->netdev,
2867                                            "Number of rx queues changed "
2868                                            "to : %d.\n", i);
2869                                adapter->num_rx_queues = i;
2870                                err = 0;
2871                                break;
2872                        }
2873                }
2874        }
2875
2876        if (!adapter->rxdataring_enabled)
2877                vmxnet3_rq_destroy_all_rxdataring(adapter);
2878
2879        return err;
2880queue_err:
2881        vmxnet3_tq_destroy_all(adapter);
2882        return err;
2883}
2884
2885static int
2886vmxnet3_open(struct net_device *netdev)
2887{
2888        struct vmxnet3_adapter *adapter;
2889        int err, i;
2890
2891        adapter = netdev_priv(netdev);
2892
2893        for (i = 0; i < adapter->num_tx_queues; i++)
2894                spin_lock_init(&adapter->tx_queue[i].tx_lock);
2895
2896        if (VMXNET3_VERSION_GE_3(adapter)) {
2897                unsigned long flags;
2898                u16 txdata_desc_size;
2899
2900                spin_lock_irqsave(&adapter->cmd_lock, flags);
2901                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2902                                       VMXNET3_CMD_GET_TXDATA_DESC_SIZE);
2903                txdata_desc_size = VMXNET3_READ_BAR1_REG(adapter,
2904                                                         VMXNET3_REG_CMD);
2905                spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2906
2907                if ((txdata_desc_size < VMXNET3_TXDATA_DESC_MIN_SIZE) ||
2908                    (txdata_desc_size > VMXNET3_TXDATA_DESC_MAX_SIZE) ||
2909                    (txdata_desc_size & VMXNET3_TXDATA_DESC_SIZE_MASK)) {
2910                        adapter->txdata_desc_size =
2911                                sizeof(struct Vmxnet3_TxDataDesc);
2912                } else {
2913                        adapter->txdata_desc_size = txdata_desc_size;
2914                }
2915        } else {
2916                adapter->txdata_desc_size = sizeof(struct Vmxnet3_TxDataDesc);
2917        }
2918
2919        err = vmxnet3_create_queues(adapter,
2920                                    adapter->tx_ring_size,
2921                                    adapter->rx_ring_size,
2922                                    adapter->rx_ring2_size,
2923                                    adapter->txdata_desc_size,
2924                                    adapter->rxdata_desc_size);
2925        if (err)
2926                goto queue_err;
2927
2928        err = vmxnet3_activate_dev(adapter);
2929        if (err)
2930                goto activate_err;
2931
2932        return 0;
2933
2934activate_err:
2935        vmxnet3_rq_destroy_all(adapter);
2936        vmxnet3_tq_destroy_all(adapter);
2937queue_err:
2938        return err;
2939}
2940
2941
2942static int
2943vmxnet3_close(struct net_device *netdev)
2944{
2945        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2946
2947        /*
2948         * Reset_work may be in the middle of resetting the device, wait for its
2949         * completion.
2950         */
2951        while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2952                usleep_range(1000, 2000);
2953
2954        vmxnet3_quiesce_dev(adapter);
2955
2956        vmxnet3_rq_destroy_all(adapter);
2957        vmxnet3_tq_destroy_all(adapter);
2958
2959        clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2960
2961
2962        return 0;
2963}
2964
2965
2966void
2967vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2968{
2969        int i;
2970
2971        /*
2972         * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2973         * vmxnet3_close() will deadlock.
2974         */
2975        BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2976
2977        /* we need to enable NAPI, otherwise dev_close will deadlock */
2978        for (i = 0; i < adapter->num_rx_queues; i++)
2979                napi_enable(&adapter->rx_queue[i].napi);
2980        /*
2981         * Need to clear the quiesce bit to ensure that vmxnet3_close
2982         * can quiesce the device properly
2983         */
2984        clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2985        dev_close(adapter->netdev);
2986}
2987
2988
2989static int
2990vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2991{
2992        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2993        int err = 0;
2994
2995        netdev->mtu = new_mtu;
2996
2997        /*
2998         * Reset_work may be in the middle of resetting the device, wait for its
2999         * completion.
3000         */
3001        while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
3002                usleep_range(1000, 2000);
3003
3004        if (netif_running(netdev)) {
3005                vmxnet3_quiesce_dev(adapter);
3006                vmxnet3_reset_dev(adapter);
3007
3008                /* we need to re-create the rx queue based on the new mtu */
3009                vmxnet3_rq_destroy_all(adapter);
3010                vmxnet3_adjust_rx_ring_size(adapter);
3011                err = vmxnet3_rq_create_all(adapter);
3012                if (err) {
3013                        netdev_err(netdev,
3014                                   "failed to re-create rx queues, "
3015                                   " error %d. Closing it.\n", err);
3016                        goto out;
3017                }
3018
3019                err = vmxnet3_activate_dev(adapter);
3020                if (err) {
3021                        netdev_err(netdev,
3022                                   "failed to re-activate, error %d. "
3023                                   "Closing it\n", err);
3024                        goto out;
3025                }
3026        }
3027
3028out:
3029        clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
3030        if (err)
3031                vmxnet3_force_close(adapter);
3032
3033        return err;
3034}
3035
3036
3037static void
3038vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
3039{
3040        struct net_device *netdev = adapter->netdev;
3041
3042        netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
3043                NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
3044                NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
3045                NETIF_F_LRO;
3046        if (dma64)
3047                netdev->hw_features |= NETIF_F_HIGHDMA;
3048        netdev->vlan_features = netdev->hw_features &
3049                                ~(NETIF_F_HW_VLAN_CTAG_TX |
3050                                  NETIF_F_HW_VLAN_CTAG_RX);
3051        netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
3052}
3053
3054
3055static void
3056vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
3057{
3058        u32 tmp;
3059
3060        tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
3061        *(u32 *)mac = tmp;
3062
3063        tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
3064        mac[4] = tmp & 0xff;
3065        mac[5] = (tmp >> 8) & 0xff;
3066}
3067
3068#ifdef CONFIG_PCI_MSI
3069
3070/*
3071 * Enable MSIx vectors.
3072 * Returns :
3073 *      VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
3074 *       were enabled.
3075 *      number of vectors which were enabled otherwise (this number is greater
3076 *       than VMXNET3_LINUX_MIN_MSIX_VECT)
3077 */
3078
3079static int
3080vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter, int nvec)
3081{
3082        int ret = pci_enable_msix_range(adapter->pdev,
3083                                        adapter->intr.msix_entries, nvec, nvec);
3084
3085        if (ret == -ENOSPC && nvec > VMXNET3_LINUX_MIN_MSIX_VECT) {
3086                dev_err(&adapter->netdev->dev,
3087                        "Failed to enable %d MSI-X, trying %d\n",
3088                        nvec, VMXNET3_LINUX_MIN_MSIX_VECT);
3089
3090                ret = pci_enable_msix_range(adapter->pdev,
3091                                            adapter->intr.msix_entries,
3092                                            VMXNET3_LINUX_MIN_MSIX_VECT,
3093                                            VMXNET3_LINUX_MIN_MSIX_VECT);
3094        }
3095
3096        if (ret < 0) {
3097                dev_err(&adapter->netdev->dev,
3098                        "Failed to enable MSI-X, error: %d\n", ret);
3099        }
3100
3101        return ret;
3102}
3103
3104
3105#endif /* CONFIG_PCI_MSI */
3106
3107static void
3108vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
3109{
3110        u32 cfg;
3111        unsigned long flags;
3112
3113        /* intr settings */
3114        spin_lock_irqsave(&adapter->cmd_lock, flags);
3115        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3116                               VMXNET3_CMD_GET_CONF_INTR);
3117        cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
3118        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3119        adapter->intr.type = cfg & 0x3;
3120        adapter->intr.mask_mode = (cfg >> 2) & 0x3;
3121
3122        if (adapter->intr.type == VMXNET3_IT_AUTO) {
3123                adapter->intr.type = VMXNET3_IT_MSIX;
3124        }
3125
3126#ifdef CONFIG_PCI_MSI
3127        if (adapter->intr.type == VMXNET3_IT_MSIX) {
3128                int i, nvec;
3129
3130                nvec  = adapter->share_intr == VMXNET3_INTR_TXSHARE ?
3131                        1 : adapter->num_tx_queues;
3132                nvec += adapter->share_intr == VMXNET3_INTR_BUDDYSHARE ?
3133                        0 : adapter->num_rx_queues;
3134                nvec += 1;      /* for link event */
3135                nvec = nvec > VMXNET3_LINUX_MIN_MSIX_VECT ?
3136                       nvec : VMXNET3_LINUX_MIN_MSIX_VECT;
3137
3138                for (i = 0; i < nvec; i++)
3139                        adapter->intr.msix_entries[i].entry = i;
3140
3141                nvec = vmxnet3_acquire_msix_vectors(adapter, nvec);
3142                if (nvec < 0)
3143                        goto msix_err;
3144
3145                /* If we cannot allocate one MSIx vector per queue
3146                 * then limit the number of rx queues to 1
3147                 */
3148                if (nvec == VMXNET3_LINUX_MIN_MSIX_VECT) {
3149                        if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
3150                            || adapter->num_rx_queues != 1) {
3151                                adapter->share_intr = VMXNET3_INTR_TXSHARE;
3152                                netdev_err(adapter->netdev,
3153                                           "Number of rx queues : 1\n");
3154                                adapter->num_rx_queues = 1;
3155                        }
3156                }
3157
3158                adapter->intr.num_intrs = nvec;
3159                return;
3160
3161msix_err:
3162                /* If we cannot allocate MSIx vectors use only one rx queue */
3163                dev_info(&adapter->pdev->dev,
3164                         "Failed to enable MSI-X, error %d. "
3165                         "Limiting #rx queues to 1, try MSI.\n", nvec);
3166
3167                adapter->intr.type = VMXNET3_IT_MSI;
3168        }
3169
3170        if (adapter->intr.type == VMXNET3_IT_MSI) {
3171                if (!pci_enable_msi(adapter->pdev)) {
3172                        adapter->num_rx_queues = 1;
3173                        adapter->intr.num_intrs = 1;
3174                        return;
3175                }
3176        }
3177#endif /* CONFIG_PCI_MSI */
3178
3179        adapter->num_rx_queues = 1;
3180        dev_info(&adapter->netdev->dev,
3181                 "Using INTx interrupt, #Rx queues: 1.\n");
3182        adapter->intr.type = VMXNET3_IT_INTX;
3183
3184        /* INT-X related setting */
3185        adapter->intr.num_intrs = 1;
3186}
3187
3188
3189static void
3190vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
3191{
3192        if (adapter->intr.type == VMXNET3_IT_MSIX)
3193                pci_disable_msix(adapter->pdev);
3194        else if (adapter->intr.type == VMXNET3_IT_MSI)
3195                pci_disable_msi(adapter->pdev);
3196        else
3197                BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
3198}
3199
3200
3201static void
3202vmxnet3_tx_timeout(struct net_device *netdev)
3203{
3204        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3205        adapter->tx_timeout_count++;
3206
3207        netdev_err(adapter->netdev, "tx hang\n");
3208        schedule_work(&adapter->work);
3209}
3210
3211
3212static void
3213vmxnet3_reset_work(struct work_struct *data)
3214{
3215        struct vmxnet3_adapter *adapter;
3216
3217        adapter = container_of(data, struct vmxnet3_adapter, work);
3218
3219        /* if another thread is resetting the device, no need to proceed */
3220        if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
3221                return;
3222
3223        /* if the device is closed, we must leave it alone */
3224        rtnl_lock();
3225        if (netif_running(adapter->netdev)) {
3226                netdev_notice(adapter->netdev, "resetting\n");
3227                vmxnet3_quiesce_dev(adapter);
3228                vmxnet3_reset_dev(adapter);
3229                vmxnet3_activate_dev(adapter);
3230        } else {
3231                netdev_info(adapter->netdev, "already closed\n");
3232        }
3233        rtnl_unlock();
3234
3235        netif_wake_queue(adapter->netdev);
3236        clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
3237}
3238
3239
3240static int
3241vmxnet3_probe_device(struct pci_dev *pdev,
3242                     const struct pci_device_id *id)
3243{
3244        static const struct net_device_ops vmxnet3_netdev_ops = {
3245                .ndo_open = vmxnet3_open,
3246                .ndo_stop = vmxnet3_close,
3247                .ndo_start_xmit = vmxnet3_xmit_frame,
3248                .ndo_set_mac_address = vmxnet3_set_mac_addr,
3249                .ndo_change_mtu = vmxnet3_change_mtu,
3250                .ndo_fix_features = vmxnet3_fix_features,
3251                .ndo_set_features = vmxnet3_set_features,
3252                .ndo_get_stats64 = vmxnet3_get_stats64,
3253                .ndo_tx_timeout = vmxnet3_tx_timeout,
3254                .ndo_set_rx_mode = vmxnet3_set_mc,
3255                .ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
3256                .ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
3257#ifdef CONFIG_NET_POLL_CONTROLLER
3258                .ndo_poll_controller = vmxnet3_netpoll,
3259#endif
3260        };
3261        int err;
3262        bool dma64;
3263        u32 ver;
3264        struct net_device *netdev;
3265        struct vmxnet3_adapter *adapter;
3266        u8 mac[ETH_ALEN];
3267        int size;
3268        int num_tx_queues;
3269        int num_rx_queues;
3270
3271        if (!pci_msi_enabled())
3272                enable_mq = 0;
3273
3274#ifdef VMXNET3_RSS
3275        if (enable_mq)
3276                num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3277                                    (int)num_online_cpus());
3278        else
3279#endif
3280                num_rx_queues = 1;
3281        num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3282
3283        if (enable_mq)
3284                num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
3285                                    (int)num_online_cpus());
3286        else
3287                num_tx_queues = 1;
3288
3289        num_tx_queues = rounddown_pow_of_two(num_tx_queues);
3290        netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
3291                                   max(num_tx_queues, num_rx_queues));
3292        dev_info(&pdev->dev,
3293                 "# of Tx queues : %d, # of Rx queues : %d\n",
3294                 num_tx_queues, num_rx_queues);
3295
3296        if (!netdev)
3297                return -ENOMEM;
3298
3299        pci_set_drvdata(pdev, netdev);
3300        adapter = netdev_priv(netdev);
3301        adapter->netdev = netdev;
3302        adapter->pdev = pdev;
3303
3304        adapter->tx_ring_size = VMXNET3_DEF_TX_RING_SIZE;
3305        adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
3306        adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE;
3307
3308        if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
3309                if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
3310                        dev_err(&pdev->dev,
3311                                "pci_set_consistent_dma_mask failed\n");
3312                        err = -EIO;
3313                        goto err_set_mask;
3314                }
3315                dma64 = true;
3316        } else {
3317                if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
3318                        dev_err(&pdev->dev,
3319                                "pci_set_dma_mask failed\n");
3320                        err = -EIO;
3321                        goto err_set_mask;
3322                }
3323                dma64 = false;
3324        }
3325
3326        spin_lock_init(&adapter->cmd_lock);
3327        adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
3328                                             sizeof(struct vmxnet3_adapter),
3329                                             PCI_DMA_TODEVICE);
3330        if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
3331                dev_err(&pdev->dev, "Failed to map dma\n");
3332                err = -EFAULT;
3333                goto err_set_mask;
3334        }
3335        adapter->shared = dma_alloc_coherent(
3336                                &adapter->pdev->dev,
3337                                sizeof(struct Vmxnet3_DriverShared),
3338                                &adapter->shared_pa, GFP_KERNEL);
3339        if (!adapter->shared) {
3340                dev_err(&pdev->dev, "Failed to allocate memory\n");
3341                err = -ENOMEM;
3342                goto err_alloc_shared;
3343        }
3344
3345        adapter->num_rx_queues = num_rx_queues;
3346        adapter->num_tx_queues = num_tx_queues;
3347        adapter->rx_buf_per_pkt = 1;
3348
3349        size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3350        size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
3351        adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size,
3352                                                &adapter->queue_desc_pa,
3353                                                GFP_KERNEL);
3354
3355        if (!adapter->tqd_start) {
3356                dev_err(&pdev->dev, "Failed to allocate memory\n");
3357                err = -ENOMEM;
3358                goto err_alloc_queue_desc;
3359        }
3360        adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
3361                                                            adapter->num_tx_queues);
3362
3363        adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev,
3364                                              sizeof(struct Vmxnet3_PMConf),
3365                                              &adapter->pm_conf_pa,
3366                                              GFP_KERNEL);
3367        if (adapter->pm_conf == NULL) {
3368                err = -ENOMEM;
3369                goto err_alloc_pm;
3370        }
3371
3372#ifdef VMXNET3_RSS
3373
3374        adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev,
3375                                               sizeof(struct UPT1_RSSConf),
3376                                               &adapter->rss_conf_pa,
3377                                               GFP_KERNEL);
3378        if (adapter->rss_conf == NULL) {
3379                err = -ENOMEM;
3380                goto err_alloc_rss;
3381        }
3382#endif /* VMXNET3_RSS */
3383
3384        err = vmxnet3_alloc_pci_resources(adapter);
3385        if (err < 0)
3386                goto err_alloc_pci;
3387
3388        ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
3389        if (ver & (1 << VMXNET3_REV_3)) {
3390                VMXNET3_WRITE_BAR1_REG(adapter,
3391                                       VMXNET3_REG_VRRS,
3392                                       1 << VMXNET3_REV_3);
3393                adapter->version = VMXNET3_REV_3 + 1;
3394        } else if (ver & (1 << VMXNET3_REV_2)) {
3395                VMXNET3_WRITE_BAR1_REG(adapter,
3396                                       VMXNET3_REG_VRRS,
3397                                       1 << VMXNET3_REV_2);
3398                adapter->version = VMXNET3_REV_2 + 1;
3399        } else if (ver & (1 << VMXNET3_REV_1)) {
3400                VMXNET3_WRITE_BAR1_REG(adapter,
3401                                       VMXNET3_REG_VRRS,
3402                                       1 << VMXNET3_REV_1);
3403                adapter->version = VMXNET3_REV_1 + 1;
3404        } else {
3405                dev_err(&pdev->dev,
3406                        "Incompatible h/w version (0x%x) for adapter\n", ver);
3407                err = -EBUSY;
3408                goto err_ver;
3409        }
3410        dev_dbg(&pdev->dev, "Using device version %d\n", adapter->version);
3411
3412        ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3413        if (ver & 1) {
3414                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3415        } else {
3416                dev_err(&pdev->dev,
3417                        "Incompatible upt version (0x%x) for adapter\n", ver);
3418                err = -EBUSY;
3419                goto err_ver;
3420        }
3421
3422        if (VMXNET3_VERSION_GE_3(adapter)) {
3423                adapter->coal_conf =
3424                        dma_alloc_coherent(&adapter->pdev->dev,
3425                                           sizeof(struct Vmxnet3_CoalesceScheme)
3426                                           ,
3427                                           &adapter->coal_conf_pa,
3428                                           GFP_KERNEL);
3429                if (!adapter->coal_conf) {
3430                        err = -ENOMEM;
3431                        goto err_ver;
3432                }
3433                adapter->coal_conf->coalMode = VMXNET3_COALESCE_DISABLED;
3434                adapter->default_coal_mode = true;
3435        }
3436
3437        SET_NETDEV_DEV(netdev, &pdev->dev);
3438        vmxnet3_declare_features(adapter, dma64);
3439
3440        adapter->rxdata_desc_size = VMXNET3_VERSION_GE_3(adapter) ?
3441                VMXNET3_DEF_RXDATA_DESC_SIZE : 0;
3442
3443        if (adapter->num_tx_queues == adapter->num_rx_queues)
3444                adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3445        else
3446                adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3447
3448        vmxnet3_alloc_intr_resources(adapter);
3449
3450#ifdef VMXNET3_RSS
3451        if (adapter->num_rx_queues > 1 &&
3452            adapter->intr.type == VMXNET3_IT_MSIX) {
3453                adapter->rss = true;
3454                netdev->hw_features |= NETIF_F_RXHASH;
3455                netdev->features |= NETIF_F_RXHASH;
3456                dev_dbg(&pdev->dev, "RSS is enabled.\n");
3457        } else {
3458                adapter->rss = false;
3459        }
3460#endif
3461
3462        vmxnet3_read_mac_addr(adapter, mac);
3463        memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3464
3465        netdev->netdev_ops = &vmxnet3_netdev_ops;
3466        vmxnet3_set_ethtool_ops(netdev);
3467        netdev->watchdog_timeo = 5 * HZ;
3468
3469        /* MTU range: 60 - 9000 */
3470        netdev->min_mtu = VMXNET3_MIN_MTU;
3471        netdev->max_mtu = VMXNET3_MAX_MTU;
3472
3473        INIT_WORK(&adapter->work, vmxnet3_reset_work);
3474        set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3475
3476        if (adapter->intr.type == VMXNET3_IT_MSIX) {
3477                int i;
3478                for (i = 0; i < adapter->num_rx_queues; i++) {
3479                        netif_napi_add(adapter->netdev,
3480                                       &adapter->rx_queue[i].napi,
3481                                       vmxnet3_poll_rx_only, 64);
3482                }
3483        } else {
3484                netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3485                               vmxnet3_poll, 64);
3486        }
3487
3488        netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3489        netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3490
3491        netif_carrier_off(netdev);
3492        err = register_netdev(netdev);
3493
3494        if (err) {
3495                dev_err(&pdev->dev, "Failed to register adapter\n");
3496                goto err_register;
3497        }
3498
3499        vmxnet3_check_link(adapter, false);
3500        return 0;
3501
3502err_register:
3503        if (VMXNET3_VERSION_GE_3(adapter)) {
3504                dma_free_coherent(&adapter->pdev->dev,
3505                                  sizeof(struct Vmxnet3_CoalesceScheme),
3506                                  adapter->coal_conf, adapter->coal_conf_pa);
3507        }
3508        vmxnet3_free_intr_resources(adapter);
3509err_ver:
3510        vmxnet3_free_pci_resources(adapter);
3511err_alloc_pci:
3512#ifdef VMXNET3_RSS
3513        dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3514                          adapter->rss_conf, adapter->rss_conf_pa);
3515err_alloc_rss:
3516#endif
3517        dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3518                          adapter->pm_conf, adapter->pm_conf_pa);
3519err_alloc_pm:
3520        dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3521                          adapter->queue_desc_pa);
3522err_alloc_queue_desc:
3523        dma_free_coherent(&adapter->pdev->dev,
3524                          sizeof(struct Vmxnet3_DriverShared),
3525                          adapter->shared, adapter->shared_pa);
3526err_alloc_shared:
3527        dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3528                         sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3529err_set_mask:
3530        free_netdev(netdev);
3531        return err;
3532}
3533
3534
3535static void
3536vmxnet3_remove_device(struct pci_dev *pdev)
3537{
3538        struct net_device *netdev = pci_get_drvdata(pdev);
3539        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3540        int size = 0;
3541        int num_rx_queues;
3542
3543#ifdef VMXNET3_RSS
3544        if (enable_mq)
3545                num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3546                                    (int)num_online_cpus());
3547        else
3548#endif
3549                num_rx_queues = 1;
3550        num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3551
3552        cancel_work_sync(&adapter->work);
3553
3554        unregister_netdev(netdev);
3555
3556        vmxnet3_free_intr_resources(adapter);
3557        vmxnet3_free_pci_resources(adapter);
3558        if (VMXNET3_VERSION_GE_3(adapter)) {
3559                dma_free_coherent(&adapter->pdev->dev,
3560                                  sizeof(struct Vmxnet3_CoalesceScheme),
3561                                  adapter->coal_conf, adapter->coal_conf_pa);
3562        }
3563#ifdef VMXNET3_RSS
3564        dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3565                          adapter->rss_conf, adapter->rss_conf_pa);
3566#endif
3567        dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3568                          adapter->pm_conf, adapter->pm_conf_pa);
3569
3570        size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3571        size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3572        dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3573                          adapter->queue_desc_pa);
3574        dma_free_coherent(&adapter->pdev->dev,
3575                          sizeof(struct Vmxnet3_DriverShared),
3576                          adapter->shared, adapter->shared_pa);
3577        dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3578                         sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3579        free_netdev(netdev);
3580}
3581
3582static void vmxnet3_shutdown_device(struct pci_dev *pdev)
3583{
3584        struct net_device *netdev = pci_get_drvdata(pdev);
3585        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3586        unsigned long flags;
3587
3588        /* Reset_work may be in the middle of resetting the device, wait for its
3589         * completion.
3590         */
3591        while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
3592                usleep_range(1000, 2000);
3593
3594        if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED,
3595                             &adapter->state)) {
3596                clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
3597                return;
3598        }
3599        spin_lock_irqsave(&adapter->cmd_lock, flags);
3600        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3601                               VMXNET3_CMD_QUIESCE_DEV);
3602        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3603        vmxnet3_disable_all_intrs(adapter);
3604
3605        clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
3606}
3607
3608
3609#ifdef CONFIG_PM
3610
3611static int
3612vmxnet3_suspend(struct device *device)
3613{
3614        struct pci_dev *pdev = to_pci_dev(device);
3615        struct net_device *netdev = pci_get_drvdata(pdev);
3616        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3617        struct Vmxnet3_PMConf *pmConf;
3618        struct ethhdr *ehdr;
3619        struct arphdr *ahdr;
3620        u8 *arpreq;
3621        struct in_device *in_dev;
3622        struct in_ifaddr *ifa;
3623        unsigned long flags;
3624        int i = 0;
3625
3626        if (!netif_running(netdev))
3627                return 0;
3628
3629        for (i = 0; i < adapter->num_rx_queues; i++)
3630                napi_disable(&adapter->rx_queue[i].napi);
3631
3632        vmxnet3_disable_all_intrs(adapter);
3633        vmxnet3_free_irqs(adapter);
3634        vmxnet3_free_intr_resources(adapter);
3635
3636        netif_device_detach(netdev);
3637        netif_tx_stop_all_queues(netdev);
3638
3639        /* Create wake-up filters. */
3640        pmConf = adapter->pm_conf;
3641        memset(pmConf, 0, sizeof(*pmConf));
3642
3643        if (adapter->wol & WAKE_UCAST) {
3644                pmConf->filters[i].patternSize = ETH_ALEN;
3645                pmConf->filters[i].maskSize = 1;
3646                memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3647                pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3648
3649                pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3650                i++;
3651        }
3652
3653        if (adapter->wol & WAKE_ARP) {
3654                rcu_read_lock();
3655
3656                in_dev = __in_dev_get_rcu(netdev);
3657                if (!in_dev) {
3658                        rcu_read_unlock();
3659                        goto skip_arp;
3660                }
3661
3662                ifa = rcu_dereference(in_dev->ifa_list);
3663                if (!ifa) {
3664                        rcu_read_unlock();
3665                        goto skip_arp;
3666                }
3667
3668                pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3669                        sizeof(struct arphdr) +         /* ARP header */
3670                        2 * ETH_ALEN +          /* 2 Ethernet addresses*/
3671                        2 * sizeof(u32);        /*2 IPv4 addresses */
3672                pmConf->filters[i].maskSize =
3673                        (pmConf->filters[i].patternSize - 1) / 8 + 1;
3674
3675                /* ETH_P_ARP in Ethernet header. */
3676                ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3677                ehdr->h_proto = htons(ETH_P_ARP);
3678
3679                /* ARPOP_REQUEST in ARP header. */
3680                ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3681                ahdr->ar_op = htons(ARPOP_REQUEST);
3682                arpreq = (u8 *)(ahdr + 1);
3683
3684                /* The Unicast IPv4 address in 'tip' field. */
3685                arpreq += 2 * ETH_ALEN + sizeof(u32);
3686                *(__be32 *)arpreq = ifa->ifa_address;
3687
3688                rcu_read_unlock();
3689
3690                /* The mask for the relevant bits. */
3691                pmConf->filters[i].mask[0] = 0x00;
3692                pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3693                pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3694                pmConf->filters[i].mask[3] = 0x00;
3695                pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3696                pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3697
3698                pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3699                i++;
3700        }
3701
3702skip_arp:
3703        if (adapter->wol & WAKE_MAGIC)
3704                pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3705
3706        pmConf->numFilters = i;
3707
3708        adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3709        adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3710                                                                  *pmConf));
3711        adapter->shared->devRead.pmConfDesc.confPA =
3712                cpu_to_le64(adapter->pm_conf_pa);
3713
3714        spin_lock_irqsave(&adapter->cmd_lock, flags);
3715        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3716                               VMXNET3_CMD_UPDATE_PMCFG);
3717        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3718
3719        pci_save_state(pdev);
3720        pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3721                        adapter->wol);
3722        pci_disable_device(pdev);
3723        pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3724
3725        return 0;
3726}
3727
3728
3729static int
3730vmxnet3_resume(struct device *device)
3731{
3732        int err;
3733        unsigned long flags;
3734        struct pci_dev *pdev = to_pci_dev(device);
3735        struct net_device *netdev = pci_get_drvdata(pdev);
3736        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3737
3738        if (!netif_running(netdev))
3739                return 0;
3740
3741        pci_set_power_state(pdev, PCI_D0);
3742        pci_restore_state(pdev);
3743        err = pci_enable_device_mem(pdev);
3744        if (err != 0)
3745                return err;
3746
3747        pci_enable_wake(pdev, PCI_D0, 0);
3748
3749        vmxnet3_alloc_intr_resources(adapter);
3750
3751        /* During hibernate and suspend, device has to be reinitialized as the
3752         * device state need not be preserved.
3753         */
3754
3755        /* Need not check adapter state as other reset tasks cannot run during
3756         * device resume.
3757         */
3758        spin_lock_irqsave(&adapter->cmd_lock, flags);
3759        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3760                               VMXNET3_CMD_QUIESCE_DEV);
3761        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3762        vmxnet3_tq_cleanup_all(adapter);
3763        vmxnet3_rq_cleanup_all(adapter);
3764
3765        vmxnet3_reset_dev(adapter);
3766        err = vmxnet3_activate_dev(adapter);
3767        if (err != 0) {
3768                netdev_err(netdev,
3769                           "failed to re-activate on resume, error: %d", err);
3770                vmxnet3_force_close(adapter);
3771                return err;
3772        }
3773        netif_device_attach(netdev);
3774
3775        return 0;
3776}
3777
3778static const struct dev_pm_ops vmxnet3_pm_ops = {
3779        .suspend = vmxnet3_suspend,
3780        .resume = vmxnet3_resume,
3781        .freeze = vmxnet3_suspend,
3782        .restore = vmxnet3_resume,
3783};
3784#endif
3785
3786static struct pci_driver vmxnet3_driver = {
3787        .name           = vmxnet3_driver_name,
3788        .id_table       = vmxnet3_pciid_table,
3789        .probe          = vmxnet3_probe_device,
3790        .remove         = vmxnet3_remove_device,
3791        .shutdown       = vmxnet3_shutdown_device,
3792#ifdef CONFIG_PM
3793        .driver.pm      = &vmxnet3_pm_ops,
3794#endif
3795};
3796
3797
3798static int __init
3799vmxnet3_init_module(void)
3800{
3801        pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3802                VMXNET3_DRIVER_VERSION_REPORT);
3803        return pci_register_driver(&vmxnet3_driver);
3804}
3805
3806module_init(vmxnet3_init_module);
3807
3808
3809static void
3810vmxnet3_exit_module(void)
3811{
3812        pci_unregister_driver(&vmxnet3_driver);
3813}
3814
3815module_exit(vmxnet3_exit_module);
3816
3817MODULE_AUTHOR("VMware, Inc.");
3818MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3819MODULE_LICENSE("GPL v2");
3820MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);
3821