linux/drivers/net/vmxnet3/vmxnet3_drv.c
<<
>>
Prefs
   1/*
   2 * Linux driver for VMware's vmxnet3 ethernet NIC.
   3 *
   4 * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License as published by the
   8 * Free Software Foundation; version 2 of the License and no later version.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  13 * NON INFRINGEMENT. See the GNU General Public License for more
  14 * details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 *
  20 * The full GNU General Public License is included in this distribution in
  21 * the file called "COPYING".
  22 *
  23 * Maintained by: pv-drivers@vmware.com
  24 *
  25 */
  26
  27#include <linux/module.h>
  28#include <net/ip6_checksum.h>
  29
  30#include "vmxnet3_int.h"
  31
  32char vmxnet3_driver_name[] = "vmxnet3";
  33#define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
  34
  35/*
  36 * PCI Device ID Table
  37 * Last entry must be all 0s
  38 */
  39static const struct pci_device_id vmxnet3_pciid_table[] = {
  40        {PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
  41        {0}
  42};
  43
  44MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
  45
  46static int enable_mq = 1;
  47
  48static void
  49vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
  50
  51/*
  52 *    Enable/Disable the given intr
  53 */
  54static void
  55vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
  56{
  57        VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
  58}
  59
  60
  61static void
  62vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
  63{
  64        VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
  65}
  66
  67
  68/*
  69 *    Enable/Disable all intrs used by the device
  70 */
  71static void
  72vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
  73{
  74        int i;
  75
  76        for (i = 0; i < adapter->intr.num_intrs; i++)
  77                vmxnet3_enable_intr(adapter, i);
  78        adapter->shared->devRead.intrConf.intrCtrl &=
  79                                        cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
  80}
  81
  82
  83static void
  84vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
  85{
  86        int i;
  87
  88        adapter->shared->devRead.intrConf.intrCtrl |=
  89                                        cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
  90        for (i = 0; i < adapter->intr.num_intrs; i++)
  91                vmxnet3_disable_intr(adapter, i);
  92}
  93
  94
  95static void
  96vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
  97{
  98        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
  99}
 100
 101
 102static bool
 103vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 104{
 105        return tq->stopped;
 106}
 107
 108
 109static void
 110vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 111{
 112        tq->stopped = false;
 113        netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
 114}
 115
 116
 117static void
 118vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 119{
 120        tq->stopped = false;
 121        netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
 122}
 123
 124
 125static void
 126vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 127{
 128        tq->stopped = true;
 129        tq->num_stop++;
 130        netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
 131}
 132
 133
 134/*
 135 * Check the link state. This may start or stop the tx queue.
 136 */
 137static void
 138vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
 139{
 140        u32 ret;
 141        int i;
 142        unsigned long flags;
 143
 144        spin_lock_irqsave(&adapter->cmd_lock, flags);
 145        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
 146        ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
 147        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
 148
 149        adapter->link_speed = ret >> 16;
 150        if (ret & 1) { /* Link is up. */
 151                netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
 152                            adapter->link_speed);
 153                netif_carrier_on(adapter->netdev);
 154
 155                if (affectTxQueue) {
 156                        for (i = 0; i < adapter->num_tx_queues; i++)
 157                                vmxnet3_tq_start(&adapter->tx_queue[i],
 158                                                 adapter);
 159                }
 160        } else {
 161                netdev_info(adapter->netdev, "NIC Link is Down\n");
 162                netif_carrier_off(adapter->netdev);
 163
 164                if (affectTxQueue) {
 165                        for (i = 0; i < adapter->num_tx_queues; i++)
 166                                vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
 167                }
 168        }
 169}
 170
 171static void
 172vmxnet3_process_events(struct vmxnet3_adapter *adapter)
 173{
 174        int i;
 175        unsigned long flags;
 176        u32 events = le32_to_cpu(adapter->shared->ecr);
 177        if (!events)
 178                return;
 179
 180        vmxnet3_ack_events(adapter, events);
 181
 182        /* Check if link state has changed */
 183        if (events & VMXNET3_ECR_LINK)
 184                vmxnet3_check_link(adapter, true);
 185
 186        /* Check if there is an error on xmit/recv queues */
 187        if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
 188                spin_lock_irqsave(&adapter->cmd_lock, flags);
 189                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
 190                                       VMXNET3_CMD_GET_QUEUE_STATUS);
 191                spin_unlock_irqrestore(&adapter->cmd_lock, flags);
 192
 193                for (i = 0; i < adapter->num_tx_queues; i++)
 194                        if (adapter->tqd_start[i].status.stopped)
 195                                dev_err(&adapter->netdev->dev,
 196                                        "%s: tq[%d] error 0x%x\n",
 197                                        adapter->netdev->name, i, le32_to_cpu(
 198                                        adapter->tqd_start[i].status.error));
 199                for (i = 0; i < adapter->num_rx_queues; i++)
 200                        if (adapter->rqd_start[i].status.stopped)
 201                                dev_err(&adapter->netdev->dev,
 202                                        "%s: rq[%d] error 0x%x\n",
 203                                        adapter->netdev->name, i,
 204                                        adapter->rqd_start[i].status.error);
 205
 206                schedule_work(&adapter->work);
 207        }
 208}
 209
 210#ifdef __BIG_ENDIAN_BITFIELD
 211/*
 212 * The device expects the bitfields in shared structures to be written in
 213 * little endian. When CPU is big endian, the following routines are used to
 214 * correctly read and write into ABI.
 215 * The general technique used here is : double word bitfields are defined in
 216 * opposite order for big endian architecture. Then before reading them in
 217 * driver the complete double word is translated using le32_to_cpu. Similarly
 218 * After the driver writes into bitfields, cpu_to_le32 is used to translate the
 219 * double words into required format.
 220 * In order to avoid touching bits in shared structure more than once, temporary
 221 * descriptors are used. These are passed as srcDesc to following functions.
 222 */
 223static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
 224                                struct Vmxnet3_RxDesc *dstDesc)
 225{
 226        u32 *src = (u32 *)srcDesc + 2;
 227        u32 *dst = (u32 *)dstDesc + 2;
 228        dstDesc->addr = le64_to_cpu(srcDesc->addr);
 229        *dst = le32_to_cpu(*src);
 230        dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
 231}
 232
 233static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
 234                               struct Vmxnet3_TxDesc *dstDesc)
 235{
 236        int i;
 237        u32 *src = (u32 *)(srcDesc + 1);
 238        u32 *dst = (u32 *)(dstDesc + 1);
 239
 240        /* Working backwards so that the gen bit is set at the end. */
 241        for (i = 2; i > 0; i--) {
 242                src--;
 243                dst--;
 244                *dst = cpu_to_le32(*src);
 245        }
 246}
 247
 248
 249static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
 250                                struct Vmxnet3_RxCompDesc *dstDesc)
 251{
 252        int i = 0;
 253        u32 *src = (u32 *)srcDesc;
 254        u32 *dst = (u32 *)dstDesc;
 255        for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
 256                *dst = le32_to_cpu(*src);
 257                src++;
 258                dst++;
 259        }
 260}
 261
 262
 263/* Used to read bitfield values from double words. */
 264static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
 265{
 266        u32 temp = le32_to_cpu(*bitfield);
 267        u32 mask = ((1 << size) - 1) << pos;
 268        temp &= mask;
 269        temp >>= pos;
 270        return temp;
 271}
 272
 273
 274
 275#endif  /* __BIG_ENDIAN_BITFIELD */
 276
 277#ifdef __BIG_ENDIAN_BITFIELD
 278
 279#   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
 280                        txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
 281                        VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
 282#   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
 283                        txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
 284                        VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
 285#   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
 286                        VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
 287                        VMXNET3_TCD_GEN_SIZE)
 288#   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
 289                        VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
 290#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
 291                        (dstrcd) = (tmp); \
 292                        vmxnet3_RxCompToCPU((rcd), (tmp)); \
 293                } while (0)
 294#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
 295                        (dstrxd) = (tmp); \
 296                        vmxnet3_RxDescToCPU((rxd), (tmp)); \
 297                } while (0)
 298
 299#else
 300
 301#   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
 302#   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
 303#   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
 304#   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
 305#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
 306#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
 307
 308#endif /* __BIG_ENDIAN_BITFIELD  */
 309
 310
 311static void
 312vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
 313                     struct pci_dev *pdev)
 314{
 315        if (tbi->map_type == VMXNET3_MAP_SINGLE)
 316                dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len,
 317                                 PCI_DMA_TODEVICE);
 318        else if (tbi->map_type == VMXNET3_MAP_PAGE)
 319                dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len,
 320                               PCI_DMA_TODEVICE);
 321        else
 322                BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
 323
 324        tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
 325}
 326
 327
 328static int
 329vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
 330                  struct pci_dev *pdev, struct vmxnet3_adapter *adapter)
 331{
 332        struct sk_buff *skb;
 333        int entries = 0;
 334
 335        /* no out of order completion */
 336        BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
 337        BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
 338
 339        skb = tq->buf_info[eop_idx].skb;
 340        BUG_ON(skb == NULL);
 341        tq->buf_info[eop_idx].skb = NULL;
 342
 343        VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
 344
 345        while (tq->tx_ring.next2comp != eop_idx) {
 346                vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
 347                                     pdev);
 348
 349                /* update next2comp w/o tx_lock. Since we are marking more,
 350                 * instead of less, tx ring entries avail, the worst case is
 351                 * that the tx routine incorrectly re-queues a pkt due to
 352                 * insufficient tx ring entries.
 353                 */
 354                vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
 355                entries++;
 356        }
 357
 358        dev_kfree_skb_any(skb);
 359        return entries;
 360}
 361
 362
 363static int
 364vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
 365                        struct vmxnet3_adapter *adapter)
 366{
 367        int completed = 0;
 368        union Vmxnet3_GenericDesc *gdesc;
 369
 370        gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
 371        while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
 372                /* Prevent any &gdesc->tcd field from being (speculatively)
 373                 * read before (&gdesc->tcd)->gen is read.
 374                 */
 375                dma_rmb();
 376
 377                completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
 378                                               &gdesc->tcd), tq, adapter->pdev,
 379                                               adapter);
 380
 381                vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
 382                gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
 383        }
 384
 385        if (completed) {
 386                spin_lock(&tq->tx_lock);
 387                if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
 388                             vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
 389                             VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
 390                             netif_carrier_ok(adapter->netdev))) {
 391                        vmxnet3_tq_wake(tq, adapter);
 392                }
 393                spin_unlock(&tq->tx_lock);
 394        }
 395        return completed;
 396}
 397
 398
 399static void
 400vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
 401                   struct vmxnet3_adapter *adapter)
 402{
 403        int i;
 404
 405        while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
 406                struct vmxnet3_tx_buf_info *tbi;
 407
 408                tbi = tq->buf_info + tq->tx_ring.next2comp;
 409
 410                vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
 411                if (tbi->skb) {
 412                        dev_kfree_skb_any(tbi->skb);
 413                        tbi->skb = NULL;
 414                }
 415                vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
 416        }
 417
 418        /* sanity check, verify all buffers are indeed unmapped and freed */
 419        for (i = 0; i < tq->tx_ring.size; i++) {
 420                BUG_ON(tq->buf_info[i].skb != NULL ||
 421                       tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
 422        }
 423
 424        tq->tx_ring.gen = VMXNET3_INIT_GEN;
 425        tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
 426
 427        tq->comp_ring.gen = VMXNET3_INIT_GEN;
 428        tq->comp_ring.next2proc = 0;
 429}
 430
 431
 432static void
 433vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
 434                   struct vmxnet3_adapter *adapter)
 435{
 436        if (tq->tx_ring.base) {
 437                dma_free_coherent(&adapter->pdev->dev, tq->tx_ring.size *
 438                                  sizeof(struct Vmxnet3_TxDesc),
 439                                  tq->tx_ring.base, tq->tx_ring.basePA);
 440                tq->tx_ring.base = NULL;
 441        }
 442        if (tq->data_ring.base) {
 443                dma_free_coherent(&adapter->pdev->dev,
 444                                  tq->data_ring.size * tq->txdata_desc_size,
 445                                  tq->data_ring.base, tq->data_ring.basePA);
 446                tq->data_ring.base = NULL;
 447        }
 448        if (tq->comp_ring.base) {
 449                dma_free_coherent(&adapter->pdev->dev, tq->comp_ring.size *
 450                                  sizeof(struct Vmxnet3_TxCompDesc),
 451                                  tq->comp_ring.base, tq->comp_ring.basePA);
 452                tq->comp_ring.base = NULL;
 453        }
 454        if (tq->buf_info) {
 455                dma_free_coherent(&adapter->pdev->dev,
 456                                  tq->tx_ring.size * sizeof(tq->buf_info[0]),
 457                                  tq->buf_info, tq->buf_info_pa);
 458                tq->buf_info = NULL;
 459        }
 460}
 461
 462
 463/* Destroy all tx queues */
 464void
 465vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
 466{
 467        int i;
 468
 469        for (i = 0; i < adapter->num_tx_queues; i++)
 470                vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
 471}
 472
 473
 474static void
 475vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
 476                struct vmxnet3_adapter *adapter)
 477{
 478        int i;
 479
 480        /* reset the tx ring contents to 0 and reset the tx ring states */
 481        memset(tq->tx_ring.base, 0, tq->tx_ring.size *
 482               sizeof(struct Vmxnet3_TxDesc));
 483        tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
 484        tq->tx_ring.gen = VMXNET3_INIT_GEN;
 485
 486        memset(tq->data_ring.base, 0,
 487               tq->data_ring.size * tq->txdata_desc_size);
 488
 489        /* reset the tx comp ring contents to 0 and reset comp ring states */
 490        memset(tq->comp_ring.base, 0, tq->comp_ring.size *
 491               sizeof(struct Vmxnet3_TxCompDesc));
 492        tq->comp_ring.next2proc = 0;
 493        tq->comp_ring.gen = VMXNET3_INIT_GEN;
 494
 495        /* reset the bookkeeping data */
 496        memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
 497        for (i = 0; i < tq->tx_ring.size; i++)
 498                tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
 499
 500        /* stats are not reset */
 501}
 502
 503
 504static int
 505vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
 506                  struct vmxnet3_adapter *adapter)
 507{
 508        size_t sz;
 509
 510        BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
 511               tq->comp_ring.base || tq->buf_info);
 512
 513        tq->tx_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
 514                        tq->tx_ring.size * sizeof(struct Vmxnet3_TxDesc),
 515                        &tq->tx_ring.basePA, GFP_KERNEL);
 516        if (!tq->tx_ring.base) {
 517                netdev_err(adapter->netdev, "failed to allocate tx ring\n");
 518                goto err;
 519        }
 520
 521        tq->data_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
 522                        tq->data_ring.size * tq->txdata_desc_size,
 523                        &tq->data_ring.basePA, GFP_KERNEL);
 524        if (!tq->data_ring.base) {
 525                netdev_err(adapter->netdev, "failed to allocate tx data ring\n");
 526                goto err;
 527        }
 528
 529        tq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
 530                        tq->comp_ring.size * sizeof(struct Vmxnet3_TxCompDesc),
 531                        &tq->comp_ring.basePA, GFP_KERNEL);
 532        if (!tq->comp_ring.base) {
 533                netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
 534                goto err;
 535        }
 536
 537        sz = tq->tx_ring.size * sizeof(tq->buf_info[0]);
 538        tq->buf_info = dma_alloc_coherent(&adapter->pdev->dev, sz,
 539                                          &tq->buf_info_pa, GFP_KERNEL);
 540        if (!tq->buf_info)
 541                goto err;
 542
 543        return 0;
 544
 545err:
 546        vmxnet3_tq_destroy(tq, adapter);
 547        return -ENOMEM;
 548}
 549
 550static void
 551vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
 552{
 553        int i;
 554
 555        for (i = 0; i < adapter->num_tx_queues; i++)
 556                vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
 557}
 558
 559/*
 560 *    starting from ring->next2fill, allocate rx buffers for the given ring
 561 *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
 562 *    are allocated or allocation fails
 563 */
 564
 565static int
 566vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
 567                        int num_to_alloc, struct vmxnet3_adapter *adapter)
 568{
 569        int num_allocated = 0;
 570        struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
 571        struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
 572        u32 val;
 573
 574        while (num_allocated <= num_to_alloc) {
 575                struct vmxnet3_rx_buf_info *rbi;
 576                union Vmxnet3_GenericDesc *gd;
 577
 578                rbi = rbi_base + ring->next2fill;
 579                gd = ring->base + ring->next2fill;
 580
 581                if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
 582                        if (rbi->skb == NULL) {
 583                                rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
 584                                                                       rbi->len,
 585                                                                       GFP_KERNEL);
 586                                if (unlikely(rbi->skb == NULL)) {
 587                                        rq->stats.rx_buf_alloc_failure++;
 588                                        break;
 589                                }
 590
 591                                rbi->dma_addr = dma_map_single(
 592                                                &adapter->pdev->dev,
 593                                                rbi->skb->data, rbi->len,
 594                                                PCI_DMA_FROMDEVICE);
 595                                if (dma_mapping_error(&adapter->pdev->dev,
 596                                                      rbi->dma_addr)) {
 597                                        dev_kfree_skb_any(rbi->skb);
 598                                        rq->stats.rx_buf_alloc_failure++;
 599                                        break;
 600                                }
 601                        } else {
 602                                /* rx buffer skipped by the device */
 603                        }
 604                        val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
 605                } else {
 606                        BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
 607                               rbi->len  != PAGE_SIZE);
 608
 609                        if (rbi->page == NULL) {
 610                                rbi->page = alloc_page(GFP_ATOMIC);
 611                                if (unlikely(rbi->page == NULL)) {
 612                                        rq->stats.rx_buf_alloc_failure++;
 613                                        break;
 614                                }
 615                                rbi->dma_addr = dma_map_page(
 616                                                &adapter->pdev->dev,
 617                                                rbi->page, 0, PAGE_SIZE,
 618                                                PCI_DMA_FROMDEVICE);
 619                                if (dma_mapping_error(&adapter->pdev->dev,
 620                                                      rbi->dma_addr)) {
 621                                        put_page(rbi->page);
 622                                        rq->stats.rx_buf_alloc_failure++;
 623                                        break;
 624                                }
 625                        } else {
 626                                /* rx buffers skipped by the device */
 627                        }
 628                        val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
 629                }
 630
 631                gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
 632                gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
 633                                           | val | rbi->len);
 634
 635                /* Fill the last buffer but dont mark it ready, or else the
 636                 * device will think that the queue is full */
 637                if (num_allocated == num_to_alloc)
 638                        break;
 639
 640                gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
 641                num_allocated++;
 642                vmxnet3_cmd_ring_adv_next2fill(ring);
 643        }
 644
 645        netdev_dbg(adapter->netdev,
 646                "alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
 647                num_allocated, ring->next2fill, ring->next2comp);
 648
 649        /* so that the device can distinguish a full ring and an empty ring */
 650        BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
 651
 652        return num_allocated;
 653}
 654
 655
 656static void
 657vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
 658                    struct vmxnet3_rx_buf_info *rbi)
 659{
 660        skb_frag_t *frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags;
 661
 662        BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
 663
 664        __skb_frag_set_page(frag, rbi->page);
 665        skb_frag_off_set(frag, 0);
 666        skb_frag_size_set(frag, rcd->len);
 667        skb->data_len += rcd->len;
 668        skb->truesize += PAGE_SIZE;
 669        skb_shinfo(skb)->nr_frags++;
 670}
 671
 672
 673static int
 674vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 675                struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
 676                struct vmxnet3_adapter *adapter)
 677{
 678        u32 dw2, len;
 679        unsigned long buf_offset;
 680        int i;
 681        union Vmxnet3_GenericDesc *gdesc;
 682        struct vmxnet3_tx_buf_info *tbi = NULL;
 683
 684        BUG_ON(ctx->copy_size > skb_headlen(skb));
 685
 686        /* use the previous gen bit for the SOP desc */
 687        dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
 688
 689        ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
 690        gdesc = ctx->sop_txd; /* both loops below can be skipped */
 691
 692        /* no need to map the buffer if headers are copied */
 693        if (ctx->copy_size) {
 694                ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
 695                                        tq->tx_ring.next2fill *
 696                                        tq->txdata_desc_size);
 697                ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
 698                ctx->sop_txd->dword[3] = 0;
 699
 700                tbi = tq->buf_info + tq->tx_ring.next2fill;
 701                tbi->map_type = VMXNET3_MAP_NONE;
 702
 703                netdev_dbg(adapter->netdev,
 704                        "txd[%u]: 0x%Lx 0x%x 0x%x\n",
 705                        tq->tx_ring.next2fill,
 706                        le64_to_cpu(ctx->sop_txd->txd.addr),
 707                        ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
 708                vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
 709
 710                /* use the right gen for non-SOP desc */
 711                dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
 712        }
 713
 714        /* linear part can use multiple tx desc if it's big */
 715        len = skb_headlen(skb) - ctx->copy_size;
 716        buf_offset = ctx->copy_size;
 717        while (len) {
 718                u32 buf_size;
 719
 720                if (len < VMXNET3_MAX_TX_BUF_SIZE) {
 721                        buf_size = len;
 722                        dw2 |= len;
 723                } else {
 724                        buf_size = VMXNET3_MAX_TX_BUF_SIZE;
 725                        /* spec says that for TxDesc.len, 0 == 2^14 */
 726                }
 727
 728                tbi = tq->buf_info + tq->tx_ring.next2fill;
 729                tbi->map_type = VMXNET3_MAP_SINGLE;
 730                tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
 731                                skb->data + buf_offset, buf_size,
 732                                PCI_DMA_TODEVICE);
 733                if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
 734                        return -EFAULT;
 735
 736                tbi->len = buf_size;
 737
 738                gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
 739                BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
 740
 741                gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
 742                gdesc->dword[2] = cpu_to_le32(dw2);
 743                gdesc->dword[3] = 0;
 744
 745                netdev_dbg(adapter->netdev,
 746                        "txd[%u]: 0x%Lx 0x%x 0x%x\n",
 747                        tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
 748                        le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
 749                vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
 750                dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
 751
 752                len -= buf_size;
 753                buf_offset += buf_size;
 754        }
 755
 756        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 757                const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 758                u32 buf_size;
 759
 760                buf_offset = 0;
 761                len = skb_frag_size(frag);
 762                while (len) {
 763                        tbi = tq->buf_info + tq->tx_ring.next2fill;
 764                        if (len < VMXNET3_MAX_TX_BUF_SIZE) {
 765                                buf_size = len;
 766                                dw2 |= len;
 767                        } else {
 768                                buf_size = VMXNET3_MAX_TX_BUF_SIZE;
 769                                /* spec says that for TxDesc.len, 0 == 2^14 */
 770                        }
 771                        tbi->map_type = VMXNET3_MAP_PAGE;
 772                        tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
 773                                                         buf_offset, buf_size,
 774                                                         DMA_TO_DEVICE);
 775                        if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
 776                                return -EFAULT;
 777
 778                        tbi->len = buf_size;
 779
 780                        gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
 781                        BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
 782
 783                        gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
 784                        gdesc->dword[2] = cpu_to_le32(dw2);
 785                        gdesc->dword[3] = 0;
 786
 787                        netdev_dbg(adapter->netdev,
 788                                "txd[%u]: 0x%llx %u %u\n",
 789                                tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
 790                                le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
 791                        vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
 792                        dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
 793
 794                        len -= buf_size;
 795                        buf_offset += buf_size;
 796                }
 797        }
 798
 799        ctx->eop_txd = gdesc;
 800
 801        /* set the last buf_info for the pkt */
 802        tbi->skb = skb;
 803        tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
 804
 805        return 0;
 806}
 807
 808
 809/* Init all tx queues */
 810static void
 811vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
 812{
 813        int i;
 814
 815        for (i = 0; i < adapter->num_tx_queues; i++)
 816                vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
 817}
 818
 819
 820/*
 821 *    parse relevant protocol headers:
 822 *      For a tso pkt, relevant headers are L2/3/4 including options
 823 *      For a pkt requesting csum offloading, they are L2/3 and may include L4
 824 *      if it's a TCP/UDP pkt
 825 *
 826 * Returns:
 827 *    -1:  error happens during parsing
 828 *     0:  protocol headers parsed, but too big to be copied
 829 *     1:  protocol headers parsed and copied
 830 *
 831 * Other effects:
 832 *    1. related *ctx fields are updated.
 833 *    2. ctx->copy_size is # of bytes copied
 834 *    3. the portion to be copied is guaranteed to be in the linear part
 835 *
 836 */
 837static int
 838vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 839                  struct vmxnet3_tx_ctx *ctx,
 840                  struct vmxnet3_adapter *adapter)
 841{
 842        u8 protocol = 0;
 843
 844        if (ctx->mss) { /* TSO */
 845                if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation) {
 846                        ctx->l4_offset = skb_inner_transport_offset(skb);
 847                        ctx->l4_hdr_size = inner_tcp_hdrlen(skb);
 848                        ctx->copy_size = ctx->l4_offset + ctx->l4_hdr_size;
 849                } else {
 850                        ctx->l4_offset = skb_transport_offset(skb);
 851                        ctx->l4_hdr_size = tcp_hdrlen(skb);
 852                        ctx->copy_size = ctx->l4_offset + ctx->l4_hdr_size;
 853                }
 854        } else {
 855                if (skb->ip_summed == CHECKSUM_PARTIAL) {
 856                        /* For encap packets, skb_checksum_start_offset refers
 857                         * to inner L4 offset. Thus, below works for encap as
 858                         * well as non-encap case
 859                         */
 860                        ctx->l4_offset = skb_checksum_start_offset(skb);
 861
 862                        if (VMXNET3_VERSION_GE_4(adapter) &&
 863                            skb->encapsulation) {
 864                                struct iphdr *iph = inner_ip_hdr(skb);
 865
 866                                if (iph->version == 4) {
 867                                        protocol = iph->protocol;
 868                                } else {
 869                                        const struct ipv6hdr *ipv6h;
 870
 871                                        ipv6h = inner_ipv6_hdr(skb);
 872                                        protocol = ipv6h->nexthdr;
 873                                }
 874                        } else {
 875                                if (ctx->ipv4) {
 876                                        const struct iphdr *iph = ip_hdr(skb);
 877
 878                                        protocol = iph->protocol;
 879                                } else if (ctx->ipv6) {
 880                                        const struct ipv6hdr *ipv6h;
 881
 882                                        ipv6h = ipv6_hdr(skb);
 883                                        protocol = ipv6h->nexthdr;
 884                                }
 885                        }
 886
 887                        switch (protocol) {
 888                        case IPPROTO_TCP:
 889                                ctx->l4_hdr_size = skb->encapsulation ? inner_tcp_hdrlen(skb) :
 890                                                   tcp_hdrlen(skb);
 891                                break;
 892                        case IPPROTO_UDP:
 893                                ctx->l4_hdr_size = sizeof(struct udphdr);
 894                                break;
 895                        default:
 896                                ctx->l4_hdr_size = 0;
 897                                break;
 898                        }
 899
 900                        ctx->copy_size = min(ctx->l4_offset +
 901                                         ctx->l4_hdr_size, skb->len);
 902                } else {
 903                        ctx->l4_offset = 0;
 904                        ctx->l4_hdr_size = 0;
 905                        /* copy as much as allowed */
 906                        ctx->copy_size = min_t(unsigned int,
 907                                               tq->txdata_desc_size,
 908                                               skb_headlen(skb));
 909                }
 910
 911                if (skb->len <= VMXNET3_HDR_COPY_SIZE)
 912                        ctx->copy_size = skb->len;
 913
 914                /* make sure headers are accessible directly */
 915                if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
 916                        goto err;
 917        }
 918
 919        if (unlikely(ctx->copy_size > tq->txdata_desc_size)) {
 920                tq->stats.oversized_hdr++;
 921                ctx->copy_size = 0;
 922                return 0;
 923        }
 924
 925        return 1;
 926err:
 927        return -1;
 928}
 929
 930/*
 931 *    copy relevant protocol headers to the transmit ring:
 932 *      For a tso pkt, relevant headers are L2/3/4 including options
 933 *      For a pkt requesting csum offloading, they are L2/3 and may include L4
 934 *      if it's a TCP/UDP pkt
 935 *
 936 *
 937 *    Note that this requires that vmxnet3_parse_hdr be called first to set the
 938 *      appropriate bits in ctx first
 939 */
 940static void
 941vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 942                 struct vmxnet3_tx_ctx *ctx,
 943                 struct vmxnet3_adapter *adapter)
 944{
 945        struct Vmxnet3_TxDataDesc *tdd;
 946
 947        tdd = (struct Vmxnet3_TxDataDesc *)((u8 *)tq->data_ring.base +
 948                                            tq->tx_ring.next2fill *
 949                                            tq->txdata_desc_size);
 950
 951        memcpy(tdd->data, skb->data, ctx->copy_size);
 952        netdev_dbg(adapter->netdev,
 953                "copy %u bytes to dataRing[%u]\n",
 954                ctx->copy_size, tq->tx_ring.next2fill);
 955}
 956
 957
 958static void
 959vmxnet3_prepare_inner_tso(struct sk_buff *skb,
 960                          struct vmxnet3_tx_ctx *ctx)
 961{
 962        struct tcphdr *tcph = inner_tcp_hdr(skb);
 963        struct iphdr *iph = inner_ip_hdr(skb);
 964
 965        if (iph->version == 4) {
 966                iph->check = 0;
 967                tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
 968                                                 IPPROTO_TCP, 0);
 969        } else {
 970                struct ipv6hdr *iph = inner_ipv6_hdr(skb);
 971
 972                tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
 973                                               IPPROTO_TCP, 0);
 974        }
 975}
 976
 977static void
 978vmxnet3_prepare_tso(struct sk_buff *skb,
 979                    struct vmxnet3_tx_ctx *ctx)
 980{
 981        struct tcphdr *tcph = tcp_hdr(skb);
 982
 983        if (ctx->ipv4) {
 984                struct iphdr *iph = ip_hdr(skb);
 985
 986                iph->check = 0;
 987                tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
 988                                                 IPPROTO_TCP, 0);
 989        } else if (ctx->ipv6) {
 990                tcp_v6_gso_csum_prep(skb);
 991        }
 992}
 993
 994static int txd_estimate(const struct sk_buff *skb)
 995{
 996        int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
 997        int i;
 998
 999        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1000                const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1001
1002                count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
1003        }
1004        return count;
1005}
1006
1007/*
1008 * Transmits a pkt thru a given tq
1009 * Returns:
1010 *    NETDEV_TX_OK:      descriptors are setup successfully
1011 *    NETDEV_TX_OK:      error occurred, the pkt is dropped
1012 *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
1013 *
1014 * Side-effects:
1015 *    1. tx ring may be changed
1016 *    2. tq stats may be updated accordingly
1017 *    3. shared->txNumDeferred may be updated
1018 */
1019
1020static int
1021vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
1022                struct vmxnet3_adapter *adapter, struct net_device *netdev)
1023{
1024        int ret;
1025        u32 count;
1026        int num_pkts;
1027        int tx_num_deferred;
1028        unsigned long flags;
1029        struct vmxnet3_tx_ctx ctx;
1030        union Vmxnet3_GenericDesc *gdesc;
1031#ifdef __BIG_ENDIAN_BITFIELD
1032        /* Use temporary descriptor to avoid touching bits multiple times */
1033        union Vmxnet3_GenericDesc tempTxDesc;
1034#endif
1035
1036        count = txd_estimate(skb);
1037
1038        ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
1039        ctx.ipv6 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IPV6));
1040
1041        ctx.mss = skb_shinfo(skb)->gso_size;
1042        if (ctx.mss) {
1043                if (skb_header_cloned(skb)) {
1044                        if (unlikely(pskb_expand_head(skb, 0, 0,
1045                                                      GFP_ATOMIC) != 0)) {
1046                                tq->stats.drop_tso++;
1047                                goto drop_pkt;
1048                        }
1049                        tq->stats.copy_skb_header++;
1050                }
1051                if (skb->encapsulation) {
1052                        vmxnet3_prepare_inner_tso(skb, &ctx);
1053                } else {
1054                        vmxnet3_prepare_tso(skb, &ctx);
1055                }
1056        } else {
1057                if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
1058
1059                        /* non-tso pkts must not use more than
1060                         * VMXNET3_MAX_TXD_PER_PKT entries
1061                         */
1062                        if (skb_linearize(skb) != 0) {
1063                                tq->stats.drop_too_many_frags++;
1064                                goto drop_pkt;
1065                        }
1066                        tq->stats.linearized++;
1067
1068                        /* recalculate the # of descriptors to use */
1069                        count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
1070                }
1071        }
1072
1073        ret = vmxnet3_parse_hdr(skb, tq, &ctx, adapter);
1074        if (ret >= 0) {
1075                BUG_ON(ret <= 0 && ctx.copy_size != 0);
1076                /* hdrs parsed, check against other limits */
1077                if (ctx.mss) {
1078                        if (unlikely(ctx.l4_offset + ctx.l4_hdr_size >
1079                                     VMXNET3_MAX_TX_BUF_SIZE)) {
1080                                tq->stats.drop_oversized_hdr++;
1081                                goto drop_pkt;
1082                        }
1083                } else {
1084                        if (skb->ip_summed == CHECKSUM_PARTIAL) {
1085                                if (unlikely(ctx.l4_offset +
1086                                             skb->csum_offset >
1087                                             VMXNET3_MAX_CSUM_OFFSET)) {
1088                                        tq->stats.drop_oversized_hdr++;
1089                                        goto drop_pkt;
1090                                }
1091                        }
1092                }
1093        } else {
1094                tq->stats.drop_hdr_inspect_err++;
1095                goto drop_pkt;
1096        }
1097
1098        spin_lock_irqsave(&tq->tx_lock, flags);
1099
1100        if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
1101                tq->stats.tx_ring_full++;
1102                netdev_dbg(adapter->netdev,
1103                        "tx queue stopped on %s, next2comp %u"
1104                        " next2fill %u\n", adapter->netdev->name,
1105                        tq->tx_ring.next2comp, tq->tx_ring.next2fill);
1106
1107                vmxnet3_tq_stop(tq, adapter);
1108                spin_unlock_irqrestore(&tq->tx_lock, flags);
1109                return NETDEV_TX_BUSY;
1110        }
1111
1112
1113        vmxnet3_copy_hdr(skb, tq, &ctx, adapter);
1114
1115        /* fill tx descs related to addr & len */
1116        if (vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter))
1117                goto unlock_drop_pkt;
1118
1119        /* setup the EOP desc */
1120        ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1121
1122        /* setup the SOP desc */
1123#ifdef __BIG_ENDIAN_BITFIELD
1124        gdesc = &tempTxDesc;
1125        gdesc->dword[2] = ctx.sop_txd->dword[2];
1126        gdesc->dword[3] = ctx.sop_txd->dword[3];
1127#else
1128        gdesc = ctx.sop_txd;
1129#endif
1130        tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred);
1131        if (ctx.mss) {
1132                if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation) {
1133                        gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size;
1134                        gdesc->txd.om = VMXNET3_OM_ENCAP;
1135                        gdesc->txd.msscof = ctx.mss;
1136
1137                        if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)
1138                                gdesc->txd.oco = 1;
1139                } else {
1140                        gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size;
1141                        gdesc->txd.om = VMXNET3_OM_TSO;
1142                        gdesc->txd.msscof = ctx.mss;
1143                }
1144                num_pkts = (skb->len - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss;
1145        } else {
1146                if (skb->ip_summed == CHECKSUM_PARTIAL) {
1147                        if (VMXNET3_VERSION_GE_4(adapter) &&
1148                            skb->encapsulation) {
1149                                gdesc->txd.hlen = ctx.l4_offset +
1150                                                  ctx.l4_hdr_size;
1151                                gdesc->txd.om = VMXNET3_OM_ENCAP;
1152                                gdesc->txd.msscof = 0;          /* Reserved */
1153                        } else {
1154                                gdesc->txd.hlen = ctx.l4_offset;
1155                                gdesc->txd.om = VMXNET3_OM_CSUM;
1156                                gdesc->txd.msscof = ctx.l4_offset +
1157                                                    skb->csum_offset;
1158                        }
1159                } else {
1160                        gdesc->txd.om = 0;
1161                        gdesc->txd.msscof = 0;
1162                }
1163                num_pkts = 1;
1164        }
1165        le32_add_cpu(&tq->shared->txNumDeferred, num_pkts);
1166        tx_num_deferred += num_pkts;
1167
1168        if (skb_vlan_tag_present(skb)) {
1169                gdesc->txd.ti = 1;
1170                gdesc->txd.tci = skb_vlan_tag_get(skb);
1171        }
1172
1173        /* Ensure that the write to (&gdesc->txd)->gen will be observed after
1174         * all other writes to &gdesc->txd.
1175         */
1176        dma_wmb();
1177
1178        /* finally flips the GEN bit of the SOP desc. */
1179        gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1180                                                  VMXNET3_TXD_GEN);
1181#ifdef __BIG_ENDIAN_BITFIELD
1182        /* Finished updating in bitfields of Tx Desc, so write them in original
1183         * place.
1184         */
1185        vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1186                           (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1187        gdesc = ctx.sop_txd;
1188#endif
1189        netdev_dbg(adapter->netdev,
1190                "txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1191                (u32)(ctx.sop_txd -
1192                tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1193                le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1194
1195        spin_unlock_irqrestore(&tq->tx_lock, flags);
1196
1197        if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) {
1198                tq->shared->txNumDeferred = 0;
1199                VMXNET3_WRITE_BAR0_REG(adapter,
1200                                       VMXNET3_REG_TXPROD + tq->qid * 8,
1201                                       tq->tx_ring.next2fill);
1202        }
1203
1204        return NETDEV_TX_OK;
1205
1206unlock_drop_pkt:
1207        spin_unlock_irqrestore(&tq->tx_lock, flags);
1208drop_pkt:
1209        tq->stats.drop_total++;
1210        dev_kfree_skb_any(skb);
1211        return NETDEV_TX_OK;
1212}
1213
1214
1215static netdev_tx_t
1216vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1217{
1218        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1219
1220        BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1221        return vmxnet3_tq_xmit(skb,
1222                               &adapter->tx_queue[skb->queue_mapping],
1223                               adapter, netdev);
1224}
1225
1226
1227static void
1228vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1229                struct sk_buff *skb,
1230                union Vmxnet3_GenericDesc *gdesc)
1231{
1232        if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1233                if (gdesc->rcd.v4 &&
1234                    (le32_to_cpu(gdesc->dword[3]) &
1235                     VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) {
1236                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1237                        WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
1238                                     !(le32_to_cpu(gdesc->dword[0]) &
1239                                     (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
1240                        WARN_ON_ONCE(gdesc->rcd.frg &&
1241                                     !(le32_to_cpu(gdesc->dword[0]) &
1242                                     (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
1243                } else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) &
1244                                             (1 << VMXNET3_RCD_TUC_SHIFT))) {
1245                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1246                        WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
1247                                     !(le32_to_cpu(gdesc->dword[0]) &
1248                                     (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
1249                        WARN_ON_ONCE(gdesc->rcd.frg &&
1250                                     !(le32_to_cpu(gdesc->dword[0]) &
1251                                     (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
1252                } else {
1253                        if (gdesc->rcd.csum) {
1254                                skb->csum = htons(gdesc->rcd.csum);
1255                                skb->ip_summed = CHECKSUM_PARTIAL;
1256                        } else {
1257                                skb_checksum_none_assert(skb);
1258                        }
1259                }
1260        } else {
1261                skb_checksum_none_assert(skb);
1262        }
1263}
1264
1265
1266static void
1267vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1268                 struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1269{
1270        rq->stats.drop_err++;
1271        if (!rcd->fcs)
1272                rq->stats.drop_fcs++;
1273
1274        rq->stats.drop_total++;
1275
1276        /*
1277         * We do not unmap and chain the rx buffer to the skb.
1278         * We basically pretend this buffer is not used and will be recycled
1279         * by vmxnet3_rq_alloc_rx_buf()
1280         */
1281
1282        /*
1283         * ctx->skb may be NULL if this is the first and the only one
1284         * desc for the pkt
1285         */
1286        if (ctx->skb)
1287                dev_kfree_skb_irq(ctx->skb);
1288
1289        ctx->skb = NULL;
1290}
1291
1292
1293static u32
1294vmxnet3_get_hdr_len(struct vmxnet3_adapter *adapter, struct sk_buff *skb,
1295                    union Vmxnet3_GenericDesc *gdesc)
1296{
1297        u32 hlen, maplen;
1298        union {
1299                void *ptr;
1300                struct ethhdr *eth;
1301                struct vlan_ethhdr *veth;
1302                struct iphdr *ipv4;
1303                struct ipv6hdr *ipv6;
1304                struct tcphdr *tcp;
1305        } hdr;
1306        BUG_ON(gdesc->rcd.tcp == 0);
1307
1308        maplen = skb_headlen(skb);
1309        if (unlikely(sizeof(struct iphdr) + sizeof(struct tcphdr) > maplen))
1310                return 0;
1311
1312        if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
1313            skb->protocol == cpu_to_be16(ETH_P_8021AD))
1314                hlen = sizeof(struct vlan_ethhdr);
1315        else
1316                hlen = sizeof(struct ethhdr);
1317
1318        hdr.eth = eth_hdr(skb);
1319        if (gdesc->rcd.v4) {
1320                BUG_ON(hdr.eth->h_proto != htons(ETH_P_IP) &&
1321                       hdr.veth->h_vlan_encapsulated_proto != htons(ETH_P_IP));
1322                hdr.ptr += hlen;
1323                BUG_ON(hdr.ipv4->protocol != IPPROTO_TCP);
1324                hlen = hdr.ipv4->ihl << 2;
1325                hdr.ptr += hdr.ipv4->ihl << 2;
1326        } else if (gdesc->rcd.v6) {
1327                BUG_ON(hdr.eth->h_proto != htons(ETH_P_IPV6) &&
1328                       hdr.veth->h_vlan_encapsulated_proto != htons(ETH_P_IPV6));
1329                hdr.ptr += hlen;
1330                /* Use an estimated value, since we also need to handle
1331                 * TSO case.
1332                 */
1333                if (hdr.ipv6->nexthdr != IPPROTO_TCP)
1334                        return sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1335                hlen = sizeof(struct ipv6hdr);
1336                hdr.ptr += sizeof(struct ipv6hdr);
1337        } else {
1338                /* Non-IP pkt, dont estimate header length */
1339                return 0;
1340        }
1341
1342        if (hlen + sizeof(struct tcphdr) > maplen)
1343                return 0;
1344
1345        return (hlen + (hdr.tcp->doff << 2));
1346}
1347
1348static int
1349vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1350                       struct vmxnet3_adapter *adapter, int quota)
1351{
1352        static const u32 rxprod_reg[2] = {
1353                VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1354        };
1355        u32 num_pkts = 0;
1356        bool skip_page_frags = false;
1357        struct Vmxnet3_RxCompDesc *rcd;
1358        struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1359        u16 segCnt = 0, mss = 0;
1360#ifdef __BIG_ENDIAN_BITFIELD
1361        struct Vmxnet3_RxDesc rxCmdDesc;
1362        struct Vmxnet3_RxCompDesc rxComp;
1363#endif
1364        vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1365                          &rxComp);
1366        while (rcd->gen == rq->comp_ring.gen) {
1367                struct vmxnet3_rx_buf_info *rbi;
1368                struct sk_buff *skb, *new_skb = NULL;
1369                struct page *new_page = NULL;
1370                dma_addr_t new_dma_addr;
1371                int num_to_alloc;
1372                struct Vmxnet3_RxDesc *rxd;
1373                u32 idx, ring_idx;
1374                struct vmxnet3_cmd_ring *ring = NULL;
1375                if (num_pkts >= quota) {
1376                        /* we may stop even before we see the EOP desc of
1377                         * the current pkt
1378                         */
1379                        break;
1380                }
1381
1382                /* Prevent any rcd field from being (speculatively) read before
1383                 * rcd->gen is read.
1384                 */
1385                dma_rmb();
1386
1387                BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 &&
1388                       rcd->rqID != rq->dataRingQid);
1389                idx = rcd->rxdIdx;
1390                ring_idx = VMXNET3_GET_RING_IDX(adapter, rcd->rqID);
1391                ring = rq->rx_ring + ring_idx;
1392                vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1393                                  &rxCmdDesc);
1394                rbi = rq->buf_info[ring_idx] + idx;
1395
1396                BUG_ON(rxd->addr != rbi->dma_addr ||
1397                       rxd->len != rbi->len);
1398
1399                if (unlikely(rcd->eop && rcd->err)) {
1400                        vmxnet3_rx_error(rq, rcd, ctx, adapter);
1401                        goto rcd_done;
1402                }
1403
1404                if (rcd->sop) { /* first buf of the pkt */
1405                        bool rxDataRingUsed;
1406                        u16 len;
1407
1408                        BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1409                               (rcd->rqID != rq->qid &&
1410                                rcd->rqID != rq->dataRingQid));
1411
1412                        BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1413                        BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1414
1415                        if (unlikely(rcd->len == 0)) {
1416                                /* Pretend the rx buffer is skipped. */
1417                                BUG_ON(!(rcd->sop && rcd->eop));
1418                                netdev_dbg(adapter->netdev,
1419                                        "rxRing[%u][%u] 0 length\n",
1420                                        ring_idx, idx);
1421                                goto rcd_done;
1422                        }
1423
1424                        skip_page_frags = false;
1425                        ctx->skb = rbi->skb;
1426
1427                        rxDataRingUsed =
1428                                VMXNET3_RX_DATA_RING(adapter, rcd->rqID);
1429                        len = rxDataRingUsed ? rcd->len : rbi->len;
1430                        new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1431                                                            len);
1432                        if (new_skb == NULL) {
1433                                /* Skb allocation failed, do not handover this
1434                                 * skb to stack. Reuse it. Drop the existing pkt
1435                                 */
1436                                rq->stats.rx_buf_alloc_failure++;
1437                                ctx->skb = NULL;
1438                                rq->stats.drop_total++;
1439                                skip_page_frags = true;
1440                                goto rcd_done;
1441                        }
1442
1443                        if (rxDataRingUsed) {
1444                                size_t sz;
1445
1446                                BUG_ON(rcd->len > rq->data_ring.desc_size);
1447
1448                                ctx->skb = new_skb;
1449                                sz = rcd->rxdIdx * rq->data_ring.desc_size;
1450                                memcpy(new_skb->data,
1451                                       &rq->data_ring.base[sz], rcd->len);
1452                        } else {
1453                                ctx->skb = rbi->skb;
1454
1455                                new_dma_addr =
1456                                        dma_map_single(&adapter->pdev->dev,
1457                                                       new_skb->data, rbi->len,
1458                                                       PCI_DMA_FROMDEVICE);
1459                                if (dma_mapping_error(&adapter->pdev->dev,
1460                                                      new_dma_addr)) {
1461                                        dev_kfree_skb(new_skb);
1462                                        /* Skb allocation failed, do not
1463                                         * handover this skb to stack. Reuse
1464                                         * it. Drop the existing pkt.
1465                                         */
1466                                        rq->stats.rx_buf_alloc_failure++;
1467                                        ctx->skb = NULL;
1468                                        rq->stats.drop_total++;
1469                                        skip_page_frags = true;
1470                                        goto rcd_done;
1471                                }
1472
1473                                dma_unmap_single(&adapter->pdev->dev,
1474                                                 rbi->dma_addr,
1475                                                 rbi->len,
1476                                                 PCI_DMA_FROMDEVICE);
1477
1478                                /* Immediate refill */
1479                                rbi->skb = new_skb;
1480                                rbi->dma_addr = new_dma_addr;
1481                                rxd->addr = cpu_to_le64(rbi->dma_addr);
1482                                rxd->len = rbi->len;
1483                        }
1484
1485#ifdef VMXNET3_RSS
1486                        if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1487                            (adapter->netdev->features & NETIF_F_RXHASH))
1488                                skb_set_hash(ctx->skb,
1489                                             le32_to_cpu(rcd->rssHash),
1490                                             PKT_HASH_TYPE_L3);
1491#endif
1492                        skb_put(ctx->skb, rcd->len);
1493
1494                        if (VMXNET3_VERSION_GE_2(adapter) &&
1495                            rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
1496                                struct Vmxnet3_RxCompDescExt *rcdlro;
1497                                rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd;
1498
1499                                segCnt = rcdlro->segCnt;
1500                                WARN_ON_ONCE(segCnt == 0);
1501                                mss = rcdlro->mss;
1502                                if (unlikely(segCnt <= 1))
1503                                        segCnt = 0;
1504                        } else {
1505                                segCnt = 0;
1506                        }
1507                } else {
1508                        BUG_ON(ctx->skb == NULL && !skip_page_frags);
1509
1510                        /* non SOP buffer must be type 1 in most cases */
1511                        BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1512                        BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1513
1514                        /* If an sop buffer was dropped, skip all
1515                         * following non-sop fragments. They will be reused.
1516                         */
1517                        if (skip_page_frags)
1518                                goto rcd_done;
1519
1520                        if (rcd->len) {
1521                                new_page = alloc_page(GFP_ATOMIC);
1522                                /* Replacement page frag could not be allocated.
1523                                 * Reuse this page. Drop the pkt and free the
1524                                 * skb which contained this page as a frag. Skip
1525                                 * processing all the following non-sop frags.
1526                                 */
1527                                if (unlikely(!new_page)) {
1528                                        rq->stats.rx_buf_alloc_failure++;
1529                                        dev_kfree_skb(ctx->skb);
1530                                        ctx->skb = NULL;
1531                                        skip_page_frags = true;
1532                                        goto rcd_done;
1533                                }
1534                                new_dma_addr = dma_map_page(&adapter->pdev->dev,
1535                                                            new_page,
1536                                                            0, PAGE_SIZE,
1537                                                            PCI_DMA_FROMDEVICE);
1538                                if (dma_mapping_error(&adapter->pdev->dev,
1539                                                      new_dma_addr)) {
1540                                        put_page(new_page);
1541                                        rq->stats.rx_buf_alloc_failure++;
1542                                        dev_kfree_skb(ctx->skb);
1543                                        ctx->skb = NULL;
1544                                        skip_page_frags = true;
1545                                        goto rcd_done;
1546                                }
1547
1548                                dma_unmap_page(&adapter->pdev->dev,
1549                                               rbi->dma_addr, rbi->len,
1550                                               PCI_DMA_FROMDEVICE);
1551
1552                                vmxnet3_append_frag(ctx->skb, rcd, rbi);
1553
1554                                /* Immediate refill */
1555                                rbi->page = new_page;
1556                                rbi->dma_addr = new_dma_addr;
1557                                rxd->addr = cpu_to_le64(rbi->dma_addr);
1558                                rxd->len = rbi->len;
1559                        }
1560                }
1561
1562
1563                skb = ctx->skb;
1564                if (rcd->eop) {
1565                        u32 mtu = adapter->netdev->mtu;
1566                        skb->len += skb->data_len;
1567
1568                        vmxnet3_rx_csum(adapter, skb,
1569                                        (union Vmxnet3_GenericDesc *)rcd);
1570                        skb->protocol = eth_type_trans(skb, adapter->netdev);
1571                        if (!rcd->tcp ||
1572                            !(adapter->netdev->features & NETIF_F_LRO))
1573                                goto not_lro;
1574
1575                        if (segCnt != 0 && mss != 0) {
1576                                skb_shinfo(skb)->gso_type = rcd->v4 ?
1577                                        SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
1578                                skb_shinfo(skb)->gso_size = mss;
1579                                skb_shinfo(skb)->gso_segs = segCnt;
1580                        } else if (segCnt != 0 || skb->len > mtu) {
1581                                u32 hlen;
1582
1583                                hlen = vmxnet3_get_hdr_len(adapter, skb,
1584                                        (union Vmxnet3_GenericDesc *)rcd);
1585                                if (hlen == 0)
1586                                        goto not_lro;
1587
1588                                skb_shinfo(skb)->gso_type =
1589                                        rcd->v4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
1590                                if (segCnt != 0) {
1591                                        skb_shinfo(skb)->gso_segs = segCnt;
1592                                        skb_shinfo(skb)->gso_size =
1593                                                DIV_ROUND_UP(skb->len -
1594                                                        hlen, segCnt);
1595                                } else {
1596                                        skb_shinfo(skb)->gso_size = mtu - hlen;
1597                                }
1598                        }
1599not_lro:
1600                        if (unlikely(rcd->ts))
1601                                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci);
1602
1603                        if (adapter->netdev->features & NETIF_F_LRO)
1604                                netif_receive_skb(skb);
1605                        else
1606                                napi_gro_receive(&rq->napi, skb);
1607
1608                        ctx->skb = NULL;
1609                        num_pkts++;
1610                }
1611
1612rcd_done:
1613                /* device may have skipped some rx descs */
1614                ring->next2comp = idx;
1615                num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1616                ring = rq->rx_ring + ring_idx;
1617
1618                /* Ensure that the writes to rxd->gen bits will be observed
1619                 * after all other writes to rxd objects.
1620                 */
1621                dma_wmb();
1622
1623                while (num_to_alloc) {
1624                        vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1625                                          &rxCmdDesc);
1626                        BUG_ON(!rxd->addr);
1627
1628                        /* Recv desc is ready to be used by the device */
1629                        rxd->gen = ring->gen;
1630                        vmxnet3_cmd_ring_adv_next2fill(ring);
1631                        num_to_alloc--;
1632                }
1633
1634                /* if needed, update the register */
1635                if (unlikely(rq->shared->updateRxProd)) {
1636                        VMXNET3_WRITE_BAR0_REG(adapter,
1637                                               rxprod_reg[ring_idx] + rq->qid * 8,
1638                                               ring->next2fill);
1639                }
1640
1641                vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1642                vmxnet3_getRxComp(rcd,
1643                                  &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1644        }
1645
1646        return num_pkts;
1647}
1648
1649
1650static void
1651vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1652                   struct vmxnet3_adapter *adapter)
1653{
1654        u32 i, ring_idx;
1655        struct Vmxnet3_RxDesc *rxd;
1656
1657        for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1658                for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1659#ifdef __BIG_ENDIAN_BITFIELD
1660                        struct Vmxnet3_RxDesc rxDesc;
1661#endif
1662                        vmxnet3_getRxDesc(rxd,
1663                                &rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1664
1665                        if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1666                                        rq->buf_info[ring_idx][i].skb) {
1667                                dma_unmap_single(&adapter->pdev->dev, rxd->addr,
1668                                                 rxd->len, PCI_DMA_FROMDEVICE);
1669                                dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1670                                rq->buf_info[ring_idx][i].skb = NULL;
1671                        } else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1672                                        rq->buf_info[ring_idx][i].page) {
1673                                dma_unmap_page(&adapter->pdev->dev, rxd->addr,
1674                                               rxd->len, PCI_DMA_FROMDEVICE);
1675                                put_page(rq->buf_info[ring_idx][i].page);
1676                                rq->buf_info[ring_idx][i].page = NULL;
1677                        }
1678                }
1679
1680                rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1681                rq->rx_ring[ring_idx].next2fill =
1682                                        rq->rx_ring[ring_idx].next2comp = 0;
1683        }
1684
1685        rq->comp_ring.gen = VMXNET3_INIT_GEN;
1686        rq->comp_ring.next2proc = 0;
1687}
1688
1689
1690static void
1691vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1692{
1693        int i;
1694
1695        for (i = 0; i < adapter->num_rx_queues; i++)
1696                vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1697}
1698
1699
1700static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1701                               struct vmxnet3_adapter *adapter)
1702{
1703        int i;
1704        int j;
1705
1706        /* all rx buffers must have already been freed */
1707        for (i = 0; i < 2; i++) {
1708                if (rq->buf_info[i]) {
1709                        for (j = 0; j < rq->rx_ring[i].size; j++)
1710                                BUG_ON(rq->buf_info[i][j].page != NULL);
1711                }
1712        }
1713
1714
1715        for (i = 0; i < 2; i++) {
1716                if (rq->rx_ring[i].base) {
1717                        dma_free_coherent(&adapter->pdev->dev,
1718                                          rq->rx_ring[i].size
1719                                          * sizeof(struct Vmxnet3_RxDesc),
1720                                          rq->rx_ring[i].base,
1721                                          rq->rx_ring[i].basePA);
1722                        rq->rx_ring[i].base = NULL;
1723                }
1724        }
1725
1726        if (rq->data_ring.base) {
1727                dma_free_coherent(&adapter->pdev->dev,
1728                                  rq->rx_ring[0].size * rq->data_ring.desc_size,
1729                                  rq->data_ring.base, rq->data_ring.basePA);
1730                rq->data_ring.base = NULL;
1731        }
1732
1733        if (rq->comp_ring.base) {
1734                dma_free_coherent(&adapter->pdev->dev, rq->comp_ring.size
1735                                  * sizeof(struct Vmxnet3_RxCompDesc),
1736                                  rq->comp_ring.base, rq->comp_ring.basePA);
1737                rq->comp_ring.base = NULL;
1738        }
1739
1740        if (rq->buf_info[0]) {
1741                size_t sz = sizeof(struct vmxnet3_rx_buf_info) *
1742                        (rq->rx_ring[0].size + rq->rx_ring[1].size);
1743                dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
1744                                  rq->buf_info_pa);
1745                rq->buf_info[0] = rq->buf_info[1] = NULL;
1746        }
1747}
1748
1749static void
1750vmxnet3_rq_destroy_all_rxdataring(struct vmxnet3_adapter *adapter)
1751{
1752        int i;
1753
1754        for (i = 0; i < adapter->num_rx_queues; i++) {
1755                struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1756
1757                if (rq->data_ring.base) {
1758                        dma_free_coherent(&adapter->pdev->dev,
1759                                          (rq->rx_ring[0].size *
1760                                          rq->data_ring.desc_size),
1761                                          rq->data_ring.base,
1762                                          rq->data_ring.basePA);
1763                        rq->data_ring.base = NULL;
1764                        rq->data_ring.desc_size = 0;
1765                }
1766        }
1767}
1768
1769static int
1770vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1771                struct vmxnet3_adapter  *adapter)
1772{
1773        int i;
1774
1775        /* initialize buf_info */
1776        for (i = 0; i < rq->rx_ring[0].size; i++) {
1777
1778                /* 1st buf for a pkt is skbuff */
1779                if (i % adapter->rx_buf_per_pkt == 0) {
1780                        rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1781                        rq->buf_info[0][i].len = adapter->skb_buf_size;
1782                } else { /* subsequent bufs for a pkt is frag */
1783                        rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1784                        rq->buf_info[0][i].len = PAGE_SIZE;
1785                }
1786        }
1787        for (i = 0; i < rq->rx_ring[1].size; i++) {
1788                rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1789                rq->buf_info[1][i].len = PAGE_SIZE;
1790        }
1791
1792        /* reset internal state and allocate buffers for both rings */
1793        for (i = 0; i < 2; i++) {
1794                rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1795
1796                memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1797                       sizeof(struct Vmxnet3_RxDesc));
1798                rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1799        }
1800        if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1801                                    adapter) == 0) {
1802                /* at least has 1 rx buffer for the 1st ring */
1803                return -ENOMEM;
1804        }
1805        vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1806
1807        /* reset the comp ring */
1808        rq->comp_ring.next2proc = 0;
1809        memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1810               sizeof(struct Vmxnet3_RxCompDesc));
1811        rq->comp_ring.gen = VMXNET3_INIT_GEN;
1812
1813        /* reset rxctx */
1814        rq->rx_ctx.skb = NULL;
1815
1816        /* stats are not reset */
1817        return 0;
1818}
1819
1820
1821static int
1822vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1823{
1824        int i, err = 0;
1825
1826        for (i = 0; i < adapter->num_rx_queues; i++) {
1827                err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1828                if (unlikely(err)) {
1829                        dev_err(&adapter->netdev->dev, "%s: failed to "
1830                                "initialize rx queue%i\n",
1831                                adapter->netdev->name, i);
1832                        break;
1833                }
1834        }
1835        return err;
1836
1837}
1838
1839
1840static int
1841vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1842{
1843        int i;
1844        size_t sz;
1845        struct vmxnet3_rx_buf_info *bi;
1846
1847        for (i = 0; i < 2; i++) {
1848
1849                sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1850                rq->rx_ring[i].base = dma_alloc_coherent(
1851                                                &adapter->pdev->dev, sz,
1852                                                &rq->rx_ring[i].basePA,
1853                                                GFP_KERNEL);
1854                if (!rq->rx_ring[i].base) {
1855                        netdev_err(adapter->netdev,
1856                                   "failed to allocate rx ring %d\n", i);
1857                        goto err;
1858                }
1859        }
1860
1861        if ((adapter->rxdataring_enabled) && (rq->data_ring.desc_size != 0)) {
1862                sz = rq->rx_ring[0].size * rq->data_ring.desc_size;
1863                rq->data_ring.base =
1864                        dma_alloc_coherent(&adapter->pdev->dev, sz,
1865                                           &rq->data_ring.basePA,
1866                                           GFP_KERNEL);
1867                if (!rq->data_ring.base) {
1868                        netdev_err(adapter->netdev,
1869                                   "rx data ring will be disabled\n");
1870                        adapter->rxdataring_enabled = false;
1871                }
1872        } else {
1873                rq->data_ring.base = NULL;
1874                rq->data_ring.desc_size = 0;
1875        }
1876
1877        sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1878        rq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev, sz,
1879                                                &rq->comp_ring.basePA,
1880                                                GFP_KERNEL);
1881        if (!rq->comp_ring.base) {
1882                netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1883                goto err;
1884        }
1885
1886        sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1887                                                   rq->rx_ring[1].size);
1888        bi = dma_alloc_coherent(&adapter->pdev->dev, sz, &rq->buf_info_pa,
1889                                GFP_KERNEL);
1890        if (!bi)
1891                goto err;
1892
1893        rq->buf_info[0] = bi;
1894        rq->buf_info[1] = bi + rq->rx_ring[0].size;
1895
1896        return 0;
1897
1898err:
1899        vmxnet3_rq_destroy(rq, adapter);
1900        return -ENOMEM;
1901}
1902
1903
1904static int
1905vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1906{
1907        int i, err = 0;
1908
1909        adapter->rxdataring_enabled = VMXNET3_VERSION_GE_3(adapter);
1910
1911        for (i = 0; i < adapter->num_rx_queues; i++) {
1912                err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1913                if (unlikely(err)) {
1914                        dev_err(&adapter->netdev->dev,
1915                                "%s: failed to create rx queue%i\n",
1916                                adapter->netdev->name, i);
1917                        goto err_out;
1918                }
1919        }
1920
1921        if (!adapter->rxdataring_enabled)
1922                vmxnet3_rq_destroy_all_rxdataring(adapter);
1923
1924        return err;
1925err_out:
1926        vmxnet3_rq_destroy_all(adapter);
1927        return err;
1928
1929}
1930
1931/* Multiple queue aware polling function for tx and rx */
1932
1933static int
1934vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1935{
1936        int rcd_done = 0, i;
1937        if (unlikely(adapter->shared->ecr))
1938                vmxnet3_process_events(adapter);
1939        for (i = 0; i < adapter->num_tx_queues; i++)
1940                vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1941
1942        for (i = 0; i < adapter->num_rx_queues; i++)
1943                rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1944                                                   adapter, budget);
1945        return rcd_done;
1946}
1947
1948
1949static int
1950vmxnet3_poll(struct napi_struct *napi, int budget)
1951{
1952        struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1953                                          struct vmxnet3_rx_queue, napi);
1954        int rxd_done;
1955
1956        rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1957
1958        if (rxd_done < budget) {
1959                napi_complete_done(napi, rxd_done);
1960                vmxnet3_enable_all_intrs(rx_queue->adapter);
1961        }
1962        return rxd_done;
1963}
1964
1965/*
1966 * NAPI polling function for MSI-X mode with multiple Rx queues
1967 * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1968 */
1969
1970static int
1971vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1972{
1973        struct vmxnet3_rx_queue *rq = container_of(napi,
1974                                                struct vmxnet3_rx_queue, napi);
1975        struct vmxnet3_adapter *adapter = rq->adapter;
1976        int rxd_done;
1977
1978        /* When sharing interrupt with corresponding tx queue, process
1979         * tx completions in that queue as well
1980         */
1981        if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1982                struct vmxnet3_tx_queue *tq =
1983                                &adapter->tx_queue[rq - adapter->rx_queue];
1984                vmxnet3_tq_tx_complete(tq, adapter);
1985        }
1986
1987        rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1988
1989        if (rxd_done < budget) {
1990                napi_complete_done(napi, rxd_done);
1991                vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1992        }
1993        return rxd_done;
1994}
1995
1996
1997#ifdef CONFIG_PCI_MSI
1998
1999/*
2000 * Handle completion interrupts on tx queues
2001 * Returns whether or not the intr is handled
2002 */
2003
2004static irqreturn_t
2005vmxnet3_msix_tx(int irq, void *data)
2006{
2007        struct vmxnet3_tx_queue *tq = data;
2008        struct vmxnet3_adapter *adapter = tq->adapter;
2009
2010        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
2011                vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
2012
2013        /* Handle the case where only one irq is allocate for all tx queues */
2014        if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
2015                int i;
2016                for (i = 0; i < adapter->num_tx_queues; i++) {
2017                        struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
2018                        vmxnet3_tq_tx_complete(txq, adapter);
2019                }
2020        } else {
2021                vmxnet3_tq_tx_complete(tq, adapter);
2022        }
2023        vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
2024
2025        return IRQ_HANDLED;
2026}
2027
2028
2029/*
2030 * Handle completion interrupts on rx queues. Returns whether or not the
2031 * intr is handled
2032 */
2033
2034static irqreturn_t
2035vmxnet3_msix_rx(int irq, void *data)
2036{
2037        struct vmxnet3_rx_queue *rq = data;
2038        struct vmxnet3_adapter *adapter = rq->adapter;
2039
2040        /* disable intr if needed */
2041        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
2042                vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
2043        napi_schedule(&rq->napi);
2044
2045        return IRQ_HANDLED;
2046}
2047
2048/*
2049 *----------------------------------------------------------------------------
2050 *
2051 * vmxnet3_msix_event --
2052 *
2053 *    vmxnet3 msix event intr handler
2054 *
2055 * Result:
2056 *    whether or not the intr is handled
2057 *
2058 *----------------------------------------------------------------------------
2059 */
2060
2061static irqreturn_t
2062vmxnet3_msix_event(int irq, void *data)
2063{
2064        struct net_device *dev = data;
2065        struct vmxnet3_adapter *adapter = netdev_priv(dev);
2066
2067        /* disable intr if needed */
2068        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
2069                vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
2070
2071        if (adapter->shared->ecr)
2072                vmxnet3_process_events(adapter);
2073
2074        vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
2075
2076        return IRQ_HANDLED;
2077}
2078
2079#endif /* CONFIG_PCI_MSI  */
2080
2081
2082/* Interrupt handler for vmxnet3  */
2083static irqreturn_t
2084vmxnet3_intr(int irq, void *dev_id)
2085{
2086        struct net_device *dev = dev_id;
2087        struct vmxnet3_adapter *adapter = netdev_priv(dev);
2088
2089        if (adapter->intr.type == VMXNET3_IT_INTX) {
2090                u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
2091                if (unlikely(icr == 0))
2092                        /* not ours */
2093                        return IRQ_NONE;
2094        }
2095
2096
2097        /* disable intr if needed */
2098        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
2099                vmxnet3_disable_all_intrs(adapter);
2100
2101        napi_schedule(&adapter->rx_queue[0].napi);
2102
2103        return IRQ_HANDLED;
2104}
2105
2106#ifdef CONFIG_NET_POLL_CONTROLLER
2107
2108/* netpoll callback. */
2109static void
2110vmxnet3_netpoll(struct net_device *netdev)
2111{
2112        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2113
2114        switch (adapter->intr.type) {
2115#ifdef CONFIG_PCI_MSI
2116        case VMXNET3_IT_MSIX: {
2117                int i;
2118                for (i = 0; i < adapter->num_rx_queues; i++)
2119                        vmxnet3_msix_rx(0, &adapter->rx_queue[i]);
2120                break;
2121        }
2122#endif
2123        case VMXNET3_IT_MSI:
2124        default:
2125                vmxnet3_intr(0, adapter->netdev);
2126                break;
2127        }
2128
2129}
2130#endif  /* CONFIG_NET_POLL_CONTROLLER */
2131
2132static int
2133vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
2134{
2135        struct vmxnet3_intr *intr = &adapter->intr;
2136        int err = 0, i;
2137        int vector = 0;
2138
2139#ifdef CONFIG_PCI_MSI
2140        if (adapter->intr.type == VMXNET3_IT_MSIX) {
2141                for (i = 0; i < adapter->num_tx_queues; i++) {
2142                        if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
2143                                sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
2144                                        adapter->netdev->name, vector);
2145                                err = request_irq(
2146                                              intr->msix_entries[vector].vector,
2147                                              vmxnet3_msix_tx, 0,
2148                                              adapter->tx_queue[i].name,
2149                                              &adapter->tx_queue[i]);
2150                        } else {
2151                                sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
2152                                        adapter->netdev->name, vector);
2153                        }
2154                        if (err) {
2155                                dev_err(&adapter->netdev->dev,
2156                                        "Failed to request irq for MSIX, %s, "
2157                                        "error %d\n",
2158                                        adapter->tx_queue[i].name, err);
2159                                return err;
2160                        }
2161
2162                        /* Handle the case where only 1 MSIx was allocated for
2163                         * all tx queues */
2164                        if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
2165                                for (; i < adapter->num_tx_queues; i++)
2166                                        adapter->tx_queue[i].comp_ring.intr_idx
2167                                                                = vector;
2168                                vector++;
2169                                break;
2170                        } else {
2171                                adapter->tx_queue[i].comp_ring.intr_idx
2172                                                                = vector++;
2173                        }
2174                }
2175                if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
2176                        vector = 0;
2177
2178                for (i = 0; i < adapter->num_rx_queues; i++) {
2179                        if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
2180                                sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
2181                                        adapter->netdev->name, vector);
2182                        else
2183                                sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
2184                                        adapter->netdev->name, vector);
2185                        err = request_irq(intr->msix_entries[vector].vector,
2186                                          vmxnet3_msix_rx, 0,
2187                                          adapter->rx_queue[i].name,
2188                                          &(adapter->rx_queue[i]));
2189                        if (err) {
2190                                netdev_err(adapter->netdev,
2191                                           "Failed to request irq for MSIX, "
2192                                           "%s, error %d\n",
2193                                           adapter->rx_queue[i].name, err);
2194                                return err;
2195                        }
2196
2197                        adapter->rx_queue[i].comp_ring.intr_idx = vector++;
2198                }
2199
2200                sprintf(intr->event_msi_vector_name, "%s-event-%d",
2201                        adapter->netdev->name, vector);
2202                err = request_irq(intr->msix_entries[vector].vector,
2203                                  vmxnet3_msix_event, 0,
2204                                  intr->event_msi_vector_name, adapter->netdev);
2205                intr->event_intr_idx = vector;
2206
2207        } else if (intr->type == VMXNET3_IT_MSI) {
2208                adapter->num_rx_queues = 1;
2209                err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
2210                                  adapter->netdev->name, adapter->netdev);
2211        } else {
2212#endif
2213                adapter->num_rx_queues = 1;
2214                err = request_irq(adapter->pdev->irq, vmxnet3_intr,
2215                                  IRQF_SHARED, adapter->netdev->name,
2216                                  adapter->netdev);
2217#ifdef CONFIG_PCI_MSI
2218        }
2219#endif
2220        intr->num_intrs = vector + 1;
2221        if (err) {
2222                netdev_err(adapter->netdev,
2223                           "Failed to request irq (intr type:%d), error %d\n",
2224                           intr->type, err);
2225        } else {
2226                /* Number of rx queues will not change after this */
2227                for (i = 0; i < adapter->num_rx_queues; i++) {
2228                        struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2229                        rq->qid = i;
2230                        rq->qid2 = i + adapter->num_rx_queues;
2231                        rq->dataRingQid = i + 2 * adapter->num_rx_queues;
2232                }
2233
2234                /* init our intr settings */
2235                for (i = 0; i < intr->num_intrs; i++)
2236                        intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
2237                if (adapter->intr.type != VMXNET3_IT_MSIX) {
2238                        adapter->intr.event_intr_idx = 0;
2239                        for (i = 0; i < adapter->num_tx_queues; i++)
2240                                adapter->tx_queue[i].comp_ring.intr_idx = 0;
2241                        adapter->rx_queue[0].comp_ring.intr_idx = 0;
2242                }
2243
2244                netdev_info(adapter->netdev,
2245                            "intr type %u, mode %u, %u vectors allocated\n",
2246                            intr->type, intr->mask_mode, intr->num_intrs);
2247        }
2248
2249        return err;
2250}
2251
2252
2253static void
2254vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
2255{
2256        struct vmxnet3_intr *intr = &adapter->intr;
2257        BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
2258
2259        switch (intr->type) {
2260#ifdef CONFIG_PCI_MSI
2261        case VMXNET3_IT_MSIX:
2262        {
2263                int i, vector = 0;
2264
2265                if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
2266                        for (i = 0; i < adapter->num_tx_queues; i++) {
2267                                free_irq(intr->msix_entries[vector++].vector,
2268                                         &(adapter->tx_queue[i]));
2269                                if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
2270                                        break;
2271                        }
2272                }
2273
2274                for (i = 0; i < adapter->num_rx_queues; i++) {
2275                        free_irq(intr->msix_entries[vector++].vector,
2276                                 &(adapter->rx_queue[i]));
2277                }
2278
2279                free_irq(intr->msix_entries[vector].vector,
2280                         adapter->netdev);
2281                BUG_ON(vector >= intr->num_intrs);
2282                break;
2283        }
2284#endif
2285        case VMXNET3_IT_MSI:
2286                free_irq(adapter->pdev->irq, adapter->netdev);
2287                break;
2288        case VMXNET3_IT_INTX:
2289                free_irq(adapter->pdev->irq, adapter->netdev);
2290                break;
2291        default:
2292                BUG();
2293        }
2294}
2295
2296
2297static void
2298vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
2299{
2300        u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2301        u16 vid;
2302
2303        /* allow untagged pkts */
2304        VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
2305
2306        for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
2307                VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
2308}
2309
2310
2311static int
2312vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
2313{
2314        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2315
2316        if (!(netdev->flags & IFF_PROMISC)) {
2317                u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2318                unsigned long flags;
2319
2320                VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
2321                spin_lock_irqsave(&adapter->cmd_lock, flags);
2322                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2323                                       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2324                spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2325        }
2326
2327        set_bit(vid, adapter->active_vlans);
2328
2329        return 0;
2330}
2331
2332
2333static int
2334vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
2335{
2336        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2337
2338        if (!(netdev->flags & IFF_PROMISC)) {
2339                u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2340                unsigned long flags;
2341
2342                VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
2343                spin_lock_irqsave(&adapter->cmd_lock, flags);
2344                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2345                                       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2346                spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2347        }
2348
2349        clear_bit(vid, adapter->active_vlans);
2350
2351        return 0;
2352}
2353
2354
2355static u8 *
2356vmxnet3_copy_mc(struct net_device *netdev)
2357{
2358        u8 *buf = NULL;
2359        u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
2360
2361        /* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
2362        if (sz <= 0xffff) {
2363                /* We may be called with BH disabled */
2364                buf = kmalloc(sz, GFP_ATOMIC);
2365                if (buf) {
2366                        struct netdev_hw_addr *ha;
2367                        int i = 0;
2368
2369                        netdev_for_each_mc_addr(ha, netdev)
2370                                memcpy(buf + i++ * ETH_ALEN, ha->addr,
2371                                       ETH_ALEN);
2372                }
2373        }
2374        return buf;
2375}
2376
2377
2378static void
2379vmxnet3_set_mc(struct net_device *netdev)
2380{
2381        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2382        unsigned long flags;
2383        struct Vmxnet3_RxFilterConf *rxConf =
2384                                        &adapter->shared->devRead.rxFilterConf;
2385        u8 *new_table = NULL;
2386        dma_addr_t new_table_pa = 0;
2387        bool new_table_pa_valid = false;
2388        u32 new_mode = VMXNET3_RXM_UCAST;
2389
2390        if (netdev->flags & IFF_PROMISC) {
2391                u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2392                memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2393
2394                new_mode |= VMXNET3_RXM_PROMISC;
2395        } else {
2396                vmxnet3_restore_vlan(adapter);
2397        }
2398
2399        if (netdev->flags & IFF_BROADCAST)
2400                new_mode |= VMXNET3_RXM_BCAST;
2401
2402        if (netdev->flags & IFF_ALLMULTI)
2403                new_mode |= VMXNET3_RXM_ALL_MULTI;
2404        else
2405                if (!netdev_mc_empty(netdev)) {
2406                        new_table = vmxnet3_copy_mc(netdev);
2407                        if (new_table) {
2408                                size_t sz = netdev_mc_count(netdev) * ETH_ALEN;
2409
2410                                rxConf->mfTableLen = cpu_to_le16(sz);
2411                                new_table_pa = dma_map_single(
2412                                                        &adapter->pdev->dev,
2413                                                        new_table,
2414                                                        sz,
2415                                                        PCI_DMA_TODEVICE);
2416                                if (!dma_mapping_error(&adapter->pdev->dev,
2417                                                       new_table_pa)) {
2418                                        new_mode |= VMXNET3_RXM_MCAST;
2419                                        new_table_pa_valid = true;
2420                                        rxConf->mfTablePA = cpu_to_le64(
2421                                                                new_table_pa);
2422                                }
2423                        }
2424                        if (!new_table_pa_valid) {
2425                                netdev_info(netdev,
2426                                            "failed to copy mcast list, setting ALL_MULTI\n");
2427                                new_mode |= VMXNET3_RXM_ALL_MULTI;
2428                        }
2429                }
2430
2431        if (!(new_mode & VMXNET3_RXM_MCAST)) {
2432                rxConf->mfTableLen = 0;
2433                rxConf->mfTablePA = 0;
2434        }
2435
2436        spin_lock_irqsave(&adapter->cmd_lock, flags);
2437        if (new_mode != rxConf->rxMode) {
2438                rxConf->rxMode = cpu_to_le32(new_mode);
2439                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2440                                       VMXNET3_CMD_UPDATE_RX_MODE);
2441                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2442                                       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2443        }
2444
2445        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2446                               VMXNET3_CMD_UPDATE_MAC_FILTERS);
2447        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2448
2449        if (new_table_pa_valid)
2450                dma_unmap_single(&adapter->pdev->dev, new_table_pa,
2451                                 rxConf->mfTableLen, PCI_DMA_TODEVICE);
2452        kfree(new_table);
2453}
2454
2455void
2456vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2457{
2458        int i;
2459
2460        for (i = 0; i < adapter->num_rx_queues; i++)
2461                vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2462}
2463
2464
2465/*
2466 *   Set up driver_shared based on settings in adapter.
2467 */
2468
2469static void
2470vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2471{
2472        struct Vmxnet3_DriverShared *shared = adapter->shared;
2473        struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2474        struct Vmxnet3_TxQueueConf *tqc;
2475        struct Vmxnet3_RxQueueConf *rqc;
2476        int i;
2477
2478        memset(shared, 0, sizeof(*shared));
2479
2480        /* driver settings */
2481        shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2482        devRead->misc.driverInfo.version = cpu_to_le32(
2483                                                VMXNET3_DRIVER_VERSION_NUM);
2484        devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2485                                VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2486        devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2487        *((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2488                                *((u32 *)&devRead->misc.driverInfo.gos));
2489        devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2490        devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2491
2492        devRead->misc.ddPA = cpu_to_le64(adapter->adapter_pa);
2493        devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2494
2495        /* set up feature flags */
2496        if (adapter->netdev->features & NETIF_F_RXCSUM)
2497                devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2498
2499        if (adapter->netdev->features & NETIF_F_LRO) {
2500                devRead->misc.uptFeatures |= UPT1_F_LRO;
2501                devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2502        }
2503        if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
2504                devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2505
2506        if (adapter->netdev->features & (NETIF_F_GSO_UDP_TUNNEL |
2507                                         NETIF_F_GSO_UDP_TUNNEL_CSUM))
2508                devRead->misc.uptFeatures |= UPT1_F_RXINNEROFLD;
2509
2510        devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2511        devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2512        devRead->misc.queueDescLen = cpu_to_le32(
2513                adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2514                adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2515
2516        /* tx queue settings */
2517        devRead->misc.numTxQueues =  adapter->num_tx_queues;
2518        for (i = 0; i < adapter->num_tx_queues; i++) {
2519                struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2520                BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2521                tqc = &adapter->tqd_start[i].conf;
2522                tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2523                tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2524                tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2525                tqc->ddPA           = cpu_to_le64(tq->buf_info_pa);
2526                tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2527                tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2528                tqc->txDataRingDescSize = cpu_to_le32(tq->txdata_desc_size);
2529                tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2530                tqc->ddLen          = cpu_to_le32(
2531                                        sizeof(struct vmxnet3_tx_buf_info) *
2532                                        tqc->txRingSize);
2533                tqc->intrIdx        = tq->comp_ring.intr_idx;
2534        }
2535
2536        /* rx queue settings */
2537        devRead->misc.numRxQueues = adapter->num_rx_queues;
2538        for (i = 0; i < adapter->num_rx_queues; i++) {
2539                struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2540                rqc = &adapter->rqd_start[i].conf;
2541                rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2542                rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2543                rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2544                rqc->ddPA            = cpu_to_le64(rq->buf_info_pa);
2545                rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2546                rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2547                rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2548                rqc->ddLen           = cpu_to_le32(
2549                                        sizeof(struct vmxnet3_rx_buf_info) *
2550                                        (rqc->rxRingSize[0] +
2551                                         rqc->rxRingSize[1]));
2552                rqc->intrIdx         = rq->comp_ring.intr_idx;
2553                if (VMXNET3_VERSION_GE_3(adapter)) {
2554                        rqc->rxDataRingBasePA =
2555                                cpu_to_le64(rq->data_ring.basePA);
2556                        rqc->rxDataRingDescSize =
2557                                cpu_to_le16(rq->data_ring.desc_size);
2558                }
2559        }
2560
2561#ifdef VMXNET3_RSS
2562        memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2563
2564        if (adapter->rss) {
2565                struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2566
2567                devRead->misc.uptFeatures |= UPT1_F_RSS;
2568                devRead->misc.numRxQueues = adapter->num_rx_queues;
2569                rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2570                                    UPT1_RSS_HASH_TYPE_IPV4 |
2571                                    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2572                                    UPT1_RSS_HASH_TYPE_IPV6;
2573                rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2574                rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2575                rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2576                netdev_rss_key_fill(rssConf->hashKey, sizeof(rssConf->hashKey));
2577
2578                for (i = 0; i < rssConf->indTableSize; i++)
2579                        rssConf->indTable[i] = ethtool_rxfh_indir_default(
2580                                i, adapter->num_rx_queues);
2581
2582                devRead->rssConfDesc.confVer = 1;
2583                devRead->rssConfDesc.confLen = cpu_to_le32(sizeof(*rssConf));
2584                devRead->rssConfDesc.confPA =
2585                        cpu_to_le64(adapter->rss_conf_pa);
2586        }
2587
2588#endif /* VMXNET3_RSS */
2589
2590        /* intr settings */
2591        devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2592                                     VMXNET3_IMM_AUTO;
2593        devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2594        for (i = 0; i < adapter->intr.num_intrs; i++)
2595                devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2596
2597        devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2598        devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2599
2600        /* rx filter settings */
2601        devRead->rxFilterConf.rxMode = 0;
2602        vmxnet3_restore_vlan(adapter);
2603        vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2604
2605        /* the rest are already zeroed */
2606}
2607
2608static void
2609vmxnet3_init_coalesce(struct vmxnet3_adapter *adapter)
2610{
2611        struct Vmxnet3_DriverShared *shared = adapter->shared;
2612        union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
2613        unsigned long flags;
2614
2615        if (!VMXNET3_VERSION_GE_3(adapter))
2616                return;
2617
2618        spin_lock_irqsave(&adapter->cmd_lock, flags);
2619        cmdInfo->varConf.confVer = 1;
2620        cmdInfo->varConf.confLen =
2621                cpu_to_le32(sizeof(*adapter->coal_conf));
2622        cmdInfo->varConf.confPA  = cpu_to_le64(adapter->coal_conf_pa);
2623
2624        if (adapter->default_coal_mode) {
2625                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2626                                       VMXNET3_CMD_GET_COALESCE);
2627        } else {
2628                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2629                                       VMXNET3_CMD_SET_COALESCE);
2630        }
2631
2632        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2633}
2634
2635static void
2636vmxnet3_init_rssfields(struct vmxnet3_adapter *adapter)
2637{
2638        struct Vmxnet3_DriverShared *shared = adapter->shared;
2639        union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
2640        unsigned long flags;
2641
2642        if (!VMXNET3_VERSION_GE_4(adapter))
2643                return;
2644
2645        spin_lock_irqsave(&adapter->cmd_lock, flags);
2646
2647        if (adapter->default_rss_fields) {
2648                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2649                                       VMXNET3_CMD_GET_RSS_FIELDS);
2650                adapter->rss_fields =
2651                        VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2652        } else {
2653                cmdInfo->setRssFields = adapter->rss_fields;
2654                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2655                                       VMXNET3_CMD_SET_RSS_FIELDS);
2656                /* Not all requested RSS may get applied, so get and
2657                 * cache what was actually applied.
2658                 */
2659                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2660                                       VMXNET3_CMD_GET_RSS_FIELDS);
2661                adapter->rss_fields =
2662                        VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2663        }
2664
2665        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2666}
2667
2668int
2669vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2670{
2671        int err, i;
2672        u32 ret;
2673        unsigned long flags;
2674
2675        netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2676                " ring sizes %u %u %u\n", adapter->netdev->name,
2677                adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2678                adapter->tx_queue[0].tx_ring.size,
2679                adapter->rx_queue[0].rx_ring[0].size,
2680                adapter->rx_queue[0].rx_ring[1].size);
2681
2682        vmxnet3_tq_init_all(adapter);
2683        err = vmxnet3_rq_init_all(adapter);
2684        if (err) {
2685                netdev_err(adapter->netdev,
2686                           "Failed to init rx queue error %d\n", err);
2687                goto rq_err;
2688        }
2689
2690        err = vmxnet3_request_irqs(adapter);
2691        if (err) {
2692                netdev_err(adapter->netdev,
2693                           "Failed to setup irq for error %d\n", err);
2694                goto irq_err;
2695        }
2696
2697        vmxnet3_setup_driver_shared(adapter);
2698
2699        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2700                               adapter->shared_pa));
2701        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2702                               adapter->shared_pa));
2703        spin_lock_irqsave(&adapter->cmd_lock, flags);
2704        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2705                               VMXNET3_CMD_ACTIVATE_DEV);
2706        ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2707        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2708
2709        if (ret != 0) {
2710                netdev_err(adapter->netdev,
2711                           "Failed to activate dev: error %u\n", ret);
2712                err = -EINVAL;
2713                goto activate_err;
2714        }
2715
2716        vmxnet3_init_coalesce(adapter);
2717        vmxnet3_init_rssfields(adapter);
2718
2719        for (i = 0; i < adapter->num_rx_queues; i++) {
2720                VMXNET3_WRITE_BAR0_REG(adapter,
2721                                VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2722                                adapter->rx_queue[i].rx_ring[0].next2fill);
2723                VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2724                                (i * VMXNET3_REG_ALIGN)),
2725                                adapter->rx_queue[i].rx_ring[1].next2fill);
2726        }
2727
2728        /* Apply the rx filter settins last. */
2729        vmxnet3_set_mc(adapter->netdev);
2730
2731        /*
2732         * Check link state when first activating device. It will start the
2733         * tx queue if the link is up.
2734         */
2735        vmxnet3_check_link(adapter, true);
2736        for (i = 0; i < adapter->num_rx_queues; i++)
2737                napi_enable(&adapter->rx_queue[i].napi);
2738        vmxnet3_enable_all_intrs(adapter);
2739        clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2740        return 0;
2741
2742activate_err:
2743        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2744        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2745        vmxnet3_free_irqs(adapter);
2746irq_err:
2747rq_err:
2748        /* free up buffers we allocated */
2749        vmxnet3_rq_cleanup_all(adapter);
2750        return err;
2751}
2752
2753
2754void
2755vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2756{
2757        unsigned long flags;
2758        spin_lock_irqsave(&adapter->cmd_lock, flags);
2759        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2760        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2761}
2762
2763
2764int
2765vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2766{
2767        int i;
2768        unsigned long flags;
2769        if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2770                return 0;
2771
2772
2773        spin_lock_irqsave(&adapter->cmd_lock, flags);
2774        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2775                               VMXNET3_CMD_QUIESCE_DEV);
2776        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2777        vmxnet3_disable_all_intrs(adapter);
2778
2779        for (i = 0; i < adapter->num_rx_queues; i++)
2780                napi_disable(&adapter->rx_queue[i].napi);
2781        netif_tx_disable(adapter->netdev);
2782        adapter->link_speed = 0;
2783        netif_carrier_off(adapter->netdev);
2784
2785        vmxnet3_tq_cleanup_all(adapter);
2786        vmxnet3_rq_cleanup_all(adapter);
2787        vmxnet3_free_irqs(adapter);
2788        return 0;
2789}
2790
2791
2792static void
2793vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2794{
2795        u32 tmp;
2796
2797        tmp = *(u32 *)mac;
2798        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2799
2800        tmp = (mac[5] << 8) | mac[4];
2801        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2802}
2803
2804
2805static int
2806vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2807{
2808        struct sockaddr *addr = p;
2809        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2810
2811        memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2812        vmxnet3_write_mac_addr(adapter, addr->sa_data);
2813
2814        return 0;
2815}
2816
2817
2818/* ==================== initialization and cleanup routines ============ */
2819
2820static int
2821vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter)
2822{
2823        int err;
2824        unsigned long mmio_start, mmio_len;
2825        struct pci_dev *pdev = adapter->pdev;
2826
2827        err = pci_enable_device(pdev);
2828        if (err) {
2829                dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2830                return err;
2831        }
2832
2833        err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2834                                           vmxnet3_driver_name);
2835        if (err) {
2836                dev_err(&pdev->dev,
2837                        "Failed to request region for adapter: error %d\n", err);
2838                goto err_enable_device;
2839        }
2840
2841        pci_set_master(pdev);
2842
2843        mmio_start = pci_resource_start(pdev, 0);
2844        mmio_len = pci_resource_len(pdev, 0);
2845        adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2846        if (!adapter->hw_addr0) {
2847                dev_err(&pdev->dev, "Failed to map bar0\n");
2848                err = -EIO;
2849                goto err_ioremap;
2850        }
2851
2852        mmio_start = pci_resource_start(pdev, 1);
2853        mmio_len = pci_resource_len(pdev, 1);
2854        adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2855        if (!adapter->hw_addr1) {
2856                dev_err(&pdev->dev, "Failed to map bar1\n");
2857                err = -EIO;
2858                goto err_bar1;
2859        }
2860        return 0;
2861
2862err_bar1:
2863        iounmap(adapter->hw_addr0);
2864err_ioremap:
2865        pci_release_selected_regions(pdev, (1 << 2) - 1);
2866err_enable_device:
2867        pci_disable_device(pdev);
2868        return err;
2869}
2870
2871
2872static void
2873vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2874{
2875        BUG_ON(!adapter->pdev);
2876
2877        iounmap(adapter->hw_addr0);
2878        iounmap(adapter->hw_addr1);
2879        pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2880        pci_disable_device(adapter->pdev);
2881}
2882
2883
2884static void
2885vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2886{
2887        size_t sz, i, ring0_size, ring1_size, comp_size;
2888        if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2889                                    VMXNET3_MAX_ETH_HDR_SIZE) {
2890                adapter->skb_buf_size = adapter->netdev->mtu +
2891                                        VMXNET3_MAX_ETH_HDR_SIZE;
2892                if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2893                        adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2894
2895                adapter->rx_buf_per_pkt = 1;
2896        } else {
2897                adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2898                sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2899                                            VMXNET3_MAX_ETH_HDR_SIZE;
2900                adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2901        }
2902
2903        /*
2904         * for simplicity, force the ring0 size to be a multiple of
2905         * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2906         */
2907        sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2908        ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2909        ring0_size = (ring0_size + sz - 1) / sz * sz;
2910        ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2911                           sz * sz);
2912        ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2913        ring1_size = (ring1_size + sz - 1) / sz * sz;
2914        ring1_size = min_t(u32, ring1_size, VMXNET3_RX_RING2_MAX_SIZE /
2915                           sz * sz);
2916        comp_size = ring0_size + ring1_size;
2917
2918        for (i = 0; i < adapter->num_rx_queues; i++) {
2919                struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2920
2921                rq->rx_ring[0].size = ring0_size;
2922                rq->rx_ring[1].size = ring1_size;
2923                rq->comp_ring.size = comp_size;
2924        }
2925}
2926
2927
2928int
2929vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2930                      u32 rx_ring_size, u32 rx_ring2_size,
2931                      u16 txdata_desc_size, u16 rxdata_desc_size)
2932{
2933        int err = 0, i;
2934
2935        for (i = 0; i < adapter->num_tx_queues; i++) {
2936                struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2937                tq->tx_ring.size   = tx_ring_size;
2938                tq->data_ring.size = tx_ring_size;
2939                tq->comp_ring.size = tx_ring_size;
2940                tq->txdata_desc_size = txdata_desc_size;
2941                tq->shared = &adapter->tqd_start[i].ctrl;
2942                tq->stopped = true;
2943                tq->adapter = adapter;
2944                tq->qid = i;
2945                err = vmxnet3_tq_create(tq, adapter);
2946                /*
2947                 * Too late to change num_tx_queues. We cannot do away with
2948                 * lesser number of queues than what we asked for
2949                 */
2950                if (err)
2951                        goto queue_err;
2952        }
2953
2954        adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2955        adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2956        vmxnet3_adjust_rx_ring_size(adapter);
2957
2958        adapter->rxdataring_enabled = VMXNET3_VERSION_GE_3(adapter);
2959        for (i = 0; i < adapter->num_rx_queues; i++) {
2960                struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2961                /* qid and qid2 for rx queues will be assigned later when num
2962                 * of rx queues is finalized after allocating intrs */
2963                rq->shared = &adapter->rqd_start[i].ctrl;
2964                rq->adapter = adapter;
2965                rq->data_ring.desc_size = rxdata_desc_size;
2966                err = vmxnet3_rq_create(rq, adapter);
2967                if (err) {
2968                        if (i == 0) {
2969                                netdev_err(adapter->netdev,
2970                                           "Could not allocate any rx queues. "
2971                                           "Aborting.\n");
2972                                goto queue_err;
2973                        } else {
2974                                netdev_info(adapter->netdev,
2975                                            "Number of rx queues changed "
2976                                            "to : %d.\n", i);
2977                                adapter->num_rx_queues = i;
2978                                err = 0;
2979                                break;
2980                        }
2981                }
2982        }
2983
2984        if (!adapter->rxdataring_enabled)
2985                vmxnet3_rq_destroy_all_rxdataring(adapter);
2986
2987        return err;
2988queue_err:
2989        vmxnet3_tq_destroy_all(adapter);
2990        return err;
2991}
2992
2993static int
2994vmxnet3_open(struct net_device *netdev)
2995{
2996        struct vmxnet3_adapter *adapter;
2997        int err, i;
2998
2999        adapter = netdev_priv(netdev);
3000
3001        for (i = 0; i < adapter->num_tx_queues; i++)
3002                spin_lock_init(&adapter->tx_queue[i].tx_lock);
3003
3004        if (VMXNET3_VERSION_GE_3(adapter)) {
3005                unsigned long flags;
3006                u16 txdata_desc_size;
3007
3008                spin_lock_irqsave(&adapter->cmd_lock, flags);
3009                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3010                                       VMXNET3_CMD_GET_TXDATA_DESC_SIZE);
3011                txdata_desc_size = VMXNET3_READ_BAR1_REG(adapter,
3012                                                         VMXNET3_REG_CMD);
3013                spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3014
3015                if ((txdata_desc_size < VMXNET3_TXDATA_DESC_MIN_SIZE) ||
3016                    (txdata_desc_size > VMXNET3_TXDATA_DESC_MAX_SIZE) ||
3017                    (txdata_desc_size & VMXNET3_TXDATA_DESC_SIZE_MASK)) {
3018                        adapter->txdata_desc_size =
3019                                sizeof(struct Vmxnet3_TxDataDesc);
3020                } else {
3021                        adapter->txdata_desc_size = txdata_desc_size;
3022                }
3023        } else {
3024                adapter->txdata_desc_size = sizeof(struct Vmxnet3_TxDataDesc);
3025        }
3026
3027        err = vmxnet3_create_queues(adapter,
3028                                    adapter->tx_ring_size,
3029                                    adapter->rx_ring_size,
3030                                    adapter->rx_ring2_size,
3031                                    adapter->txdata_desc_size,
3032                                    adapter->rxdata_desc_size);
3033        if (err)
3034                goto queue_err;
3035
3036        err = vmxnet3_activate_dev(adapter);
3037        if (err)
3038                goto activate_err;
3039
3040        return 0;
3041
3042activate_err:
3043        vmxnet3_rq_destroy_all(adapter);
3044        vmxnet3_tq_destroy_all(adapter);
3045queue_err:
3046        return err;
3047}
3048
3049
3050static int
3051vmxnet3_close(struct net_device *netdev)
3052{
3053        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3054
3055        /*
3056         * Reset_work may be in the middle of resetting the device, wait for its
3057         * completion.
3058         */
3059        while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
3060                usleep_range(1000, 2000);
3061
3062        vmxnet3_quiesce_dev(adapter);
3063
3064        vmxnet3_rq_destroy_all(adapter);
3065        vmxnet3_tq_destroy_all(adapter);
3066
3067        clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
3068
3069
3070        return 0;
3071}
3072
3073
3074void
3075vmxnet3_force_close(struct vmxnet3_adapter *adapter)
3076{
3077        int i;
3078
3079        /*
3080         * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
3081         * vmxnet3_close() will deadlock.
3082         */
3083        BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
3084
3085        /* we need to enable NAPI, otherwise dev_close will deadlock */
3086        for (i = 0; i < adapter->num_rx_queues; i++)
3087                napi_enable(&adapter->rx_queue[i].napi);
3088        /*
3089         * Need to clear the quiesce bit to ensure that vmxnet3_close
3090         * can quiesce the device properly
3091         */
3092        clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3093        dev_close(adapter->netdev);
3094}
3095
3096
3097static int
3098vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
3099{
3100        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3101        int err = 0;
3102
3103        netdev->mtu = new_mtu;
3104
3105        /*
3106         * Reset_work may be in the middle of resetting the device, wait for its
3107         * completion.
3108         */
3109        while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
3110                usleep_range(1000, 2000);
3111
3112        if (netif_running(netdev)) {
3113                vmxnet3_quiesce_dev(adapter);
3114                vmxnet3_reset_dev(adapter);
3115
3116                /* we need to re-create the rx queue based on the new mtu */
3117                vmxnet3_rq_destroy_all(adapter);
3118                vmxnet3_adjust_rx_ring_size(adapter);
3119                err = vmxnet3_rq_create_all(adapter);
3120                if (err) {
3121                        netdev_err(netdev,
3122                                   "failed to re-create rx queues, "
3123                                   " error %d. Closing it.\n", err);
3124                        goto out;
3125                }
3126
3127                err = vmxnet3_activate_dev(adapter);
3128                if (err) {
3129                        netdev_err(netdev,
3130                                   "failed to re-activate, error %d. "
3131                                   "Closing it\n", err);
3132                        goto out;
3133                }
3134        }
3135
3136out:
3137        clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
3138        if (err)
3139                vmxnet3_force_close(adapter);
3140
3141        return err;
3142}
3143
3144
3145static void
3146vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
3147{
3148        struct net_device *netdev = adapter->netdev;
3149
3150        netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
3151                NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
3152                NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
3153                NETIF_F_LRO;
3154
3155        if (VMXNET3_VERSION_GE_4(adapter)) {
3156                netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
3157                                NETIF_F_GSO_UDP_TUNNEL_CSUM;
3158
3159                netdev->hw_enc_features = NETIF_F_SG | NETIF_F_RXCSUM |
3160                        NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
3161                        NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
3162                        NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL |
3163                        NETIF_F_GSO_UDP_TUNNEL_CSUM;
3164        }
3165
3166        if (dma64)
3167                netdev->hw_features |= NETIF_F_HIGHDMA;
3168        netdev->vlan_features = netdev->hw_features &
3169                                ~(NETIF_F_HW_VLAN_CTAG_TX |
3170                                  NETIF_F_HW_VLAN_CTAG_RX);
3171        netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
3172}
3173
3174
3175static void
3176vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
3177{
3178        u32 tmp;
3179
3180        tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
3181        *(u32 *)mac = tmp;
3182
3183        tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
3184        mac[4] = tmp & 0xff;
3185        mac[5] = (tmp >> 8) & 0xff;
3186}
3187
3188#ifdef CONFIG_PCI_MSI
3189
3190/*
3191 * Enable MSIx vectors.
3192 * Returns :
3193 *      VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
3194 *       were enabled.
3195 *      number of vectors which were enabled otherwise (this number is greater
3196 *       than VMXNET3_LINUX_MIN_MSIX_VECT)
3197 */
3198
3199static int
3200vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter, int nvec)
3201{
3202        int ret = pci_enable_msix_range(adapter->pdev,
3203                                        adapter->intr.msix_entries, nvec, nvec);
3204
3205        if (ret == -ENOSPC && nvec > VMXNET3_LINUX_MIN_MSIX_VECT) {
3206                dev_err(&adapter->netdev->dev,
3207                        "Failed to enable %d MSI-X, trying %d\n",
3208                        nvec, VMXNET3_LINUX_MIN_MSIX_VECT);
3209
3210                ret = pci_enable_msix_range(adapter->pdev,
3211                                            adapter->intr.msix_entries,
3212                                            VMXNET3_LINUX_MIN_MSIX_VECT,
3213                                            VMXNET3_LINUX_MIN_MSIX_VECT);
3214        }
3215
3216        if (ret < 0) {
3217                dev_err(&adapter->netdev->dev,
3218                        "Failed to enable MSI-X, error: %d\n", ret);
3219        }
3220
3221        return ret;
3222}
3223
3224
3225#endif /* CONFIG_PCI_MSI */
3226
3227static void
3228vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
3229{
3230        u32 cfg;
3231        unsigned long flags;
3232
3233        /* intr settings */
3234        spin_lock_irqsave(&adapter->cmd_lock, flags);
3235        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3236                               VMXNET3_CMD_GET_CONF_INTR);
3237        cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
3238        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3239        adapter->intr.type = cfg & 0x3;
3240        adapter->intr.mask_mode = (cfg >> 2) & 0x3;
3241
3242        if (adapter->intr.type == VMXNET3_IT_AUTO) {
3243                adapter->intr.type = VMXNET3_IT_MSIX;
3244        }
3245
3246#ifdef CONFIG_PCI_MSI
3247        if (adapter->intr.type == VMXNET3_IT_MSIX) {
3248                int i, nvec;
3249
3250                nvec  = adapter->share_intr == VMXNET3_INTR_TXSHARE ?
3251                        1 : adapter->num_tx_queues;
3252                nvec += adapter->share_intr == VMXNET3_INTR_BUDDYSHARE ?
3253                        0 : adapter->num_rx_queues;
3254                nvec += 1;      /* for link event */
3255                nvec = nvec > VMXNET3_LINUX_MIN_MSIX_VECT ?
3256                       nvec : VMXNET3_LINUX_MIN_MSIX_VECT;
3257
3258                for (i = 0; i < nvec; i++)
3259                        adapter->intr.msix_entries[i].entry = i;
3260
3261                nvec = vmxnet3_acquire_msix_vectors(adapter, nvec);
3262                if (nvec < 0)
3263                        goto msix_err;
3264
3265                /* If we cannot allocate one MSIx vector per queue
3266                 * then limit the number of rx queues to 1
3267                 */
3268                if (nvec == VMXNET3_LINUX_MIN_MSIX_VECT) {
3269                        if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
3270                            || adapter->num_rx_queues != 1) {
3271                                adapter->share_intr = VMXNET3_INTR_TXSHARE;
3272                                netdev_err(adapter->netdev,
3273                                           "Number of rx queues : 1\n");
3274                                adapter->num_rx_queues = 1;
3275                        }
3276                }
3277
3278                adapter->intr.num_intrs = nvec;
3279                return;
3280
3281msix_err:
3282                /* If we cannot allocate MSIx vectors use only one rx queue */
3283                dev_info(&adapter->pdev->dev,
3284                         "Failed to enable MSI-X, error %d. "
3285                         "Limiting #rx queues to 1, try MSI.\n", nvec);
3286
3287                adapter->intr.type = VMXNET3_IT_MSI;
3288        }
3289
3290        if (adapter->intr.type == VMXNET3_IT_MSI) {
3291                if (!pci_enable_msi(adapter->pdev)) {
3292                        adapter->num_rx_queues = 1;
3293                        adapter->intr.num_intrs = 1;
3294                        return;
3295                }
3296        }
3297#endif /* CONFIG_PCI_MSI */
3298
3299        adapter->num_rx_queues = 1;
3300        dev_info(&adapter->netdev->dev,
3301                 "Using INTx interrupt, #Rx queues: 1.\n");
3302        adapter->intr.type = VMXNET3_IT_INTX;
3303
3304        /* INT-X related setting */
3305        adapter->intr.num_intrs = 1;
3306}
3307
3308
3309static void
3310vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
3311{
3312        if (adapter->intr.type == VMXNET3_IT_MSIX)
3313                pci_disable_msix(adapter->pdev);
3314        else if (adapter->intr.type == VMXNET3_IT_MSI)
3315                pci_disable_msi(adapter->pdev);
3316        else
3317                BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
3318}
3319
3320
3321static void
3322vmxnet3_tx_timeout(struct net_device *netdev, unsigned int txqueue)
3323{
3324        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3325        adapter->tx_timeout_count++;
3326
3327        netdev_err(adapter->netdev, "tx hang\n");
3328        schedule_work(&adapter->work);
3329}
3330
3331
3332static void
3333vmxnet3_reset_work(struct work_struct *data)
3334{
3335        struct vmxnet3_adapter *adapter;
3336
3337        adapter = container_of(data, struct vmxnet3_adapter, work);
3338
3339        /* if another thread is resetting the device, no need to proceed */
3340        if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
3341                return;
3342
3343        /* if the device is closed, we must leave it alone */
3344        rtnl_lock();
3345        if (netif_running(adapter->netdev)) {
3346                netdev_notice(adapter->netdev, "resetting\n");
3347                vmxnet3_quiesce_dev(adapter);
3348                vmxnet3_reset_dev(adapter);
3349                vmxnet3_activate_dev(adapter);
3350        } else {
3351                netdev_info(adapter->netdev, "already closed\n");
3352        }
3353        rtnl_unlock();
3354
3355        netif_wake_queue(adapter->netdev);
3356        clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
3357}
3358
3359
3360static int
3361vmxnet3_probe_device(struct pci_dev *pdev,
3362                     const struct pci_device_id *id)
3363{
3364        static const struct net_device_ops vmxnet3_netdev_ops = {
3365                .ndo_open = vmxnet3_open,
3366                .ndo_stop = vmxnet3_close,
3367                .ndo_start_xmit = vmxnet3_xmit_frame,
3368                .ndo_set_mac_address = vmxnet3_set_mac_addr,
3369                .ndo_change_mtu = vmxnet3_change_mtu,
3370                .ndo_fix_features = vmxnet3_fix_features,
3371                .ndo_set_features = vmxnet3_set_features,
3372                .ndo_features_check = vmxnet3_features_check,
3373                .ndo_get_stats64 = vmxnet3_get_stats64,
3374                .ndo_tx_timeout = vmxnet3_tx_timeout,
3375                .ndo_set_rx_mode = vmxnet3_set_mc,
3376                .ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
3377                .ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
3378#ifdef CONFIG_NET_POLL_CONTROLLER
3379                .ndo_poll_controller = vmxnet3_netpoll,
3380#endif
3381        };
3382        int err;
3383        bool dma64;
3384        u32 ver;
3385        struct net_device *netdev;
3386        struct vmxnet3_adapter *adapter;
3387        u8 mac[ETH_ALEN];
3388        int size;
3389        int num_tx_queues;
3390        int num_rx_queues;
3391
3392        if (!pci_msi_enabled())
3393                enable_mq = 0;
3394
3395#ifdef VMXNET3_RSS
3396        if (enable_mq)
3397                num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3398                                    (int)num_online_cpus());
3399        else
3400#endif
3401                num_rx_queues = 1;
3402        num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3403
3404        if (enable_mq)
3405                num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
3406                                    (int)num_online_cpus());
3407        else
3408                num_tx_queues = 1;
3409
3410        num_tx_queues = rounddown_pow_of_two(num_tx_queues);
3411        netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
3412                                   max(num_tx_queues, num_rx_queues));
3413        dev_info(&pdev->dev,
3414                 "# of Tx queues : %d, # of Rx queues : %d\n",
3415                 num_tx_queues, num_rx_queues);
3416
3417        if (!netdev)
3418                return -ENOMEM;
3419
3420        pci_set_drvdata(pdev, netdev);
3421        adapter = netdev_priv(netdev);
3422        adapter->netdev = netdev;
3423        adapter->pdev = pdev;
3424
3425        adapter->tx_ring_size = VMXNET3_DEF_TX_RING_SIZE;
3426        adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
3427        adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE;
3428
3429        if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
3430                if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
3431                        dev_err(&pdev->dev,
3432                                "pci_set_consistent_dma_mask failed\n");
3433                        err = -EIO;
3434                        goto err_set_mask;
3435                }
3436                dma64 = true;
3437        } else {
3438                if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
3439                        dev_err(&pdev->dev,
3440                                "pci_set_dma_mask failed\n");
3441                        err = -EIO;
3442                        goto err_set_mask;
3443                }
3444                dma64 = false;
3445        }
3446
3447        spin_lock_init(&adapter->cmd_lock);
3448        adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
3449                                             sizeof(struct vmxnet3_adapter),
3450                                             PCI_DMA_TODEVICE);
3451        if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
3452                dev_err(&pdev->dev, "Failed to map dma\n");
3453                err = -EFAULT;
3454                goto err_set_mask;
3455        }
3456        adapter->shared = dma_alloc_coherent(
3457                                &adapter->pdev->dev,
3458                                sizeof(struct Vmxnet3_DriverShared),
3459                                &adapter->shared_pa, GFP_KERNEL);
3460        if (!adapter->shared) {
3461                dev_err(&pdev->dev, "Failed to allocate memory\n");
3462                err = -ENOMEM;
3463                goto err_alloc_shared;
3464        }
3465
3466        adapter->num_rx_queues = num_rx_queues;
3467        adapter->num_tx_queues = num_tx_queues;
3468        adapter->rx_buf_per_pkt = 1;
3469
3470        size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3471        size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
3472        adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size,
3473                                                &adapter->queue_desc_pa,
3474                                                GFP_KERNEL);
3475
3476        if (!adapter->tqd_start) {
3477                dev_err(&pdev->dev, "Failed to allocate memory\n");
3478                err = -ENOMEM;
3479                goto err_alloc_queue_desc;
3480        }
3481        adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
3482                                                            adapter->num_tx_queues);
3483
3484        adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev,
3485                                              sizeof(struct Vmxnet3_PMConf),
3486                                              &adapter->pm_conf_pa,
3487                                              GFP_KERNEL);
3488        if (adapter->pm_conf == NULL) {
3489                err = -ENOMEM;
3490                goto err_alloc_pm;
3491        }
3492
3493#ifdef VMXNET3_RSS
3494
3495        adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev,
3496                                               sizeof(struct UPT1_RSSConf),
3497                                               &adapter->rss_conf_pa,
3498                                               GFP_KERNEL);
3499        if (adapter->rss_conf == NULL) {
3500                err = -ENOMEM;
3501                goto err_alloc_rss;
3502        }
3503#endif /* VMXNET3_RSS */
3504
3505        err = vmxnet3_alloc_pci_resources(adapter);
3506        if (err < 0)
3507                goto err_alloc_pci;
3508
3509        ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
3510        if (ver & (1 << VMXNET3_REV_4)) {
3511                VMXNET3_WRITE_BAR1_REG(adapter,
3512                                       VMXNET3_REG_VRRS,
3513                                       1 << VMXNET3_REV_4);
3514                adapter->version = VMXNET3_REV_4 + 1;
3515        } else if (ver & (1 << VMXNET3_REV_3)) {
3516                VMXNET3_WRITE_BAR1_REG(adapter,
3517                                       VMXNET3_REG_VRRS,
3518                                       1 << VMXNET3_REV_3);
3519                adapter->version = VMXNET3_REV_3 + 1;
3520        } else if (ver & (1 << VMXNET3_REV_2)) {
3521                VMXNET3_WRITE_BAR1_REG(adapter,
3522                                       VMXNET3_REG_VRRS,
3523                                       1 << VMXNET3_REV_2);
3524                adapter->version = VMXNET3_REV_2 + 1;
3525        } else if (ver & (1 << VMXNET3_REV_1)) {
3526                VMXNET3_WRITE_BAR1_REG(adapter,
3527                                       VMXNET3_REG_VRRS,
3528                                       1 << VMXNET3_REV_1);
3529                adapter->version = VMXNET3_REV_1 + 1;
3530        } else {
3531                dev_err(&pdev->dev,
3532                        "Incompatible h/w version (0x%x) for adapter\n", ver);
3533                err = -EBUSY;
3534                goto err_ver;
3535        }
3536        dev_dbg(&pdev->dev, "Using device version %d\n", adapter->version);
3537
3538        ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3539        if (ver & 1) {
3540                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3541        } else {
3542                dev_err(&pdev->dev,
3543                        "Incompatible upt version (0x%x) for adapter\n", ver);
3544                err = -EBUSY;
3545                goto err_ver;
3546        }
3547
3548        if (VMXNET3_VERSION_GE_3(adapter)) {
3549                adapter->coal_conf =
3550                        dma_alloc_coherent(&adapter->pdev->dev,
3551                                           sizeof(struct Vmxnet3_CoalesceScheme)
3552                                           ,
3553                                           &adapter->coal_conf_pa,
3554                                           GFP_KERNEL);
3555                if (!adapter->coal_conf) {
3556                        err = -ENOMEM;
3557                        goto err_ver;
3558                }
3559                adapter->coal_conf->coalMode = VMXNET3_COALESCE_DISABLED;
3560                adapter->default_coal_mode = true;
3561        }
3562
3563        if (VMXNET3_VERSION_GE_4(adapter)) {
3564                adapter->default_rss_fields = true;
3565                adapter->rss_fields = VMXNET3_RSS_FIELDS_DEFAULT;
3566        }
3567
3568        SET_NETDEV_DEV(netdev, &pdev->dev);
3569        vmxnet3_declare_features(adapter, dma64);
3570
3571        adapter->rxdata_desc_size = VMXNET3_VERSION_GE_3(adapter) ?
3572                VMXNET3_DEF_RXDATA_DESC_SIZE : 0;
3573
3574        if (adapter->num_tx_queues == adapter->num_rx_queues)
3575                adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3576        else
3577                adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3578
3579        vmxnet3_alloc_intr_resources(adapter);
3580
3581#ifdef VMXNET3_RSS
3582        if (adapter->num_rx_queues > 1 &&
3583            adapter->intr.type == VMXNET3_IT_MSIX) {
3584                adapter->rss = true;
3585                netdev->hw_features |= NETIF_F_RXHASH;
3586                netdev->features |= NETIF_F_RXHASH;
3587                dev_dbg(&pdev->dev, "RSS is enabled.\n");
3588        } else {
3589                adapter->rss = false;
3590        }
3591#endif
3592
3593        vmxnet3_read_mac_addr(adapter, mac);
3594        memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3595
3596        netdev->netdev_ops = &vmxnet3_netdev_ops;
3597        vmxnet3_set_ethtool_ops(netdev);
3598        netdev->watchdog_timeo = 5 * HZ;
3599
3600        /* MTU range: 60 - 9000 */
3601        netdev->min_mtu = VMXNET3_MIN_MTU;
3602        netdev->max_mtu = VMXNET3_MAX_MTU;
3603
3604        INIT_WORK(&adapter->work, vmxnet3_reset_work);
3605        set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3606
3607        if (adapter->intr.type == VMXNET3_IT_MSIX) {
3608                int i;
3609                for (i = 0; i < adapter->num_rx_queues; i++) {
3610                        netif_napi_add(adapter->netdev,
3611                                       &adapter->rx_queue[i].napi,
3612                                       vmxnet3_poll_rx_only, 64);
3613                }
3614        } else {
3615                netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3616                               vmxnet3_poll, 64);
3617        }
3618
3619        netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3620        netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3621
3622        netif_carrier_off(netdev);
3623        err = register_netdev(netdev);
3624
3625        if (err) {
3626                dev_err(&pdev->dev, "Failed to register adapter\n");
3627                goto err_register;
3628        }
3629
3630        vmxnet3_check_link(adapter, false);
3631        return 0;
3632
3633err_register:
3634        if (VMXNET3_VERSION_GE_3(adapter)) {
3635                dma_free_coherent(&adapter->pdev->dev,
3636                                  sizeof(struct Vmxnet3_CoalesceScheme),
3637                                  adapter->coal_conf, adapter->coal_conf_pa);
3638        }
3639        vmxnet3_free_intr_resources(adapter);
3640err_ver:
3641        vmxnet3_free_pci_resources(adapter);
3642err_alloc_pci:
3643#ifdef VMXNET3_RSS
3644        dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3645                          adapter->rss_conf, adapter->rss_conf_pa);
3646err_alloc_rss:
3647#endif
3648        dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3649                          adapter->pm_conf, adapter->pm_conf_pa);
3650err_alloc_pm:
3651        dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3652                          adapter->queue_desc_pa);
3653err_alloc_queue_desc:
3654        dma_free_coherent(&adapter->pdev->dev,
3655                          sizeof(struct Vmxnet3_DriverShared),
3656                          adapter->shared, adapter->shared_pa);
3657err_alloc_shared:
3658        dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3659                         sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3660err_set_mask:
3661        free_netdev(netdev);
3662        return err;
3663}
3664
3665
3666static void
3667vmxnet3_remove_device(struct pci_dev *pdev)
3668{
3669        struct net_device *netdev = pci_get_drvdata(pdev);
3670        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3671        int size = 0;
3672        int num_rx_queues;
3673
3674#ifdef VMXNET3_RSS
3675        if (enable_mq)
3676                num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3677                                    (int)num_online_cpus());
3678        else
3679#endif
3680                num_rx_queues = 1;
3681        num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3682
3683        cancel_work_sync(&adapter->work);
3684
3685        unregister_netdev(netdev);
3686
3687        vmxnet3_free_intr_resources(adapter);
3688        vmxnet3_free_pci_resources(adapter);
3689        if (VMXNET3_VERSION_GE_3(adapter)) {
3690                dma_free_coherent(&adapter->pdev->dev,
3691                                  sizeof(struct Vmxnet3_CoalesceScheme),
3692                                  adapter->coal_conf, adapter->coal_conf_pa);
3693        }
3694#ifdef VMXNET3_RSS
3695        dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3696                          adapter->rss_conf, adapter->rss_conf_pa);
3697#endif
3698        dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3699                          adapter->pm_conf, adapter->pm_conf_pa);
3700
3701        size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3702        size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3703        dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3704                          adapter->queue_desc_pa);
3705        dma_free_coherent(&adapter->pdev->dev,
3706                          sizeof(struct Vmxnet3_DriverShared),
3707                          adapter->shared, adapter->shared_pa);
3708        dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3709                         sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3710        free_netdev(netdev);
3711}
3712
3713static void vmxnet3_shutdown_device(struct pci_dev *pdev)
3714{
3715        struct net_device *netdev = pci_get_drvdata(pdev);
3716        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3717        unsigned long flags;
3718
3719        /* Reset_work may be in the middle of resetting the device, wait for its
3720         * completion.
3721         */
3722        while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
3723                usleep_range(1000, 2000);
3724
3725        if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED,
3726                             &adapter->state)) {
3727                clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
3728                return;
3729        }
3730        spin_lock_irqsave(&adapter->cmd_lock, flags);
3731        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3732                               VMXNET3_CMD_QUIESCE_DEV);
3733        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3734        vmxnet3_disable_all_intrs(adapter);
3735
3736        clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
3737}
3738
3739
3740#ifdef CONFIG_PM
3741
3742static int
3743vmxnet3_suspend(struct device *device)
3744{
3745        struct pci_dev *pdev = to_pci_dev(device);
3746        struct net_device *netdev = pci_get_drvdata(pdev);
3747        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3748        struct Vmxnet3_PMConf *pmConf;
3749        struct ethhdr *ehdr;
3750        struct arphdr *ahdr;
3751        u8 *arpreq;
3752        struct in_device *in_dev;
3753        struct in_ifaddr *ifa;
3754        unsigned long flags;
3755        int i = 0;
3756
3757        if (!netif_running(netdev))
3758                return 0;
3759
3760        for (i = 0; i < adapter->num_rx_queues; i++)
3761                napi_disable(&adapter->rx_queue[i].napi);
3762
3763        vmxnet3_disable_all_intrs(adapter);
3764        vmxnet3_free_irqs(adapter);
3765        vmxnet3_free_intr_resources(adapter);
3766
3767        netif_device_detach(netdev);
3768        netif_tx_stop_all_queues(netdev);
3769
3770        /* Create wake-up filters. */
3771        pmConf = adapter->pm_conf;
3772        memset(pmConf, 0, sizeof(*pmConf));
3773
3774        if (adapter->wol & WAKE_UCAST) {
3775                pmConf->filters[i].patternSize = ETH_ALEN;
3776                pmConf->filters[i].maskSize = 1;
3777                memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3778                pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3779
3780                pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3781                i++;
3782        }
3783
3784        if (adapter->wol & WAKE_ARP) {
3785                rcu_read_lock();
3786
3787                in_dev = __in_dev_get_rcu(netdev);
3788                if (!in_dev) {
3789                        rcu_read_unlock();
3790                        goto skip_arp;
3791                }
3792
3793                ifa = rcu_dereference(in_dev->ifa_list);
3794                if (!ifa) {
3795                        rcu_read_unlock();
3796                        goto skip_arp;
3797                }
3798
3799                pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3800                        sizeof(struct arphdr) +         /* ARP header */
3801                        2 * ETH_ALEN +          /* 2 Ethernet addresses*/
3802                        2 * sizeof(u32);        /*2 IPv4 addresses */
3803                pmConf->filters[i].maskSize =
3804                        (pmConf->filters[i].patternSize - 1) / 8 + 1;
3805
3806                /* ETH_P_ARP in Ethernet header. */
3807                ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3808                ehdr->h_proto = htons(ETH_P_ARP);
3809
3810                /* ARPOP_REQUEST in ARP header. */
3811                ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3812                ahdr->ar_op = htons(ARPOP_REQUEST);
3813                arpreq = (u8 *)(ahdr + 1);
3814
3815                /* The Unicast IPv4 address in 'tip' field. */
3816                arpreq += 2 * ETH_ALEN + sizeof(u32);
3817                *(__be32 *)arpreq = ifa->ifa_address;
3818
3819                rcu_read_unlock();
3820
3821                /* The mask for the relevant bits. */
3822                pmConf->filters[i].mask[0] = 0x00;
3823                pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3824                pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3825                pmConf->filters[i].mask[3] = 0x00;
3826                pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3827                pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3828
3829                pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3830                i++;
3831        }
3832
3833skip_arp:
3834        if (adapter->wol & WAKE_MAGIC)
3835                pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3836
3837        pmConf->numFilters = i;
3838
3839        adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3840        adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3841                                                                  *pmConf));
3842        adapter->shared->devRead.pmConfDesc.confPA =
3843                cpu_to_le64(adapter->pm_conf_pa);
3844
3845        spin_lock_irqsave(&adapter->cmd_lock, flags);
3846        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3847                               VMXNET3_CMD_UPDATE_PMCFG);
3848        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3849
3850        pci_save_state(pdev);
3851        pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3852                        adapter->wol);
3853        pci_disable_device(pdev);
3854        pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3855
3856        return 0;
3857}
3858
3859
3860static int
3861vmxnet3_resume(struct device *device)
3862{
3863        int err;
3864        unsigned long flags;
3865        struct pci_dev *pdev = to_pci_dev(device);
3866        struct net_device *netdev = pci_get_drvdata(pdev);
3867        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3868
3869        if (!netif_running(netdev))
3870                return 0;
3871
3872        pci_set_power_state(pdev, PCI_D0);
3873        pci_restore_state(pdev);
3874        err = pci_enable_device_mem(pdev);
3875        if (err != 0)
3876                return err;
3877
3878        pci_enable_wake(pdev, PCI_D0, 0);
3879
3880        vmxnet3_alloc_intr_resources(adapter);
3881
3882        /* During hibernate and suspend, device has to be reinitialized as the
3883         * device state need not be preserved.
3884         */
3885
3886        /* Need not check adapter state as other reset tasks cannot run during
3887         * device resume.
3888         */
3889        spin_lock_irqsave(&adapter->cmd_lock, flags);
3890        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3891                               VMXNET3_CMD_QUIESCE_DEV);
3892        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3893        vmxnet3_tq_cleanup_all(adapter);
3894        vmxnet3_rq_cleanup_all(adapter);
3895
3896        vmxnet3_reset_dev(adapter);
3897        err = vmxnet3_activate_dev(adapter);
3898        if (err != 0) {
3899                netdev_err(netdev,
3900                           "failed to re-activate on resume, error: %d", err);
3901                vmxnet3_force_close(adapter);
3902                return err;
3903        }
3904        netif_device_attach(netdev);
3905
3906        return 0;
3907}
3908
3909static const struct dev_pm_ops vmxnet3_pm_ops = {
3910        .suspend = vmxnet3_suspend,
3911        .resume = vmxnet3_resume,
3912        .freeze = vmxnet3_suspend,
3913        .restore = vmxnet3_resume,
3914};
3915#endif
3916
3917static struct pci_driver vmxnet3_driver = {
3918        .name           = vmxnet3_driver_name,
3919        .id_table       = vmxnet3_pciid_table,
3920        .probe          = vmxnet3_probe_device,
3921        .remove         = vmxnet3_remove_device,
3922        .shutdown       = vmxnet3_shutdown_device,
3923#ifdef CONFIG_PM
3924        .driver.pm      = &vmxnet3_pm_ops,
3925#endif
3926};
3927
3928
3929static int __init
3930vmxnet3_init_module(void)
3931{
3932        pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3933                VMXNET3_DRIVER_VERSION_REPORT);
3934        return pci_register_driver(&vmxnet3_driver);
3935}
3936
3937module_init(vmxnet3_init_module);
3938
3939
3940static void
3941vmxnet3_exit_module(void)
3942{
3943        pci_unregister_driver(&vmxnet3_driver);
3944}
3945
3946module_exit(vmxnet3_exit_module);
3947
3948MODULE_AUTHOR("VMware, Inc.");
3949MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3950MODULE_LICENSE("GPL v2");
3951MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);
3952