linux/drivers/net/vmxnet3/vmxnet3_drv.c
<<
>>
Prefs
   1/*
   2 * Linux driver for VMware's vmxnet3 ethernet NIC.
   3 *
   4 * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License as published by the
   8 * Free Software Foundation; version 2 of the License and no later version.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  13 * NON INFRINGEMENT. See the GNU General Public License for more
  14 * details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 *
  20 * The full GNU General Public License is included in this distribution in
  21 * the file called "COPYING".
  22 *
  23 * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com>
  24 *
  25 */
  26
  27#include <linux/module.h>
  28#include <net/ip6_checksum.h>
  29
  30#include "vmxnet3_int.h"
  31
  32char vmxnet3_driver_name[] = "vmxnet3";
  33#define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver"
  34
  35/*
  36 * PCI Device ID Table
  37 * Last entry must be all 0s
  38 */
  39static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = {
  40        {PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
  41        {0}
  42};
  43
  44MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
  45
  46static int enable_mq = 1;
  47
  48static void
  49vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
  50
  51/*
  52 *    Enable/Disable the given intr
  53 */
  54static void
  55vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
  56{
  57        VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0);
  58}
  59
  60
  61static void
  62vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx)
  63{
  64        VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1);
  65}
  66
  67
  68/*
  69 *    Enable/Disable all intrs used by the device
  70 */
  71static void
  72vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
  73{
  74        int i;
  75
  76        for (i = 0; i < adapter->intr.num_intrs; i++)
  77                vmxnet3_enable_intr(adapter, i);
  78        adapter->shared->devRead.intrConf.intrCtrl &=
  79                                        cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
  80}
  81
  82
  83static void
  84vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
  85{
  86        int i;
  87
  88        adapter->shared->devRead.intrConf.intrCtrl |=
  89                                        cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
  90        for (i = 0; i < adapter->intr.num_intrs; i++)
  91                vmxnet3_disable_intr(adapter, i);
  92}
  93
  94
  95static void
  96vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
  97{
  98        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events);
  99}
 100
 101
 102static bool
 103vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 104{
 105        return tq->stopped;
 106}
 107
 108
 109static void
 110vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 111{
 112        tq->stopped = false;
 113        netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
 114}
 115
 116
 117static void
 118vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 119{
 120        tq->stopped = false;
 121        netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
 122}
 123
 124
 125static void
 126vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 127{
 128        tq->stopped = true;
 129        tq->num_stop++;
 130        netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
 131}
 132
 133
 134/*
 135 * Check the link state. This may start or stop the tx queue.
 136 */
 137static void
 138vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
 139{
 140        u32 ret;
 141        int i;
 142        unsigned long flags;
 143
 144        spin_lock_irqsave(&adapter->cmd_lock, flags);
 145        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
 146        ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
 147        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
 148
 149        adapter->link_speed = ret >> 16;
 150        if (ret & 1) { /* Link is up. */
 151                netdev_info(adapter->netdev, "NIC Link is Up %d Mbps\n",
 152                            adapter->link_speed);
 153                netif_carrier_on(adapter->netdev);
 154
 155                if (affectTxQueue) {
 156                        for (i = 0; i < adapter->num_tx_queues; i++)
 157                                vmxnet3_tq_start(&adapter->tx_queue[i],
 158                                                 adapter);
 159                }
 160        } else {
 161                netdev_info(adapter->netdev, "NIC Link is Down\n");
 162                netif_carrier_off(adapter->netdev);
 163
 164                if (affectTxQueue) {
 165                        for (i = 0; i < adapter->num_tx_queues; i++)
 166                                vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
 167                }
 168        }
 169}
 170
 171static void
 172vmxnet3_process_events(struct vmxnet3_adapter *adapter)
 173{
 174        int i;
 175        unsigned long flags;
 176        u32 events = le32_to_cpu(adapter->shared->ecr);
 177        if (!events)
 178                return;
 179
 180        vmxnet3_ack_events(adapter, events);
 181
 182        /* Check if link state has changed */
 183        if (events & VMXNET3_ECR_LINK)
 184                vmxnet3_check_link(adapter, true);
 185
 186        /* Check if there is an error on xmit/recv queues */
 187        if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
 188                spin_lock_irqsave(&adapter->cmd_lock, flags);
 189                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
 190                                       VMXNET3_CMD_GET_QUEUE_STATUS);
 191                spin_unlock_irqrestore(&adapter->cmd_lock, flags);
 192
 193                for (i = 0; i < adapter->num_tx_queues; i++)
 194                        if (adapter->tqd_start[i].status.stopped)
 195                                dev_err(&adapter->netdev->dev,
 196                                        "%s: tq[%d] error 0x%x\n",
 197                                        adapter->netdev->name, i, le32_to_cpu(
 198                                        adapter->tqd_start[i].status.error));
 199                for (i = 0; i < adapter->num_rx_queues; i++)
 200                        if (adapter->rqd_start[i].status.stopped)
 201                                dev_err(&adapter->netdev->dev,
 202                                        "%s: rq[%d] error 0x%x\n",
 203                                        adapter->netdev->name, i,
 204                                        adapter->rqd_start[i].status.error);
 205
 206                schedule_work(&adapter->work);
 207        }
 208}
 209
 210#ifdef __BIG_ENDIAN_BITFIELD
 211/*
 212 * The device expects the bitfields in shared structures to be written in
 213 * little endian. When CPU is big endian, the following routines are used to
 214 * correctly read and write into ABI.
 215 * The general technique used here is : double word bitfields are defined in
 216 * opposite order for big endian architecture. Then before reading them in
 217 * driver the complete double word is translated using le32_to_cpu. Similarly
 218 * After the driver writes into bitfields, cpu_to_le32 is used to translate the
 219 * double words into required format.
 220 * In order to avoid touching bits in shared structure more than once, temporary
 221 * descriptors are used. These are passed as srcDesc to following functions.
 222 */
 223static void vmxnet3_RxDescToCPU(const struct Vmxnet3_RxDesc *srcDesc,
 224                                struct Vmxnet3_RxDesc *dstDesc)
 225{
 226        u32 *src = (u32 *)srcDesc + 2;
 227        u32 *dst = (u32 *)dstDesc + 2;
 228        dstDesc->addr = le64_to_cpu(srcDesc->addr);
 229        *dst = le32_to_cpu(*src);
 230        dstDesc->ext1 = le32_to_cpu(srcDesc->ext1);
 231}
 232
 233static void vmxnet3_TxDescToLe(const struct Vmxnet3_TxDesc *srcDesc,
 234                               struct Vmxnet3_TxDesc *dstDesc)
 235{
 236        int i;
 237        u32 *src = (u32 *)(srcDesc + 1);
 238        u32 *dst = (u32 *)(dstDesc + 1);
 239
 240        /* Working backwards so that the gen bit is set at the end. */
 241        for (i = 2; i > 0; i--) {
 242                src--;
 243                dst--;
 244                *dst = cpu_to_le32(*src);
 245        }
 246}
 247
 248
 249static void vmxnet3_RxCompToCPU(const struct Vmxnet3_RxCompDesc *srcDesc,
 250                                struct Vmxnet3_RxCompDesc *dstDesc)
 251{
 252        int i = 0;
 253        u32 *src = (u32 *)srcDesc;
 254        u32 *dst = (u32 *)dstDesc;
 255        for (i = 0; i < sizeof(struct Vmxnet3_RxCompDesc) / sizeof(u32); i++) {
 256                *dst = le32_to_cpu(*src);
 257                src++;
 258                dst++;
 259        }
 260}
 261
 262
 263/* Used to read bitfield values from double words. */
 264static u32 get_bitfield32(const __le32 *bitfield, u32 pos, u32 size)
 265{
 266        u32 temp = le32_to_cpu(*bitfield);
 267        u32 mask = ((1 << size) - 1) << pos;
 268        temp &= mask;
 269        temp >>= pos;
 270        return temp;
 271}
 272
 273
 274
 275#endif  /* __BIG_ENDIAN_BITFIELD */
 276
 277#ifdef __BIG_ENDIAN_BITFIELD
 278
 279#   define VMXNET3_TXDESC_GET_GEN(txdesc) get_bitfield32(((const __le32 *) \
 280                        txdesc) + VMXNET3_TXD_GEN_DWORD_SHIFT, \
 281                        VMXNET3_TXD_GEN_SHIFT, VMXNET3_TXD_GEN_SIZE)
 282#   define VMXNET3_TXDESC_GET_EOP(txdesc) get_bitfield32(((const __le32 *) \
 283                        txdesc) + VMXNET3_TXD_EOP_DWORD_SHIFT, \
 284                        VMXNET3_TXD_EOP_SHIFT, VMXNET3_TXD_EOP_SIZE)
 285#   define VMXNET3_TCD_GET_GEN(tcd) get_bitfield32(((const __le32 *)tcd) + \
 286                        VMXNET3_TCD_GEN_DWORD_SHIFT, VMXNET3_TCD_GEN_SHIFT, \
 287                        VMXNET3_TCD_GEN_SIZE)
 288#   define VMXNET3_TCD_GET_TXIDX(tcd) get_bitfield32((const __le32 *)tcd, \
 289                        VMXNET3_TCD_TXIDX_SHIFT, VMXNET3_TCD_TXIDX_SIZE)
 290#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) do { \
 291                        (dstrcd) = (tmp); \
 292                        vmxnet3_RxCompToCPU((rcd), (tmp)); \
 293                } while (0)
 294#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) do { \
 295                        (dstrxd) = (tmp); \
 296                        vmxnet3_RxDescToCPU((rxd), (tmp)); \
 297                } while (0)
 298
 299#else
 300
 301#   define VMXNET3_TXDESC_GET_GEN(txdesc) ((txdesc)->gen)
 302#   define VMXNET3_TXDESC_GET_EOP(txdesc) ((txdesc)->eop)
 303#   define VMXNET3_TCD_GET_GEN(tcd) ((tcd)->gen)
 304#   define VMXNET3_TCD_GET_TXIDX(tcd) ((tcd)->txdIdx)
 305#   define vmxnet3_getRxComp(dstrcd, rcd, tmp) (dstrcd) = (rcd)
 306#   define vmxnet3_getRxDesc(dstrxd, rxd, tmp) (dstrxd) = (rxd)
 307
 308#endif /* __BIG_ENDIAN_BITFIELD  */
 309
 310
 311static void
 312vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
 313                     struct pci_dev *pdev)
 314{
 315        if (tbi->map_type == VMXNET3_MAP_SINGLE)
 316                dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len,
 317                                 PCI_DMA_TODEVICE);
 318        else if (tbi->map_type == VMXNET3_MAP_PAGE)
 319                dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len,
 320                               PCI_DMA_TODEVICE);
 321        else
 322                BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
 323
 324        tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */
 325}
 326
 327
 328static int
 329vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq,
 330                  struct pci_dev *pdev, struct vmxnet3_adapter *adapter)
 331{
 332        struct sk_buff *skb;
 333        int entries = 0;
 334
 335        /* no out of order completion */
 336        BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp);
 337        BUG_ON(VMXNET3_TXDESC_GET_EOP(&(tq->tx_ring.base[eop_idx].txd)) != 1);
 338
 339        skb = tq->buf_info[eop_idx].skb;
 340        BUG_ON(skb == NULL);
 341        tq->buf_info[eop_idx].skb = NULL;
 342
 343        VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size);
 344
 345        while (tq->tx_ring.next2comp != eop_idx) {
 346                vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp,
 347                                     pdev);
 348
 349                /* update next2comp w/o tx_lock. Since we are marking more,
 350                 * instead of less, tx ring entries avail, the worst case is
 351                 * that the tx routine incorrectly re-queues a pkt due to
 352                 * insufficient tx ring entries.
 353                 */
 354                vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
 355                entries++;
 356        }
 357
 358        dev_kfree_skb_any(skb);
 359        return entries;
 360}
 361
 362
 363static int
 364vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq,
 365                        struct vmxnet3_adapter *adapter)
 366{
 367        int completed = 0;
 368        union Vmxnet3_GenericDesc *gdesc;
 369
 370        gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
 371        while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) {
 372                completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX(
 373                                               &gdesc->tcd), tq, adapter->pdev,
 374                                               adapter);
 375
 376                vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring);
 377                gdesc = tq->comp_ring.base + tq->comp_ring.next2proc;
 378        }
 379
 380        if (completed) {
 381                spin_lock(&tq->tx_lock);
 382                if (unlikely(vmxnet3_tq_stopped(tq, adapter) &&
 383                             vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) >
 384                             VMXNET3_WAKE_QUEUE_THRESHOLD(tq) &&
 385                             netif_carrier_ok(adapter->netdev))) {
 386                        vmxnet3_tq_wake(tq, adapter);
 387                }
 388                spin_unlock(&tq->tx_lock);
 389        }
 390        return completed;
 391}
 392
 393
 394static void
 395vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
 396                   struct vmxnet3_adapter *adapter)
 397{
 398        int i;
 399
 400        while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
 401                struct vmxnet3_tx_buf_info *tbi;
 402
 403                tbi = tq->buf_info + tq->tx_ring.next2comp;
 404
 405                vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
 406                if (tbi->skb) {
 407                        dev_kfree_skb_any(tbi->skb);
 408                        tbi->skb = NULL;
 409                }
 410                vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring);
 411        }
 412
 413        /* sanity check, verify all buffers are indeed unmapped and freed */
 414        for (i = 0; i < tq->tx_ring.size; i++) {
 415                BUG_ON(tq->buf_info[i].skb != NULL ||
 416                       tq->buf_info[i].map_type != VMXNET3_MAP_NONE);
 417        }
 418
 419        tq->tx_ring.gen = VMXNET3_INIT_GEN;
 420        tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
 421
 422        tq->comp_ring.gen = VMXNET3_INIT_GEN;
 423        tq->comp_ring.next2proc = 0;
 424}
 425
 426
 427static void
 428vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
 429                   struct vmxnet3_adapter *adapter)
 430{
 431        if (tq->tx_ring.base) {
 432                dma_free_coherent(&adapter->pdev->dev, tq->tx_ring.size *
 433                                  sizeof(struct Vmxnet3_TxDesc),
 434                                  tq->tx_ring.base, tq->tx_ring.basePA);
 435                tq->tx_ring.base = NULL;
 436        }
 437        if (tq->data_ring.base) {
 438                dma_free_coherent(&adapter->pdev->dev, tq->data_ring.size *
 439                                  sizeof(struct Vmxnet3_TxDataDesc),
 440                                  tq->data_ring.base, tq->data_ring.basePA);
 441                tq->data_ring.base = NULL;
 442        }
 443        if (tq->comp_ring.base) {
 444                dma_free_coherent(&adapter->pdev->dev, tq->comp_ring.size *
 445                                  sizeof(struct Vmxnet3_TxCompDesc),
 446                                  tq->comp_ring.base, tq->comp_ring.basePA);
 447                tq->comp_ring.base = NULL;
 448        }
 449        if (tq->buf_info) {
 450                dma_free_coherent(&adapter->pdev->dev,
 451                                  tq->tx_ring.size * sizeof(tq->buf_info[0]),
 452                                  tq->buf_info, tq->buf_info_pa);
 453                tq->buf_info = NULL;
 454        }
 455}
 456
 457
 458/* Destroy all tx queues */
 459void
 460vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
 461{
 462        int i;
 463
 464        for (i = 0; i < adapter->num_tx_queues; i++)
 465                vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
 466}
 467
 468
 469static void
 470vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
 471                struct vmxnet3_adapter *adapter)
 472{
 473        int i;
 474
 475        /* reset the tx ring contents to 0 and reset the tx ring states */
 476        memset(tq->tx_ring.base, 0, tq->tx_ring.size *
 477               sizeof(struct Vmxnet3_TxDesc));
 478        tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0;
 479        tq->tx_ring.gen = VMXNET3_INIT_GEN;
 480
 481        memset(tq->data_ring.base, 0, tq->data_ring.size *
 482               sizeof(struct Vmxnet3_TxDataDesc));
 483
 484        /* reset the tx comp ring contents to 0 and reset comp ring states */
 485        memset(tq->comp_ring.base, 0, tq->comp_ring.size *
 486               sizeof(struct Vmxnet3_TxCompDesc));
 487        tq->comp_ring.next2proc = 0;
 488        tq->comp_ring.gen = VMXNET3_INIT_GEN;
 489
 490        /* reset the bookkeeping data */
 491        memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size);
 492        for (i = 0; i < tq->tx_ring.size; i++)
 493                tq->buf_info[i].map_type = VMXNET3_MAP_NONE;
 494
 495        /* stats are not reset */
 496}
 497
 498
 499static int
 500vmxnet3_tq_create(struct vmxnet3_tx_queue *tq,
 501                  struct vmxnet3_adapter *adapter)
 502{
 503        size_t sz;
 504
 505        BUG_ON(tq->tx_ring.base || tq->data_ring.base ||
 506               tq->comp_ring.base || tq->buf_info);
 507
 508        tq->tx_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
 509                        tq->tx_ring.size * sizeof(struct Vmxnet3_TxDesc),
 510                        &tq->tx_ring.basePA, GFP_KERNEL);
 511        if (!tq->tx_ring.base) {
 512                netdev_err(adapter->netdev, "failed to allocate tx ring\n");
 513                goto err;
 514        }
 515
 516        tq->data_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
 517                        tq->data_ring.size * sizeof(struct Vmxnet3_TxDataDesc),
 518                        &tq->data_ring.basePA, GFP_KERNEL);
 519        if (!tq->data_ring.base) {
 520                netdev_err(adapter->netdev, "failed to allocate data ring\n");
 521                goto err;
 522        }
 523
 524        tq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev,
 525                        tq->comp_ring.size * sizeof(struct Vmxnet3_TxCompDesc),
 526                        &tq->comp_ring.basePA, GFP_KERNEL);
 527        if (!tq->comp_ring.base) {
 528                netdev_err(adapter->netdev, "failed to allocate tx comp ring\n");
 529                goto err;
 530        }
 531
 532        sz = tq->tx_ring.size * sizeof(tq->buf_info[0]);
 533        tq->buf_info = dma_zalloc_coherent(&adapter->pdev->dev, sz,
 534                                           &tq->buf_info_pa, GFP_KERNEL);
 535        if (!tq->buf_info)
 536                goto err;
 537
 538        return 0;
 539
 540err:
 541        vmxnet3_tq_destroy(tq, adapter);
 542        return -ENOMEM;
 543}
 544
 545static void
 546vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
 547{
 548        int i;
 549
 550        for (i = 0; i < adapter->num_tx_queues; i++)
 551                vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
 552}
 553
 554/*
 555 *    starting from ring->next2fill, allocate rx buffers for the given ring
 556 *    of the rx queue and update the rx desc. stop after @num_to_alloc buffers
 557 *    are allocated or allocation fails
 558 */
 559
 560static int
 561vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
 562                        int num_to_alloc, struct vmxnet3_adapter *adapter)
 563{
 564        int num_allocated = 0;
 565        struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx];
 566        struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
 567        u32 val;
 568
 569        while (num_allocated <= num_to_alloc) {
 570                struct vmxnet3_rx_buf_info *rbi;
 571                union Vmxnet3_GenericDesc *gd;
 572
 573                rbi = rbi_base + ring->next2fill;
 574                gd = ring->base + ring->next2fill;
 575
 576                if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
 577                        if (rbi->skb == NULL) {
 578                                rbi->skb = __netdev_alloc_skb_ip_align(adapter->netdev,
 579                                                                       rbi->len,
 580                                                                       GFP_KERNEL);
 581                                if (unlikely(rbi->skb == NULL)) {
 582                                        rq->stats.rx_buf_alloc_failure++;
 583                                        break;
 584                                }
 585
 586                                rbi->dma_addr = dma_map_single(
 587                                                &adapter->pdev->dev,
 588                                                rbi->skb->data, rbi->len,
 589                                                PCI_DMA_FROMDEVICE);
 590                        } else {
 591                                /* rx buffer skipped by the device */
 592                        }
 593                        val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT;
 594                } else {
 595                        BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE ||
 596                               rbi->len  != PAGE_SIZE);
 597
 598                        if (rbi->page == NULL) {
 599                                rbi->page = alloc_page(GFP_ATOMIC);
 600                                if (unlikely(rbi->page == NULL)) {
 601                                        rq->stats.rx_buf_alloc_failure++;
 602                                        break;
 603                                }
 604                                rbi->dma_addr = dma_map_page(
 605                                                &adapter->pdev->dev,
 606                                                rbi->page, 0, PAGE_SIZE,
 607                                                PCI_DMA_FROMDEVICE);
 608                        } else {
 609                                /* rx buffers skipped by the device */
 610                        }
 611                        val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
 612                }
 613
 614                BUG_ON(rbi->dma_addr == 0);
 615                gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
 616                gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
 617                                           | val | rbi->len);
 618
 619                /* Fill the last buffer but dont mark it ready, or else the
 620                 * device will think that the queue is full */
 621                if (num_allocated == num_to_alloc)
 622                        break;
 623
 624                gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
 625                num_allocated++;
 626                vmxnet3_cmd_ring_adv_next2fill(ring);
 627        }
 628
 629        netdev_dbg(adapter->netdev,
 630                "alloc_rx_buf: %d allocated, next2fill %u, next2comp %u\n",
 631                num_allocated, ring->next2fill, ring->next2comp);
 632
 633        /* so that the device can distinguish a full ring and an empty ring */
 634        BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp);
 635
 636        return num_allocated;
 637}
 638
 639
 640static void
 641vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
 642                    struct vmxnet3_rx_buf_info *rbi)
 643{
 644        struct skb_frag_struct *frag = skb_shinfo(skb)->frags +
 645                skb_shinfo(skb)->nr_frags;
 646
 647        BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
 648
 649        __skb_frag_set_page(frag, rbi->page);
 650        frag->page_offset = 0;
 651        skb_frag_size_set(frag, rcd->len);
 652        skb->data_len += rcd->len;
 653        skb->truesize += PAGE_SIZE;
 654        skb_shinfo(skb)->nr_frags++;
 655}
 656
 657
 658static void
 659vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 660                struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
 661                struct vmxnet3_adapter *adapter)
 662{
 663        u32 dw2, len;
 664        unsigned long buf_offset;
 665        int i;
 666        union Vmxnet3_GenericDesc *gdesc;
 667        struct vmxnet3_tx_buf_info *tbi = NULL;
 668
 669        BUG_ON(ctx->copy_size > skb_headlen(skb));
 670
 671        /* use the previous gen bit for the SOP desc */
 672        dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
 673
 674        ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
 675        gdesc = ctx->sop_txd; /* both loops below can be skipped */
 676
 677        /* no need to map the buffer if headers are copied */
 678        if (ctx->copy_size) {
 679                ctx->sop_txd->txd.addr = cpu_to_le64(tq->data_ring.basePA +
 680                                        tq->tx_ring.next2fill *
 681                                        sizeof(struct Vmxnet3_TxDataDesc));
 682                ctx->sop_txd->dword[2] = cpu_to_le32(dw2 | ctx->copy_size);
 683                ctx->sop_txd->dword[3] = 0;
 684
 685                tbi = tq->buf_info + tq->tx_ring.next2fill;
 686                tbi->map_type = VMXNET3_MAP_NONE;
 687
 688                netdev_dbg(adapter->netdev,
 689                        "txd[%u]: 0x%Lx 0x%x 0x%x\n",
 690                        tq->tx_ring.next2fill,
 691                        le64_to_cpu(ctx->sop_txd->txd.addr),
 692                        ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]);
 693                vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
 694
 695                /* use the right gen for non-SOP desc */
 696                dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
 697        }
 698
 699        /* linear part can use multiple tx desc if it's big */
 700        len = skb_headlen(skb) - ctx->copy_size;
 701        buf_offset = ctx->copy_size;
 702        while (len) {
 703                u32 buf_size;
 704
 705                if (len < VMXNET3_MAX_TX_BUF_SIZE) {
 706                        buf_size = len;
 707                        dw2 |= len;
 708                } else {
 709                        buf_size = VMXNET3_MAX_TX_BUF_SIZE;
 710                        /* spec says that for TxDesc.len, 0 == 2^14 */
 711                }
 712
 713                tbi = tq->buf_info + tq->tx_ring.next2fill;
 714                tbi->map_type = VMXNET3_MAP_SINGLE;
 715                tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
 716                                skb->data + buf_offset, buf_size,
 717                                PCI_DMA_TODEVICE);
 718
 719                tbi->len = buf_size;
 720
 721                gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
 722                BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
 723
 724                gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
 725                gdesc->dword[2] = cpu_to_le32(dw2);
 726                gdesc->dword[3] = 0;
 727
 728                netdev_dbg(adapter->netdev,
 729                        "txd[%u]: 0x%Lx 0x%x 0x%x\n",
 730                        tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
 731                        le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
 732                vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
 733                dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
 734
 735                len -= buf_size;
 736                buf_offset += buf_size;
 737        }
 738
 739        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 740                const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
 741                u32 buf_size;
 742
 743                buf_offset = 0;
 744                len = skb_frag_size(frag);
 745                while (len) {
 746                        tbi = tq->buf_info + tq->tx_ring.next2fill;
 747                        if (len < VMXNET3_MAX_TX_BUF_SIZE) {
 748                                buf_size = len;
 749                                dw2 |= len;
 750                        } else {
 751                                buf_size = VMXNET3_MAX_TX_BUF_SIZE;
 752                                /* spec says that for TxDesc.len, 0 == 2^14 */
 753                        }
 754                        tbi->map_type = VMXNET3_MAP_PAGE;
 755                        tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
 756                                                         buf_offset, buf_size,
 757                                                         DMA_TO_DEVICE);
 758
 759                        tbi->len = buf_size;
 760
 761                        gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
 762                        BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
 763
 764                        gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
 765                        gdesc->dword[2] = cpu_to_le32(dw2);
 766                        gdesc->dword[3] = 0;
 767
 768                        netdev_dbg(adapter->netdev,
 769                                "txd[%u]: 0x%llu %u %u\n",
 770                                tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr),
 771                                le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]);
 772                        vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring);
 773                        dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT;
 774
 775                        len -= buf_size;
 776                        buf_offset += buf_size;
 777                }
 778        }
 779
 780        ctx->eop_txd = gdesc;
 781
 782        /* set the last buf_info for the pkt */
 783        tbi->skb = skb;
 784        tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
 785}
 786
 787
 788/* Init all tx queues */
 789static void
 790vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
 791{
 792        int i;
 793
 794        for (i = 0; i < adapter->num_tx_queues; i++)
 795                vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
 796}
 797
 798
 799/*
 800 *    parse and copy relevant protocol headers:
 801 *      For a tso pkt, relevant headers are L2/3/4 including options
 802 *      For a pkt requesting csum offloading, they are L2/3 and may include L4
 803 *      if it's a TCP/UDP pkt
 804 *
 805 * Returns:
 806 *    -1:  error happens during parsing
 807 *     0:  protocol headers parsed, but too big to be copied
 808 *     1:  protocol headers parsed and copied
 809 *
 810 * Other effects:
 811 *    1. related *ctx fields are updated.
 812 *    2. ctx->copy_size is # of bytes copied
 813 *    3. the portion copied is guaranteed to be in the linear part
 814 *
 815 */
 816static int
 817vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 818                           struct vmxnet3_tx_ctx *ctx,
 819                           struct vmxnet3_adapter *adapter)
 820{
 821        struct Vmxnet3_TxDataDesc *tdd;
 822
 823        if (ctx->mss) { /* TSO */
 824                ctx->eth_ip_hdr_size = skb_transport_offset(skb);
 825                ctx->l4_hdr_size = tcp_hdrlen(skb);
 826                ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
 827        } else {
 828                if (skb->ip_summed == CHECKSUM_PARTIAL) {
 829                        ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
 830
 831                        if (ctx->ipv4) {
 832                                const struct iphdr *iph = ip_hdr(skb);
 833
 834                                if (iph->protocol == IPPROTO_TCP)
 835                                        ctx->l4_hdr_size = tcp_hdrlen(skb);
 836                                else if (iph->protocol == IPPROTO_UDP)
 837                                        ctx->l4_hdr_size = sizeof(struct udphdr);
 838                                else
 839                                        ctx->l4_hdr_size = 0;
 840                        } else {
 841                                /* for simplicity, don't copy L4 headers */
 842                                ctx->l4_hdr_size = 0;
 843                        }
 844                        ctx->copy_size = min(ctx->eth_ip_hdr_size +
 845                                         ctx->l4_hdr_size, skb->len);
 846                } else {
 847                        ctx->eth_ip_hdr_size = 0;
 848                        ctx->l4_hdr_size = 0;
 849                        /* copy as much as allowed */
 850                        ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE
 851                                             , skb_headlen(skb));
 852                }
 853
 854                /* make sure headers are accessible directly */
 855                if (unlikely(!pskb_may_pull(skb, ctx->copy_size)))
 856                        goto err;
 857        }
 858
 859        if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) {
 860                tq->stats.oversized_hdr++;
 861                ctx->copy_size = 0;
 862                return 0;
 863        }
 864
 865        tdd = tq->data_ring.base + tq->tx_ring.next2fill;
 866
 867        memcpy(tdd->data, skb->data, ctx->copy_size);
 868        netdev_dbg(adapter->netdev,
 869                "copy %u bytes to dataRing[%u]\n",
 870                ctx->copy_size, tq->tx_ring.next2fill);
 871        return 1;
 872
 873err:
 874        return -1;
 875}
 876
 877
 878static void
 879vmxnet3_prepare_tso(struct sk_buff *skb,
 880                    struct vmxnet3_tx_ctx *ctx)
 881{
 882        struct tcphdr *tcph = tcp_hdr(skb);
 883
 884        if (ctx->ipv4) {
 885                struct iphdr *iph = ip_hdr(skb);
 886
 887                iph->check = 0;
 888                tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
 889                                                 IPPROTO_TCP, 0);
 890        } else {
 891                struct ipv6hdr *iph = ipv6_hdr(skb);
 892
 893                tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
 894                                               IPPROTO_TCP, 0);
 895        }
 896}
 897
 898static int txd_estimate(const struct sk_buff *skb)
 899{
 900        int count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
 901        int i;
 902
 903        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 904                const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
 905
 906                count += VMXNET3_TXD_NEEDED(skb_frag_size(frag));
 907        }
 908        return count;
 909}
 910
 911/*
 912 * Transmits a pkt thru a given tq
 913 * Returns:
 914 *    NETDEV_TX_OK:      descriptors are setup successfully
 915 *    NETDEV_TX_OK:      error occurred, the pkt is dropped
 916 *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
 917 *
 918 * Side-effects:
 919 *    1. tx ring may be changed
 920 *    2. tq stats may be updated accordingly
 921 *    3. shared->txNumDeferred may be updated
 922 */
 923
 924static int
 925vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 926                struct vmxnet3_adapter *adapter, struct net_device *netdev)
 927{
 928        int ret;
 929        u32 count;
 930        unsigned long flags;
 931        struct vmxnet3_tx_ctx ctx;
 932        union Vmxnet3_GenericDesc *gdesc;
 933#ifdef __BIG_ENDIAN_BITFIELD
 934        /* Use temporary descriptor to avoid touching bits multiple times */
 935        union Vmxnet3_GenericDesc tempTxDesc;
 936#endif
 937
 938        count = txd_estimate(skb);
 939
 940        ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
 941
 942        ctx.mss = skb_shinfo(skb)->gso_size;
 943        if (ctx.mss) {
 944                if (skb_header_cloned(skb)) {
 945                        if (unlikely(pskb_expand_head(skb, 0, 0,
 946                                                      GFP_ATOMIC) != 0)) {
 947                                tq->stats.drop_tso++;
 948                                goto drop_pkt;
 949                        }
 950                        tq->stats.copy_skb_header++;
 951                }
 952                vmxnet3_prepare_tso(skb, &ctx);
 953        } else {
 954                if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
 955
 956                        /* non-tso pkts must not use more than
 957                         * VMXNET3_MAX_TXD_PER_PKT entries
 958                         */
 959                        if (skb_linearize(skb) != 0) {
 960                                tq->stats.drop_too_many_frags++;
 961                                goto drop_pkt;
 962                        }
 963                        tq->stats.linearized++;
 964
 965                        /* recalculate the # of descriptors to use */
 966                        count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1;
 967                }
 968        }
 969
 970        spin_lock_irqsave(&tq->tx_lock, flags);
 971
 972        if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
 973                tq->stats.tx_ring_full++;
 974                netdev_dbg(adapter->netdev,
 975                        "tx queue stopped on %s, next2comp %u"
 976                        " next2fill %u\n", adapter->netdev->name,
 977                        tq->tx_ring.next2comp, tq->tx_ring.next2fill);
 978
 979                vmxnet3_tq_stop(tq, adapter);
 980                spin_unlock_irqrestore(&tq->tx_lock, flags);
 981                return NETDEV_TX_BUSY;
 982        }
 983
 984
 985        ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
 986        if (ret >= 0) {
 987                BUG_ON(ret <= 0 && ctx.copy_size != 0);
 988                /* hdrs parsed, check against other limits */
 989                if (ctx.mss) {
 990                        if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
 991                                     VMXNET3_MAX_TX_BUF_SIZE)) {
 992                                goto hdr_too_big;
 993                        }
 994                } else {
 995                        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 996                                if (unlikely(ctx.eth_ip_hdr_size +
 997                                             skb->csum_offset >
 998                                             VMXNET3_MAX_CSUM_OFFSET)) {
 999                                        goto hdr_too_big;
1000                                }
1001                        }
1002                }
1003        } else {
1004                tq->stats.drop_hdr_inspect_err++;
1005                goto unlock_drop_pkt;
1006        }
1007
1008        /* fill tx descs related to addr & len */
1009        vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
1010
1011        /* setup the EOP desc */
1012        ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
1013
1014        /* setup the SOP desc */
1015#ifdef __BIG_ENDIAN_BITFIELD
1016        gdesc = &tempTxDesc;
1017        gdesc->dword[2] = ctx.sop_txd->dword[2];
1018        gdesc->dword[3] = ctx.sop_txd->dword[3];
1019#else
1020        gdesc = ctx.sop_txd;
1021#endif
1022        if (ctx.mss) {
1023                gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
1024                gdesc->txd.om = VMXNET3_OM_TSO;
1025                gdesc->txd.msscof = ctx.mss;
1026                le32_add_cpu(&tq->shared->txNumDeferred, (skb->len -
1027                             gdesc->txd.hlen + ctx.mss - 1) / ctx.mss);
1028        } else {
1029                if (skb->ip_summed == CHECKSUM_PARTIAL) {
1030                        gdesc->txd.hlen = ctx.eth_ip_hdr_size;
1031                        gdesc->txd.om = VMXNET3_OM_CSUM;
1032                        gdesc->txd.msscof = ctx.eth_ip_hdr_size +
1033                                            skb->csum_offset;
1034                } else {
1035                        gdesc->txd.om = 0;
1036                        gdesc->txd.msscof = 0;
1037                }
1038                le32_add_cpu(&tq->shared->txNumDeferred, 1);
1039        }
1040
1041        if (vlan_tx_tag_present(skb)) {
1042                gdesc->txd.ti = 1;
1043                gdesc->txd.tci = vlan_tx_tag_get(skb);
1044        }
1045
1046        /* finally flips the GEN bit of the SOP desc. */
1047        gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
1048                                                  VMXNET3_TXD_GEN);
1049#ifdef __BIG_ENDIAN_BITFIELD
1050        /* Finished updating in bitfields of Tx Desc, so write them in original
1051         * place.
1052         */
1053        vmxnet3_TxDescToLe((struct Vmxnet3_TxDesc *)gdesc,
1054                           (struct Vmxnet3_TxDesc *)ctx.sop_txd);
1055        gdesc = ctx.sop_txd;
1056#endif
1057        netdev_dbg(adapter->netdev,
1058                "txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
1059                (u32)(ctx.sop_txd -
1060                tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
1061                le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
1062
1063        spin_unlock_irqrestore(&tq->tx_lock, flags);
1064
1065        if (le32_to_cpu(tq->shared->txNumDeferred) >=
1066                                        le32_to_cpu(tq->shared->txThreshold)) {
1067                tq->shared->txNumDeferred = 0;
1068                VMXNET3_WRITE_BAR0_REG(adapter,
1069                                       VMXNET3_REG_TXPROD + tq->qid * 8,
1070                                       tq->tx_ring.next2fill);
1071        }
1072
1073        return NETDEV_TX_OK;
1074
1075hdr_too_big:
1076        tq->stats.drop_oversized_hdr++;
1077unlock_drop_pkt:
1078        spin_unlock_irqrestore(&tq->tx_lock, flags);
1079drop_pkt:
1080        tq->stats.drop_total++;
1081        dev_kfree_skb(skb);
1082        return NETDEV_TX_OK;
1083}
1084
1085
1086static netdev_tx_t
1087vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1088{
1089        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1090
1091        BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
1092        return vmxnet3_tq_xmit(skb,
1093                               &adapter->tx_queue[skb->queue_mapping],
1094                               adapter, netdev);
1095}
1096
1097
1098static void
1099vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
1100                struct sk_buff *skb,
1101                union Vmxnet3_GenericDesc *gdesc)
1102{
1103        if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
1104                /* typical case: TCP/UDP over IP and both csums are correct */
1105                if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
1106                                                        VMXNET3_RCD_CSUM_OK) {
1107                        skb->ip_summed = CHECKSUM_UNNECESSARY;
1108                        BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
1109                        BUG_ON(!(gdesc->rcd.v4  || gdesc->rcd.v6));
1110                        BUG_ON(gdesc->rcd.frg);
1111                } else {
1112                        if (gdesc->rcd.csum) {
1113                                skb->csum = htons(gdesc->rcd.csum);
1114                                skb->ip_summed = CHECKSUM_PARTIAL;
1115                        } else {
1116                                skb_checksum_none_assert(skb);
1117                        }
1118                }
1119        } else {
1120                skb_checksum_none_assert(skb);
1121        }
1122}
1123
1124
1125static void
1126vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd,
1127                 struct vmxnet3_rx_ctx *ctx,  struct vmxnet3_adapter *adapter)
1128{
1129        rq->stats.drop_err++;
1130        if (!rcd->fcs)
1131                rq->stats.drop_fcs++;
1132
1133        rq->stats.drop_total++;
1134
1135        /*
1136         * We do not unmap and chain the rx buffer to the skb.
1137         * We basically pretend this buffer is not used and will be recycled
1138         * by vmxnet3_rq_alloc_rx_buf()
1139         */
1140
1141        /*
1142         * ctx->skb may be NULL if this is the first and the only one
1143         * desc for the pkt
1144         */
1145        if (ctx->skb)
1146                dev_kfree_skb_irq(ctx->skb);
1147
1148        ctx->skb = NULL;
1149}
1150
1151
1152static int
1153vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
1154                       struct vmxnet3_adapter *adapter, int quota)
1155{
1156        static const u32 rxprod_reg[2] = {
1157                VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
1158        };
1159        u32 num_rxd = 0;
1160        bool skip_page_frags = false;
1161        struct Vmxnet3_RxCompDesc *rcd;
1162        struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
1163#ifdef __BIG_ENDIAN_BITFIELD
1164        struct Vmxnet3_RxDesc rxCmdDesc;
1165        struct Vmxnet3_RxCompDesc rxComp;
1166#endif
1167        vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
1168                          &rxComp);
1169        while (rcd->gen == rq->comp_ring.gen) {
1170                struct vmxnet3_rx_buf_info *rbi;
1171                struct sk_buff *skb, *new_skb = NULL;
1172                struct page *new_page = NULL;
1173                int num_to_alloc;
1174                struct Vmxnet3_RxDesc *rxd;
1175                u32 idx, ring_idx;
1176                struct vmxnet3_cmd_ring *ring = NULL;
1177                if (num_rxd >= quota) {
1178                        /* we may stop even before we see the EOP desc of
1179                         * the current pkt
1180                         */
1181                        break;
1182                }
1183                num_rxd++;
1184                BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
1185                idx = rcd->rxdIdx;
1186                ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
1187                ring = rq->rx_ring + ring_idx;
1188                vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
1189                                  &rxCmdDesc);
1190                rbi = rq->buf_info[ring_idx] + idx;
1191
1192                BUG_ON(rxd->addr != rbi->dma_addr ||
1193                       rxd->len != rbi->len);
1194
1195                if (unlikely(rcd->eop && rcd->err)) {
1196                        vmxnet3_rx_error(rq, rcd, ctx, adapter);
1197                        goto rcd_done;
1198                }
1199
1200                if (rcd->sop) { /* first buf of the pkt */
1201                        BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD ||
1202                               rcd->rqID != rq->qid);
1203
1204                        BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB);
1205                        BUG_ON(ctx->skb != NULL || rbi->skb == NULL);
1206
1207                        if (unlikely(rcd->len == 0)) {
1208                                /* Pretend the rx buffer is skipped. */
1209                                BUG_ON(!(rcd->sop && rcd->eop));
1210                                netdev_dbg(adapter->netdev,
1211                                        "rxRing[%u][%u] 0 length\n",
1212                                        ring_idx, idx);
1213                                goto rcd_done;
1214                        }
1215
1216                        skip_page_frags = false;
1217                        ctx->skb = rbi->skb;
1218                        new_skb = netdev_alloc_skb_ip_align(adapter->netdev,
1219                                                            rbi->len);
1220                        if (new_skb == NULL) {
1221                                /* Skb allocation failed, do not handover this
1222                                 * skb to stack. Reuse it. Drop the existing pkt
1223                                 */
1224                                rq->stats.rx_buf_alloc_failure++;
1225                                ctx->skb = NULL;
1226                                rq->stats.drop_total++;
1227                                skip_page_frags = true;
1228                                goto rcd_done;
1229                        }
1230
1231                        dma_unmap_single(&adapter->pdev->dev, rbi->dma_addr,
1232                                         rbi->len,
1233                                         PCI_DMA_FROMDEVICE);
1234
1235#ifdef VMXNET3_RSS
1236                        if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
1237                            (adapter->netdev->features & NETIF_F_RXHASH))
1238                                ctx->skb->rxhash = le32_to_cpu(rcd->rssHash);
1239#endif
1240                        skb_put(ctx->skb, rcd->len);
1241
1242                        /* Immediate refill */
1243                        rbi->skb = new_skb;
1244                        rbi->dma_addr = dma_map_single(&adapter->pdev->dev,
1245                                                       rbi->skb->data, rbi->len,
1246                                                       PCI_DMA_FROMDEVICE);
1247                        rxd->addr = cpu_to_le64(rbi->dma_addr);
1248                        rxd->len = rbi->len;
1249
1250                } else {
1251                        BUG_ON(ctx->skb == NULL && !skip_page_frags);
1252
1253                        /* non SOP buffer must be type 1 in most cases */
1254                        BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
1255                        BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
1256
1257                        /* If an sop buffer was dropped, skip all
1258                         * following non-sop fragments. They will be reused.
1259                         */
1260                        if (skip_page_frags)
1261                                goto rcd_done;
1262
1263                        new_page = alloc_page(GFP_ATOMIC);
1264                        if (unlikely(new_page == NULL)) {
1265                                /* Replacement page frag could not be allocated.
1266                                 * Reuse this page. Drop the pkt and free the
1267                                 * skb which contained this page as a frag. Skip
1268                                 * processing all the following non-sop frags.
1269                                 */
1270                                rq->stats.rx_buf_alloc_failure++;
1271                                dev_kfree_skb(ctx->skb);
1272                                ctx->skb = NULL;
1273                                skip_page_frags = true;
1274                                goto rcd_done;
1275                        }
1276
1277                        if (rcd->len) {
1278                                dma_unmap_page(&adapter->pdev->dev,
1279                                               rbi->dma_addr, rbi->len,
1280                                               PCI_DMA_FROMDEVICE);
1281
1282                                vmxnet3_append_frag(ctx->skb, rcd, rbi);
1283                        }
1284
1285                        /* Immediate refill */
1286                        rbi->page = new_page;
1287                        rbi->dma_addr = dma_map_page(&adapter->pdev->dev,
1288                                                     rbi->page,
1289                                                     0, PAGE_SIZE,
1290                                                     PCI_DMA_FROMDEVICE);
1291                        rxd->addr = cpu_to_le64(rbi->dma_addr);
1292                        rxd->len = rbi->len;
1293                }
1294
1295
1296                skb = ctx->skb;
1297                if (rcd->eop) {
1298                        skb->len += skb->data_len;
1299
1300                        vmxnet3_rx_csum(adapter, skb,
1301                                        (union Vmxnet3_GenericDesc *)rcd);
1302                        skb->protocol = eth_type_trans(skb, adapter->netdev);
1303
1304                        if (unlikely(rcd->ts))
1305                                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rcd->tci);
1306
1307                        if (adapter->netdev->features & NETIF_F_LRO)
1308                                netif_receive_skb(skb);
1309                        else
1310                                napi_gro_receive(&rq->napi, skb);
1311
1312                        ctx->skb = NULL;
1313                }
1314
1315rcd_done:
1316                /* device may have skipped some rx descs */
1317                ring->next2comp = idx;
1318                num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
1319                ring = rq->rx_ring + ring_idx;
1320                while (num_to_alloc) {
1321                        vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
1322                                          &rxCmdDesc);
1323                        BUG_ON(!rxd->addr);
1324
1325                        /* Recv desc is ready to be used by the device */
1326                        rxd->gen = ring->gen;
1327                        vmxnet3_cmd_ring_adv_next2fill(ring);
1328                        num_to_alloc--;
1329                }
1330
1331                /* if needed, update the register */
1332                if (unlikely(rq->shared->updateRxProd)) {
1333                        VMXNET3_WRITE_BAR0_REG(adapter,
1334                                               rxprod_reg[ring_idx] + rq->qid * 8,
1335                                               ring->next2fill);
1336                }
1337
1338                vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
1339                vmxnet3_getRxComp(rcd,
1340                                  &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, &rxComp);
1341        }
1342
1343        return num_rxd;
1344}
1345
1346
1347static void
1348vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
1349                   struct vmxnet3_adapter *adapter)
1350{
1351        u32 i, ring_idx;
1352        struct Vmxnet3_RxDesc *rxd;
1353
1354        for (ring_idx = 0; ring_idx < 2; ring_idx++) {
1355                for (i = 0; i < rq->rx_ring[ring_idx].size; i++) {
1356#ifdef __BIG_ENDIAN_BITFIELD
1357                        struct Vmxnet3_RxDesc rxDesc;
1358#endif
1359                        vmxnet3_getRxDesc(rxd,
1360                                &rq->rx_ring[ring_idx].base[i].rxd, &rxDesc);
1361
1362                        if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
1363                                        rq->buf_info[ring_idx][i].skb) {
1364                                dma_unmap_single(&adapter->pdev->dev, rxd->addr,
1365                                                 rxd->len, PCI_DMA_FROMDEVICE);
1366                                dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
1367                                rq->buf_info[ring_idx][i].skb = NULL;
1368                        } else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
1369                                        rq->buf_info[ring_idx][i].page) {
1370                                dma_unmap_page(&adapter->pdev->dev, rxd->addr,
1371                                               rxd->len, PCI_DMA_FROMDEVICE);
1372                                put_page(rq->buf_info[ring_idx][i].page);
1373                                rq->buf_info[ring_idx][i].page = NULL;
1374                        }
1375                }
1376
1377                rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN;
1378                rq->rx_ring[ring_idx].next2fill =
1379                                        rq->rx_ring[ring_idx].next2comp = 0;
1380        }
1381
1382        rq->comp_ring.gen = VMXNET3_INIT_GEN;
1383        rq->comp_ring.next2proc = 0;
1384}
1385
1386
1387static void
1388vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
1389{
1390        int i;
1391
1392        for (i = 0; i < adapter->num_rx_queues; i++)
1393                vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
1394}
1395
1396
1397static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
1398                               struct vmxnet3_adapter *adapter)
1399{
1400        int i;
1401        int j;
1402
1403        /* all rx buffers must have already been freed */
1404        for (i = 0; i < 2; i++) {
1405                if (rq->buf_info[i]) {
1406                        for (j = 0; j < rq->rx_ring[i].size; j++)
1407                                BUG_ON(rq->buf_info[i][j].page != NULL);
1408                }
1409        }
1410
1411
1412        for (i = 0; i < 2; i++) {
1413                if (rq->rx_ring[i].base) {
1414                        dma_free_coherent(&adapter->pdev->dev,
1415                                          rq->rx_ring[i].size
1416                                          * sizeof(struct Vmxnet3_RxDesc),
1417                                          rq->rx_ring[i].base,
1418                                          rq->rx_ring[i].basePA);
1419                        rq->rx_ring[i].base = NULL;
1420                }
1421                rq->buf_info[i] = NULL;
1422        }
1423
1424        if (rq->comp_ring.base) {
1425                dma_free_coherent(&adapter->pdev->dev, rq->comp_ring.size
1426                                  * sizeof(struct Vmxnet3_RxCompDesc),
1427                                  rq->comp_ring.base, rq->comp_ring.basePA);
1428                rq->comp_ring.base = NULL;
1429        }
1430
1431        if (rq->buf_info[0]) {
1432                size_t sz = sizeof(struct vmxnet3_rx_buf_info) *
1433                        (rq->rx_ring[0].size + rq->rx_ring[1].size);
1434                dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
1435                                  rq->buf_info_pa);
1436        }
1437}
1438
1439
1440static int
1441vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
1442                struct vmxnet3_adapter  *adapter)
1443{
1444        int i;
1445
1446        /* initialize buf_info */
1447        for (i = 0; i < rq->rx_ring[0].size; i++) {
1448
1449                /* 1st buf for a pkt is skbuff */
1450                if (i % adapter->rx_buf_per_pkt == 0) {
1451                        rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB;
1452                        rq->buf_info[0][i].len = adapter->skb_buf_size;
1453                } else { /* subsequent bufs for a pkt is frag */
1454                        rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE;
1455                        rq->buf_info[0][i].len = PAGE_SIZE;
1456                }
1457        }
1458        for (i = 0; i < rq->rx_ring[1].size; i++) {
1459                rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE;
1460                rq->buf_info[1][i].len = PAGE_SIZE;
1461        }
1462
1463        /* reset internal state and allocate buffers for both rings */
1464        for (i = 0; i < 2; i++) {
1465                rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0;
1466
1467                memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
1468                       sizeof(struct Vmxnet3_RxDesc));
1469                rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
1470        }
1471        if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
1472                                    adapter) == 0) {
1473                /* at least has 1 rx buffer for the 1st ring */
1474                return -ENOMEM;
1475        }
1476        vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter);
1477
1478        /* reset the comp ring */
1479        rq->comp_ring.next2proc = 0;
1480        memset(rq->comp_ring.base, 0, rq->comp_ring.size *
1481               sizeof(struct Vmxnet3_RxCompDesc));
1482        rq->comp_ring.gen = VMXNET3_INIT_GEN;
1483
1484        /* reset rxctx */
1485        rq->rx_ctx.skb = NULL;
1486
1487        /* stats are not reset */
1488        return 0;
1489}
1490
1491
1492static int
1493vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
1494{
1495        int i, err = 0;
1496
1497        for (i = 0; i < adapter->num_rx_queues; i++) {
1498                err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
1499                if (unlikely(err)) {
1500                        dev_err(&adapter->netdev->dev, "%s: failed to "
1501                                "initialize rx queue%i\n",
1502                                adapter->netdev->name, i);
1503                        break;
1504                }
1505        }
1506        return err;
1507
1508}
1509
1510
1511static int
1512vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
1513{
1514        int i;
1515        size_t sz;
1516        struct vmxnet3_rx_buf_info *bi;
1517
1518        for (i = 0; i < 2; i++) {
1519
1520                sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc);
1521                rq->rx_ring[i].base = dma_alloc_coherent(
1522                                                &adapter->pdev->dev, sz,
1523                                                &rq->rx_ring[i].basePA,
1524                                                GFP_KERNEL);
1525                if (!rq->rx_ring[i].base) {
1526                        netdev_err(adapter->netdev,
1527                                   "failed to allocate rx ring %d\n", i);
1528                        goto err;
1529                }
1530        }
1531
1532        sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc);
1533        rq->comp_ring.base = dma_alloc_coherent(&adapter->pdev->dev, sz,
1534                                                &rq->comp_ring.basePA,
1535                                                GFP_KERNEL);
1536        if (!rq->comp_ring.base) {
1537                netdev_err(adapter->netdev, "failed to allocate rx comp ring\n");
1538                goto err;
1539        }
1540
1541        sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
1542                                                   rq->rx_ring[1].size);
1543        bi = dma_zalloc_coherent(&adapter->pdev->dev, sz, &rq->buf_info_pa,
1544                                 GFP_KERNEL);
1545        if (!bi)
1546                goto err;
1547
1548        rq->buf_info[0] = bi;
1549        rq->buf_info[1] = bi + rq->rx_ring[0].size;
1550
1551        return 0;
1552
1553err:
1554        vmxnet3_rq_destroy(rq, adapter);
1555        return -ENOMEM;
1556}
1557
1558
1559static int
1560vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
1561{
1562        int i, err = 0;
1563
1564        for (i = 0; i < adapter->num_rx_queues; i++) {
1565                err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
1566                if (unlikely(err)) {
1567                        dev_err(&adapter->netdev->dev,
1568                                "%s: failed to create rx queue%i\n",
1569                                adapter->netdev->name, i);
1570                        goto err_out;
1571                }
1572        }
1573        return err;
1574err_out:
1575        vmxnet3_rq_destroy_all(adapter);
1576        return err;
1577
1578}
1579
1580/* Multiple queue aware polling function for tx and rx */
1581
1582static int
1583vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
1584{
1585        int rcd_done = 0, i;
1586        if (unlikely(adapter->shared->ecr))
1587                vmxnet3_process_events(adapter);
1588        for (i = 0; i < adapter->num_tx_queues; i++)
1589                vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
1590
1591        for (i = 0; i < adapter->num_rx_queues; i++)
1592                rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
1593                                                   adapter, budget);
1594        return rcd_done;
1595}
1596
1597
1598static int
1599vmxnet3_poll(struct napi_struct *napi, int budget)
1600{
1601        struct vmxnet3_rx_queue *rx_queue = container_of(napi,
1602                                          struct vmxnet3_rx_queue, napi);
1603        int rxd_done;
1604
1605        rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
1606
1607        if (rxd_done < budget) {
1608                napi_complete(napi);
1609                vmxnet3_enable_all_intrs(rx_queue->adapter);
1610        }
1611        return rxd_done;
1612}
1613
1614/*
1615 * NAPI polling function for MSI-X mode with multiple Rx queues
1616 * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
1617 */
1618
1619static int
1620vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
1621{
1622        struct vmxnet3_rx_queue *rq = container_of(napi,
1623                                                struct vmxnet3_rx_queue, napi);
1624        struct vmxnet3_adapter *adapter = rq->adapter;
1625        int rxd_done;
1626
1627        /* When sharing interrupt with corresponding tx queue, process
1628         * tx completions in that queue as well
1629         */
1630        if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
1631                struct vmxnet3_tx_queue *tq =
1632                                &adapter->tx_queue[rq - adapter->rx_queue];
1633                vmxnet3_tq_tx_complete(tq, adapter);
1634        }
1635
1636        rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
1637
1638        if (rxd_done < budget) {
1639                napi_complete(napi);
1640                vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
1641        }
1642        return rxd_done;
1643}
1644
1645
1646#ifdef CONFIG_PCI_MSI
1647
1648/*
1649 * Handle completion interrupts on tx queues
1650 * Returns whether or not the intr is handled
1651 */
1652
1653static irqreturn_t
1654vmxnet3_msix_tx(int irq, void *data)
1655{
1656        struct vmxnet3_tx_queue *tq = data;
1657        struct vmxnet3_adapter *adapter = tq->adapter;
1658
1659        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1660                vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
1661
1662        /* Handle the case where only one irq is allocate for all tx queues */
1663        if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1664                int i;
1665                for (i = 0; i < adapter->num_tx_queues; i++) {
1666                        struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
1667                        vmxnet3_tq_tx_complete(txq, adapter);
1668                }
1669        } else {
1670                vmxnet3_tq_tx_complete(tq, adapter);
1671        }
1672        vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
1673
1674        return IRQ_HANDLED;
1675}
1676
1677
1678/*
1679 * Handle completion interrupts on rx queues. Returns whether or not the
1680 * intr is handled
1681 */
1682
1683static irqreturn_t
1684vmxnet3_msix_rx(int irq, void *data)
1685{
1686        struct vmxnet3_rx_queue *rq = data;
1687        struct vmxnet3_adapter *adapter = rq->adapter;
1688
1689        /* disable intr if needed */
1690        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1691                vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
1692        napi_schedule(&rq->napi);
1693
1694        return IRQ_HANDLED;
1695}
1696
1697/*
1698 *----------------------------------------------------------------------------
1699 *
1700 * vmxnet3_msix_event --
1701 *
1702 *    vmxnet3 msix event intr handler
1703 *
1704 * Result:
1705 *    whether or not the intr is handled
1706 *
1707 *----------------------------------------------------------------------------
1708 */
1709
1710static irqreturn_t
1711vmxnet3_msix_event(int irq, void *data)
1712{
1713        struct net_device *dev = data;
1714        struct vmxnet3_adapter *adapter = netdev_priv(dev);
1715
1716        /* disable intr if needed */
1717        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1718                vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
1719
1720        if (adapter->shared->ecr)
1721                vmxnet3_process_events(adapter);
1722
1723        vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
1724
1725        return IRQ_HANDLED;
1726}
1727
1728#endif /* CONFIG_PCI_MSI  */
1729
1730
1731/* Interrupt handler for vmxnet3  */
1732static irqreturn_t
1733vmxnet3_intr(int irq, void *dev_id)
1734{
1735        struct net_device *dev = dev_id;
1736        struct vmxnet3_adapter *adapter = netdev_priv(dev);
1737
1738        if (adapter->intr.type == VMXNET3_IT_INTX) {
1739                u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
1740                if (unlikely(icr == 0))
1741                        /* not ours */
1742                        return IRQ_NONE;
1743        }
1744
1745
1746        /* disable intr if needed */
1747        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1748                vmxnet3_disable_all_intrs(adapter);
1749
1750        napi_schedule(&adapter->rx_queue[0].napi);
1751
1752        return IRQ_HANDLED;
1753}
1754
1755#ifdef CONFIG_NET_POLL_CONTROLLER
1756
1757/* netpoll callback. */
1758static void
1759vmxnet3_netpoll(struct net_device *netdev)
1760{
1761        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1762
1763        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
1764                vmxnet3_disable_all_intrs(adapter);
1765
1766        vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size);
1767        vmxnet3_enable_all_intrs(adapter);
1768
1769}
1770#endif  /* CONFIG_NET_POLL_CONTROLLER */
1771
1772static int
1773vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
1774{
1775        struct vmxnet3_intr *intr = &adapter->intr;
1776        int err = 0, i;
1777        int vector = 0;
1778
1779#ifdef CONFIG_PCI_MSI
1780        if (adapter->intr.type == VMXNET3_IT_MSIX) {
1781                for (i = 0; i < adapter->num_tx_queues; i++) {
1782                        if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1783                                sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
1784                                        adapter->netdev->name, vector);
1785                                err = request_irq(
1786                                              intr->msix_entries[vector].vector,
1787                                              vmxnet3_msix_tx, 0,
1788                                              adapter->tx_queue[i].name,
1789                                              &adapter->tx_queue[i]);
1790                        } else {
1791                                sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
1792                                        adapter->netdev->name, vector);
1793                        }
1794                        if (err) {
1795                                dev_err(&adapter->netdev->dev,
1796                                        "Failed to request irq for MSIX, %s, "
1797                                        "error %d\n",
1798                                        adapter->tx_queue[i].name, err);
1799                                return err;
1800                        }
1801
1802                        /* Handle the case where only 1 MSIx was allocated for
1803                         * all tx queues */
1804                        if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
1805                                for (; i < adapter->num_tx_queues; i++)
1806                                        adapter->tx_queue[i].comp_ring.intr_idx
1807                                                                = vector;
1808                                vector++;
1809                                break;
1810                        } else {
1811                                adapter->tx_queue[i].comp_ring.intr_idx
1812                                                                = vector++;
1813                        }
1814                }
1815                if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
1816                        vector = 0;
1817
1818                for (i = 0; i < adapter->num_rx_queues; i++) {
1819                        if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
1820                                sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
1821                                        adapter->netdev->name, vector);
1822                        else
1823                                sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
1824                                        adapter->netdev->name, vector);
1825                        err = request_irq(intr->msix_entries[vector].vector,
1826                                          vmxnet3_msix_rx, 0,
1827                                          adapter->rx_queue[i].name,
1828                                          &(adapter->rx_queue[i]));
1829                        if (err) {
1830                                netdev_err(adapter->netdev,
1831                                           "Failed to request irq for MSIX, "
1832                                           "%s, error %d\n",
1833                                           adapter->rx_queue[i].name, err);
1834                                return err;
1835                        }
1836
1837                        adapter->rx_queue[i].comp_ring.intr_idx = vector++;
1838                }
1839
1840                sprintf(intr->event_msi_vector_name, "%s-event-%d",
1841                        adapter->netdev->name, vector);
1842                err = request_irq(intr->msix_entries[vector].vector,
1843                                  vmxnet3_msix_event, 0,
1844                                  intr->event_msi_vector_name, adapter->netdev);
1845                intr->event_intr_idx = vector;
1846
1847        } else if (intr->type == VMXNET3_IT_MSI) {
1848                adapter->num_rx_queues = 1;
1849                err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
1850                                  adapter->netdev->name, adapter->netdev);
1851        } else {
1852#endif
1853                adapter->num_rx_queues = 1;
1854                err = request_irq(adapter->pdev->irq, vmxnet3_intr,
1855                                  IRQF_SHARED, adapter->netdev->name,
1856                                  adapter->netdev);
1857#ifdef CONFIG_PCI_MSI
1858        }
1859#endif
1860        intr->num_intrs = vector + 1;
1861        if (err) {
1862                netdev_err(adapter->netdev,
1863                           "Failed to request irq (intr type:%d), error %d\n",
1864                           intr->type, err);
1865        } else {
1866                /* Number of rx queues will not change after this */
1867                for (i = 0; i < adapter->num_rx_queues; i++) {
1868                        struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
1869                        rq->qid = i;
1870                        rq->qid2 = i + adapter->num_rx_queues;
1871                }
1872
1873
1874
1875                /* init our intr settings */
1876                for (i = 0; i < intr->num_intrs; i++)
1877                        intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
1878                if (adapter->intr.type != VMXNET3_IT_MSIX) {
1879                        adapter->intr.event_intr_idx = 0;
1880                        for (i = 0; i < adapter->num_tx_queues; i++)
1881                                adapter->tx_queue[i].comp_ring.intr_idx = 0;
1882                        adapter->rx_queue[0].comp_ring.intr_idx = 0;
1883                }
1884
1885                netdev_info(adapter->netdev,
1886                            "intr type %u, mode %u, %u vectors allocated\n",
1887                            intr->type, intr->mask_mode, intr->num_intrs);
1888        }
1889
1890        return err;
1891}
1892
1893
1894static void
1895vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
1896{
1897        struct vmxnet3_intr *intr = &adapter->intr;
1898        BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
1899
1900        switch (intr->type) {
1901#ifdef CONFIG_PCI_MSI
1902        case VMXNET3_IT_MSIX:
1903        {
1904                int i, vector = 0;
1905
1906                if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
1907                        for (i = 0; i < adapter->num_tx_queues; i++) {
1908                                free_irq(intr->msix_entries[vector++].vector,
1909                                         &(adapter->tx_queue[i]));
1910                                if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
1911                                        break;
1912                        }
1913                }
1914
1915                for (i = 0; i < adapter->num_rx_queues; i++) {
1916                        free_irq(intr->msix_entries[vector++].vector,
1917                                 &(adapter->rx_queue[i]));
1918                }
1919
1920                free_irq(intr->msix_entries[vector].vector,
1921                         adapter->netdev);
1922                BUG_ON(vector >= intr->num_intrs);
1923                break;
1924        }
1925#endif
1926        case VMXNET3_IT_MSI:
1927                free_irq(adapter->pdev->irq, adapter->netdev);
1928                break;
1929        case VMXNET3_IT_INTX:
1930                free_irq(adapter->pdev->irq, adapter->netdev);
1931                break;
1932        default:
1933                BUG();
1934        }
1935}
1936
1937
1938static void
1939vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
1940{
1941        u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1942        u16 vid;
1943
1944        /* allow untagged pkts */
1945        VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
1946
1947        for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
1948                VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1949}
1950
1951
1952static int
1953vmxnet3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1954{
1955        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1956
1957        if (!(netdev->flags & IFF_PROMISC)) {
1958                u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1959                unsigned long flags;
1960
1961                VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
1962                spin_lock_irqsave(&adapter->cmd_lock, flags);
1963                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1964                                       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1965                spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1966        }
1967
1968        set_bit(vid, adapter->active_vlans);
1969
1970        return 0;
1971}
1972
1973
1974static int
1975vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
1976{
1977        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
1978
1979        if (!(netdev->flags & IFF_PROMISC)) {
1980                u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
1981                unsigned long flags;
1982
1983                VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
1984                spin_lock_irqsave(&adapter->cmd_lock, flags);
1985                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
1986                                       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
1987                spin_unlock_irqrestore(&adapter->cmd_lock, flags);
1988        }
1989
1990        clear_bit(vid, adapter->active_vlans);
1991
1992        return 0;
1993}
1994
1995
1996static u8 *
1997vmxnet3_copy_mc(struct net_device *netdev)
1998{
1999        u8 *buf = NULL;
2000        u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
2001
2002        /* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
2003        if (sz <= 0xffff) {
2004                /* We may be called with BH disabled */
2005                buf = kmalloc(sz, GFP_ATOMIC);
2006                if (buf) {
2007                        struct netdev_hw_addr *ha;
2008                        int i = 0;
2009
2010                        netdev_for_each_mc_addr(ha, netdev)
2011                                memcpy(buf + i++ * ETH_ALEN, ha->addr,
2012                                       ETH_ALEN);
2013                }
2014        }
2015        return buf;
2016}
2017
2018
2019static void
2020vmxnet3_set_mc(struct net_device *netdev)
2021{
2022        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2023        unsigned long flags;
2024        struct Vmxnet3_RxFilterConf *rxConf =
2025                                        &adapter->shared->devRead.rxFilterConf;
2026        u8 *new_table = NULL;
2027        dma_addr_t new_table_pa = 0;
2028        u32 new_mode = VMXNET3_RXM_UCAST;
2029
2030        if (netdev->flags & IFF_PROMISC) {
2031                u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
2032                memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
2033
2034                new_mode |= VMXNET3_RXM_PROMISC;
2035        } else {
2036                vmxnet3_restore_vlan(adapter);
2037        }
2038
2039        if (netdev->flags & IFF_BROADCAST)
2040                new_mode |= VMXNET3_RXM_BCAST;
2041
2042        if (netdev->flags & IFF_ALLMULTI)
2043                new_mode |= VMXNET3_RXM_ALL_MULTI;
2044        else
2045                if (!netdev_mc_empty(netdev)) {
2046                        new_table = vmxnet3_copy_mc(netdev);
2047                        if (new_table) {
2048                                new_mode |= VMXNET3_RXM_MCAST;
2049                                rxConf->mfTableLen = cpu_to_le16(
2050                                        netdev_mc_count(netdev) * ETH_ALEN);
2051                                new_table_pa = dma_map_single(
2052                                                        &adapter->pdev->dev,
2053                                                        new_table,
2054                                                        rxConf->mfTableLen,
2055                                                        PCI_DMA_TODEVICE);
2056                                rxConf->mfTablePA = cpu_to_le64(new_table_pa);
2057                        } else {
2058                                netdev_info(netdev, "failed to copy mcast list"
2059                                            ", setting ALL_MULTI\n");
2060                                new_mode |= VMXNET3_RXM_ALL_MULTI;
2061                        }
2062                }
2063
2064
2065        if (!(new_mode & VMXNET3_RXM_MCAST)) {
2066                rxConf->mfTableLen = 0;
2067                rxConf->mfTablePA = 0;
2068        }
2069
2070        spin_lock_irqsave(&adapter->cmd_lock, flags);
2071        if (new_mode != rxConf->rxMode) {
2072                rxConf->rxMode = cpu_to_le32(new_mode);
2073                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2074                                       VMXNET3_CMD_UPDATE_RX_MODE);
2075                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2076                                       VMXNET3_CMD_UPDATE_VLAN_FILTERS);
2077        }
2078
2079        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2080                               VMXNET3_CMD_UPDATE_MAC_FILTERS);
2081        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2082
2083        if (new_table) {
2084                dma_unmap_single(&adapter->pdev->dev, new_table_pa,
2085                                 rxConf->mfTableLen, PCI_DMA_TODEVICE);
2086                kfree(new_table);
2087        }
2088}
2089
2090void
2091vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
2092{
2093        int i;
2094
2095        for (i = 0; i < adapter->num_rx_queues; i++)
2096                vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
2097}
2098
2099
2100/*
2101 *   Set up driver_shared based on settings in adapter.
2102 */
2103
2104static void
2105vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
2106{
2107        struct Vmxnet3_DriverShared *shared = adapter->shared;
2108        struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
2109        struct Vmxnet3_TxQueueConf *tqc;
2110        struct Vmxnet3_RxQueueConf *rqc;
2111        int i;
2112
2113        memset(shared, 0, sizeof(*shared));
2114
2115        /* driver settings */
2116        shared->magic = cpu_to_le32(VMXNET3_REV1_MAGIC);
2117        devRead->misc.driverInfo.version = cpu_to_le32(
2118                                                VMXNET3_DRIVER_VERSION_NUM);
2119        devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ?
2120                                VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64);
2121        devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX;
2122        *((u32 *)&devRead->misc.driverInfo.gos) = cpu_to_le32(
2123                                *((u32 *)&devRead->misc.driverInfo.gos));
2124        devRead->misc.driverInfo.vmxnet3RevSpt = cpu_to_le32(1);
2125        devRead->misc.driverInfo.uptVerSpt = cpu_to_le32(1);
2126
2127        devRead->misc.ddPA = cpu_to_le64(adapter->adapter_pa);
2128        devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
2129
2130        /* set up feature flags */
2131        if (adapter->netdev->features & NETIF_F_RXCSUM)
2132                devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
2133
2134        if (adapter->netdev->features & NETIF_F_LRO) {
2135                devRead->misc.uptFeatures |= UPT1_F_LRO;
2136                devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
2137        }
2138        if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
2139                devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
2140
2141        devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
2142        devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
2143        devRead->misc.queueDescLen = cpu_to_le32(
2144                adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
2145                adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
2146
2147        /* tx queue settings */
2148        devRead->misc.numTxQueues =  adapter->num_tx_queues;
2149        for (i = 0; i < adapter->num_tx_queues; i++) {
2150                struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2151                BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
2152                tqc = &adapter->tqd_start[i].conf;
2153                tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
2154                tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
2155                tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
2156                tqc->ddPA           = cpu_to_le64(tq->buf_info_pa);
2157                tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
2158                tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
2159                tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
2160                tqc->ddLen          = cpu_to_le32(
2161                                        sizeof(struct vmxnet3_tx_buf_info) *
2162                                        tqc->txRingSize);
2163                tqc->intrIdx        = tq->comp_ring.intr_idx;
2164        }
2165
2166        /* rx queue settings */
2167        devRead->misc.numRxQueues = adapter->num_rx_queues;
2168        for (i = 0; i < adapter->num_rx_queues; i++) {
2169                struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2170                rqc = &adapter->rqd_start[i].conf;
2171                rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
2172                rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
2173                rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
2174                rqc->ddPA            = cpu_to_le64(rq->buf_info_pa);
2175                rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
2176                rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
2177                rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
2178                rqc->ddLen           = cpu_to_le32(
2179                                        sizeof(struct vmxnet3_rx_buf_info) *
2180                                        (rqc->rxRingSize[0] +
2181                                         rqc->rxRingSize[1]));
2182                rqc->intrIdx         = rq->comp_ring.intr_idx;
2183        }
2184
2185#ifdef VMXNET3_RSS
2186        memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
2187
2188        if (adapter->rss) {
2189                struct UPT1_RSSConf *rssConf = adapter->rss_conf;
2190                static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
2191                        0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
2192                        0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
2193                        0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
2194                        0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
2195                        0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
2196                };
2197
2198                devRead->misc.uptFeatures |= UPT1_F_RSS;
2199                devRead->misc.numRxQueues = adapter->num_rx_queues;
2200                rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
2201                                    UPT1_RSS_HASH_TYPE_IPV4 |
2202                                    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
2203                                    UPT1_RSS_HASH_TYPE_IPV6;
2204                rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
2205                rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
2206                rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
2207                memcpy(rssConf->hashKey, rss_key, sizeof(rss_key));
2208
2209                for (i = 0; i < rssConf->indTableSize; i++)
2210                        rssConf->indTable[i] = ethtool_rxfh_indir_default(
2211                                i, adapter->num_rx_queues);
2212
2213                devRead->rssConfDesc.confVer = 1;
2214                devRead->rssConfDesc.confLen = cpu_to_le32(sizeof(*rssConf));
2215                devRead->rssConfDesc.confPA =
2216                        cpu_to_le64(adapter->rss_conf_pa);
2217        }
2218
2219#endif /* VMXNET3_RSS */
2220
2221        /* intr settings */
2222        devRead->intrConf.autoMask = adapter->intr.mask_mode ==
2223                                     VMXNET3_IMM_AUTO;
2224        devRead->intrConf.numIntrs = adapter->intr.num_intrs;
2225        for (i = 0; i < adapter->intr.num_intrs; i++)
2226                devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
2227
2228        devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
2229        devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
2230
2231        /* rx filter settings */
2232        devRead->rxFilterConf.rxMode = 0;
2233        vmxnet3_restore_vlan(adapter);
2234        vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
2235
2236        /* the rest are already zeroed */
2237}
2238
2239
2240int
2241vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
2242{
2243        int err, i;
2244        u32 ret;
2245        unsigned long flags;
2246
2247        netdev_dbg(adapter->netdev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
2248                " ring sizes %u %u %u\n", adapter->netdev->name,
2249                adapter->skb_buf_size, adapter->rx_buf_per_pkt,
2250                adapter->tx_queue[0].tx_ring.size,
2251                adapter->rx_queue[0].rx_ring[0].size,
2252                adapter->rx_queue[0].rx_ring[1].size);
2253
2254        vmxnet3_tq_init_all(adapter);
2255        err = vmxnet3_rq_init_all(adapter);
2256        if (err) {
2257                netdev_err(adapter->netdev,
2258                           "Failed to init rx queue error %d\n", err);
2259                goto rq_err;
2260        }
2261
2262        err = vmxnet3_request_irqs(adapter);
2263        if (err) {
2264                netdev_err(adapter->netdev,
2265                           "Failed to setup irq for error %d\n", err);
2266                goto irq_err;
2267        }
2268
2269        vmxnet3_setup_driver_shared(adapter);
2270
2271        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, VMXNET3_GET_ADDR_LO(
2272                               adapter->shared_pa));
2273        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
2274                               adapter->shared_pa));
2275        spin_lock_irqsave(&adapter->cmd_lock, flags);
2276        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2277                               VMXNET3_CMD_ACTIVATE_DEV);
2278        ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2279        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2280
2281        if (ret != 0) {
2282                netdev_err(adapter->netdev,
2283                           "Failed to activate dev: error %u\n", ret);
2284                err = -EINVAL;
2285                goto activate_err;
2286        }
2287
2288        for (i = 0; i < adapter->num_rx_queues; i++) {
2289                VMXNET3_WRITE_BAR0_REG(adapter,
2290                                VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
2291                                adapter->rx_queue[i].rx_ring[0].next2fill);
2292                VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
2293                                (i * VMXNET3_REG_ALIGN)),
2294                                adapter->rx_queue[i].rx_ring[1].next2fill);
2295        }
2296
2297        /* Apply the rx filter settins last. */
2298        vmxnet3_set_mc(adapter->netdev);
2299
2300        /*
2301         * Check link state when first activating device. It will start the
2302         * tx queue if the link is up.
2303         */
2304        vmxnet3_check_link(adapter, true);
2305        for (i = 0; i < adapter->num_rx_queues; i++)
2306                napi_enable(&adapter->rx_queue[i].napi);
2307        vmxnet3_enable_all_intrs(adapter);
2308        clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
2309        return 0;
2310
2311activate_err:
2312        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0);
2313        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0);
2314        vmxnet3_free_irqs(adapter);
2315irq_err:
2316rq_err:
2317        /* free up buffers we allocated */
2318        vmxnet3_rq_cleanup_all(adapter);
2319        return err;
2320}
2321
2322
2323void
2324vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
2325{
2326        unsigned long flags;
2327        spin_lock_irqsave(&adapter->cmd_lock, flags);
2328        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
2329        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2330}
2331
2332
2333int
2334vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
2335{
2336        int i;
2337        unsigned long flags;
2338        if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
2339                return 0;
2340
2341
2342        spin_lock_irqsave(&adapter->cmd_lock, flags);
2343        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2344                               VMXNET3_CMD_QUIESCE_DEV);
2345        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2346        vmxnet3_disable_all_intrs(adapter);
2347
2348        for (i = 0; i < adapter->num_rx_queues; i++)
2349                napi_disable(&adapter->rx_queue[i].napi);
2350        netif_tx_disable(adapter->netdev);
2351        adapter->link_speed = 0;
2352        netif_carrier_off(adapter->netdev);
2353
2354        vmxnet3_tq_cleanup_all(adapter);
2355        vmxnet3_rq_cleanup_all(adapter);
2356        vmxnet3_free_irqs(adapter);
2357        return 0;
2358}
2359
2360
2361static void
2362vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2363{
2364        u32 tmp;
2365
2366        tmp = *(u32 *)mac;
2367        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp);
2368
2369        tmp = (mac[5] << 8) | mac[4];
2370        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp);
2371}
2372
2373
2374static int
2375vmxnet3_set_mac_addr(struct net_device *netdev, void *p)
2376{
2377        struct sockaddr *addr = p;
2378        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2379
2380        memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2381        vmxnet3_write_mac_addr(adapter, addr->sa_data);
2382
2383        return 0;
2384}
2385
2386
2387/* ==================== initialization and cleanup routines ============ */
2388
2389static int
2390vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64)
2391{
2392        int err;
2393        unsigned long mmio_start, mmio_len;
2394        struct pci_dev *pdev = adapter->pdev;
2395
2396        err = pci_enable_device(pdev);
2397        if (err) {
2398                dev_err(&pdev->dev, "Failed to enable adapter: error %d\n", err);
2399                return err;
2400        }
2401
2402        if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
2403                if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
2404                        dev_err(&pdev->dev,
2405                                "pci_set_consistent_dma_mask failed\n");
2406                        err = -EIO;
2407                        goto err_set_mask;
2408                }
2409                *dma64 = true;
2410        } else {
2411                if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
2412                        dev_err(&pdev->dev,
2413                                "pci_set_dma_mask failed\n");
2414                        err = -EIO;
2415                        goto err_set_mask;
2416                }
2417                *dma64 = false;
2418        }
2419
2420        err = pci_request_selected_regions(pdev, (1 << 2) - 1,
2421                                           vmxnet3_driver_name);
2422        if (err) {
2423                dev_err(&pdev->dev,
2424                        "Failed to request region for adapter: error %d\n", err);
2425                goto err_set_mask;
2426        }
2427
2428        pci_set_master(pdev);
2429
2430        mmio_start = pci_resource_start(pdev, 0);
2431        mmio_len = pci_resource_len(pdev, 0);
2432        adapter->hw_addr0 = ioremap(mmio_start, mmio_len);
2433        if (!adapter->hw_addr0) {
2434                dev_err(&pdev->dev, "Failed to map bar0\n");
2435                err = -EIO;
2436                goto err_ioremap;
2437        }
2438
2439        mmio_start = pci_resource_start(pdev, 1);
2440        mmio_len = pci_resource_len(pdev, 1);
2441        adapter->hw_addr1 = ioremap(mmio_start, mmio_len);
2442        if (!adapter->hw_addr1) {
2443                dev_err(&pdev->dev, "Failed to map bar1\n");
2444                err = -EIO;
2445                goto err_bar1;
2446        }
2447        return 0;
2448
2449err_bar1:
2450        iounmap(adapter->hw_addr0);
2451err_ioremap:
2452        pci_release_selected_regions(pdev, (1 << 2) - 1);
2453err_set_mask:
2454        pci_disable_device(pdev);
2455        return err;
2456}
2457
2458
2459static void
2460vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
2461{
2462        BUG_ON(!adapter->pdev);
2463
2464        iounmap(adapter->hw_addr0);
2465        iounmap(adapter->hw_addr1);
2466        pci_release_selected_regions(adapter->pdev, (1 << 2) - 1);
2467        pci_disable_device(adapter->pdev);
2468}
2469
2470
2471static void
2472vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
2473{
2474        size_t sz, i, ring0_size, ring1_size, comp_size;
2475        struct vmxnet3_rx_queue *rq = &adapter->rx_queue[0];
2476
2477
2478        if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
2479                                    VMXNET3_MAX_ETH_HDR_SIZE) {
2480                adapter->skb_buf_size = adapter->netdev->mtu +
2481                                        VMXNET3_MAX_ETH_HDR_SIZE;
2482                if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE)
2483                        adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE;
2484
2485                adapter->rx_buf_per_pkt = 1;
2486        } else {
2487                adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE;
2488                sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE +
2489                                            VMXNET3_MAX_ETH_HDR_SIZE;
2490                adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE;
2491        }
2492
2493        /*
2494         * for simplicity, force the ring0 size to be a multiple of
2495         * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
2496         */
2497        sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
2498        ring0_size = adapter->rx_queue[0].rx_ring[0].size;
2499        ring0_size = (ring0_size + sz - 1) / sz * sz;
2500        ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
2501                           sz * sz);
2502        ring1_size = adapter->rx_queue[0].rx_ring[1].size;
2503        comp_size = ring0_size + ring1_size;
2504
2505        for (i = 0; i < adapter->num_rx_queues; i++) {
2506                rq = &adapter->rx_queue[i];
2507                rq->rx_ring[0].size = ring0_size;
2508                rq->rx_ring[1].size = ring1_size;
2509                rq->comp_ring.size = comp_size;
2510        }
2511}
2512
2513
2514int
2515vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
2516                      u32 rx_ring_size, u32 rx_ring2_size)
2517{
2518        int err = 0, i;
2519
2520        for (i = 0; i < adapter->num_tx_queues; i++) {
2521                struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
2522                tq->tx_ring.size   = tx_ring_size;
2523                tq->data_ring.size = tx_ring_size;
2524                tq->comp_ring.size = tx_ring_size;
2525                tq->shared = &adapter->tqd_start[i].ctrl;
2526                tq->stopped = true;
2527                tq->adapter = adapter;
2528                tq->qid = i;
2529                err = vmxnet3_tq_create(tq, adapter);
2530                /*
2531                 * Too late to change num_tx_queues. We cannot do away with
2532                 * lesser number of queues than what we asked for
2533                 */
2534                if (err)
2535                        goto queue_err;
2536        }
2537
2538        adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
2539        adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
2540        vmxnet3_adjust_rx_ring_size(adapter);
2541        for (i = 0; i < adapter->num_rx_queues; i++) {
2542                struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
2543                /* qid and qid2 for rx queues will be assigned later when num
2544                 * of rx queues is finalized after allocating intrs */
2545                rq->shared = &adapter->rqd_start[i].ctrl;
2546                rq->adapter = adapter;
2547                err = vmxnet3_rq_create(rq, adapter);
2548                if (err) {
2549                        if (i == 0) {
2550                                netdev_err(adapter->netdev,
2551                                           "Could not allocate any rx queues. "
2552                                           "Aborting.\n");
2553                                goto queue_err;
2554                        } else {
2555                                netdev_info(adapter->netdev,
2556                                            "Number of rx queues changed "
2557                                            "to : %d.\n", i);
2558                                adapter->num_rx_queues = i;
2559                                err = 0;
2560                                break;
2561                        }
2562                }
2563        }
2564        return err;
2565queue_err:
2566        vmxnet3_tq_destroy_all(adapter);
2567        return err;
2568}
2569
2570static int
2571vmxnet3_open(struct net_device *netdev)
2572{
2573        struct vmxnet3_adapter *adapter;
2574        int err, i;
2575
2576        adapter = netdev_priv(netdev);
2577
2578        for (i = 0; i < adapter->num_tx_queues; i++)
2579                spin_lock_init(&adapter->tx_queue[i].tx_lock);
2580
2581        err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE,
2582                                    VMXNET3_DEF_RX_RING_SIZE,
2583                                    VMXNET3_DEF_RX_RING_SIZE);
2584        if (err)
2585                goto queue_err;
2586
2587        err = vmxnet3_activate_dev(adapter);
2588        if (err)
2589                goto activate_err;
2590
2591        return 0;
2592
2593activate_err:
2594        vmxnet3_rq_destroy_all(adapter);
2595        vmxnet3_tq_destroy_all(adapter);
2596queue_err:
2597        return err;
2598}
2599
2600
2601static int
2602vmxnet3_close(struct net_device *netdev)
2603{
2604        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2605
2606        /*
2607         * Reset_work may be in the middle of resetting the device, wait for its
2608         * completion.
2609         */
2610        while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2611                msleep(1);
2612
2613        vmxnet3_quiesce_dev(adapter);
2614
2615        vmxnet3_rq_destroy_all(adapter);
2616        vmxnet3_tq_destroy_all(adapter);
2617
2618        clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2619
2620
2621        return 0;
2622}
2623
2624
2625void
2626vmxnet3_force_close(struct vmxnet3_adapter *adapter)
2627{
2628        int i;
2629
2630        /*
2631         * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
2632         * vmxnet3_close() will deadlock.
2633         */
2634        BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
2635
2636        /* we need to enable NAPI, otherwise dev_close will deadlock */
2637        for (i = 0; i < adapter->num_rx_queues; i++)
2638                napi_enable(&adapter->rx_queue[i].napi);
2639        dev_close(adapter->netdev);
2640}
2641
2642
2643static int
2644vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
2645{
2646        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2647        int err = 0;
2648
2649        if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
2650                return -EINVAL;
2651
2652        netdev->mtu = new_mtu;
2653
2654        /*
2655         * Reset_work may be in the middle of resetting the device, wait for its
2656         * completion.
2657         */
2658        while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2659                msleep(1);
2660
2661        if (netif_running(netdev)) {
2662                vmxnet3_quiesce_dev(adapter);
2663                vmxnet3_reset_dev(adapter);
2664
2665                /* we need to re-create the rx queue based on the new mtu */
2666                vmxnet3_rq_destroy_all(adapter);
2667                vmxnet3_adjust_rx_ring_size(adapter);
2668                err = vmxnet3_rq_create_all(adapter);
2669                if (err) {
2670                        netdev_err(netdev,
2671                                   "failed to re-create rx queues, "
2672                                   " error %d. Closing it.\n", err);
2673                        goto out;
2674                }
2675
2676                err = vmxnet3_activate_dev(adapter);
2677                if (err) {
2678                        netdev_err(netdev,
2679                                   "failed to re-activate, error %d. "
2680                                   "Closing it\n", err);
2681                        goto out;
2682                }
2683        }
2684
2685out:
2686        clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2687        if (err)
2688                vmxnet3_force_close(adapter);
2689
2690        return err;
2691}
2692
2693
2694static void
2695vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
2696{
2697        struct net_device *netdev = adapter->netdev;
2698
2699        netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
2700                NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2701                NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
2702                NETIF_F_LRO;
2703        if (dma64)
2704                netdev->hw_features |= NETIF_F_HIGHDMA;
2705        netdev->vlan_features = netdev->hw_features &
2706                                ~(NETIF_F_HW_VLAN_CTAG_TX |
2707                                  NETIF_F_HW_VLAN_CTAG_RX);
2708        netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
2709}
2710
2711
2712static void
2713vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
2714{
2715        u32 tmp;
2716
2717        tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL);
2718        *(u32 *)mac = tmp;
2719
2720        tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH);
2721        mac[4] = tmp & 0xff;
2722        mac[5] = (tmp >> 8) & 0xff;
2723}
2724
2725#ifdef CONFIG_PCI_MSI
2726
2727/*
2728 * Enable MSIx vectors.
2729 * Returns :
2730 *      0 on successful enabling of required vectors,
2731 *      VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
2732 *       could be enabled.
2733 *      number of vectors which can be enabled otherwise (this number is smaller
2734 *       than VMXNET3_LINUX_MIN_MSIX_VECT)
2735 */
2736
2737static int
2738vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter,
2739                             int vectors)
2740{
2741        int err = 0, vector_threshold;
2742        vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT;
2743
2744        while (vectors >= vector_threshold) {
2745                err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
2746                                      vectors);
2747                if (!err) {
2748                        adapter->intr.num_intrs = vectors;
2749                        return 0;
2750                } else if (err < 0) {
2751                        dev_err(&adapter->netdev->dev,
2752                                   "Failed to enable MSI-X, error: %d\n", err);
2753                        vectors = 0;
2754                } else if (err < vector_threshold) {
2755                        break;
2756                } else {
2757                        /* If fails to enable required number of MSI-x vectors
2758                         * try enabling minimum number of vectors required.
2759                         */
2760                        dev_err(&adapter->netdev->dev,
2761                                "Failed to enable %d MSI-X, trying %d instead\n",
2762                                    vectors, vector_threshold);
2763                        vectors = vector_threshold;
2764                }
2765        }
2766
2767        dev_info(&adapter->pdev->dev,
2768                 "Number of MSI-X interrupts which can be allocated "
2769                 "is lower than min threshold required.\n");
2770        return err;
2771}
2772
2773
2774#endif /* CONFIG_PCI_MSI */
2775
2776static void
2777vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
2778{
2779        u32 cfg;
2780        unsigned long flags;
2781
2782        /* intr settings */
2783        spin_lock_irqsave(&adapter->cmd_lock, flags);
2784        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
2785                               VMXNET3_CMD_GET_CONF_INTR);
2786        cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
2787        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
2788        adapter->intr.type = cfg & 0x3;
2789        adapter->intr.mask_mode = (cfg >> 2) & 0x3;
2790
2791        if (adapter->intr.type == VMXNET3_IT_AUTO) {
2792                adapter->intr.type = VMXNET3_IT_MSIX;
2793        }
2794
2795#ifdef CONFIG_PCI_MSI
2796        if (adapter->intr.type == VMXNET3_IT_MSIX) {
2797                int vector, err = 0;
2798
2799                adapter->intr.num_intrs = (adapter->share_intr ==
2800                                           VMXNET3_INTR_TXSHARE) ? 1 :
2801                                           adapter->num_tx_queues;
2802                adapter->intr.num_intrs += (adapter->share_intr ==
2803                                           VMXNET3_INTR_BUDDYSHARE) ? 0 :
2804                                           adapter->num_rx_queues;
2805                adapter->intr.num_intrs += 1;           /* for link event */
2806
2807                adapter->intr.num_intrs = (adapter->intr.num_intrs >
2808                                           VMXNET3_LINUX_MIN_MSIX_VECT
2809                                           ? adapter->intr.num_intrs :
2810                                           VMXNET3_LINUX_MIN_MSIX_VECT);
2811
2812                for (vector = 0; vector < adapter->intr.num_intrs; vector++)
2813                        adapter->intr.msix_entries[vector].entry = vector;
2814
2815                err = vmxnet3_acquire_msix_vectors(adapter,
2816                                                   adapter->intr.num_intrs);
2817                /* If we cannot allocate one MSIx vector per queue
2818                 * then limit the number of rx queues to 1
2819                 */
2820                if (err == VMXNET3_LINUX_MIN_MSIX_VECT) {
2821                        if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
2822                            || adapter->num_rx_queues != 1) {
2823                                adapter->share_intr = VMXNET3_INTR_TXSHARE;
2824                                netdev_err(adapter->netdev,
2825                                           "Number of rx queues : 1\n");
2826                                adapter->num_rx_queues = 1;
2827                                adapter->intr.num_intrs =
2828                                                VMXNET3_LINUX_MIN_MSIX_VECT;
2829                        }
2830                        return;
2831                }
2832                if (!err)
2833                        return;
2834
2835                /* If we cannot allocate MSIx vectors use only one rx queue */
2836                dev_info(&adapter->pdev->dev,
2837                         "Failed to enable MSI-X, error %d. "
2838                         "Limiting #rx queues to 1, try MSI.\n", err);
2839
2840                adapter->intr.type = VMXNET3_IT_MSI;
2841        }
2842
2843        if (adapter->intr.type == VMXNET3_IT_MSI) {
2844                int err;
2845                err = pci_enable_msi(adapter->pdev);
2846                if (!err) {
2847                        adapter->num_rx_queues = 1;
2848                        adapter->intr.num_intrs = 1;
2849                        return;
2850                }
2851        }
2852#endif /* CONFIG_PCI_MSI */
2853
2854        adapter->num_rx_queues = 1;
2855        dev_info(&adapter->netdev->dev,
2856                 "Using INTx interrupt, #Rx queues: 1.\n");
2857        adapter->intr.type = VMXNET3_IT_INTX;
2858
2859        /* INT-X related setting */
2860        adapter->intr.num_intrs = 1;
2861}
2862
2863
2864static void
2865vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
2866{
2867        if (adapter->intr.type == VMXNET3_IT_MSIX)
2868                pci_disable_msix(adapter->pdev);
2869        else if (adapter->intr.type == VMXNET3_IT_MSI)
2870                pci_disable_msi(adapter->pdev);
2871        else
2872                BUG_ON(adapter->intr.type != VMXNET3_IT_INTX);
2873}
2874
2875
2876static void
2877vmxnet3_tx_timeout(struct net_device *netdev)
2878{
2879        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
2880        adapter->tx_timeout_count++;
2881
2882        netdev_err(adapter->netdev, "tx hang\n");
2883        schedule_work(&adapter->work);
2884        netif_wake_queue(adapter->netdev);
2885}
2886
2887
2888static void
2889vmxnet3_reset_work(struct work_struct *data)
2890{
2891        struct vmxnet3_adapter *adapter;
2892
2893        adapter = container_of(data, struct vmxnet3_adapter, work);
2894
2895        /* if another thread is resetting the device, no need to proceed */
2896        if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
2897                return;
2898
2899        /* if the device is closed, we must leave it alone */
2900        rtnl_lock();
2901        if (netif_running(adapter->netdev)) {
2902                netdev_notice(adapter->netdev, "resetting\n");
2903                vmxnet3_quiesce_dev(adapter);
2904                vmxnet3_reset_dev(adapter);
2905                vmxnet3_activate_dev(adapter);
2906        } else {
2907                netdev_info(adapter->netdev, "already closed\n");
2908        }
2909        rtnl_unlock();
2910
2911        clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
2912}
2913
2914
2915static int
2916vmxnet3_probe_device(struct pci_dev *pdev,
2917                     const struct pci_device_id *id)
2918{
2919        static const struct net_device_ops vmxnet3_netdev_ops = {
2920                .ndo_open = vmxnet3_open,
2921                .ndo_stop = vmxnet3_close,
2922                .ndo_start_xmit = vmxnet3_xmit_frame,
2923                .ndo_set_mac_address = vmxnet3_set_mac_addr,
2924                .ndo_change_mtu = vmxnet3_change_mtu,
2925                .ndo_set_features = vmxnet3_set_features,
2926                .ndo_get_stats64 = vmxnet3_get_stats64,
2927                .ndo_tx_timeout = vmxnet3_tx_timeout,
2928                .ndo_set_rx_mode = vmxnet3_set_mc,
2929                .ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
2930                .ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
2931#ifdef CONFIG_NET_POLL_CONTROLLER
2932                .ndo_poll_controller = vmxnet3_netpoll,
2933#endif
2934        };
2935        int err;
2936        bool dma64 = false; /* stupid gcc */
2937        u32 ver;
2938        struct net_device *netdev;
2939        struct vmxnet3_adapter *adapter;
2940        u8 mac[ETH_ALEN];
2941        int size;
2942        int num_tx_queues;
2943        int num_rx_queues;
2944
2945        if (!pci_msi_enabled())
2946                enable_mq = 0;
2947
2948#ifdef VMXNET3_RSS
2949        if (enable_mq)
2950                num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
2951                                    (int)num_online_cpus());
2952        else
2953#endif
2954                num_rx_queues = 1;
2955        num_rx_queues = rounddown_pow_of_two(num_rx_queues);
2956
2957        if (enable_mq)
2958                num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
2959                                    (int)num_online_cpus());
2960        else
2961                num_tx_queues = 1;
2962
2963        num_tx_queues = rounddown_pow_of_two(num_tx_queues);
2964        netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
2965                                   max(num_tx_queues, num_rx_queues));
2966        dev_info(&pdev->dev,
2967                 "# of Tx queues : %d, # of Rx queues : %d\n",
2968                 num_tx_queues, num_rx_queues);
2969
2970        if (!netdev)
2971                return -ENOMEM;
2972
2973        pci_set_drvdata(pdev, netdev);
2974        adapter = netdev_priv(netdev);
2975        adapter->netdev = netdev;
2976        adapter->pdev = pdev;
2977
2978        spin_lock_init(&adapter->cmd_lock);
2979        adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
2980                                             sizeof(struct vmxnet3_adapter),
2981                                             PCI_DMA_TODEVICE);
2982        adapter->shared = dma_alloc_coherent(
2983                                &adapter->pdev->dev,
2984                                sizeof(struct Vmxnet3_DriverShared),
2985                                &adapter->shared_pa, GFP_KERNEL);
2986        if (!adapter->shared) {
2987                dev_err(&pdev->dev, "Failed to allocate memory\n");
2988                err = -ENOMEM;
2989                goto err_alloc_shared;
2990        }
2991
2992        adapter->num_rx_queues = num_rx_queues;
2993        adapter->num_tx_queues = num_tx_queues;
2994        adapter->rx_buf_per_pkt = 1;
2995
2996        size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
2997        size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
2998        adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size,
2999                                                &adapter->queue_desc_pa,
3000                                                GFP_KERNEL);
3001
3002        if (!adapter->tqd_start) {
3003                dev_err(&pdev->dev, "Failed to allocate memory\n");
3004                err = -ENOMEM;
3005                goto err_alloc_queue_desc;
3006        }
3007        adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
3008                                                            adapter->num_tx_queues);
3009
3010        adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev,
3011                                              sizeof(struct Vmxnet3_PMConf),
3012                                              &adapter->pm_conf_pa,
3013                                              GFP_KERNEL);
3014        if (adapter->pm_conf == NULL) {
3015                err = -ENOMEM;
3016                goto err_alloc_pm;
3017        }
3018
3019#ifdef VMXNET3_RSS
3020
3021        adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev,
3022                                               sizeof(struct UPT1_RSSConf),
3023                                               &adapter->rss_conf_pa,
3024                                               GFP_KERNEL);
3025        if (adapter->rss_conf == NULL) {
3026                err = -ENOMEM;
3027                goto err_alloc_rss;
3028        }
3029#endif /* VMXNET3_RSS */
3030
3031        err = vmxnet3_alloc_pci_resources(adapter, &dma64);
3032        if (err < 0)
3033                goto err_alloc_pci;
3034
3035        ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
3036        if (ver & 1) {
3037                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1);
3038        } else {
3039                dev_err(&pdev->dev,
3040                        "Incompatible h/w version (0x%x) for adapter\n", ver);
3041                err = -EBUSY;
3042                goto err_ver;
3043        }
3044
3045        ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS);
3046        if (ver & 1) {
3047                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1);
3048        } else {
3049                dev_err(&pdev->dev,
3050                        "Incompatible upt version (0x%x) for adapter\n", ver);
3051                err = -EBUSY;
3052                goto err_ver;
3053        }
3054
3055        SET_NETDEV_DEV(netdev, &pdev->dev);
3056        vmxnet3_declare_features(adapter, dma64);
3057
3058        if (adapter->num_tx_queues == adapter->num_rx_queues)
3059                adapter->share_intr = VMXNET3_INTR_BUDDYSHARE;
3060        else
3061                adapter->share_intr = VMXNET3_INTR_DONTSHARE;
3062
3063        vmxnet3_alloc_intr_resources(adapter);
3064
3065#ifdef VMXNET3_RSS
3066        if (adapter->num_rx_queues > 1 &&
3067            adapter->intr.type == VMXNET3_IT_MSIX) {
3068                adapter->rss = true;
3069                netdev->hw_features |= NETIF_F_RXHASH;
3070                netdev->features |= NETIF_F_RXHASH;
3071                dev_dbg(&pdev->dev, "RSS is enabled.\n");
3072        } else {
3073                adapter->rss = false;
3074        }
3075#endif
3076
3077        vmxnet3_read_mac_addr(adapter, mac);
3078        memcpy(netdev->dev_addr,  mac, netdev->addr_len);
3079
3080        netdev->netdev_ops = &vmxnet3_netdev_ops;
3081        vmxnet3_set_ethtool_ops(netdev);
3082        netdev->watchdog_timeo = 5 * HZ;
3083
3084        INIT_WORK(&adapter->work, vmxnet3_reset_work);
3085        set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
3086
3087        if (adapter->intr.type == VMXNET3_IT_MSIX) {
3088                int i;
3089                for (i = 0; i < adapter->num_rx_queues; i++) {
3090                        netif_napi_add(adapter->netdev,
3091                                       &adapter->rx_queue[i].napi,
3092                                       vmxnet3_poll_rx_only, 64);
3093                }
3094        } else {
3095                netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
3096                               vmxnet3_poll, 64);
3097        }
3098
3099        netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
3100        netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
3101
3102        netif_carrier_off(netdev);
3103        err = register_netdev(netdev);
3104
3105        if (err) {
3106                dev_err(&pdev->dev, "Failed to register adapter\n");
3107                goto err_register;
3108        }
3109
3110        vmxnet3_check_link(adapter, false);
3111        return 0;
3112
3113err_register:
3114        vmxnet3_free_intr_resources(adapter);
3115err_ver:
3116        vmxnet3_free_pci_resources(adapter);
3117err_alloc_pci:
3118#ifdef VMXNET3_RSS
3119        dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3120                          adapter->rss_conf, adapter->rss_conf_pa);
3121err_alloc_rss:
3122#endif
3123        dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3124                          adapter->pm_conf, adapter->pm_conf_pa);
3125err_alloc_pm:
3126        dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3127                          adapter->queue_desc_pa);
3128err_alloc_queue_desc:
3129        dma_free_coherent(&adapter->pdev->dev,
3130                          sizeof(struct Vmxnet3_DriverShared),
3131                          adapter->shared, adapter->shared_pa);
3132err_alloc_shared:
3133        dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3134                         sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3135        pci_set_drvdata(pdev, NULL);
3136        free_netdev(netdev);
3137        return err;
3138}
3139
3140
3141static void
3142vmxnet3_remove_device(struct pci_dev *pdev)
3143{
3144        struct net_device *netdev = pci_get_drvdata(pdev);
3145        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3146        int size = 0;
3147        int num_rx_queues;
3148
3149#ifdef VMXNET3_RSS
3150        if (enable_mq)
3151                num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
3152                                    (int)num_online_cpus());
3153        else
3154#endif
3155                num_rx_queues = 1;
3156        num_rx_queues = rounddown_pow_of_two(num_rx_queues);
3157
3158        cancel_work_sync(&adapter->work);
3159
3160        unregister_netdev(netdev);
3161
3162        vmxnet3_free_intr_resources(adapter);
3163        vmxnet3_free_pci_resources(adapter);
3164#ifdef VMXNET3_RSS
3165        dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
3166                          adapter->rss_conf, adapter->rss_conf_pa);
3167#endif
3168        dma_free_coherent(&adapter->pdev->dev, sizeof(struct Vmxnet3_PMConf),
3169                          adapter->pm_conf, adapter->pm_conf_pa);
3170
3171        size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
3172        size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
3173        dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
3174                          adapter->queue_desc_pa);
3175        dma_free_coherent(&adapter->pdev->dev,
3176                          sizeof(struct Vmxnet3_DriverShared),
3177                          adapter->shared, adapter->shared_pa);
3178        dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
3179                         sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
3180        free_netdev(netdev);
3181}
3182
3183
3184#ifdef CONFIG_PM
3185
3186static int
3187vmxnet3_suspend(struct device *device)
3188{
3189        struct pci_dev *pdev = to_pci_dev(device);
3190        struct net_device *netdev = pci_get_drvdata(pdev);
3191        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3192        struct Vmxnet3_PMConf *pmConf;
3193        struct ethhdr *ehdr;
3194        struct arphdr *ahdr;
3195        u8 *arpreq;
3196        struct in_device *in_dev;
3197        struct in_ifaddr *ifa;
3198        unsigned long flags;
3199        int i = 0;
3200
3201        if (!netif_running(netdev))
3202                return 0;
3203
3204        for (i = 0; i < adapter->num_rx_queues; i++)
3205                napi_disable(&adapter->rx_queue[i].napi);
3206
3207        vmxnet3_disable_all_intrs(adapter);
3208        vmxnet3_free_irqs(adapter);
3209        vmxnet3_free_intr_resources(adapter);
3210
3211        netif_device_detach(netdev);
3212        netif_tx_stop_all_queues(netdev);
3213
3214        /* Create wake-up filters. */
3215        pmConf = adapter->pm_conf;
3216        memset(pmConf, 0, sizeof(*pmConf));
3217
3218        if (adapter->wol & WAKE_UCAST) {
3219                pmConf->filters[i].patternSize = ETH_ALEN;
3220                pmConf->filters[i].maskSize = 1;
3221                memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
3222                pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
3223
3224                pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3225                i++;
3226        }
3227
3228        if (adapter->wol & WAKE_ARP) {
3229                in_dev = in_dev_get(netdev);
3230                if (!in_dev)
3231                        goto skip_arp;
3232
3233                ifa = (struct in_ifaddr *)in_dev->ifa_list;
3234                if (!ifa)
3235                        goto skip_arp;
3236
3237                pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/
3238                        sizeof(struct arphdr) +         /* ARP header */
3239                        2 * ETH_ALEN +          /* 2 Ethernet addresses*/
3240                        2 * sizeof(u32);        /*2 IPv4 addresses */
3241                pmConf->filters[i].maskSize =
3242                        (pmConf->filters[i].patternSize - 1) / 8 + 1;
3243
3244                /* ETH_P_ARP in Ethernet header. */
3245                ehdr = (struct ethhdr *)pmConf->filters[i].pattern;
3246                ehdr->h_proto = htons(ETH_P_ARP);
3247
3248                /* ARPOP_REQUEST in ARP header. */
3249                ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN];
3250                ahdr->ar_op = htons(ARPOP_REQUEST);
3251                arpreq = (u8 *)(ahdr + 1);
3252
3253                /* The Unicast IPv4 address in 'tip' field. */
3254                arpreq += 2 * ETH_ALEN + sizeof(u32);
3255                *(u32 *)arpreq = ifa->ifa_address;
3256
3257                /* The mask for the relevant bits. */
3258                pmConf->filters[i].mask[0] = 0x00;
3259                pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */
3260                pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */
3261                pmConf->filters[i].mask[3] = 0x00;
3262                pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */
3263                pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
3264                in_dev_put(in_dev);
3265
3266                pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
3267                i++;
3268        }
3269
3270skip_arp:
3271        if (adapter->wol & WAKE_MAGIC)
3272                pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
3273
3274        pmConf->numFilters = i;
3275
3276        adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3277        adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3278                                                                  *pmConf));
3279        adapter->shared->devRead.pmConfDesc.confPA =
3280                cpu_to_le64(adapter->pm_conf_pa);
3281
3282        spin_lock_irqsave(&adapter->cmd_lock, flags);
3283        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3284                               VMXNET3_CMD_UPDATE_PMCFG);
3285        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3286
3287        pci_save_state(pdev);
3288        pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
3289                        adapter->wol);
3290        pci_disable_device(pdev);
3291        pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND));
3292
3293        return 0;
3294}
3295
3296
3297static int
3298vmxnet3_resume(struct device *device)
3299{
3300        int err, i = 0;
3301        unsigned long flags;
3302        struct pci_dev *pdev = to_pci_dev(device);
3303        struct net_device *netdev = pci_get_drvdata(pdev);
3304        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
3305        struct Vmxnet3_PMConf *pmConf;
3306
3307        if (!netif_running(netdev))
3308                return 0;
3309
3310        /* Destroy wake-up filters. */
3311        pmConf = adapter->pm_conf;
3312        memset(pmConf, 0, sizeof(*pmConf));
3313
3314        adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
3315        adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
3316                                                                  *pmConf));
3317        adapter->shared->devRead.pmConfDesc.confPA =
3318                cpu_to_le64(adapter->pm_conf_pa);
3319
3320        netif_device_attach(netdev);
3321        pci_set_power_state(pdev, PCI_D0);
3322        pci_restore_state(pdev);
3323        err = pci_enable_device_mem(pdev);
3324        if (err != 0)
3325                return err;
3326
3327        pci_enable_wake(pdev, PCI_D0, 0);
3328
3329        spin_lock_irqsave(&adapter->cmd_lock, flags);
3330        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
3331                               VMXNET3_CMD_UPDATE_PMCFG);
3332        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
3333        vmxnet3_alloc_intr_resources(adapter);
3334        vmxnet3_request_irqs(adapter);
3335        for (i = 0; i < adapter->num_rx_queues; i++)
3336                napi_enable(&adapter->rx_queue[i].napi);
3337        vmxnet3_enable_all_intrs(adapter);
3338
3339        return 0;
3340}
3341
3342static const struct dev_pm_ops vmxnet3_pm_ops = {
3343        .suspend = vmxnet3_suspend,
3344        .resume = vmxnet3_resume,
3345};
3346#endif
3347
3348static struct pci_driver vmxnet3_driver = {
3349        .name           = vmxnet3_driver_name,
3350        .id_table       = vmxnet3_pciid_table,
3351        .probe          = vmxnet3_probe_device,
3352        .remove         = vmxnet3_remove_device,
3353#ifdef CONFIG_PM
3354        .driver.pm      = &vmxnet3_pm_ops,
3355#endif
3356};
3357
3358
3359static int __init
3360vmxnet3_init_module(void)
3361{
3362        pr_info("%s - version %s\n", VMXNET3_DRIVER_DESC,
3363                VMXNET3_DRIVER_VERSION_REPORT);
3364        return pci_register_driver(&vmxnet3_driver);
3365}
3366
3367module_init(vmxnet3_init_module);
3368
3369
3370static void
3371vmxnet3_exit_module(void)
3372{
3373        pci_unregister_driver(&vmxnet3_driver);
3374}
3375
3376module_exit(vmxnet3_exit_module);
3377
3378MODULE_AUTHOR("VMware, Inc.");
3379MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC);
3380MODULE_LICENSE("GPL v2");
3381MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING);
3382