linux/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
<<
>>
Prefs
   1// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
   2/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
   3
   4/*
   5 * nfp_net_common.c
   6 * Netronome network device driver: Common functions between PF and VF
   7 * Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
   8 *          Jason McMullan <jason.mcmullan@netronome.com>
   9 *          Rolf Neugebauer <rolf.neugebauer@netronome.com>
  10 *          Brad Petrus <brad.petrus@netronome.com>
  11 *          Chris Telfer <chris.telfer@netronome.com>
  12 */
  13
  14#include <linux/bitfield.h>
  15#include <linux/bpf.h>
  16#include <linux/bpf_trace.h>
  17#include <linux/module.h>
  18#include <linux/kernel.h>
  19#include <linux/init.h>
  20#include <linux/fs.h>
  21#include <linux/netdevice.h>
  22#include <linux/etherdevice.h>
  23#include <linux/interrupt.h>
  24#include <linux/ip.h>
  25#include <linux/ipv6.h>
  26#include <linux/mm.h>
  27#include <linux/overflow.h>
  28#include <linux/page_ref.h>
  29#include <linux/pci.h>
  30#include <linux/pci_regs.h>
  31#include <linux/msi.h>
  32#include <linux/ethtool.h>
  33#include <linux/log2.h>
  34#include <linux/if_vlan.h>
  35#include <linux/random.h>
  36#include <linux/vmalloc.h>
  37#include <linux/ktime.h>
  38
  39#include <net/tls.h>
  40#include <net/vxlan.h>
  41
  42#include "nfpcore/nfp_nsp.h"
  43#include "ccm.h"
  44#include "nfp_app.h"
  45#include "nfp_net_ctrl.h"
  46#include "nfp_net.h"
  47#include "nfp_net_sriov.h"
  48#include "nfp_port.h"
  49#include "crypto/crypto.h"
  50
  51/**
  52 * nfp_net_get_fw_version() - Read and parse the FW version
  53 * @fw_ver:     Output fw_version structure to read to
  54 * @ctrl_bar:   Mapped address of the control BAR
  55 */
  56void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
  57                            void __iomem *ctrl_bar)
  58{
  59        u32 reg;
  60
  61        reg = readl(ctrl_bar + NFP_NET_CFG_VERSION);
  62        put_unaligned_le32(reg, fw_ver);
  63}
  64
  65static dma_addr_t nfp_net_dma_map_rx(struct nfp_net_dp *dp, void *frag)
  66{
  67        return dma_map_single_attrs(dp->dev, frag + NFP_NET_RX_BUF_HEADROOM,
  68                                    dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
  69                                    dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
  70}
  71
  72static void
  73nfp_net_dma_sync_dev_rx(const struct nfp_net_dp *dp, dma_addr_t dma_addr)
  74{
  75        dma_sync_single_for_device(dp->dev, dma_addr,
  76                                   dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
  77                                   dp->rx_dma_dir);
  78}
  79
  80static void nfp_net_dma_unmap_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr)
  81{
  82        dma_unmap_single_attrs(dp->dev, dma_addr,
  83                               dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
  84                               dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
  85}
  86
  87static void nfp_net_dma_sync_cpu_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr,
  88                                    unsigned int len)
  89{
  90        dma_sync_single_for_cpu(dp->dev, dma_addr - NFP_NET_RX_BUF_HEADROOM,
  91                                len, dp->rx_dma_dir);
  92}
  93
  94/* Firmware reconfig
  95 *
  96 * Firmware reconfig may take a while so we have two versions of it -
  97 * synchronous and asynchronous (posted).  All synchronous callers are holding
  98 * RTNL so we don't have to worry about serializing them.
  99 */
 100static void nfp_net_reconfig_start(struct nfp_net *nn, u32 update)
 101{
 102        nn_writel(nn, NFP_NET_CFG_UPDATE, update);
 103        /* ensure update is written before pinging HW */
 104        nn_pci_flush(nn);
 105        nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1);
 106        nn->reconfig_in_progress_update = update;
 107}
 108
 109/* Pass 0 as update to run posted reconfigs. */
 110static void nfp_net_reconfig_start_async(struct nfp_net *nn, u32 update)
 111{
 112        update |= nn->reconfig_posted;
 113        nn->reconfig_posted = 0;
 114
 115        nfp_net_reconfig_start(nn, update);
 116
 117        nn->reconfig_timer_active = true;
 118        mod_timer(&nn->reconfig_timer, jiffies + NFP_NET_POLL_TIMEOUT * HZ);
 119}
 120
 121static bool nfp_net_reconfig_check_done(struct nfp_net *nn, bool last_check)
 122{
 123        u32 reg;
 124
 125        reg = nn_readl(nn, NFP_NET_CFG_UPDATE);
 126        if (reg == 0)
 127                return true;
 128        if (reg & NFP_NET_CFG_UPDATE_ERR) {
 129                nn_err(nn, "Reconfig error (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n",
 130                       reg, nn->reconfig_in_progress_update,
 131                       nn_readl(nn, NFP_NET_CFG_CTRL));
 132                return true;
 133        } else if (last_check) {
 134                nn_err(nn, "Reconfig timeout (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n",
 135                       reg, nn->reconfig_in_progress_update,
 136                       nn_readl(nn, NFP_NET_CFG_CTRL));
 137                return true;
 138        }
 139
 140        return false;
 141}
 142
 143static bool __nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
 144{
 145        bool timed_out = false;
 146        int i;
 147
 148        /* Poll update field, waiting for NFP to ack the config.
 149         * Do an opportunistic wait-busy loop, afterward sleep.
 150         */
 151        for (i = 0; i < 50; i++) {
 152                if (nfp_net_reconfig_check_done(nn, false))
 153                        return false;
 154                udelay(4);
 155        }
 156
 157        while (!nfp_net_reconfig_check_done(nn, timed_out)) {
 158                usleep_range(250, 500);
 159                timed_out = time_is_before_eq_jiffies(deadline);
 160        }
 161
 162        return timed_out;
 163}
 164
 165static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
 166{
 167        if (__nfp_net_reconfig_wait(nn, deadline))
 168                return -EIO;
 169
 170        if (nn_readl(nn, NFP_NET_CFG_UPDATE) & NFP_NET_CFG_UPDATE_ERR)
 171                return -EIO;
 172
 173        return 0;
 174}
 175
 176static void nfp_net_reconfig_timer(struct timer_list *t)
 177{
 178        struct nfp_net *nn = from_timer(nn, t, reconfig_timer);
 179
 180        spin_lock_bh(&nn->reconfig_lock);
 181
 182        nn->reconfig_timer_active = false;
 183
 184        /* If sync caller is present it will take over from us */
 185        if (nn->reconfig_sync_present)
 186                goto done;
 187
 188        /* Read reconfig status and report errors */
 189        nfp_net_reconfig_check_done(nn, true);
 190
 191        if (nn->reconfig_posted)
 192                nfp_net_reconfig_start_async(nn, 0);
 193done:
 194        spin_unlock_bh(&nn->reconfig_lock);
 195}
 196
 197/**
 198 * nfp_net_reconfig_post() - Post async reconfig request
 199 * @nn:      NFP Net device to reconfigure
 200 * @update:  The value for the update field in the BAR config
 201 *
 202 * Record FW reconfiguration request.  Reconfiguration will be kicked off
 203 * whenever reconfiguration machinery is idle.  Multiple requests can be
 204 * merged together!
 205 */
 206static void nfp_net_reconfig_post(struct nfp_net *nn, u32 update)
 207{
 208        spin_lock_bh(&nn->reconfig_lock);
 209
 210        /* Sync caller will kick off async reconf when it's done, just post */
 211        if (nn->reconfig_sync_present) {
 212                nn->reconfig_posted |= update;
 213                goto done;
 214        }
 215
 216        /* Opportunistically check if the previous command is done */
 217        if (!nn->reconfig_timer_active ||
 218            nfp_net_reconfig_check_done(nn, false))
 219                nfp_net_reconfig_start_async(nn, update);
 220        else
 221                nn->reconfig_posted |= update;
 222done:
 223        spin_unlock_bh(&nn->reconfig_lock);
 224}
 225
 226static void nfp_net_reconfig_sync_enter(struct nfp_net *nn)
 227{
 228        bool cancelled_timer = false;
 229        u32 pre_posted_requests;
 230
 231        spin_lock_bh(&nn->reconfig_lock);
 232
 233        WARN_ON(nn->reconfig_sync_present);
 234        nn->reconfig_sync_present = true;
 235
 236        if (nn->reconfig_timer_active) {
 237                nn->reconfig_timer_active = false;
 238                cancelled_timer = true;
 239        }
 240        pre_posted_requests = nn->reconfig_posted;
 241        nn->reconfig_posted = 0;
 242
 243        spin_unlock_bh(&nn->reconfig_lock);
 244
 245        if (cancelled_timer) {
 246                del_timer_sync(&nn->reconfig_timer);
 247                nfp_net_reconfig_wait(nn, nn->reconfig_timer.expires);
 248        }
 249
 250        /* Run the posted reconfigs which were issued before we started */
 251        if (pre_posted_requests) {
 252                nfp_net_reconfig_start(nn, pre_posted_requests);
 253                nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
 254        }
 255}
 256
 257static void nfp_net_reconfig_wait_posted(struct nfp_net *nn)
 258{
 259        nfp_net_reconfig_sync_enter(nn);
 260
 261        spin_lock_bh(&nn->reconfig_lock);
 262        nn->reconfig_sync_present = false;
 263        spin_unlock_bh(&nn->reconfig_lock);
 264}
 265
 266/**
 267 * __nfp_net_reconfig() - Reconfigure the firmware
 268 * @nn:      NFP Net device to reconfigure
 269 * @update:  The value for the update field in the BAR config
 270 *
 271 * Write the update word to the BAR and ping the reconfig queue.  The
 272 * poll until the firmware has acknowledged the update by zeroing the
 273 * update word.
 274 *
 275 * Return: Negative errno on error, 0 on success
 276 */
 277int __nfp_net_reconfig(struct nfp_net *nn, u32 update)
 278{
 279        int ret;
 280
 281        nfp_net_reconfig_sync_enter(nn);
 282
 283        nfp_net_reconfig_start(nn, update);
 284        ret = nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
 285
 286        spin_lock_bh(&nn->reconfig_lock);
 287
 288        if (nn->reconfig_posted)
 289                nfp_net_reconfig_start_async(nn, 0);
 290
 291        nn->reconfig_sync_present = false;
 292
 293        spin_unlock_bh(&nn->reconfig_lock);
 294
 295        return ret;
 296}
 297
 298int nfp_net_reconfig(struct nfp_net *nn, u32 update)
 299{
 300        int ret;
 301
 302        nn_ctrl_bar_lock(nn);
 303        ret = __nfp_net_reconfig(nn, update);
 304        nn_ctrl_bar_unlock(nn);
 305
 306        return ret;
 307}
 308
 309int nfp_net_mbox_lock(struct nfp_net *nn, unsigned int data_size)
 310{
 311        if (nn->tlv_caps.mbox_len < NFP_NET_CFG_MBOX_SIMPLE_VAL + data_size) {
 312                nn_err(nn, "mailbox too small for %u of data (%u)\n",
 313                       data_size, nn->tlv_caps.mbox_len);
 314                return -EIO;
 315        }
 316
 317        nn_ctrl_bar_lock(nn);
 318        return 0;
 319}
 320
 321/**
 322 * nfp_net_mbox_reconfig() - Reconfigure the firmware via the mailbox
 323 * @nn:        NFP Net device to reconfigure
 324 * @mbox_cmd:  The value for the mailbox command
 325 *
 326 * Helper function for mailbox updates
 327 *
 328 * Return: Negative errno on error, 0 on success
 329 */
 330int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd)
 331{
 332        u32 mbox = nn->tlv_caps.mbox_off;
 333        int ret;
 334
 335        nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
 336
 337        ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX);
 338        if (ret) {
 339                nn_err(nn, "Mailbox update error\n");
 340                return ret;
 341        }
 342
 343        return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
 344}
 345
 346void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 mbox_cmd)
 347{
 348        u32 mbox = nn->tlv_caps.mbox_off;
 349
 350        nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
 351
 352        nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_MBOX);
 353}
 354
 355int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn)
 356{
 357        u32 mbox = nn->tlv_caps.mbox_off;
 358
 359        nfp_net_reconfig_wait_posted(nn);
 360
 361        return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
 362}
 363
 364int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd)
 365{
 366        int ret;
 367
 368        ret = nfp_net_mbox_reconfig(nn, mbox_cmd);
 369        nn_ctrl_bar_unlock(nn);
 370        return ret;
 371}
 372
 373/* Interrupt configuration and handling
 374 */
 375
 376/**
 377 * nfp_net_irq_unmask() - Unmask automasked interrupt
 378 * @nn:       NFP Network structure
 379 * @entry_nr: MSI-X table entry
 380 *
 381 * Clear the ICR for the IRQ entry.
 382 */
 383static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr)
 384{
 385        nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED);
 386        nn_pci_flush(nn);
 387}
 388
 389/**
 390 * nfp_net_irqs_alloc() - allocates MSI-X irqs
 391 * @pdev:        PCI device structure
 392 * @irq_entries: Array to be initialized and used to hold the irq entries
 393 * @min_irqs:    Minimal acceptable number of interrupts
 394 * @wanted_irqs: Target number of interrupts to allocate
 395 *
 396 * Return: Number of irqs obtained or 0 on error.
 397 */
 398unsigned int
 399nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries,
 400                   unsigned int min_irqs, unsigned int wanted_irqs)
 401{
 402        unsigned int i;
 403        int got_irqs;
 404
 405        for (i = 0; i < wanted_irqs; i++)
 406                irq_entries[i].entry = i;
 407
 408        got_irqs = pci_enable_msix_range(pdev, irq_entries,
 409                                         min_irqs, wanted_irqs);
 410        if (got_irqs < 0) {
 411                dev_err(&pdev->dev, "Failed to enable %d-%d MSI-X (err=%d)\n",
 412                        min_irqs, wanted_irqs, got_irqs);
 413                return 0;
 414        }
 415
 416        if (got_irqs < wanted_irqs)
 417                dev_warn(&pdev->dev, "Unable to allocate %d IRQs got only %d\n",
 418                         wanted_irqs, got_irqs);
 419
 420        return got_irqs;
 421}
 422
 423/**
 424 * nfp_net_irqs_assign() - Assign interrupts allocated externally to netdev
 425 * @nn:          NFP Network structure
 426 * @irq_entries: Table of allocated interrupts
 427 * @n:           Size of @irq_entries (number of entries to grab)
 428 *
 429 * After interrupts are allocated with nfp_net_irqs_alloc() this function
 430 * should be called to assign them to a specific netdev (port).
 431 */
 432void
 433nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries,
 434                    unsigned int n)
 435{
 436        struct nfp_net_dp *dp = &nn->dp;
 437
 438        nn->max_r_vecs = n - NFP_NET_NON_Q_VECTORS;
 439        dp->num_r_vecs = nn->max_r_vecs;
 440
 441        memcpy(nn->irq_entries, irq_entries, sizeof(*irq_entries) * n);
 442
 443        if (dp->num_rx_rings > dp->num_r_vecs ||
 444            dp->num_tx_rings > dp->num_r_vecs)
 445                dev_warn(nn->dp.dev, "More rings (%d,%d) than vectors (%d).\n",
 446                         dp->num_rx_rings, dp->num_tx_rings,
 447                         dp->num_r_vecs);
 448
 449        dp->num_rx_rings = min(dp->num_r_vecs, dp->num_rx_rings);
 450        dp->num_tx_rings = min(dp->num_r_vecs, dp->num_tx_rings);
 451        dp->num_stack_tx_rings = dp->num_tx_rings;
 452}
 453
 454/**
 455 * nfp_net_irqs_disable() - Disable interrupts
 456 * @pdev:        PCI device structure
 457 *
 458 * Undoes what @nfp_net_irqs_alloc() does.
 459 */
 460void nfp_net_irqs_disable(struct pci_dev *pdev)
 461{
 462        pci_disable_msix(pdev);
 463}
 464
 465/**
 466 * nfp_net_irq_rxtx() - Interrupt service routine for RX/TX rings.
 467 * @irq:      Interrupt
 468 * @data:     Opaque data structure
 469 *
 470 * Return: Indicate if the interrupt has been handled.
 471 */
 472static irqreturn_t nfp_net_irq_rxtx(int irq, void *data)
 473{
 474        struct nfp_net_r_vector *r_vec = data;
 475
 476        napi_schedule_irqoff(&r_vec->napi);
 477
 478        /* The FW auto-masks any interrupt, either via the MASK bit in
 479         * the MSI-X table or via the per entry ICR field.  So there
 480         * is no need to disable interrupts here.
 481         */
 482        return IRQ_HANDLED;
 483}
 484
 485static irqreturn_t nfp_ctrl_irq_rxtx(int irq, void *data)
 486{
 487        struct nfp_net_r_vector *r_vec = data;
 488
 489        tasklet_schedule(&r_vec->tasklet);
 490
 491        return IRQ_HANDLED;
 492}
 493
 494/**
 495 * nfp_net_read_link_status() - Reread link status from control BAR
 496 * @nn:       NFP Network structure
 497 */
 498static void nfp_net_read_link_status(struct nfp_net *nn)
 499{
 500        unsigned long flags;
 501        bool link_up;
 502        u32 sts;
 503
 504        spin_lock_irqsave(&nn->link_status_lock, flags);
 505
 506        sts = nn_readl(nn, NFP_NET_CFG_STS);
 507        link_up = !!(sts & NFP_NET_CFG_STS_LINK);
 508
 509        if (nn->link_up == link_up)
 510                goto out;
 511
 512        nn->link_up = link_up;
 513        if (nn->port)
 514                set_bit(NFP_PORT_CHANGED, &nn->port->flags);
 515
 516        if (nn->link_up) {
 517                netif_carrier_on(nn->dp.netdev);
 518                netdev_info(nn->dp.netdev, "NIC Link is Up\n");
 519        } else {
 520                netif_carrier_off(nn->dp.netdev);
 521                netdev_info(nn->dp.netdev, "NIC Link is Down\n");
 522        }
 523out:
 524        spin_unlock_irqrestore(&nn->link_status_lock, flags);
 525}
 526
 527/**
 528 * nfp_net_irq_lsc() - Interrupt service routine for link state changes
 529 * @irq:      Interrupt
 530 * @data:     Opaque data structure
 531 *
 532 * Return: Indicate if the interrupt has been handled.
 533 */
 534static irqreturn_t nfp_net_irq_lsc(int irq, void *data)
 535{
 536        struct nfp_net *nn = data;
 537        struct msix_entry *entry;
 538
 539        entry = &nn->irq_entries[NFP_NET_IRQ_LSC_IDX];
 540
 541        nfp_net_read_link_status(nn);
 542
 543        nfp_net_irq_unmask(nn, entry->entry);
 544
 545        return IRQ_HANDLED;
 546}
 547
 548/**
 549 * nfp_net_irq_exn() - Interrupt service routine for exceptions
 550 * @irq:      Interrupt
 551 * @data:     Opaque data structure
 552 *
 553 * Return: Indicate if the interrupt has been handled.
 554 */
 555static irqreturn_t nfp_net_irq_exn(int irq, void *data)
 556{
 557        struct nfp_net *nn = data;
 558
 559        nn_err(nn, "%s: UNIMPLEMENTED.\n", __func__);
 560        /* XXX TO BE IMPLEMENTED */
 561        return IRQ_HANDLED;
 562}
 563
 564/**
 565 * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring
 566 * @tx_ring:  TX ring structure
 567 * @r_vec:    IRQ vector servicing this ring
 568 * @idx:      Ring index
 569 * @is_xdp:   Is this an XDP TX ring?
 570 */
 571static void
 572nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring,
 573                     struct nfp_net_r_vector *r_vec, unsigned int idx,
 574                     bool is_xdp)
 575{
 576        struct nfp_net *nn = r_vec->nfp_net;
 577
 578        tx_ring->idx = idx;
 579        tx_ring->r_vec = r_vec;
 580        tx_ring->is_xdp = is_xdp;
 581        u64_stats_init(&tx_ring->r_vec->tx_sync);
 582
 583        tx_ring->qcidx = tx_ring->idx * nn->stride_tx;
 584        tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx);
 585}
 586
 587/**
 588 * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring
 589 * @rx_ring:  RX ring structure
 590 * @r_vec:    IRQ vector servicing this ring
 591 * @idx:      Ring index
 592 */
 593static void
 594nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring,
 595                     struct nfp_net_r_vector *r_vec, unsigned int idx)
 596{
 597        struct nfp_net *nn = r_vec->nfp_net;
 598
 599        rx_ring->idx = idx;
 600        rx_ring->r_vec = r_vec;
 601        u64_stats_init(&rx_ring->r_vec->rx_sync);
 602
 603        rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx;
 604        rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx);
 605}
 606
 607/**
 608 * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN)
 609 * @nn:         NFP Network structure
 610 * @ctrl_offset: Control BAR offset where IRQ configuration should be written
 611 * @format:     printf-style format to construct the interrupt name
 612 * @name:       Pointer to allocated space for interrupt name
 613 * @name_sz:    Size of space for interrupt name
 614 * @vector_idx: Index of MSI-X vector used for this interrupt
 615 * @handler:    IRQ handler to register for this interrupt
 616 */
 617static int
 618nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset,
 619                        const char *format, char *name, size_t name_sz,
 620                        unsigned int vector_idx, irq_handler_t handler)
 621{
 622        struct msix_entry *entry;
 623        int err;
 624
 625        entry = &nn->irq_entries[vector_idx];
 626
 627        snprintf(name, name_sz, format, nfp_net_name(nn));
 628        err = request_irq(entry->vector, handler, 0, name, nn);
 629        if (err) {
 630                nn_err(nn, "Failed to request IRQ %d (err=%d).\n",
 631                       entry->vector, err);
 632                return err;
 633        }
 634        nn_writeb(nn, ctrl_offset, entry->entry);
 635        nfp_net_irq_unmask(nn, entry->entry);
 636
 637        return 0;
 638}
 639
 640/**
 641 * nfp_net_aux_irq_free() - Free an auxiliary interrupt (LSC or EXN)
 642 * @nn:         NFP Network structure
 643 * @ctrl_offset: Control BAR offset where IRQ configuration should be written
 644 * @vector_idx: Index of MSI-X vector used for this interrupt
 645 */
 646static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset,
 647                                 unsigned int vector_idx)
 648{
 649        nn_writeb(nn, ctrl_offset, 0xff);
 650        nn_pci_flush(nn);
 651        free_irq(nn->irq_entries[vector_idx].vector, nn);
 652}
 653
 654/* Transmit
 655 *
 656 * One queue controller peripheral queue is used for transmit.  The
 657 * driver en-queues packets for transmit by advancing the write
 658 * pointer.  The device indicates that packets have transmitted by
 659 * advancing the read pointer.  The driver maintains a local copy of
 660 * the read and write pointer in @struct nfp_net_tx_ring.  The driver
 661 * keeps @wr_p in sync with the queue controller write pointer and can
 662 * determine how many packets have been transmitted by comparing its
 663 * copy of the read pointer @rd_p with the read pointer maintained by
 664 * the queue controller peripheral.
 665 */
 666
 667/**
 668 * nfp_net_tx_full() - Check if the TX ring is full
 669 * @tx_ring: TX ring to check
 670 * @dcnt:    Number of descriptors that need to be enqueued (must be >= 1)
 671 *
 672 * This function checks, based on the *host copy* of read/write
 673 * pointer if a given TX ring is full.  The real TX queue may have
 674 * some newly made available slots.
 675 *
 676 * Return: True if the ring is full.
 677 */
 678static int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt)
 679{
 680        return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt);
 681}
 682
 683/* Wrappers for deciding when to stop and restart TX queues */
 684static int nfp_net_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring)
 685{
 686        return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4);
 687}
 688
 689static int nfp_net_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring)
 690{
 691        return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1);
 692}
 693
 694/**
 695 * nfp_net_tx_ring_stop() - stop tx ring
 696 * @nd_q:    netdev queue
 697 * @tx_ring: driver tx queue structure
 698 *
 699 * Safely stop TX ring.  Remember that while we are running .start_xmit()
 700 * someone else may be cleaning the TX ring completions so we need to be
 701 * extra careful here.
 702 */
 703static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q,
 704                                 struct nfp_net_tx_ring *tx_ring)
 705{
 706        netif_tx_stop_queue(nd_q);
 707
 708        /* We can race with the TX completion out of NAPI so recheck */
 709        smp_mb();
 710        if (unlikely(nfp_net_tx_ring_should_wake(tx_ring)))
 711                netif_tx_start_queue(nd_q);
 712}
 713
 714/**
 715 * nfp_net_tx_tso() - Set up Tx descriptor for LSO
 716 * @r_vec: per-ring structure
 717 * @txbuf: Pointer to driver soft TX descriptor
 718 * @txd: Pointer to HW TX descriptor
 719 * @skb: Pointer to SKB
 720 * @md_bytes: Prepend length
 721 *
 722 * Set up Tx descriptor for LSO, do nothing for non-LSO skbs.
 723 * Return error on packet header greater than maximum supported LSO header size.
 724 */
 725static void nfp_net_tx_tso(struct nfp_net_r_vector *r_vec,
 726                           struct nfp_net_tx_buf *txbuf,
 727                           struct nfp_net_tx_desc *txd, struct sk_buff *skb,
 728                           u32 md_bytes)
 729{
 730        u32 l3_offset, l4_offset, hdrlen;
 731        u16 mss;
 732
 733        if (!skb_is_gso(skb))
 734                return;
 735
 736        if (!skb->encapsulation) {
 737                l3_offset = skb_network_offset(skb);
 738                l4_offset = skb_transport_offset(skb);
 739                hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
 740        } else {
 741                l3_offset = skb_inner_network_offset(skb);
 742                l4_offset = skb_inner_transport_offset(skb);
 743                hdrlen = skb_inner_transport_header(skb) - skb->data +
 744                        inner_tcp_hdrlen(skb);
 745        }
 746
 747        txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs;
 748        txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1);
 749
 750        mss = skb_shinfo(skb)->gso_size & PCIE_DESC_TX_MSS_MASK;
 751        txd->l3_offset = l3_offset - md_bytes;
 752        txd->l4_offset = l4_offset - md_bytes;
 753        txd->lso_hdrlen = hdrlen - md_bytes;
 754        txd->mss = cpu_to_le16(mss);
 755        txd->flags |= PCIE_DESC_TX_LSO;
 756
 757        u64_stats_update_begin(&r_vec->tx_sync);
 758        r_vec->tx_lso++;
 759        u64_stats_update_end(&r_vec->tx_sync);
 760}
 761
 762/**
 763 * nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor
 764 * @dp:  NFP Net data path struct
 765 * @r_vec: per-ring structure
 766 * @txbuf: Pointer to driver soft TX descriptor
 767 * @txd: Pointer to TX descriptor
 768 * @skb: Pointer to SKB
 769 *
 770 * This function sets the TX checksum flags in the TX descriptor based
 771 * on the configuration and the protocol of the packet to be transmitted.
 772 */
 773static void nfp_net_tx_csum(struct nfp_net_dp *dp,
 774                            struct nfp_net_r_vector *r_vec,
 775                            struct nfp_net_tx_buf *txbuf,
 776                            struct nfp_net_tx_desc *txd, struct sk_buff *skb)
 777{
 778        struct ipv6hdr *ipv6h;
 779        struct iphdr *iph;
 780        u8 l4_hdr;
 781
 782        if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM))
 783                return;
 784
 785        if (skb->ip_summed != CHECKSUM_PARTIAL)
 786                return;
 787
 788        txd->flags |= PCIE_DESC_TX_CSUM;
 789        if (skb->encapsulation)
 790                txd->flags |= PCIE_DESC_TX_ENCAP;
 791
 792        iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
 793        ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
 794
 795        if (iph->version == 4) {
 796                txd->flags |= PCIE_DESC_TX_IP4_CSUM;
 797                l4_hdr = iph->protocol;
 798        } else if (ipv6h->version == 6) {
 799                l4_hdr = ipv6h->nexthdr;
 800        } else {
 801                nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version);
 802                return;
 803        }
 804
 805        switch (l4_hdr) {
 806        case IPPROTO_TCP:
 807                txd->flags |= PCIE_DESC_TX_TCP_CSUM;
 808                break;
 809        case IPPROTO_UDP:
 810                txd->flags |= PCIE_DESC_TX_UDP_CSUM;
 811                break;
 812        default:
 813                nn_dp_warn(dp, "partial checksum but l4 proto=%x!\n", l4_hdr);
 814                return;
 815        }
 816
 817        u64_stats_update_begin(&r_vec->tx_sync);
 818        if (skb->encapsulation)
 819                r_vec->hw_csum_tx_inner += txbuf->pkt_cnt;
 820        else
 821                r_vec->hw_csum_tx += txbuf->pkt_cnt;
 822        u64_stats_update_end(&r_vec->tx_sync);
 823}
 824
 825static struct sk_buff *
 826nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
 827               struct sk_buff *skb, u64 *tls_handle, int *nr_frags)
 828{
 829#ifdef CONFIG_TLS_DEVICE
 830        struct nfp_net_tls_offload_ctx *ntls;
 831        struct sk_buff *nskb;
 832        bool resync_pending;
 833        u32 datalen, seq;
 834
 835        if (likely(!dp->ktls_tx))
 836                return skb;
 837        if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
 838                return skb;
 839
 840        datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
 841        seq = ntohl(tcp_hdr(skb)->seq);
 842        ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
 843        resync_pending = tls_offload_tx_resync_pending(skb->sk);
 844        if (unlikely(resync_pending || ntls->next_seq != seq)) {
 845                /* Pure ACK out of order already */
 846                if (!datalen)
 847                        return skb;
 848
 849                u64_stats_update_begin(&r_vec->tx_sync);
 850                r_vec->tls_tx_fallback++;
 851                u64_stats_update_end(&r_vec->tx_sync);
 852
 853                nskb = tls_encrypt_skb(skb);
 854                if (!nskb) {
 855                        u64_stats_update_begin(&r_vec->tx_sync);
 856                        r_vec->tls_tx_no_fallback++;
 857                        u64_stats_update_end(&r_vec->tx_sync);
 858                        return NULL;
 859                }
 860                /* encryption wasn't necessary */
 861                if (nskb == skb)
 862                        return skb;
 863                /* we don't re-check ring space */
 864                if (unlikely(skb_is_nonlinear(nskb))) {
 865                        nn_dp_warn(dp, "tls_encrypt_skb() produced fragmented frame\n");
 866                        u64_stats_update_begin(&r_vec->tx_sync);
 867                        r_vec->tx_errors++;
 868                        u64_stats_update_end(&r_vec->tx_sync);
 869                        dev_kfree_skb_any(nskb);
 870                        return NULL;
 871                }
 872
 873                /* jump forward, a TX may have gotten lost, need to sync TX */
 874                if (!resync_pending && seq - ntls->next_seq < U32_MAX / 4)
 875                        tls_offload_tx_resync_request(nskb->sk);
 876
 877                *nr_frags = 0;
 878                return nskb;
 879        }
 880
 881        if (datalen) {
 882                u64_stats_update_begin(&r_vec->tx_sync);
 883                if (!skb_is_gso(skb))
 884                        r_vec->hw_tls_tx++;
 885                else
 886                        r_vec->hw_tls_tx += skb_shinfo(skb)->gso_segs;
 887                u64_stats_update_end(&r_vec->tx_sync);
 888        }
 889
 890        memcpy(tls_handle, ntls->fw_handle, sizeof(ntls->fw_handle));
 891        ntls->next_seq += datalen;
 892#endif
 893        return skb;
 894}
 895
 896static void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle)
 897{
 898#ifdef CONFIG_TLS_DEVICE
 899        struct nfp_net_tls_offload_ctx *ntls;
 900        u32 datalen, seq;
 901
 902        if (!tls_handle)
 903                return;
 904        if (WARN_ON_ONCE(!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)))
 905                return;
 906
 907        datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
 908        seq = ntohl(tcp_hdr(skb)->seq);
 909
 910        ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
 911        if (ntls->next_seq == seq + datalen)
 912                ntls->next_seq = seq;
 913        else
 914                WARN_ON_ONCE(1);
 915#endif
 916}
 917
 918static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
 919{
 920        wmb();
 921        nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add);
 922        tx_ring->wr_ptr_add = 0;
 923}
 924
 925static int nfp_net_prep_tx_meta(struct sk_buff *skb, u64 tls_handle)
 926{
 927        struct metadata_dst *md_dst = skb_metadata_dst(skb);
 928        unsigned char *data;
 929        u32 meta_id = 0;
 930        int md_bytes;
 931
 932        if (likely(!md_dst && !tls_handle))
 933                return 0;
 934        if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) {
 935                if (!tls_handle)
 936                        return 0;
 937                md_dst = NULL;
 938        }
 939
 940        md_bytes = 4 + !!md_dst * 4 + !!tls_handle * 8;
 941
 942        if (unlikely(skb_cow_head(skb, md_bytes)))
 943                return -ENOMEM;
 944
 945        meta_id = 0;
 946        data = skb_push(skb, md_bytes) + md_bytes;
 947        if (md_dst) {
 948                data -= 4;
 949                put_unaligned_be32(md_dst->u.port_info.port_id, data);
 950                meta_id = NFP_NET_META_PORTID;
 951        }
 952        if (tls_handle) {
 953                /* conn handle is opaque, we just use u64 to be able to quickly
 954                 * compare it to zero
 955                 */
 956                data -= 8;
 957                memcpy(data, &tls_handle, sizeof(tls_handle));
 958                meta_id <<= NFP_NET_META_FIELD_SIZE;
 959                meta_id |= NFP_NET_META_CONN_HANDLE;
 960        }
 961
 962        data -= 4;
 963        put_unaligned_be32(meta_id, data);
 964
 965        return md_bytes;
 966}
 967
 968/**
 969 * nfp_net_tx() - Main transmit entry point
 970 * @skb:    SKB to transmit
 971 * @netdev: netdev structure
 972 *
 973 * Return: NETDEV_TX_OK on success.
 974 */
 975static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
 976{
 977        struct nfp_net *nn = netdev_priv(netdev);
 978        const skb_frag_t *frag;
 979        int f, nr_frags, wr_idx, md_bytes;
 980        struct nfp_net_tx_ring *tx_ring;
 981        struct nfp_net_r_vector *r_vec;
 982        struct nfp_net_tx_buf *txbuf;
 983        struct nfp_net_tx_desc *txd;
 984        struct netdev_queue *nd_q;
 985        struct nfp_net_dp *dp;
 986        dma_addr_t dma_addr;
 987        unsigned int fsize;
 988        u64 tls_handle = 0;
 989        u16 qidx;
 990
 991        dp = &nn->dp;
 992        qidx = skb_get_queue_mapping(skb);
 993        tx_ring = &dp->tx_rings[qidx];
 994        r_vec = tx_ring->r_vec;
 995
 996        nr_frags = skb_shinfo(skb)->nr_frags;
 997
 998        if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) {
 999                nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n",
1000                           qidx, tx_ring->wr_p, tx_ring->rd_p);
1001                nd_q = netdev_get_tx_queue(dp->netdev, qidx);
1002                netif_tx_stop_queue(nd_q);
1003                nfp_net_tx_xmit_more_flush(tx_ring);
1004                u64_stats_update_begin(&r_vec->tx_sync);
1005                r_vec->tx_busy++;
1006                u64_stats_update_end(&r_vec->tx_sync);
1007                return NETDEV_TX_BUSY;
1008        }
1009
1010        skb = nfp_net_tls_tx(dp, r_vec, skb, &tls_handle, &nr_frags);
1011        if (unlikely(!skb)) {
1012                nfp_net_tx_xmit_more_flush(tx_ring);
1013                return NETDEV_TX_OK;
1014        }
1015
1016        md_bytes = nfp_net_prep_tx_meta(skb, tls_handle);
1017        if (unlikely(md_bytes < 0))
1018                goto err_flush;
1019
1020        /* Start with the head skbuf */
1021        dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
1022                                  DMA_TO_DEVICE);
1023        if (dma_mapping_error(dp->dev, dma_addr))
1024                goto err_dma_err;
1025
1026        wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
1027
1028        /* Stash the soft descriptor of the head then initialize it */
1029        txbuf = &tx_ring->txbufs[wr_idx];
1030        txbuf->skb = skb;
1031        txbuf->dma_addr = dma_addr;
1032        txbuf->fidx = -1;
1033        txbuf->pkt_cnt = 1;
1034        txbuf->real_len = skb->len;
1035
1036        /* Build TX descriptor */
1037        txd = &tx_ring->txds[wr_idx];
1038        txd->offset_eop = (nr_frags ? 0 : PCIE_DESC_TX_EOP) | md_bytes;
1039        txd->dma_len = cpu_to_le16(skb_headlen(skb));
1040        nfp_desc_set_dma_addr(txd, dma_addr);
1041        txd->data_len = cpu_to_le16(skb->len);
1042
1043        txd->flags = 0;
1044        txd->mss = 0;
1045        txd->lso_hdrlen = 0;
1046
1047        /* Do not reorder - tso may adjust pkt cnt, vlan may override fields */
1048        nfp_net_tx_tso(r_vec, txbuf, txd, skb, md_bytes);
1049        nfp_net_tx_csum(dp, r_vec, txbuf, txd, skb);
1050        if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
1051                txd->flags |= PCIE_DESC_TX_VLAN;
1052                txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
1053        }
1054
1055        /* Gather DMA */
1056        if (nr_frags > 0) {
1057                __le64 second_half;
1058
1059                /* all descs must match except for in addr, length and eop */
1060                second_half = txd->vals8[1];
1061
1062                for (f = 0; f < nr_frags; f++) {
1063                        frag = &skb_shinfo(skb)->frags[f];
1064                        fsize = skb_frag_size(frag);
1065
1066                        dma_addr = skb_frag_dma_map(dp->dev, frag, 0,
1067                                                    fsize, DMA_TO_DEVICE);
1068                        if (dma_mapping_error(dp->dev, dma_addr))
1069                                goto err_unmap;
1070
1071                        wr_idx = D_IDX(tx_ring, wr_idx + 1);
1072                        tx_ring->txbufs[wr_idx].skb = skb;
1073                        tx_ring->txbufs[wr_idx].dma_addr = dma_addr;
1074                        tx_ring->txbufs[wr_idx].fidx = f;
1075
1076                        txd = &tx_ring->txds[wr_idx];
1077                        txd->dma_len = cpu_to_le16(fsize);
1078                        nfp_desc_set_dma_addr(txd, dma_addr);
1079                        txd->offset_eop = md_bytes |
1080                                ((f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0);
1081                        txd->vals8[1] = second_half;
1082                }
1083
1084                u64_stats_update_begin(&r_vec->tx_sync);
1085                r_vec->tx_gather++;
1086                u64_stats_update_end(&r_vec->tx_sync);
1087        }
1088
1089        skb_tx_timestamp(skb);
1090
1091        nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
1092
1093        tx_ring->wr_p += nr_frags + 1;
1094        if (nfp_net_tx_ring_should_stop(tx_ring))
1095                nfp_net_tx_ring_stop(nd_q, tx_ring);
1096
1097        tx_ring->wr_ptr_add += nr_frags + 1;
1098        if (__netdev_tx_sent_queue(nd_q, txbuf->real_len, netdev_xmit_more()))
1099                nfp_net_tx_xmit_more_flush(tx_ring);
1100
1101        return NETDEV_TX_OK;
1102
1103err_unmap:
1104        while (--f >= 0) {
1105                frag = &skb_shinfo(skb)->frags[f];
1106                dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
1107                               skb_frag_size(frag), DMA_TO_DEVICE);
1108                tx_ring->txbufs[wr_idx].skb = NULL;
1109                tx_ring->txbufs[wr_idx].dma_addr = 0;
1110                tx_ring->txbufs[wr_idx].fidx = -2;
1111                wr_idx = wr_idx - 1;
1112                if (wr_idx < 0)
1113                        wr_idx += tx_ring->cnt;
1114        }
1115        dma_unmap_single(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
1116                         skb_headlen(skb), DMA_TO_DEVICE);
1117        tx_ring->txbufs[wr_idx].skb = NULL;
1118        tx_ring->txbufs[wr_idx].dma_addr = 0;
1119        tx_ring->txbufs[wr_idx].fidx = -2;
1120err_dma_err:
1121        nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
1122err_flush:
1123        nfp_net_tx_xmit_more_flush(tx_ring);
1124        u64_stats_update_begin(&r_vec->tx_sync);
1125        r_vec->tx_errors++;
1126        u64_stats_update_end(&r_vec->tx_sync);
1127        nfp_net_tls_tx_undo(skb, tls_handle);
1128        dev_kfree_skb_any(skb);
1129        return NETDEV_TX_OK;
1130}
1131
1132/**
1133 * nfp_net_tx_complete() - Handled completed TX packets
1134 * @tx_ring:    TX ring structure
1135 * @budget:     NAPI budget (only used as bool to determine if in NAPI context)
1136 */
1137static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
1138{
1139        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
1140        struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
1141        struct netdev_queue *nd_q;
1142        u32 done_pkts = 0, done_bytes = 0;
1143        u32 qcp_rd_p;
1144        int todo;
1145
1146        if (tx_ring->wr_p == tx_ring->rd_p)
1147                return;
1148
1149        /* Work out how many descriptors have been transmitted */
1150        qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
1151
1152        if (qcp_rd_p == tx_ring->qcp_rd_p)
1153                return;
1154
1155        todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
1156
1157        while (todo--) {
1158                const skb_frag_t *frag;
1159                struct nfp_net_tx_buf *tx_buf;
1160                struct sk_buff *skb;
1161                int fidx, nr_frags;
1162                int idx;
1163
1164                idx = D_IDX(tx_ring, tx_ring->rd_p++);
1165                tx_buf = &tx_ring->txbufs[idx];
1166
1167                skb = tx_buf->skb;
1168                if (!skb)
1169                        continue;
1170
1171                nr_frags = skb_shinfo(skb)->nr_frags;
1172                fidx = tx_buf->fidx;
1173
1174                if (fidx == -1) {
1175                        /* unmap head */
1176                        dma_unmap_single(dp->dev, tx_buf->dma_addr,
1177                                         skb_headlen(skb), DMA_TO_DEVICE);
1178
1179                        done_pkts += tx_buf->pkt_cnt;
1180                        done_bytes += tx_buf->real_len;
1181                } else {
1182                        /* unmap fragment */
1183                        frag = &skb_shinfo(skb)->frags[fidx];
1184                        dma_unmap_page(dp->dev, tx_buf->dma_addr,
1185                                       skb_frag_size(frag), DMA_TO_DEVICE);
1186                }
1187
1188                /* check for last gather fragment */
1189                if (fidx == nr_frags - 1)
1190                        napi_consume_skb(skb, budget);
1191
1192                tx_buf->dma_addr = 0;
1193                tx_buf->skb = NULL;
1194                tx_buf->fidx = -2;
1195        }
1196
1197        tx_ring->qcp_rd_p = qcp_rd_p;
1198
1199        u64_stats_update_begin(&r_vec->tx_sync);
1200        r_vec->tx_bytes += done_bytes;
1201        r_vec->tx_pkts += done_pkts;
1202        u64_stats_update_end(&r_vec->tx_sync);
1203
1204        if (!dp->netdev)
1205                return;
1206
1207        nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
1208        netdev_tx_completed_queue(nd_q, done_pkts, done_bytes);
1209        if (nfp_net_tx_ring_should_wake(tx_ring)) {
1210                /* Make sure TX thread will see updated tx_ring->rd_p */
1211                smp_mb();
1212
1213                if (unlikely(netif_tx_queue_stopped(nd_q)))
1214                        netif_tx_wake_queue(nd_q);
1215        }
1216
1217        WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
1218                  "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
1219                  tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
1220}
1221
1222static bool nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring)
1223{
1224        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
1225        u32 done_pkts = 0, done_bytes = 0;
1226        bool done_all;
1227        int idx, todo;
1228        u32 qcp_rd_p;
1229
1230        /* Work out how many descriptors have been transmitted */
1231        qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
1232
1233        if (qcp_rd_p == tx_ring->qcp_rd_p)
1234                return true;
1235
1236        todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
1237
1238        done_all = todo <= NFP_NET_XDP_MAX_COMPLETE;
1239        todo = min(todo, NFP_NET_XDP_MAX_COMPLETE);
1240
1241        tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo);
1242
1243        done_pkts = todo;
1244        while (todo--) {
1245                idx = D_IDX(tx_ring, tx_ring->rd_p);
1246                tx_ring->rd_p++;
1247
1248                done_bytes += tx_ring->txbufs[idx].real_len;
1249        }
1250
1251        u64_stats_update_begin(&r_vec->tx_sync);
1252        r_vec->tx_bytes += done_bytes;
1253        r_vec->tx_pkts += done_pkts;
1254        u64_stats_update_end(&r_vec->tx_sync);
1255
1256        WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
1257                  "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
1258                  tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
1259
1260        return done_all;
1261}
1262
1263/**
1264 * nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers
1265 * @dp:         NFP Net data path struct
1266 * @tx_ring:    TX ring structure
1267 *
1268 * Assumes that the device is stopped, must be idempotent.
1269 */
1270static void
1271nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
1272{
1273        const skb_frag_t *frag;
1274        struct netdev_queue *nd_q;
1275
1276        while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) {
1277                struct nfp_net_tx_buf *tx_buf;
1278                struct sk_buff *skb;
1279                int idx, nr_frags;
1280
1281                idx = D_IDX(tx_ring, tx_ring->rd_p);
1282                tx_buf = &tx_ring->txbufs[idx];
1283
1284                skb = tx_ring->txbufs[idx].skb;
1285                nr_frags = skb_shinfo(skb)->nr_frags;
1286
1287                if (tx_buf->fidx == -1) {
1288                        /* unmap head */
1289                        dma_unmap_single(dp->dev, tx_buf->dma_addr,
1290                                         skb_headlen(skb), DMA_TO_DEVICE);
1291                } else {
1292                        /* unmap fragment */
1293                        frag = &skb_shinfo(skb)->frags[tx_buf->fidx];
1294                        dma_unmap_page(dp->dev, tx_buf->dma_addr,
1295                                       skb_frag_size(frag), DMA_TO_DEVICE);
1296                }
1297
1298                /* check for last gather fragment */
1299                if (tx_buf->fidx == nr_frags - 1)
1300                        dev_kfree_skb_any(skb);
1301
1302                tx_buf->dma_addr = 0;
1303                tx_buf->skb = NULL;
1304                tx_buf->fidx = -2;
1305
1306                tx_ring->qcp_rd_p++;
1307                tx_ring->rd_p++;
1308        }
1309
1310        memset(tx_ring->txds, 0, tx_ring->size);
1311        tx_ring->wr_p = 0;
1312        tx_ring->rd_p = 0;
1313        tx_ring->qcp_rd_p = 0;
1314        tx_ring->wr_ptr_add = 0;
1315
1316        if (tx_ring->is_xdp || !dp->netdev)
1317                return;
1318
1319        nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
1320        netdev_tx_reset_queue(nd_q);
1321}
1322
1323static void nfp_net_tx_timeout(struct net_device *netdev)
1324{
1325        struct nfp_net *nn = netdev_priv(netdev);
1326        int i;
1327
1328        for (i = 0; i < nn->dp.netdev->real_num_tx_queues; i++) {
1329                if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i)))
1330                        continue;
1331                nn_warn(nn, "TX timeout on ring: %d\n", i);
1332        }
1333        nn_warn(nn, "TX watchdog timeout\n");
1334}
1335
1336/* Receive processing
1337 */
1338static unsigned int
1339nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp)
1340{
1341        unsigned int fl_bufsz;
1342
1343        fl_bufsz = NFP_NET_RX_BUF_HEADROOM;
1344        fl_bufsz += dp->rx_dma_off;
1345        if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
1346                fl_bufsz += NFP_NET_MAX_PREPEND;
1347        else
1348                fl_bufsz += dp->rx_offset;
1349        fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + dp->mtu;
1350
1351        fl_bufsz = SKB_DATA_ALIGN(fl_bufsz);
1352        fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1353
1354        return fl_bufsz;
1355}
1356
1357static void
1358nfp_net_free_frag(void *frag, bool xdp)
1359{
1360        if (!xdp)
1361                skb_free_frag(frag);
1362        else
1363                __free_page(virt_to_page(frag));
1364}
1365
1366/**
1367 * nfp_net_rx_alloc_one() - Allocate and map page frag for RX
1368 * @dp:         NFP Net data path struct
1369 * @dma_addr:   Pointer to storage for DMA address (output param)
1370 *
1371 * This function will allcate a new page frag, map it for DMA.
1372 *
1373 * Return: allocated page frag or NULL on failure.
1374 */
1375static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
1376{
1377        void *frag;
1378
1379        if (!dp->xdp_prog) {
1380                frag = netdev_alloc_frag(dp->fl_bufsz);
1381        } else {
1382                struct page *page;
1383
1384                page = alloc_page(GFP_KERNEL);
1385                frag = page ? page_address(page) : NULL;
1386        }
1387        if (!frag) {
1388                nn_dp_warn(dp, "Failed to alloc receive page frag\n");
1389                return NULL;
1390        }
1391
1392        *dma_addr = nfp_net_dma_map_rx(dp, frag);
1393        if (dma_mapping_error(dp->dev, *dma_addr)) {
1394                nfp_net_free_frag(frag, dp->xdp_prog);
1395                nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
1396                return NULL;
1397        }
1398
1399        return frag;
1400}
1401
1402static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
1403{
1404        void *frag;
1405
1406        if (!dp->xdp_prog) {
1407                frag = napi_alloc_frag(dp->fl_bufsz);
1408                if (unlikely(!frag))
1409                        return NULL;
1410        } else {
1411                struct page *page;
1412
1413                page = dev_alloc_page();
1414                if (unlikely(!page))
1415                        return NULL;
1416                frag = page_address(page);
1417        }
1418
1419        *dma_addr = nfp_net_dma_map_rx(dp, frag);
1420        if (dma_mapping_error(dp->dev, *dma_addr)) {
1421                nfp_net_free_frag(frag, dp->xdp_prog);
1422                nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
1423                return NULL;
1424        }
1425
1426        return frag;
1427}
1428
1429/**
1430 * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings
1431 * @dp:         NFP Net data path struct
1432 * @rx_ring:    RX ring structure
1433 * @frag:       page fragment buffer
1434 * @dma_addr:   DMA address of skb mapping
1435 */
1436static void nfp_net_rx_give_one(const struct nfp_net_dp *dp,
1437                                struct nfp_net_rx_ring *rx_ring,
1438                                void *frag, dma_addr_t dma_addr)
1439{
1440        unsigned int wr_idx;
1441
1442        wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
1443
1444        nfp_net_dma_sync_dev_rx(dp, dma_addr);
1445
1446        /* Stash SKB and DMA address away */
1447        rx_ring->rxbufs[wr_idx].frag = frag;
1448        rx_ring->rxbufs[wr_idx].dma_addr = dma_addr;
1449
1450        /* Fill freelist descriptor */
1451        rx_ring->rxds[wr_idx].fld.reserved = 0;
1452        rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
1453        nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
1454                              dma_addr + dp->rx_dma_off);
1455
1456        rx_ring->wr_p++;
1457        if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) {
1458                /* Update write pointer of the freelist queue. Make
1459                 * sure all writes are flushed before telling the hardware.
1460                 */
1461                wmb();
1462                nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH);
1463        }
1464}
1465
1466/**
1467 * nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable
1468 * @rx_ring:    RX ring structure
1469 *
1470 * Assumes that the device is stopped, must be idempotent.
1471 */
1472static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
1473{
1474        unsigned int wr_idx, last_idx;
1475
1476        /* wr_p == rd_p means ring was never fed FL bufs.  RX rings are always
1477         * kept at cnt - 1 FL bufs.
1478         */
1479        if (rx_ring->wr_p == 0 && rx_ring->rd_p == 0)
1480                return;
1481
1482        /* Move the empty entry to the end of the list */
1483        wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
1484        last_idx = rx_ring->cnt - 1;
1485        rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr;
1486        rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag;
1487        rx_ring->rxbufs[last_idx].dma_addr = 0;
1488        rx_ring->rxbufs[last_idx].frag = NULL;
1489
1490        memset(rx_ring->rxds, 0, rx_ring->size);
1491        rx_ring->wr_p = 0;
1492        rx_ring->rd_p = 0;
1493}
1494
1495/**
1496 * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring
1497 * @dp:         NFP Net data path struct
1498 * @rx_ring:    RX ring to remove buffers from
1499 *
1500 * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1)
1501 * entries.  After device is disabled nfp_net_rx_ring_reset() must be called
1502 * to restore required ring geometry.
1503 */
1504static void
1505nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp,
1506                          struct nfp_net_rx_ring *rx_ring)
1507{
1508        unsigned int i;
1509
1510        for (i = 0; i < rx_ring->cnt - 1; i++) {
1511                /* NULL skb can only happen when initial filling of the ring
1512                 * fails to allocate enough buffers and calls here to free
1513                 * already allocated ones.
1514                 */
1515                if (!rx_ring->rxbufs[i].frag)
1516                        continue;
1517
1518                nfp_net_dma_unmap_rx(dp, rx_ring->rxbufs[i].dma_addr);
1519                nfp_net_free_frag(rx_ring->rxbufs[i].frag, dp->xdp_prog);
1520                rx_ring->rxbufs[i].dma_addr = 0;
1521                rx_ring->rxbufs[i].frag = NULL;
1522        }
1523}
1524
1525/**
1526 * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW)
1527 * @dp:         NFP Net data path struct
1528 * @rx_ring:    RX ring to remove buffers from
1529 */
1530static int
1531nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp,
1532                           struct nfp_net_rx_ring *rx_ring)
1533{
1534        struct nfp_net_rx_buf *rxbufs;
1535        unsigned int i;
1536
1537        rxbufs = rx_ring->rxbufs;
1538
1539        for (i = 0; i < rx_ring->cnt - 1; i++) {
1540                rxbufs[i].frag = nfp_net_rx_alloc_one(dp, &rxbufs[i].dma_addr);
1541                if (!rxbufs[i].frag) {
1542                        nfp_net_rx_ring_bufs_free(dp, rx_ring);
1543                        return -ENOMEM;
1544                }
1545        }
1546
1547        return 0;
1548}
1549
1550/**
1551 * nfp_net_rx_ring_fill_freelist() - Give buffers from the ring to FW
1552 * @dp:      NFP Net data path struct
1553 * @rx_ring: RX ring to fill
1554 */
1555static void
1556nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp,
1557                              struct nfp_net_rx_ring *rx_ring)
1558{
1559        unsigned int i;
1560
1561        for (i = 0; i < rx_ring->cnt - 1; i++)
1562                nfp_net_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag,
1563                                    rx_ring->rxbufs[i].dma_addr);
1564}
1565
1566/**
1567 * nfp_net_rx_csum_has_errors() - group check if rxd has any csum errors
1568 * @flags: RX descriptor flags field in CPU byte order
1569 */
1570static int nfp_net_rx_csum_has_errors(u16 flags)
1571{
1572        u16 csum_all_checked, csum_all_ok;
1573
1574        csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL;
1575        csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK;
1576
1577        return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT);
1578}
1579
1580/**
1581 * nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags
1582 * @dp:  NFP Net data path struct
1583 * @r_vec: per-ring structure
1584 * @rxd: Pointer to RX descriptor
1585 * @meta: Parsed metadata prepend
1586 * @skb: Pointer to SKB
1587 */
1588static void nfp_net_rx_csum(struct nfp_net_dp *dp,
1589                            struct nfp_net_r_vector *r_vec,
1590                            struct nfp_net_rx_desc *rxd,
1591                            struct nfp_meta_parsed *meta, struct sk_buff *skb)
1592{
1593        skb_checksum_none_assert(skb);
1594
1595        if (!(dp->netdev->features & NETIF_F_RXCSUM))
1596                return;
1597
1598        if (meta->csum_type) {
1599                skb->ip_summed = meta->csum_type;
1600                skb->csum = meta->csum;
1601                u64_stats_update_begin(&r_vec->rx_sync);
1602                r_vec->hw_csum_rx_complete++;
1603                u64_stats_update_end(&r_vec->rx_sync);
1604                return;
1605        }
1606
1607        if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) {
1608                u64_stats_update_begin(&r_vec->rx_sync);
1609                r_vec->hw_csum_rx_error++;
1610                u64_stats_update_end(&r_vec->rx_sync);
1611                return;
1612        }
1613
1614        /* Assume that the firmware will never report inner CSUM_OK unless outer
1615         * L4 headers were successfully parsed. FW will always report zero UDP
1616         * checksum as CSUM_OK.
1617         */
1618        if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK ||
1619            rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) {
1620                __skb_incr_checksum_unnecessary(skb);
1621                u64_stats_update_begin(&r_vec->rx_sync);
1622                r_vec->hw_csum_rx_ok++;
1623                u64_stats_update_end(&r_vec->rx_sync);
1624        }
1625
1626        if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK ||
1627            rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) {
1628                __skb_incr_checksum_unnecessary(skb);
1629                u64_stats_update_begin(&r_vec->rx_sync);
1630                r_vec->hw_csum_rx_inner_ok++;
1631                u64_stats_update_end(&r_vec->rx_sync);
1632        }
1633}
1634
1635static void
1636nfp_net_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta,
1637                 unsigned int type, __be32 *hash)
1638{
1639        if (!(netdev->features & NETIF_F_RXHASH))
1640                return;
1641
1642        switch (type) {
1643        case NFP_NET_RSS_IPV4:
1644        case NFP_NET_RSS_IPV6:
1645        case NFP_NET_RSS_IPV6_EX:
1646                meta->hash_type = PKT_HASH_TYPE_L3;
1647                break;
1648        default:
1649                meta->hash_type = PKT_HASH_TYPE_L4;
1650                break;
1651        }
1652
1653        meta->hash = get_unaligned_be32(hash);
1654}
1655
1656static void
1657nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta,
1658                      void *data, struct nfp_net_rx_desc *rxd)
1659{
1660        struct nfp_net_rx_hash *rx_hash = data;
1661
1662        if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
1663                return;
1664
1665        nfp_net_set_hash(netdev, meta, get_unaligned_be32(&rx_hash->hash_type),
1666                         &rx_hash->hash);
1667}
1668
1669static void *
1670nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
1671                   void *data, int meta_len)
1672{
1673        u32 meta_info;
1674
1675        meta_info = get_unaligned_be32(data);
1676        data += 4;
1677
1678        while (meta_info) {
1679                switch (meta_info & NFP_NET_META_FIELD_MASK) {
1680                case NFP_NET_META_HASH:
1681                        meta_info >>= NFP_NET_META_FIELD_SIZE;
1682                        nfp_net_set_hash(netdev, meta,
1683                                         meta_info & NFP_NET_META_FIELD_MASK,
1684                                         (__be32 *)data);
1685                        data += 4;
1686                        break;
1687                case NFP_NET_META_MARK:
1688                        meta->mark = get_unaligned_be32(data);
1689                        data += 4;
1690                        break;
1691                case NFP_NET_META_PORTID:
1692                        meta->portid = get_unaligned_be32(data);
1693                        data += 4;
1694                        break;
1695                case NFP_NET_META_CSUM:
1696                        meta->csum_type = CHECKSUM_COMPLETE;
1697                        meta->csum =
1698                                (__force __wsum)__get_unaligned_cpu32(data);
1699                        data += 4;
1700                        break;
1701                default:
1702                        return NULL;
1703                }
1704
1705                meta_info >>= NFP_NET_META_FIELD_SIZE;
1706        }
1707
1708        return data;
1709}
1710
1711static void
1712nfp_net_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
1713                struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf,
1714                struct sk_buff *skb)
1715{
1716        u64_stats_update_begin(&r_vec->rx_sync);
1717        r_vec->rx_drops++;
1718        /* If we have both skb and rxbuf the replacement buffer allocation
1719         * must have failed, count this as an alloc failure.
1720         */
1721        if (skb && rxbuf)
1722                r_vec->rx_replace_buf_alloc_fail++;
1723        u64_stats_update_end(&r_vec->rx_sync);
1724
1725        /* skb is build based on the frag, free_skb() would free the frag
1726         * so to be able to reuse it we need an extra ref.
1727         */
1728        if (skb && rxbuf && skb->head == rxbuf->frag)
1729                page_ref_inc(virt_to_head_page(rxbuf->frag));
1730        if (rxbuf)
1731                nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr);
1732        if (skb)
1733                dev_kfree_skb_any(skb);
1734}
1735
1736static bool
1737nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
1738                   struct nfp_net_tx_ring *tx_ring,
1739                   struct nfp_net_rx_buf *rxbuf, unsigned int dma_off,
1740                   unsigned int pkt_len, bool *completed)
1741{
1742        struct nfp_net_tx_buf *txbuf;
1743        struct nfp_net_tx_desc *txd;
1744        int wr_idx;
1745
1746        if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
1747                if (!*completed) {
1748                        nfp_net_xdp_complete(tx_ring);
1749                        *completed = true;
1750                }
1751
1752                if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
1753                        nfp_net_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf,
1754                                        NULL);
1755                        return false;
1756                }
1757        }
1758
1759        wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
1760
1761        /* Stash the soft descriptor of the head then initialize it */
1762        txbuf = &tx_ring->txbufs[wr_idx];
1763
1764        nfp_net_rx_give_one(dp, rx_ring, txbuf->frag, txbuf->dma_addr);
1765
1766        txbuf->frag = rxbuf->frag;
1767        txbuf->dma_addr = rxbuf->dma_addr;
1768        txbuf->fidx = -1;
1769        txbuf->pkt_cnt = 1;
1770        txbuf->real_len = pkt_len;
1771
1772        dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off,
1773                                   pkt_len, DMA_BIDIRECTIONAL);
1774
1775        /* Build TX descriptor */
1776        txd = &tx_ring->txds[wr_idx];
1777        txd->offset_eop = PCIE_DESC_TX_EOP;
1778        txd->dma_len = cpu_to_le16(pkt_len);
1779        nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + dma_off);
1780        txd->data_len = cpu_to_le16(pkt_len);
1781
1782        txd->flags = 0;
1783        txd->mss = 0;
1784        txd->lso_hdrlen = 0;
1785
1786        tx_ring->wr_p++;
1787        tx_ring->wr_ptr_add++;
1788        return true;
1789}
1790
1791/**
1792 * nfp_net_rx() - receive up to @budget packets on @rx_ring
1793 * @rx_ring:   RX ring to receive from
1794 * @budget:    NAPI budget
1795 *
1796 * Note, this function is separated out from the napi poll function to
1797 * more cleanly separate packet receive code from other bookkeeping
1798 * functions performed in the napi poll function.
1799 *
1800 * Return: Number of packets received.
1801 */
1802static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
1803{
1804        struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
1805        struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
1806        struct nfp_net_tx_ring *tx_ring;
1807        struct bpf_prog *xdp_prog;
1808        bool xdp_tx_cmpl = false;
1809        unsigned int true_bufsz;
1810        struct sk_buff *skb;
1811        int pkts_polled = 0;
1812        struct xdp_buff xdp;
1813        int idx;
1814
1815        rcu_read_lock();
1816        xdp_prog = READ_ONCE(dp->xdp_prog);
1817        true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
1818        xdp.rxq = &rx_ring->xdp_rxq;
1819        tx_ring = r_vec->xdp_ring;
1820
1821        while (pkts_polled < budget) {
1822                unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
1823                struct nfp_net_rx_buf *rxbuf;
1824                struct nfp_net_rx_desc *rxd;
1825                struct nfp_meta_parsed meta;
1826                bool redir_egress = false;
1827                struct net_device *netdev;
1828                dma_addr_t new_dma_addr;
1829                u32 meta_len_xdp = 0;
1830                void *new_frag;
1831
1832                idx = D_IDX(rx_ring, rx_ring->rd_p);
1833
1834                rxd = &rx_ring->rxds[idx];
1835                if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
1836                        break;
1837
1838                /* Memory barrier to ensure that we won't do other reads
1839                 * before the DD bit.
1840                 */
1841                dma_rmb();
1842
1843                memset(&meta, 0, sizeof(meta));
1844
1845                rx_ring->rd_p++;
1846                pkts_polled++;
1847
1848                rxbuf = &rx_ring->rxbufs[idx];
1849                /*         < meta_len >
1850                 *  <-- [rx_offset] -->
1851                 *  ---------------------------------------------------------
1852                 * | [XX] |  metadata  |             packet           | XXXX |
1853                 *  ---------------------------------------------------------
1854                 *         <---------------- data_len --------------->
1855                 *
1856                 * The rx_offset is fixed for all packets, the meta_len can vary
1857                 * on a packet by packet basis. If rx_offset is set to zero
1858                 * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the
1859                 * buffer and is immediately followed by the packet (no [XX]).
1860                 */
1861                meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
1862                data_len = le16_to_cpu(rxd->rxd.data_len);
1863                pkt_len = data_len - meta_len;
1864
1865                pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
1866                if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
1867                        pkt_off += meta_len;
1868                else
1869                        pkt_off += dp->rx_offset;
1870                meta_off = pkt_off - meta_len;
1871
1872                /* Stats update */
1873                u64_stats_update_begin(&r_vec->rx_sync);
1874                r_vec->rx_pkts++;
1875                r_vec->rx_bytes += pkt_len;
1876                u64_stats_update_end(&r_vec->rx_sync);
1877
1878                if (unlikely(meta_len > NFP_NET_MAX_PREPEND ||
1879                             (dp->rx_offset && meta_len > dp->rx_offset))) {
1880                        nn_dp_warn(dp, "oversized RX packet metadata %u\n",
1881                                   meta_len);
1882                        nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
1883                        continue;
1884                }
1885
1886                nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off,
1887                                        data_len);
1888
1889                if (!dp->chained_metadata_format) {
1890                        nfp_net_set_hash_desc(dp->netdev, &meta,
1891                                              rxbuf->frag + meta_off, rxd);
1892                } else if (meta_len) {
1893                        void *end;
1894
1895                        end = nfp_net_parse_meta(dp->netdev, &meta,
1896                                                 rxbuf->frag + meta_off,
1897                                                 meta_len);
1898                        if (unlikely(end != rxbuf->frag + pkt_off)) {
1899                                nn_dp_warn(dp, "invalid RX packet metadata\n");
1900                                nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
1901                                                NULL);
1902                                continue;
1903                        }
1904                }
1905
1906                if (xdp_prog && !meta.portid) {
1907                        void *orig_data = rxbuf->frag + pkt_off;
1908                        unsigned int dma_off;
1909                        int act;
1910
1911                        xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
1912                        xdp.data = orig_data;
1913                        xdp.data_meta = orig_data;
1914                        xdp.data_end = orig_data + pkt_len;
1915
1916                        act = bpf_prog_run_xdp(xdp_prog, &xdp);
1917
1918                        pkt_len = xdp.data_end - xdp.data;
1919                        pkt_off += xdp.data - orig_data;
1920
1921                        switch (act) {
1922                        case XDP_PASS:
1923                                meta_len_xdp = xdp.data - xdp.data_meta;
1924                                break;
1925                        case XDP_TX:
1926                                dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM;
1927                                if (unlikely(!nfp_net_tx_xdp_buf(dp, rx_ring,
1928                                                                 tx_ring, rxbuf,
1929                                                                 dma_off,
1930                                                                 pkt_len,
1931                                                                 &xdp_tx_cmpl)))
1932                                        trace_xdp_exception(dp->netdev,
1933                                                            xdp_prog, act);
1934                                continue;
1935                        default:
1936                                bpf_warn_invalid_xdp_action(act);
1937                                /* fall through */
1938                        case XDP_ABORTED:
1939                                trace_xdp_exception(dp->netdev, xdp_prog, act);
1940                                /* fall through */
1941                        case XDP_DROP:
1942                                nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
1943                                                    rxbuf->dma_addr);
1944                                continue;
1945                        }
1946                }
1947
1948                if (likely(!meta.portid)) {
1949                        netdev = dp->netdev;
1950                } else if (meta.portid == NFP_META_PORT_ID_CTRL) {
1951                        struct nfp_net *nn = netdev_priv(dp->netdev);
1952
1953                        nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off,
1954                                            pkt_len);
1955                        nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
1956                                            rxbuf->dma_addr);
1957                        continue;
1958                } else {
1959                        struct nfp_net *nn;
1960
1961                        nn = netdev_priv(dp->netdev);
1962                        netdev = nfp_app_dev_get(nn->app, meta.portid,
1963                                                 &redir_egress);
1964                        if (unlikely(!netdev)) {
1965                                nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
1966                                                NULL);
1967                                continue;
1968                        }
1969
1970                        if (nfp_netdev_is_nfp_repr(netdev))
1971                                nfp_repr_inc_rx_stats(netdev, pkt_len);
1972                }
1973
1974                skb = build_skb(rxbuf->frag, true_bufsz);
1975                if (unlikely(!skb)) {
1976                        nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
1977                        continue;
1978                }
1979                new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr);
1980                if (unlikely(!new_frag)) {
1981                        nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
1982                        continue;
1983                }
1984
1985                nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
1986
1987                nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
1988
1989                skb_reserve(skb, pkt_off);
1990                skb_put(skb, pkt_len);
1991
1992                skb->mark = meta.mark;
1993                skb_set_hash(skb, meta.hash, meta.hash_type);
1994
1995                skb_record_rx_queue(skb, rx_ring->idx);
1996                skb->protocol = eth_type_trans(skb, netdev);
1997
1998                nfp_net_rx_csum(dp, r_vec, rxd, &meta, skb);
1999
2000#ifdef CONFIG_TLS_DEVICE
2001                if (rxd->rxd.flags & PCIE_DESC_RX_DECRYPTED) {
2002                        skb->decrypted = true;
2003                        u64_stats_update_begin(&r_vec->rx_sync);
2004                        r_vec->hw_tls_rx++;
2005                        u64_stats_update_end(&r_vec->rx_sync);
2006                }
2007#endif
2008
2009                if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
2010                        __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
2011                                               le16_to_cpu(rxd->rxd.vlan));
2012                if (meta_len_xdp)
2013                        skb_metadata_set(skb, meta_len_xdp);
2014
2015                if (likely(!redir_egress)) {
2016                        napi_gro_receive(&rx_ring->r_vec->napi, skb);
2017                } else {
2018                        skb->dev = netdev;
2019                        skb_reset_network_header(skb);
2020                        __skb_push(skb, ETH_HLEN);
2021                        dev_queue_xmit(skb);
2022                }
2023        }
2024
2025        if (xdp_prog) {
2026                if (tx_ring->wr_ptr_add)
2027                        nfp_net_tx_xmit_more_flush(tx_ring);
2028                else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) &&
2029                         !xdp_tx_cmpl)
2030                        if (!nfp_net_xdp_complete(tx_ring))
2031                                pkts_polled = budget;
2032        }
2033        rcu_read_unlock();
2034
2035        return pkts_polled;
2036}
2037
2038/**
2039 * nfp_net_poll() - napi poll function
2040 * @napi:    NAPI structure
2041 * @budget:  NAPI budget
2042 *
2043 * Return: number of packets polled.
2044 */
2045static int nfp_net_poll(struct napi_struct *napi, int budget)
2046{
2047        struct nfp_net_r_vector *r_vec =
2048                container_of(napi, struct nfp_net_r_vector, napi);
2049        unsigned int pkts_polled = 0;
2050
2051        if (r_vec->tx_ring)
2052                nfp_net_tx_complete(r_vec->tx_ring, budget);
2053        if (r_vec->rx_ring)
2054                pkts_polled = nfp_net_rx(r_vec->rx_ring, budget);
2055
2056        if (pkts_polled < budget)
2057                if (napi_complete_done(napi, pkts_polled))
2058                        nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
2059
2060        return pkts_polled;
2061}
2062
2063/* Control device data path
2064 */
2065
2066static bool
2067nfp_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
2068                struct sk_buff *skb, bool old)
2069{
2070        unsigned int real_len = skb->len, meta_len = 0;
2071        struct nfp_net_tx_ring *tx_ring;
2072        struct nfp_net_tx_buf *txbuf;
2073        struct nfp_net_tx_desc *txd;
2074        struct nfp_net_dp *dp;
2075        dma_addr_t dma_addr;
2076        int wr_idx;
2077
2078        dp = &r_vec->nfp_net->dp;
2079        tx_ring = r_vec->tx_ring;
2080
2081        if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) {
2082                nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n");
2083                goto err_free;
2084        }
2085
2086        if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
2087                u64_stats_update_begin(&r_vec->tx_sync);
2088                r_vec->tx_busy++;
2089                u64_stats_update_end(&r_vec->tx_sync);
2090                if (!old)
2091                        __skb_queue_tail(&r_vec->queue, skb);
2092                else
2093                        __skb_queue_head(&r_vec->queue, skb);
2094                return true;
2095        }
2096
2097        if (nfp_app_ctrl_has_meta(nn->app)) {
2098                if (unlikely(skb_headroom(skb) < 8)) {
2099                        nn_dp_warn(dp, "CTRL TX on skb without headroom\n");
2100                        goto err_free;
2101                }
2102                meta_len = 8;
2103                put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4));
2104                put_unaligned_be32(NFP_NET_META_PORTID, skb_push(skb, 4));
2105        }
2106
2107        /* Start with the head skbuf */
2108        dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
2109                                  DMA_TO_DEVICE);
2110        if (dma_mapping_error(dp->dev, dma_addr))
2111                goto err_dma_warn;
2112
2113        wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
2114
2115        /* Stash the soft descriptor of the head then initialize it */
2116        txbuf = &tx_ring->txbufs[wr_idx];
2117        txbuf->skb = skb;
2118        txbuf->dma_addr = dma_addr;
2119        txbuf->fidx = -1;
2120        txbuf->pkt_cnt = 1;
2121        txbuf->real_len = real_len;
2122
2123        /* Build TX descriptor */
2124        txd = &tx_ring->txds[wr_idx];
2125        txd->offset_eop = meta_len | PCIE_DESC_TX_EOP;
2126        txd->dma_len = cpu_to_le16(skb_headlen(skb));
2127        nfp_desc_set_dma_addr(txd, dma_addr);
2128        txd->data_len = cpu_to_le16(skb->len);
2129
2130        txd->flags = 0;
2131        txd->mss = 0;
2132        txd->lso_hdrlen = 0;
2133
2134        tx_ring->wr_p++;
2135        tx_ring->wr_ptr_add++;
2136        nfp_net_tx_xmit_more_flush(tx_ring);
2137
2138        return false;
2139
2140err_dma_warn:
2141        nn_dp_warn(dp, "Failed to DMA map TX CTRL buffer\n");
2142err_free:
2143        u64_stats_update_begin(&r_vec->tx_sync);
2144        r_vec->tx_errors++;
2145        u64_stats_update_end(&r_vec->tx_sync);
2146        dev_kfree_skb_any(skb);
2147        return false;
2148}
2149
2150bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb)
2151{
2152        struct nfp_net_r_vector *r_vec = &nn->r_vecs[0];
2153
2154        return nfp_ctrl_tx_one(nn, r_vec, skb, false);
2155}
2156
2157bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb)
2158{
2159        struct nfp_net_r_vector *r_vec = &nn->r_vecs[0];
2160        bool ret;
2161
2162        spin_lock_bh(&r_vec->lock);
2163        ret = nfp_ctrl_tx_one(nn, r_vec, skb, false);
2164        spin_unlock_bh(&r_vec->lock);
2165
2166        return ret;
2167}
2168
2169static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec)
2170{
2171        struct sk_buff *skb;
2172
2173        while ((skb = __skb_dequeue(&r_vec->queue)))
2174                if (nfp_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true))
2175                        return;
2176}
2177
2178static bool
2179nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len)
2180{
2181        u32 meta_type, meta_tag;
2182
2183        if (!nfp_app_ctrl_has_meta(nn->app))
2184                return !meta_len;
2185
2186        if (meta_len != 8)
2187                return false;
2188
2189        meta_type = get_unaligned_be32(data);
2190        meta_tag = get_unaligned_be32(data + 4);
2191
2192        return (meta_type == NFP_NET_META_PORTID &&
2193                meta_tag == NFP_META_PORT_ID_CTRL);
2194}
2195
2196static bool
2197nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp,
2198                struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring)
2199{
2200        unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
2201        struct nfp_net_rx_buf *rxbuf;
2202        struct nfp_net_rx_desc *rxd;
2203        dma_addr_t new_dma_addr;
2204        struct sk_buff *skb;
2205        void *new_frag;
2206        int idx;
2207
2208        idx = D_IDX(rx_ring, rx_ring->rd_p);
2209
2210        rxd = &rx_ring->rxds[idx];
2211        if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
2212                return false;
2213
2214        /* Memory barrier to ensure that we won't do other reads
2215         * before the DD bit.
2216         */
2217        dma_rmb();
2218
2219        rx_ring->rd_p++;
2220
2221        rxbuf = &rx_ring->rxbufs[idx];
2222        meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
2223        data_len = le16_to_cpu(rxd->rxd.data_len);
2224        pkt_len = data_len - meta_len;
2225
2226        pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
2227        if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
2228                pkt_off += meta_len;
2229        else
2230                pkt_off += dp->rx_offset;
2231        meta_off = pkt_off - meta_len;
2232
2233        /* Stats update */
2234        u64_stats_update_begin(&r_vec->rx_sync);
2235        r_vec->rx_pkts++;
2236        r_vec->rx_bytes += pkt_len;
2237        u64_stats_update_end(&r_vec->rx_sync);
2238
2239        nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len);
2240
2241        if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) {
2242                nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n",
2243                           meta_len);
2244                nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
2245                return true;
2246        }
2247
2248        skb = build_skb(rxbuf->frag, dp->fl_bufsz);
2249        if (unlikely(!skb)) {
2250                nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
2251                return true;
2252        }
2253        new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr);
2254        if (unlikely(!new_frag)) {
2255                nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
2256                return true;
2257        }
2258
2259        nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
2260
2261        nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
2262
2263        skb_reserve(skb, pkt_off);
2264        skb_put(skb, pkt_len);
2265
2266        nfp_app_ctrl_rx(nn->app, skb);
2267
2268        return true;
2269}
2270
2271static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec)
2272{
2273        struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring;
2274        struct nfp_net *nn = r_vec->nfp_net;
2275        struct nfp_net_dp *dp = &nn->dp;
2276        unsigned int budget = 512;
2277
2278        while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--)
2279                continue;
2280
2281        return budget;
2282}
2283
2284static void nfp_ctrl_poll(unsigned long arg)
2285{
2286        struct nfp_net_r_vector *r_vec = (void *)arg;
2287
2288        spin_lock(&r_vec->lock);
2289        nfp_net_tx_complete(r_vec->tx_ring, 0);
2290        __nfp_ctrl_tx_queued(r_vec);
2291        spin_unlock(&r_vec->lock);
2292
2293        if (nfp_ctrl_rx(r_vec)) {
2294                nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
2295        } else {
2296                tasklet_schedule(&r_vec->tasklet);
2297                nn_dp_warn(&r_vec->nfp_net->dp,
2298                           "control message budget exceeded!\n");
2299        }
2300}
2301
2302/* Setup and Configuration
2303 */
2304
2305/**
2306 * nfp_net_vecs_init() - Assign IRQs and setup rvecs.
2307 * @nn:         NFP Network structure
2308 */
2309static void nfp_net_vecs_init(struct nfp_net *nn)
2310{
2311        struct nfp_net_r_vector *r_vec;
2312        int r;
2313
2314        nn->lsc_handler = nfp_net_irq_lsc;
2315        nn->exn_handler = nfp_net_irq_exn;
2316
2317        for (r = 0; r < nn->max_r_vecs; r++) {
2318                struct msix_entry *entry;
2319
2320                entry = &nn->irq_entries[NFP_NET_NON_Q_VECTORS + r];
2321
2322                r_vec = &nn->r_vecs[r];
2323                r_vec->nfp_net = nn;
2324                r_vec->irq_entry = entry->entry;
2325                r_vec->irq_vector = entry->vector;
2326
2327                if (nn->dp.netdev) {
2328                        r_vec->handler = nfp_net_irq_rxtx;
2329                } else {
2330                        r_vec->handler = nfp_ctrl_irq_rxtx;
2331
2332                        __skb_queue_head_init(&r_vec->queue);
2333                        spin_lock_init(&r_vec->lock);
2334                        tasklet_init(&r_vec->tasklet, nfp_ctrl_poll,
2335                                     (unsigned long)r_vec);
2336                        tasklet_disable(&r_vec->tasklet);
2337                }
2338
2339                cpumask_set_cpu(r, &r_vec->affinity_mask);
2340        }
2341}
2342
2343/**
2344 * nfp_net_tx_ring_free() - Free resources allocated to a TX ring
2345 * @tx_ring:   TX ring to free
2346 */
2347static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
2348{
2349        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
2350        struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
2351
2352        kvfree(tx_ring->txbufs);
2353
2354        if (tx_ring->txds)
2355                dma_free_coherent(dp->dev, tx_ring->size,
2356                                  tx_ring->txds, tx_ring->dma);
2357
2358        tx_ring->cnt = 0;
2359        tx_ring->txbufs = NULL;
2360        tx_ring->txds = NULL;
2361        tx_ring->dma = 0;
2362        tx_ring->size = 0;
2363}
2364
2365/**
2366 * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring
2367 * @dp:        NFP Net data path struct
2368 * @tx_ring:   TX Ring structure to allocate
2369 *
2370 * Return: 0 on success, negative errno otherwise.
2371 */
2372static int
2373nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
2374{
2375        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
2376
2377        tx_ring->cnt = dp->txd_cnt;
2378
2379        tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->txds));
2380        tx_ring->txds = dma_alloc_coherent(dp->dev, tx_ring->size,
2381                                           &tx_ring->dma,
2382                                           GFP_KERNEL | __GFP_NOWARN);
2383        if (!tx_ring->txds) {
2384                netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n",
2385                            tx_ring->cnt);
2386                goto err_alloc;
2387        }
2388
2389        tx_ring->txbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->txbufs),
2390                                   GFP_KERNEL);
2391        if (!tx_ring->txbufs)
2392                goto err_alloc;
2393
2394        if (!tx_ring->is_xdp && dp->netdev)
2395                netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask,
2396                                    tx_ring->idx);
2397
2398        return 0;
2399
2400err_alloc:
2401        nfp_net_tx_ring_free(tx_ring);
2402        return -ENOMEM;
2403}
2404
2405static void
2406nfp_net_tx_ring_bufs_free(struct nfp_net_dp *dp,
2407                          struct nfp_net_tx_ring *tx_ring)
2408{
2409        unsigned int i;
2410
2411        if (!tx_ring->is_xdp)
2412                return;
2413
2414        for (i = 0; i < tx_ring->cnt; i++) {
2415                if (!tx_ring->txbufs[i].frag)
2416                        return;
2417
2418                nfp_net_dma_unmap_rx(dp, tx_ring->txbufs[i].dma_addr);
2419                __free_page(virt_to_page(tx_ring->txbufs[i].frag));
2420        }
2421}
2422
2423static int
2424nfp_net_tx_ring_bufs_alloc(struct nfp_net_dp *dp,
2425                           struct nfp_net_tx_ring *tx_ring)
2426{
2427        struct nfp_net_tx_buf *txbufs = tx_ring->txbufs;
2428        unsigned int i;
2429
2430        if (!tx_ring->is_xdp)
2431                return 0;
2432
2433        for (i = 0; i < tx_ring->cnt; i++) {
2434                txbufs[i].frag = nfp_net_rx_alloc_one(dp, &txbufs[i].dma_addr);
2435                if (!txbufs[i].frag) {
2436                        nfp_net_tx_ring_bufs_free(dp, tx_ring);
2437                        return -ENOMEM;
2438                }
2439        }
2440
2441        return 0;
2442}
2443
2444static int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
2445{
2446        unsigned int r;
2447
2448        dp->tx_rings = kcalloc(dp->num_tx_rings, sizeof(*dp->tx_rings),
2449                               GFP_KERNEL);
2450        if (!dp->tx_rings)
2451                return -ENOMEM;
2452
2453        for (r = 0; r < dp->num_tx_rings; r++) {
2454                int bias = 0;
2455
2456                if (r >= dp->num_stack_tx_rings)
2457                        bias = dp->num_stack_tx_rings;
2458
2459                nfp_net_tx_ring_init(&dp->tx_rings[r], &nn->r_vecs[r - bias],
2460                                     r, bias);
2461
2462                if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r]))
2463                        goto err_free_prev;
2464
2465                if (nfp_net_tx_ring_bufs_alloc(dp, &dp->tx_rings[r]))
2466                        goto err_free_ring;
2467        }
2468
2469        return 0;
2470
2471err_free_prev:
2472        while (r--) {
2473                nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]);
2474err_free_ring:
2475                nfp_net_tx_ring_free(&dp->tx_rings[r]);
2476        }
2477        kfree(dp->tx_rings);
2478        return -ENOMEM;
2479}
2480
2481static void nfp_net_tx_rings_free(struct nfp_net_dp *dp)
2482{
2483        unsigned int r;
2484
2485        for (r = 0; r < dp->num_tx_rings; r++) {
2486                nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]);
2487                nfp_net_tx_ring_free(&dp->tx_rings[r]);
2488        }
2489
2490        kfree(dp->tx_rings);
2491}
2492
2493/**
2494 * nfp_net_rx_ring_free() - Free resources allocated to a RX ring
2495 * @rx_ring:  RX ring to free
2496 */
2497static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
2498{
2499        struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
2500        struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
2501
2502        if (dp->netdev)
2503                xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
2504        kvfree(rx_ring->rxbufs);
2505
2506        if (rx_ring->rxds)
2507                dma_free_coherent(dp->dev, rx_ring->size,
2508                                  rx_ring->rxds, rx_ring->dma);
2509
2510        rx_ring->cnt = 0;
2511        rx_ring->rxbufs = NULL;
2512        rx_ring->rxds = NULL;
2513        rx_ring->dma = 0;
2514        rx_ring->size = 0;
2515}
2516
2517/**
2518 * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
2519 * @dp:       NFP Net data path struct
2520 * @rx_ring:  RX ring to allocate
2521 *
2522 * Return: 0 on success, negative errno otherwise.
2523 */
2524static int
2525nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
2526{
2527        int err;
2528
2529        if (dp->netdev) {
2530                err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev,
2531                                       rx_ring->idx);
2532                if (err < 0)
2533                        return err;
2534        }
2535
2536        rx_ring->cnt = dp->rxd_cnt;
2537        rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds));
2538        rx_ring->rxds = dma_alloc_coherent(dp->dev, rx_ring->size,
2539                                           &rx_ring->dma,
2540                                           GFP_KERNEL | __GFP_NOWARN);
2541        if (!rx_ring->rxds) {
2542                netdev_warn(dp->netdev, "failed to allocate RX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n",
2543                            rx_ring->cnt);
2544                goto err_alloc;
2545        }
2546
2547        rx_ring->rxbufs = kvcalloc(rx_ring->cnt, sizeof(*rx_ring->rxbufs),
2548                                   GFP_KERNEL);
2549        if (!rx_ring->rxbufs)
2550                goto err_alloc;
2551
2552        return 0;
2553
2554err_alloc:
2555        nfp_net_rx_ring_free(rx_ring);
2556        return -ENOMEM;
2557}
2558
2559static int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
2560{
2561        unsigned int r;
2562
2563        dp->rx_rings = kcalloc(dp->num_rx_rings, sizeof(*dp->rx_rings),
2564                               GFP_KERNEL);
2565        if (!dp->rx_rings)
2566                return -ENOMEM;
2567
2568        for (r = 0; r < dp->num_rx_rings; r++) {
2569                nfp_net_rx_ring_init(&dp->rx_rings[r], &nn->r_vecs[r], r);
2570
2571                if (nfp_net_rx_ring_alloc(dp, &dp->rx_rings[r]))
2572                        goto err_free_prev;
2573
2574                if (nfp_net_rx_ring_bufs_alloc(dp, &dp->rx_rings[r]))
2575                        goto err_free_ring;
2576        }
2577
2578        return 0;
2579
2580err_free_prev:
2581        while (r--) {
2582                nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
2583err_free_ring:
2584                nfp_net_rx_ring_free(&dp->rx_rings[r]);
2585        }
2586        kfree(dp->rx_rings);
2587        return -ENOMEM;
2588}
2589
2590static void nfp_net_rx_rings_free(struct nfp_net_dp *dp)
2591{
2592        unsigned int r;
2593
2594        for (r = 0; r < dp->num_rx_rings; r++) {
2595                nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
2596                nfp_net_rx_ring_free(&dp->rx_rings[r]);
2597        }
2598
2599        kfree(dp->rx_rings);
2600}
2601
2602static void
2603nfp_net_vector_assign_rings(struct nfp_net_dp *dp,
2604                            struct nfp_net_r_vector *r_vec, int idx)
2605{
2606        r_vec->rx_ring = idx < dp->num_rx_rings ? &dp->rx_rings[idx] : NULL;
2607        r_vec->tx_ring =
2608                idx < dp->num_stack_tx_rings ? &dp->tx_rings[idx] : NULL;
2609
2610        r_vec->xdp_ring = idx < dp->num_tx_rings - dp->num_stack_tx_rings ?
2611                &dp->tx_rings[dp->num_stack_tx_rings + idx] : NULL;
2612}
2613
2614static int
2615nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
2616                       int idx)
2617{
2618        int err;
2619
2620        /* Setup NAPI */
2621        if (nn->dp.netdev)
2622                netif_napi_add(nn->dp.netdev, &r_vec->napi,
2623                               nfp_net_poll, NAPI_POLL_WEIGHT);
2624        else
2625                tasklet_enable(&r_vec->tasklet);
2626
2627        snprintf(r_vec->name, sizeof(r_vec->name),
2628                 "%s-rxtx-%d", nfp_net_name(nn), idx);
2629        err = request_irq(r_vec->irq_vector, r_vec->handler, 0, r_vec->name,
2630                          r_vec);
2631        if (err) {
2632                if (nn->dp.netdev)
2633                        netif_napi_del(&r_vec->napi);
2634                else
2635                        tasklet_disable(&r_vec->tasklet);
2636
2637                nn_err(nn, "Error requesting IRQ %d\n", r_vec->irq_vector);
2638                return err;
2639        }
2640        disable_irq(r_vec->irq_vector);
2641
2642        irq_set_affinity_hint(r_vec->irq_vector, &r_vec->affinity_mask);
2643
2644        nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", idx, r_vec->irq_vector,
2645               r_vec->irq_entry);
2646
2647        return 0;
2648}
2649
2650static void
2651nfp_net_cleanup_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
2652{
2653        irq_set_affinity_hint(r_vec->irq_vector, NULL);
2654        if (nn->dp.netdev)
2655                netif_napi_del(&r_vec->napi);
2656        else
2657                tasklet_disable(&r_vec->tasklet);
2658
2659        free_irq(r_vec->irq_vector, r_vec);
2660}
2661
2662/**
2663 * nfp_net_rss_write_itbl() - Write RSS indirection table to device
2664 * @nn:      NFP Net device to reconfigure
2665 */
2666void nfp_net_rss_write_itbl(struct nfp_net *nn)
2667{
2668        int i;
2669
2670        for (i = 0; i < NFP_NET_CFG_RSS_ITBL_SZ; i += 4)
2671                nn_writel(nn, NFP_NET_CFG_RSS_ITBL + i,
2672                          get_unaligned_le32(nn->rss_itbl + i));
2673}
2674
2675/**
2676 * nfp_net_rss_write_key() - Write RSS hash key to device
2677 * @nn:      NFP Net device to reconfigure
2678 */
2679void nfp_net_rss_write_key(struct nfp_net *nn)
2680{
2681        int i;
2682
2683        for (i = 0; i < nfp_net_rss_key_sz(nn); i += 4)
2684                nn_writel(nn, NFP_NET_CFG_RSS_KEY + i,
2685                          get_unaligned_le32(nn->rss_key + i));
2686}
2687
2688/**
2689 * nfp_net_coalesce_write_cfg() - Write irq coalescence configuration to HW
2690 * @nn:      NFP Net device to reconfigure
2691 */
2692void nfp_net_coalesce_write_cfg(struct nfp_net *nn)
2693{
2694        u8 i;
2695        u32 factor;
2696        u32 value;
2697
2698        /* Compute factor used to convert coalesce '_usecs' parameters to
2699         * ME timestamp ticks.  There are 16 ME clock cycles for each timestamp
2700         * count.
2701         */
2702        factor = nn->tlv_caps.me_freq_mhz / 16;
2703
2704        /* copy RX interrupt coalesce parameters */
2705        value = (nn->rx_coalesce_max_frames << 16) |
2706                (factor * nn->rx_coalesce_usecs);
2707        for (i = 0; i < nn->dp.num_rx_rings; i++)
2708                nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value);
2709
2710        /* copy TX interrupt coalesce parameters */
2711        value = (nn->tx_coalesce_max_frames << 16) |
2712                (factor * nn->tx_coalesce_usecs);
2713        for (i = 0; i < nn->dp.num_tx_rings; i++)
2714                nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value);
2715}
2716
2717/**
2718 * nfp_net_write_mac_addr() - Write mac address to the device control BAR
2719 * @nn:      NFP Net device to reconfigure
2720 * @addr:    MAC address to write
2721 *
2722 * Writes the MAC address from the netdev to the device control BAR.  Does not
2723 * perform the required reconfig.  We do a bit of byte swapping dance because
2724 * firmware is LE.
2725 */
2726static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *addr)
2727{
2728        nn_writel(nn, NFP_NET_CFG_MACADDR + 0, get_unaligned_be32(addr));
2729        nn_writew(nn, NFP_NET_CFG_MACADDR + 6, get_unaligned_be16(addr + 4));
2730}
2731
2732static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
2733{
2734        nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), 0);
2735        nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), 0);
2736        nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), 0);
2737
2738        nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), 0);
2739        nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), 0);
2740        nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), 0);
2741}
2742
2743/**
2744 * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP
2745 * @nn:      NFP Net device to reconfigure
2746 *
2747 * Warning: must be fully idempotent.
2748 */
2749static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
2750{
2751        u32 new_ctrl, update;
2752        unsigned int r;
2753        int err;
2754
2755        new_ctrl = nn->dp.ctrl;
2756        new_ctrl &= ~NFP_NET_CFG_CTRL_ENABLE;
2757        update = NFP_NET_CFG_UPDATE_GEN;
2758        update |= NFP_NET_CFG_UPDATE_MSIX;
2759        update |= NFP_NET_CFG_UPDATE_RING;
2760
2761        if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
2762                new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG;
2763
2764        nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
2765        nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
2766
2767        nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
2768        err = nfp_net_reconfig(nn, update);
2769        if (err)
2770                nn_err(nn, "Could not disable device: %d\n", err);
2771
2772        for (r = 0; r < nn->dp.num_rx_rings; r++)
2773                nfp_net_rx_ring_reset(&nn->dp.rx_rings[r]);
2774        for (r = 0; r < nn->dp.num_tx_rings; r++)
2775                nfp_net_tx_ring_reset(&nn->dp, &nn->dp.tx_rings[r]);
2776        for (r = 0; r < nn->dp.num_r_vecs; r++)
2777                nfp_net_vec_clear_ring_data(nn, r);
2778
2779        nn->dp.ctrl = new_ctrl;
2780}
2781
2782static void
2783nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn,
2784                             struct nfp_net_rx_ring *rx_ring, unsigned int idx)
2785{
2786        /* Write the DMA address, size and MSI-X info to the device */
2787        nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), rx_ring->dma);
2788        nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(rx_ring->cnt));
2789        nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), rx_ring->r_vec->irq_entry);
2790}
2791
2792static void
2793nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn,
2794                             struct nfp_net_tx_ring *tx_ring, unsigned int idx)
2795{
2796        nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma);
2797        nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt));
2798        nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry);
2799}
2800
2801/**
2802 * nfp_net_set_config_and_enable() - Write control BAR and enable NFP
2803 * @nn:      NFP Net device to reconfigure
2804 */
2805static int nfp_net_set_config_and_enable(struct nfp_net *nn)
2806{
2807        u32 bufsz, new_ctrl, update = 0;
2808        unsigned int r;
2809        int err;
2810
2811        new_ctrl = nn->dp.ctrl;
2812
2813        if (nn->dp.ctrl & NFP_NET_CFG_CTRL_RSS_ANY) {
2814                nfp_net_rss_write_key(nn);
2815                nfp_net_rss_write_itbl(nn);
2816                nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg);
2817                update |= NFP_NET_CFG_UPDATE_RSS;
2818        }
2819
2820        if (nn->dp.ctrl & NFP_NET_CFG_CTRL_IRQMOD) {
2821                nfp_net_coalesce_write_cfg(nn);
2822                update |= NFP_NET_CFG_UPDATE_IRQMOD;
2823        }
2824
2825        for (r = 0; r < nn->dp.num_tx_rings; r++)
2826                nfp_net_tx_ring_hw_cfg_write(nn, &nn->dp.tx_rings[r], r);
2827        for (r = 0; r < nn->dp.num_rx_rings; r++)
2828                nfp_net_rx_ring_hw_cfg_write(nn, &nn->dp.rx_rings[r], r);
2829
2830        nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->dp.num_tx_rings == 64 ?
2831                  0xffffffffffffffffULL : ((u64)1 << nn->dp.num_tx_rings) - 1);
2832
2833        nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->dp.num_rx_rings == 64 ?
2834                  0xffffffffffffffffULL : ((u64)1 << nn->dp.num_rx_rings) - 1);
2835
2836        if (nn->dp.netdev)
2837                nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr);
2838
2839        nn_writel(nn, NFP_NET_CFG_MTU, nn->dp.mtu);
2840
2841        bufsz = nn->dp.fl_bufsz - nn->dp.rx_dma_off - NFP_NET_RX_BUF_NON_DATA;
2842        nn_writel(nn, NFP_NET_CFG_FLBUFSZ, bufsz);
2843
2844        /* Enable device */
2845        new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
2846        update |= NFP_NET_CFG_UPDATE_GEN;
2847        update |= NFP_NET_CFG_UPDATE_MSIX;
2848        update |= NFP_NET_CFG_UPDATE_RING;
2849        if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
2850                new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
2851
2852        nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
2853        err = nfp_net_reconfig(nn, update);
2854        if (err) {
2855                nfp_net_clear_config_and_disable(nn);
2856                return err;
2857        }
2858
2859        nn->dp.ctrl = new_ctrl;
2860
2861        for (r = 0; r < nn->dp.num_rx_rings; r++)
2862                nfp_net_rx_ring_fill_freelist(&nn->dp, &nn->dp.rx_rings[r]);
2863
2864        /* Since reconfiguration requests while NFP is down are ignored we
2865         * have to wipe the entire VXLAN configuration and reinitialize it.
2866         */
2867        if (nn->dp.ctrl & NFP_NET_CFG_CTRL_VXLAN) {
2868                memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports));
2869                memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt));
2870                udp_tunnel_get_rx_info(nn->dp.netdev);
2871        }
2872
2873        return 0;
2874}
2875
2876/**
2877 * nfp_net_close_stack() - Quiesce the stack (part of close)
2878 * @nn:      NFP Net device to reconfigure
2879 */
2880static void nfp_net_close_stack(struct nfp_net *nn)
2881{
2882        unsigned int r;
2883
2884        disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
2885        netif_carrier_off(nn->dp.netdev);
2886        nn->link_up = false;
2887
2888        for (r = 0; r < nn->dp.num_r_vecs; r++) {
2889                disable_irq(nn->r_vecs[r].irq_vector);
2890                napi_disable(&nn->r_vecs[r].napi);
2891        }
2892
2893        netif_tx_disable(nn->dp.netdev);
2894}
2895
2896/**
2897 * nfp_net_close_free_all() - Free all runtime resources
2898 * @nn:      NFP Net device to reconfigure
2899 */
2900static void nfp_net_close_free_all(struct nfp_net *nn)
2901{
2902        unsigned int r;
2903
2904        nfp_net_tx_rings_free(&nn->dp);
2905        nfp_net_rx_rings_free(&nn->dp);
2906
2907        for (r = 0; r < nn->dp.num_r_vecs; r++)
2908                nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
2909
2910        nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
2911        nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
2912}
2913
2914/**
2915 * nfp_net_netdev_close() - Called when the device is downed
2916 * @netdev:      netdev structure
2917 */
2918static int nfp_net_netdev_close(struct net_device *netdev)
2919{
2920        struct nfp_net *nn = netdev_priv(netdev);
2921
2922        /* Step 1: Disable RX and TX rings from the Linux kernel perspective
2923         */
2924        nfp_net_close_stack(nn);
2925
2926        /* Step 2: Tell NFP
2927         */
2928        nfp_net_clear_config_and_disable(nn);
2929        nfp_port_configure(netdev, false);
2930
2931        /* Step 3: Free resources
2932         */
2933        nfp_net_close_free_all(nn);
2934
2935        nn_dbg(nn, "%s down", netdev->name);
2936        return 0;
2937}
2938
2939void nfp_ctrl_close(struct nfp_net *nn)
2940{
2941        int r;
2942
2943        rtnl_lock();
2944
2945        for (r = 0; r < nn->dp.num_r_vecs; r++) {
2946                disable_irq(nn->r_vecs[r].irq_vector);
2947                tasklet_disable(&nn->r_vecs[r].tasklet);
2948        }
2949
2950        nfp_net_clear_config_and_disable(nn);
2951
2952        nfp_net_close_free_all(nn);
2953
2954        rtnl_unlock();
2955}
2956
2957/**
2958 * nfp_net_open_stack() - Start the device from stack's perspective
2959 * @nn:      NFP Net device to reconfigure
2960 */
2961static void nfp_net_open_stack(struct nfp_net *nn)
2962{
2963        unsigned int r;
2964
2965        for (r = 0; r < nn->dp.num_r_vecs; r++) {
2966                napi_enable(&nn->r_vecs[r].napi);
2967                enable_irq(nn->r_vecs[r].irq_vector);
2968        }
2969
2970        netif_tx_wake_all_queues(nn->dp.netdev);
2971
2972        enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
2973        nfp_net_read_link_status(nn);
2974}
2975
2976static int nfp_net_open_alloc_all(struct nfp_net *nn)
2977{
2978        int err, r;
2979
2980        err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_EXN, "%s-exn",
2981                                      nn->exn_name, sizeof(nn->exn_name),
2982                                      NFP_NET_IRQ_EXN_IDX, nn->exn_handler);
2983        if (err)
2984                return err;
2985        err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_LSC, "%s-lsc",
2986                                      nn->lsc_name, sizeof(nn->lsc_name),
2987                                      NFP_NET_IRQ_LSC_IDX, nn->lsc_handler);
2988        if (err)
2989                goto err_free_exn;
2990        disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
2991
2992        for (r = 0; r < nn->dp.num_r_vecs; r++) {
2993                err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
2994                if (err)
2995                        goto err_cleanup_vec_p;
2996        }
2997
2998        err = nfp_net_rx_rings_prepare(nn, &nn->dp);
2999        if (err)
3000                goto err_cleanup_vec;
3001
3002        err = nfp_net_tx_rings_prepare(nn, &nn->dp);
3003        if (err)
3004                goto err_free_rx_rings;
3005
3006        for (r = 0; r < nn->max_r_vecs; r++)
3007                nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
3008
3009        return 0;
3010
3011err_free_rx_rings:
3012        nfp_net_rx_rings_free(&nn->dp);
3013err_cleanup_vec:
3014        r = nn->dp.num_r_vecs;
3015err_cleanup_vec_p:
3016        while (r--)
3017                nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
3018        nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
3019err_free_exn:
3020        nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
3021        return err;
3022}
3023
3024static int nfp_net_netdev_open(struct net_device *netdev)
3025{
3026        struct nfp_net *nn = netdev_priv(netdev);
3027        int err;
3028
3029        /* Step 1: Allocate resources for rings and the like
3030         * - Request interrupts
3031         * - Allocate RX and TX ring resources
3032         * - Setup initial RSS table
3033         */
3034        err = nfp_net_open_alloc_all(nn);
3035        if (err)
3036                return err;
3037
3038        err = netif_set_real_num_tx_queues(netdev, nn->dp.num_stack_tx_rings);
3039        if (err)
3040                goto err_free_all;
3041
3042        err = netif_set_real_num_rx_queues(netdev, nn->dp.num_rx_rings);
3043        if (err)
3044                goto err_free_all;
3045
3046        /* Step 2: Configure the NFP
3047         * - Ifup the physical interface if it exists
3048         * - Enable rings from 0 to tx_rings/rx_rings - 1.
3049         * - Write MAC address (in case it changed)
3050         * - Set the MTU
3051         * - Set the Freelist buffer size
3052         * - Enable the FW
3053         */
3054        err = nfp_port_configure(netdev, true);
3055        if (err)
3056                goto err_free_all;
3057
3058        err = nfp_net_set_config_and_enable(nn);
3059        if (err)
3060                goto err_port_disable;
3061
3062        /* Step 3: Enable for kernel
3063         * - put some freelist descriptors on each RX ring
3064         * - enable NAPI on each ring
3065         * - enable all TX queues
3066         * - set link state
3067         */
3068        nfp_net_open_stack(nn);
3069
3070        return 0;
3071
3072err_port_disable:
3073        nfp_port_configure(netdev, false);
3074err_free_all:
3075        nfp_net_close_free_all(nn);
3076        return err;
3077}
3078
3079int nfp_ctrl_open(struct nfp_net *nn)
3080{
3081        int err, r;
3082
3083        /* ring dumping depends on vNICs being opened/closed under rtnl */
3084        rtnl_lock();
3085
3086        err = nfp_net_open_alloc_all(nn);
3087        if (err)
3088                goto err_unlock;
3089
3090        err = nfp_net_set_config_and_enable(nn);
3091        if (err)
3092                goto err_free_all;
3093
3094        for (r = 0; r < nn->dp.num_r_vecs; r++)
3095                enable_irq(nn->r_vecs[r].irq_vector);
3096
3097        rtnl_unlock();
3098
3099        return 0;
3100
3101err_free_all:
3102        nfp_net_close_free_all(nn);
3103err_unlock:
3104        rtnl_unlock();
3105        return err;
3106}
3107
3108static void nfp_net_set_rx_mode(struct net_device *netdev)
3109{
3110        struct nfp_net *nn = netdev_priv(netdev);
3111        u32 new_ctrl;
3112
3113        new_ctrl = nn->dp.ctrl;
3114
3115        if (!netdev_mc_empty(netdev) || netdev->flags & IFF_ALLMULTI)
3116                new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_L2MC;
3117        else
3118                new_ctrl &= ~NFP_NET_CFG_CTRL_L2MC;
3119
3120        if (netdev->flags & IFF_PROMISC) {
3121                if (nn->cap & NFP_NET_CFG_CTRL_PROMISC)
3122                        new_ctrl |= NFP_NET_CFG_CTRL_PROMISC;
3123                else
3124                        nn_warn(nn, "FW does not support promiscuous mode\n");
3125        } else {
3126                new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC;
3127        }
3128
3129        if (new_ctrl == nn->dp.ctrl)
3130                return;
3131
3132        nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
3133        nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_GEN);
3134
3135        nn->dp.ctrl = new_ctrl;
3136}
3137
3138static void nfp_net_rss_init_itbl(struct nfp_net *nn)
3139{
3140        int i;
3141
3142        for (i = 0; i < sizeof(nn->rss_itbl); i++)
3143                nn->rss_itbl[i] =
3144                        ethtool_rxfh_indir_default(i, nn->dp.num_rx_rings);
3145}
3146
3147static void nfp_net_dp_swap(struct nfp_net *nn, struct nfp_net_dp *dp)
3148{
3149        struct nfp_net_dp new_dp = *dp;
3150
3151        *dp = nn->dp;
3152        nn->dp = new_dp;
3153
3154        nn->dp.netdev->mtu = new_dp.mtu;
3155
3156        if (!netif_is_rxfh_configured(nn->dp.netdev))
3157                nfp_net_rss_init_itbl(nn);
3158}
3159
3160static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp)
3161{
3162        unsigned int r;
3163        int err;
3164
3165        nfp_net_dp_swap(nn, dp);
3166
3167        for (r = 0; r < nn->max_r_vecs; r++)
3168                nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
3169
3170        err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings);
3171        if (err)
3172                return err;
3173
3174        if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) {
3175                err = netif_set_real_num_tx_queues(nn->dp.netdev,
3176                                                   nn->dp.num_stack_tx_rings);
3177                if (err)
3178                        return err;
3179        }
3180
3181        return nfp_net_set_config_and_enable(nn);
3182}
3183
3184struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn)
3185{
3186        struct nfp_net_dp *new;
3187
3188        new = kmalloc(sizeof(*new), GFP_KERNEL);
3189        if (!new)
3190                return NULL;
3191
3192        *new = nn->dp;
3193
3194        /* Clear things which need to be recomputed */
3195        new->fl_bufsz = 0;
3196        new->tx_rings = NULL;
3197        new->rx_rings = NULL;
3198        new->num_r_vecs = 0;
3199        new->num_stack_tx_rings = 0;
3200
3201        return new;
3202}
3203
3204static int
3205nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp,
3206                     struct netlink_ext_ack *extack)
3207{
3208        /* XDP-enabled tests */
3209        if (!dp->xdp_prog)
3210                return 0;
3211        if (dp->fl_bufsz > PAGE_SIZE) {
3212                NL_SET_ERR_MSG_MOD(extack, "MTU too large w/ XDP enabled");
3213                return -EINVAL;
3214        }
3215        if (dp->num_tx_rings > nn->max_tx_rings) {
3216                NL_SET_ERR_MSG_MOD(extack, "Insufficient number of TX rings w/ XDP enabled");
3217                return -EINVAL;
3218        }
3219
3220        return 0;
3221}
3222
3223int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp,
3224                          struct netlink_ext_ack *extack)
3225{
3226        int r, err;
3227
3228        dp->fl_bufsz = nfp_net_calc_fl_bufsz(dp);
3229
3230        dp->num_stack_tx_rings = dp->num_tx_rings;
3231        if (dp->xdp_prog)
3232                dp->num_stack_tx_rings -= dp->num_rx_rings;
3233
3234        dp->num_r_vecs = max(dp->num_rx_rings, dp->num_stack_tx_rings);
3235
3236        err = nfp_net_check_config(nn, dp, extack);
3237        if (err)
3238                goto exit_free_dp;
3239
3240        if (!netif_running(dp->netdev)) {
3241                nfp_net_dp_swap(nn, dp);
3242                err = 0;
3243                goto exit_free_dp;
3244        }
3245
3246        /* Prepare new rings */
3247        for (r = nn->dp.num_r_vecs; r < dp->num_r_vecs; r++) {
3248                err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
3249                if (err) {
3250                        dp->num_r_vecs = r;
3251                        goto err_cleanup_vecs;
3252                }
3253        }
3254
3255        err = nfp_net_rx_rings_prepare(nn, dp);
3256        if (err)
3257                goto err_cleanup_vecs;
3258
3259        err = nfp_net_tx_rings_prepare(nn, dp);
3260        if (err)
3261                goto err_free_rx;
3262
3263        /* Stop device, swap in new rings, try to start the firmware */
3264        nfp_net_close_stack(nn);
3265        nfp_net_clear_config_and_disable(nn);
3266
3267        err = nfp_net_dp_swap_enable(nn, dp);
3268        if (err) {
3269                int err2;
3270
3271                nfp_net_clear_config_and_disable(nn);
3272
3273                /* Try with old configuration and old rings */
3274                err2 = nfp_net_dp_swap_enable(nn, dp);
3275                if (err2)
3276                        nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n",
3277                               err, err2);
3278        }
3279        for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
3280                nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
3281
3282        nfp_net_rx_rings_free(dp);
3283        nfp_net_tx_rings_free(dp);
3284
3285        nfp_net_open_stack(nn);
3286exit_free_dp:
3287        kfree(dp);
3288
3289        return err;
3290
3291err_free_rx:
3292        nfp_net_rx_rings_free(dp);
3293err_cleanup_vecs:
3294        for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
3295                nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
3296        kfree(dp);
3297        return err;
3298}
3299
3300static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
3301{
3302        struct nfp_net *nn = netdev_priv(netdev);
3303        struct nfp_net_dp *dp;
3304        int err;
3305
3306        err = nfp_app_check_mtu(nn->app, netdev, new_mtu);
3307        if (err)
3308                return err;
3309
3310        dp = nfp_net_clone_dp(nn);
3311        if (!dp)
3312                return -ENOMEM;
3313
3314        dp->mtu = new_mtu;
3315
3316        return nfp_net_ring_reconfig(nn, dp, NULL);
3317}
3318
3319static int
3320nfp_net_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
3321{
3322        const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD;
3323        struct nfp_net *nn = netdev_priv(netdev);
3324        int err;
3325
3326        /* Priority tagged packets with vlan id 0 are processed by the
3327         * NFP as untagged packets
3328         */
3329        if (!vid)
3330                return 0;
3331
3332        err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ);
3333        if (err)
3334                return err;
3335
3336        nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
3337        nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
3338                  ETH_P_8021Q);
3339
3340        return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
3341}
3342
3343static int
3344nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
3345{
3346        const u32 cmd = NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL;
3347        struct nfp_net *nn = netdev_priv(netdev);
3348        int err;
3349
3350        /* Priority tagged packets with vlan id 0 are processed by the
3351         * NFP as untagged packets
3352         */
3353        if (!vid)
3354                return 0;
3355
3356        err = nfp_net_mbox_lock(nn, NFP_NET_CFG_VLAN_FILTER_SZ);
3357        if (err)
3358                return err;
3359
3360        nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
3361        nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
3362                  ETH_P_8021Q);
3363
3364        return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
3365}
3366
3367static void nfp_net_stat64(struct net_device *netdev,
3368                           struct rtnl_link_stats64 *stats)
3369{
3370        struct nfp_net *nn = netdev_priv(netdev);
3371        int r;
3372
3373        /* Collect software stats */
3374        for (r = 0; r < nn->max_r_vecs; r++) {
3375                struct nfp_net_r_vector *r_vec = &nn->r_vecs[r];
3376                u64 data[3];
3377                unsigned int start;
3378
3379                do {
3380                        start = u64_stats_fetch_begin(&r_vec->rx_sync);
3381                        data[0] = r_vec->rx_pkts;
3382                        data[1] = r_vec->rx_bytes;
3383                        data[2] = r_vec->rx_drops;
3384                } while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
3385                stats->rx_packets += data[0];
3386                stats->rx_bytes += data[1];
3387                stats->rx_dropped += data[2];
3388
3389                do {
3390                        start = u64_stats_fetch_begin(&r_vec->tx_sync);
3391                        data[0] = r_vec->tx_pkts;
3392                        data[1] = r_vec->tx_bytes;
3393                        data[2] = r_vec->tx_errors;
3394                } while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
3395                stats->tx_packets += data[0];
3396                stats->tx_bytes += data[1];
3397                stats->tx_errors += data[2];
3398        }
3399
3400        /* Add in device stats */
3401        stats->multicast += nn_readq(nn, NFP_NET_CFG_STATS_RX_MC_FRAMES);
3402        stats->rx_dropped += nn_readq(nn, NFP_NET_CFG_STATS_RX_DISCARDS);
3403        stats->rx_errors += nn_readq(nn, NFP_NET_CFG_STATS_RX_ERRORS);
3404
3405        stats->tx_dropped += nn_readq(nn, NFP_NET_CFG_STATS_TX_DISCARDS);
3406        stats->tx_errors += nn_readq(nn, NFP_NET_CFG_STATS_TX_ERRORS);
3407}
3408
3409static int nfp_net_set_features(struct net_device *netdev,
3410                                netdev_features_t features)
3411{
3412        netdev_features_t changed = netdev->features ^ features;
3413        struct nfp_net *nn = netdev_priv(netdev);
3414        u32 new_ctrl;
3415        int err;
3416
3417        /* Assume this is not called with features we have not advertised */
3418
3419        new_ctrl = nn->dp.ctrl;
3420
3421        if (changed & NETIF_F_RXCSUM) {
3422                if (features & NETIF_F_RXCSUM)
3423                        new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY;
3424                else
3425                        new_ctrl &= ~NFP_NET_CFG_CTRL_RXCSUM_ANY;
3426        }
3427
3428        if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
3429                if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))
3430                        new_ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
3431                else
3432                        new_ctrl &= ~NFP_NET_CFG_CTRL_TXCSUM;
3433        }
3434
3435        if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) {
3436                if (features & (NETIF_F_TSO | NETIF_F_TSO6))
3437                        new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_LSO2 ?:
3438                                              NFP_NET_CFG_CTRL_LSO;
3439                else
3440                        new_ctrl &= ~NFP_NET_CFG_CTRL_LSO_ANY;
3441        }
3442
3443        if (changed & NETIF_F_HW_VLAN_CTAG_RX) {
3444                if (features & NETIF_F_HW_VLAN_CTAG_RX)
3445                        new_ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
3446                else
3447                        new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN;
3448        }
3449
3450        if (changed & NETIF_F_HW_VLAN_CTAG_TX) {
3451                if (features & NETIF_F_HW_VLAN_CTAG_TX)
3452                        new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
3453                else
3454                        new_ctrl &= ~NFP_NET_CFG_CTRL_TXVLAN;
3455        }
3456
3457        if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
3458                if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
3459                        new_ctrl |= NFP_NET_CFG_CTRL_CTAG_FILTER;
3460                else
3461                        new_ctrl &= ~NFP_NET_CFG_CTRL_CTAG_FILTER;
3462        }
3463
3464        if (changed & NETIF_F_SG) {
3465                if (features & NETIF_F_SG)
3466                        new_ctrl |= NFP_NET_CFG_CTRL_GATHER;
3467                else
3468                        new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
3469        }
3470
3471        err = nfp_port_set_features(netdev, features);
3472        if (err)
3473                return err;
3474
3475        nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
3476               netdev->features, features, changed);
3477
3478        if (new_ctrl == nn->dp.ctrl)
3479                return 0;
3480
3481        nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->dp.ctrl, new_ctrl);
3482        nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
3483        err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
3484        if (err)
3485                return err;
3486
3487        nn->dp.ctrl = new_ctrl;
3488
3489        return 0;
3490}
3491
3492static netdev_features_t
3493nfp_net_features_check(struct sk_buff *skb, struct net_device *dev,
3494                       netdev_features_t features)
3495{
3496        u8 l4_hdr;
3497
3498        /* We can't do TSO over double tagged packets (802.1AD) */
3499        features &= vlan_features_check(skb, features);
3500
3501        if (!skb->encapsulation)
3502                return features;
3503
3504        /* Ensure that inner L4 header offset fits into TX descriptor field */
3505        if (skb_is_gso(skb)) {
3506                u32 hdrlen;
3507
3508                hdrlen = skb_inner_transport_header(skb) - skb->data +
3509                        inner_tcp_hdrlen(skb);
3510
3511                /* Assume worst case scenario of having longest possible
3512                 * metadata prepend - 8B
3513                 */
3514                if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ - 8))
3515                        features &= ~NETIF_F_GSO_MASK;
3516        }
3517
3518        /* VXLAN/GRE check */
3519        switch (vlan_get_protocol(skb)) {
3520        case htons(ETH_P_IP):
3521                l4_hdr = ip_hdr(skb)->protocol;
3522                break;
3523        case htons(ETH_P_IPV6):
3524                l4_hdr = ipv6_hdr(skb)->nexthdr;
3525                break;
3526        default:
3527                return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
3528        }
3529
3530        if (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
3531            skb->inner_protocol != htons(ETH_P_TEB) ||
3532            (l4_hdr != IPPROTO_UDP && l4_hdr != IPPROTO_GRE) ||
3533            (l4_hdr == IPPROTO_UDP &&
3534             (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
3535              sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
3536                return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
3537
3538        return features;
3539}
3540
3541static int
3542nfp_net_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
3543{
3544        struct nfp_net *nn = netdev_priv(netdev);
3545        int n;
3546
3547        /* If port is defined, devlink_port is registered and devlink core
3548         * is taking care of name formatting.
3549         */
3550        if (nn->port)
3551                return -EOPNOTSUPP;
3552
3553        if (nn->dp.is_vf || nn->vnic_no_name)
3554                return -EOPNOTSUPP;
3555
3556        n = snprintf(name, len, "n%d", nn->id);
3557        if (n >= len)
3558                return -EINVAL;
3559
3560        return 0;
3561}
3562
3563/**
3564 * nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW
3565 * @nn:   NFP Net device to reconfigure
3566 * @idx:  Index into the port table where new port should be written
3567 * @port: UDP port to configure (pass zero to remove VXLAN port)
3568 */
3569static void nfp_net_set_vxlan_port(struct nfp_net *nn, int idx, __be16 port)
3570{
3571        int i;
3572
3573        nn->vxlan_ports[idx] = port;
3574
3575        if (!(nn->dp.ctrl & NFP_NET_CFG_CTRL_VXLAN))
3576                return;
3577
3578        BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS & 1);
3579        for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i += 2)
3580                nn_writel(nn, NFP_NET_CFG_VXLAN_PORT + i * sizeof(port),
3581                          be16_to_cpu(nn->vxlan_ports[i + 1]) << 16 |
3582                          be16_to_cpu(nn->vxlan_ports[i]));
3583
3584        nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_VXLAN);
3585}
3586
3587/**
3588 * nfp_net_find_vxlan_idx() - find table entry of the port or a free one
3589 * @nn:   NFP Network structure
3590 * @port: UDP port to look for
3591 *
3592 * Return: if the port is already in the table -- it's position;
3593 *         if the port is not in the table -- free position to use;
3594 *         if the table is full -- -ENOSPC.
3595 */
3596static int nfp_net_find_vxlan_idx(struct nfp_net *nn, __be16 port)
3597{
3598        int i, free_idx = -ENOSPC;
3599
3600        for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i++) {
3601                if (nn->vxlan_ports[i] == port)
3602                        return i;
3603                if (!nn->vxlan_usecnt[i])
3604                        free_idx = i;
3605        }
3606
3607        return free_idx;
3608}
3609
3610static void nfp_net_add_vxlan_port(struct net_device *netdev,
3611                                   struct udp_tunnel_info *ti)
3612{
3613        struct nfp_net *nn = netdev_priv(netdev);
3614        int idx;
3615
3616        if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
3617                return;
3618
3619        idx = nfp_net_find_vxlan_idx(nn, ti->port);
3620        if (idx == -ENOSPC)
3621                return;
3622
3623        if (!nn->vxlan_usecnt[idx]++)
3624                nfp_net_set_vxlan_port(nn, idx, ti->port);
3625}
3626
3627static void nfp_net_del_vxlan_port(struct net_device *netdev,
3628                                   struct udp_tunnel_info *ti)
3629{
3630        struct nfp_net *nn = netdev_priv(netdev);
3631        int idx;
3632
3633        if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
3634                return;
3635
3636        idx = nfp_net_find_vxlan_idx(nn, ti->port);
3637        if (idx == -ENOSPC || !nn->vxlan_usecnt[idx])
3638                return;
3639
3640        if (!--nn->vxlan_usecnt[idx])
3641                nfp_net_set_vxlan_port(nn, idx, 0);
3642}
3643
3644static int nfp_net_xdp_setup_drv(struct nfp_net *nn, struct netdev_bpf *bpf)
3645{
3646        struct bpf_prog *prog = bpf->prog;
3647        struct nfp_net_dp *dp;
3648        int err;
3649
3650        if (!xdp_attachment_flags_ok(&nn->xdp, bpf))
3651                return -EBUSY;
3652
3653        if (!prog == !nn->dp.xdp_prog) {
3654                WRITE_ONCE(nn->dp.xdp_prog, prog);
3655                xdp_attachment_setup(&nn->xdp, bpf);
3656                return 0;
3657        }
3658
3659        dp = nfp_net_clone_dp(nn);
3660        if (!dp)
3661                return -ENOMEM;
3662
3663        dp->xdp_prog = prog;
3664        dp->num_tx_rings += prog ? nn->dp.num_rx_rings : -nn->dp.num_rx_rings;
3665        dp->rx_dma_dir = prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
3666        dp->rx_dma_off = prog ? XDP_PACKET_HEADROOM - nn->dp.rx_offset : 0;
3667
3668        /* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */
3669        err = nfp_net_ring_reconfig(nn, dp, bpf->extack);
3670        if (err)
3671                return err;
3672
3673        xdp_attachment_setup(&nn->xdp, bpf);
3674        return 0;
3675}
3676
3677static int nfp_net_xdp_setup_hw(struct nfp_net *nn, struct netdev_bpf *bpf)
3678{
3679        int err;
3680
3681        if (!xdp_attachment_flags_ok(&nn->xdp_hw, bpf))
3682                return -EBUSY;
3683
3684        err = nfp_app_xdp_offload(nn->app, nn, bpf->prog, bpf->extack);
3685        if (err)
3686                return err;
3687
3688        xdp_attachment_setup(&nn->xdp_hw, bpf);
3689        return 0;
3690}
3691
3692static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
3693{
3694        struct nfp_net *nn = netdev_priv(netdev);
3695
3696        switch (xdp->command) {
3697        case XDP_SETUP_PROG:
3698                return nfp_net_xdp_setup_drv(nn, xdp);
3699        case XDP_SETUP_PROG_HW:
3700                return nfp_net_xdp_setup_hw(nn, xdp);
3701        case XDP_QUERY_PROG:
3702                return xdp_attachment_query(&nn->xdp, xdp);
3703        case XDP_QUERY_PROG_HW:
3704                return xdp_attachment_query(&nn->xdp_hw, xdp);
3705        default:
3706                return nfp_app_bpf(nn->app, nn, xdp);
3707        }
3708}
3709
3710static int nfp_net_set_mac_address(struct net_device *netdev, void *addr)
3711{
3712        struct nfp_net *nn = netdev_priv(netdev);
3713        struct sockaddr *saddr = addr;
3714        int err;
3715
3716        err = eth_prepare_mac_addr_change(netdev, addr);
3717        if (err)
3718                return err;
3719
3720        nfp_net_write_mac_addr(nn, saddr->sa_data);
3721
3722        err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MACADDR);
3723        if (err)
3724                return err;
3725
3726        eth_commit_mac_addr_change(netdev, addr);
3727
3728        return 0;
3729}
3730
3731const struct net_device_ops nfp_net_netdev_ops = {
3732        .ndo_init               = nfp_app_ndo_init,
3733        .ndo_uninit             = nfp_app_ndo_uninit,
3734        .ndo_open               = nfp_net_netdev_open,
3735        .ndo_stop               = nfp_net_netdev_close,
3736        .ndo_start_xmit         = nfp_net_tx,
3737        .ndo_get_stats64        = nfp_net_stat64,
3738        .ndo_vlan_rx_add_vid    = nfp_net_vlan_rx_add_vid,
3739        .ndo_vlan_rx_kill_vid   = nfp_net_vlan_rx_kill_vid,
3740        .ndo_set_vf_mac         = nfp_app_set_vf_mac,
3741        .ndo_set_vf_vlan        = nfp_app_set_vf_vlan,
3742        .ndo_set_vf_spoofchk    = nfp_app_set_vf_spoofchk,
3743        .ndo_set_vf_trust       = nfp_app_set_vf_trust,
3744        .ndo_get_vf_config      = nfp_app_get_vf_config,
3745        .ndo_set_vf_link_state  = nfp_app_set_vf_link_state,
3746        .ndo_setup_tc           = nfp_port_setup_tc,
3747        .ndo_tx_timeout         = nfp_net_tx_timeout,
3748        .ndo_set_rx_mode        = nfp_net_set_rx_mode,
3749        .ndo_change_mtu         = nfp_net_change_mtu,
3750        .ndo_set_mac_address    = nfp_net_set_mac_address,
3751        .ndo_set_features       = nfp_net_set_features,
3752        .ndo_features_check     = nfp_net_features_check,
3753        .ndo_get_phys_port_name = nfp_net_get_phys_port_name,
3754        .ndo_udp_tunnel_add     = nfp_net_add_vxlan_port,
3755        .ndo_udp_tunnel_del     = nfp_net_del_vxlan_port,
3756        .ndo_bpf                = nfp_net_xdp,
3757        .ndo_get_devlink_port   = nfp_devlink_get_devlink_port,
3758};
3759
3760/**
3761 * nfp_net_info() - Print general info about the NIC
3762 * @nn:      NFP Net device to reconfigure
3763 */
3764void nfp_net_info(struct nfp_net *nn)
3765{
3766        nn_info(nn, "Netronome NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n",
3767                nn->dp.is_vf ? "VF " : "",
3768                nn->dp.num_tx_rings, nn->max_tx_rings,
3769                nn->dp.num_rx_rings, nn->max_rx_rings);
3770        nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n",
3771                nn->fw_ver.resv, nn->fw_ver.class,
3772                nn->fw_ver.major, nn->fw_ver.minor,
3773                nn->max_mtu);
3774        nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3775                nn->cap,
3776                nn->cap & NFP_NET_CFG_CTRL_PROMISC  ? "PROMISC "  : "",
3777                nn->cap & NFP_NET_CFG_CTRL_L2BC     ? "L2BCFILT " : "",
3778                nn->cap & NFP_NET_CFG_CTRL_L2MC     ? "L2MCFILT " : "",
3779                nn->cap & NFP_NET_CFG_CTRL_RXCSUM   ? "RXCSUM "   : "",
3780                nn->cap & NFP_NET_CFG_CTRL_TXCSUM   ? "TXCSUM "   : "",
3781                nn->cap & NFP_NET_CFG_CTRL_RXVLAN   ? "RXVLAN "   : "",
3782                nn->cap & NFP_NET_CFG_CTRL_TXVLAN   ? "TXVLAN "   : "",
3783                nn->cap & NFP_NET_CFG_CTRL_SCATTER  ? "SCATTER "  : "",
3784                nn->cap & NFP_NET_CFG_CTRL_GATHER   ? "GATHER "   : "",
3785                nn->cap & NFP_NET_CFG_CTRL_LSO      ? "TSO1 "     : "",
3786                nn->cap & NFP_NET_CFG_CTRL_LSO2     ? "TSO2 "     : "",
3787                nn->cap & NFP_NET_CFG_CTRL_RSS      ? "RSS1 "     : "",
3788                nn->cap & NFP_NET_CFG_CTRL_RSS2     ? "RSS2 "     : "",
3789                nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER ? "CTAG_FILTER " : "",
3790                nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
3791                nn->cap & NFP_NET_CFG_CTRL_IRQMOD   ? "IRQMOD "   : "",
3792                nn->cap & NFP_NET_CFG_CTRL_VXLAN    ? "VXLAN "    : "",
3793                nn->cap & NFP_NET_CFG_CTRL_NVGRE    ? "NVGRE "    : "",
3794                nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ?
3795                                                      "RXCSUM_COMPLETE " : "",
3796                nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR ? "LIVE_ADDR " : "",
3797                nfp_app_extra_cap(nn->app, nn));
3798}
3799
3800/**
3801 * nfp_net_alloc() - Allocate netdev and related structure
3802 * @pdev:         PCI device
3803 * @ctrl_bar:     PCI IOMEM with vNIC config memory
3804 * @needs_netdev: Whether to allocate a netdev for this vNIC
3805 * @max_tx_rings: Maximum number of TX rings supported by device
3806 * @max_rx_rings: Maximum number of RX rings supported by device
3807 *
3808 * This function allocates a netdev device and fills in the initial
3809 * part of the @struct nfp_net structure.  In case of control device
3810 * nfp_net structure is allocated without the netdev.
3811 *
3812 * Return: NFP Net device structure, or ERR_PTR on error.
3813 */
3814struct nfp_net *
3815nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev,
3816              unsigned int max_tx_rings, unsigned int max_rx_rings)
3817{
3818        struct nfp_net *nn;
3819        int err;
3820
3821        if (needs_netdev) {
3822                struct net_device *netdev;
3823
3824                netdev = alloc_etherdev_mqs(sizeof(struct nfp_net),
3825                                            max_tx_rings, max_rx_rings);
3826                if (!netdev)
3827                        return ERR_PTR(-ENOMEM);
3828
3829                SET_NETDEV_DEV(netdev, &pdev->dev);
3830                nn = netdev_priv(netdev);
3831                nn->dp.netdev = netdev;
3832        } else {
3833                nn = vzalloc(sizeof(*nn));
3834                if (!nn)
3835                        return ERR_PTR(-ENOMEM);
3836        }
3837
3838        nn->dp.dev = &pdev->dev;
3839        nn->dp.ctrl_bar = ctrl_bar;
3840        nn->pdev = pdev;
3841
3842        nn->max_tx_rings = max_tx_rings;
3843        nn->max_rx_rings = max_rx_rings;
3844
3845        nn->dp.num_tx_rings = min_t(unsigned int,
3846                                    max_tx_rings, num_online_cpus());
3847        nn->dp.num_rx_rings = min_t(unsigned int, max_rx_rings,
3848                                 netif_get_num_default_rss_queues());
3849
3850        nn->dp.num_r_vecs = max(nn->dp.num_tx_rings, nn->dp.num_rx_rings);
3851        nn->dp.num_r_vecs = min_t(unsigned int,
3852                                  nn->dp.num_r_vecs, num_online_cpus());
3853
3854        nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
3855        nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
3856
3857        sema_init(&nn->bar_lock, 1);
3858
3859        spin_lock_init(&nn->reconfig_lock);
3860        spin_lock_init(&nn->link_status_lock);
3861
3862        timer_setup(&nn->reconfig_timer, nfp_net_reconfig_timer, 0);
3863
3864        err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar,
3865                                     &nn->tlv_caps);
3866        if (err)
3867                goto err_free_nn;
3868
3869        err = nfp_ccm_mbox_alloc(nn);
3870        if (err)
3871                goto err_free_nn;
3872
3873        return nn;
3874
3875err_free_nn:
3876        if (nn->dp.netdev)
3877                free_netdev(nn->dp.netdev);
3878        else
3879                vfree(nn);
3880        return ERR_PTR(err);
3881}
3882
3883/**
3884 * nfp_net_free() - Undo what @nfp_net_alloc() did
3885 * @nn:      NFP Net device to reconfigure
3886 */
3887void nfp_net_free(struct nfp_net *nn)
3888{
3889        WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted);
3890        nfp_ccm_mbox_free(nn);
3891
3892        if (nn->dp.netdev)
3893                free_netdev(nn->dp.netdev);
3894        else
3895                vfree(nn);
3896}
3897
3898/**
3899 * nfp_net_rss_key_sz() - Get current size of the RSS key
3900 * @nn:         NFP Net device instance
3901 *
3902 * Return: size of the RSS key for currently selected hash function.
3903 */
3904unsigned int nfp_net_rss_key_sz(struct nfp_net *nn)
3905{
3906        switch (nn->rss_hfunc) {
3907        case ETH_RSS_HASH_TOP:
3908                return NFP_NET_CFG_RSS_KEY_SZ;
3909        case ETH_RSS_HASH_XOR:
3910                return 0;
3911        case ETH_RSS_HASH_CRC32:
3912                return 4;
3913        }
3914
3915        nn_warn(nn, "Unknown hash function: %u\n", nn->rss_hfunc);
3916        return 0;
3917}
3918
3919/**
3920 * nfp_net_rss_init() - Set the initial RSS parameters
3921 * @nn:      NFP Net device to reconfigure
3922 */
3923static void nfp_net_rss_init(struct nfp_net *nn)
3924{
3925        unsigned long func_bit, rss_cap_hfunc;
3926        u32 reg;
3927
3928        /* Read the RSS function capability and select first supported func */
3929        reg = nn_readl(nn, NFP_NET_CFG_RSS_CAP);
3930        rss_cap_hfunc = FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC, reg);
3931        if (!rss_cap_hfunc)
3932                rss_cap_hfunc = FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC,
3933                                          NFP_NET_CFG_RSS_TOEPLITZ);
3934
3935        func_bit = find_first_bit(&rss_cap_hfunc, NFP_NET_CFG_RSS_HFUNCS);
3936        if (func_bit == NFP_NET_CFG_RSS_HFUNCS) {
3937                dev_warn(nn->dp.dev,
3938                         "Bad RSS config, defaulting to Toeplitz hash\n");
3939                func_bit = ETH_RSS_HASH_TOP_BIT;
3940        }
3941        nn->rss_hfunc = 1 << func_bit;
3942
3943        netdev_rss_key_fill(nn->rss_key, nfp_net_rss_key_sz(nn));
3944
3945        nfp_net_rss_init_itbl(nn);
3946
3947        /* Enable IPv4/IPv6 TCP by default */
3948        nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP |
3949                      NFP_NET_CFG_RSS_IPV6_TCP |
3950                      FIELD_PREP(NFP_NET_CFG_RSS_HFUNC, nn->rss_hfunc) |
3951                      NFP_NET_CFG_RSS_MASK;
3952}
3953
3954/**
3955 * nfp_net_irqmod_init() - Set the initial IRQ moderation parameters
3956 * @nn:      NFP Net device to reconfigure
3957 */
3958static void nfp_net_irqmod_init(struct nfp_net *nn)
3959{
3960        nn->rx_coalesce_usecs      = 50;
3961        nn->rx_coalesce_max_frames = 64;
3962        nn->tx_coalesce_usecs      = 50;
3963        nn->tx_coalesce_max_frames = 64;
3964}
3965
3966static void nfp_net_netdev_init(struct nfp_net *nn)
3967{
3968        struct net_device *netdev = nn->dp.netdev;
3969
3970        nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr);
3971
3972        netdev->mtu = nn->dp.mtu;
3973
3974        /* Advertise/enable offloads based on capabilities
3975         *
3976         * Note: netdev->features show the currently enabled features
3977         * and netdev->hw_features advertises which features are
3978         * supported.  By default we enable most features.
3979         */
3980        if (nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR)
3981                netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
3982
3983        netdev->hw_features = NETIF_F_HIGHDMA;
3984        if (nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY) {
3985                netdev->hw_features |= NETIF_F_RXCSUM;
3986                nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY;
3987        }
3988        if (nn->cap & NFP_NET_CFG_CTRL_TXCSUM) {
3989                netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
3990                nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
3991        }
3992        if (nn->cap & NFP_NET_CFG_CTRL_GATHER) {
3993                netdev->hw_features |= NETIF_F_SG;
3994                nn->dp.ctrl |= NFP_NET_CFG_CTRL_GATHER;
3995        }
3996        if ((nn->cap & NFP_NET_CFG_CTRL_LSO && nn->fw_ver.major > 2) ||
3997            nn->cap & NFP_NET_CFG_CTRL_LSO2) {
3998                netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
3999                nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_LSO2 ?:
4000                                         NFP_NET_CFG_CTRL_LSO;
4001        }
4002        if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY)
4003                netdev->hw_features |= NETIF_F_RXHASH;
4004        if (nn->cap & NFP_NET_CFG_CTRL_VXLAN) {
4005                if (nn->cap & NFP_NET_CFG_CTRL_LSO)
4006                        netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
4007                nn->dp.ctrl |= NFP_NET_CFG_CTRL_VXLAN;
4008        }
4009        if (nn->cap & NFP_NET_CFG_CTRL_NVGRE) {
4010                if (nn->cap & NFP_NET_CFG_CTRL_LSO)
4011                        netdev->hw_features |= NETIF_F_GSO_GRE;
4012                nn->dp.ctrl |= NFP_NET_CFG_CTRL_NVGRE;
4013        }
4014        if (nn->cap & (NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE))
4015                netdev->hw_enc_features = netdev->hw_features;
4016
4017        netdev->vlan_features = netdev->hw_features;
4018
4019        if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN) {
4020                netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
4021                nn->dp.ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
4022        }
4023        if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN) {
4024                if (nn->cap & NFP_NET_CFG_CTRL_LSO2) {
4025                        nn_warn(nn, "Device advertises both TSO2 and TXVLAN. Refusing to enable TXVLAN.\n");
4026                } else {
4027                        netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
4028                        nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
4029                }
4030        }
4031        if (nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER) {
4032                netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
4033                nn->dp.ctrl |= NFP_NET_CFG_CTRL_CTAG_FILTER;
4034        }
4035
4036        netdev->features = netdev->hw_features;
4037
4038        if (nfp_app_has_tc(nn->app) && nn->port)
4039                netdev->hw_features |= NETIF_F_HW_TC;
4040
4041        /* Advertise but disable TSO by default. */
4042        netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
4043        nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_LSO_ANY;
4044
4045        /* Finalise the netdev setup */
4046        netdev->netdev_ops = &nfp_net_netdev_ops;
4047        netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
4048
4049        /* MTU range: 68 - hw-specific max */
4050        netdev->min_mtu = ETH_MIN_MTU;
4051        netdev->max_mtu = nn->max_mtu;
4052
4053        netdev->gso_max_segs = NFP_NET_LSO_MAX_SEGS;
4054
4055        netif_carrier_off(netdev);
4056
4057        nfp_net_set_ethtool_ops(netdev);
4058}
4059
4060static int nfp_net_read_caps(struct nfp_net *nn)
4061{
4062        /* Get some of the read-only fields from the BAR */
4063        nn->cap = nn_readl(nn, NFP_NET_CFG_CAP);
4064        nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU);
4065
4066        /* ABI 4.x and ctrl vNIC always use chained metadata, in other cases
4067         * we allow use of non-chained metadata if RSS(v1) is the only
4068         * advertised capability requiring metadata.
4069         */
4070        nn->dp.chained_metadata_format = nn->fw_ver.major == 4 ||
4071                                         !nn->dp.netdev ||
4072                                         !(nn->cap & NFP_NET_CFG_CTRL_RSS) ||
4073                                         nn->cap & NFP_NET_CFG_CTRL_CHAIN_META;
4074        /* RSS(v1) uses non-chained metadata format, except in ABI 4.x where
4075         * it has the same meaning as RSSv2.
4076         */
4077        if (nn->dp.chained_metadata_format && nn->fw_ver.major != 4)
4078                nn->cap &= ~NFP_NET_CFG_CTRL_RSS;
4079
4080        /* Determine RX packet/metadata boundary offset */
4081        if (nn->fw_ver.major >= 2) {
4082                u32 reg;
4083
4084                reg = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
4085                if (reg > NFP_NET_MAX_PREPEND) {
4086                        nn_err(nn, "Invalid rx offset: %d\n", reg);
4087                        return -EINVAL;
4088                }
4089                nn->dp.rx_offset = reg;
4090        } else {
4091                nn->dp.rx_offset = NFP_NET_RX_OFFSET;
4092        }
4093
4094        /* For control vNICs mask out the capabilities app doesn't want. */
4095        if (!nn->dp.netdev)
4096                nn->cap &= nn->app->type->ctrl_cap_mask;
4097
4098        return 0;
4099}
4100
4101/**
4102 * nfp_net_init() - Initialise/finalise the nfp_net structure
4103 * @nn:         NFP Net device structure
4104 *
4105 * Return: 0 on success or negative errno on error.
4106 */
4107int nfp_net_init(struct nfp_net *nn)
4108{
4109        int err;
4110
4111        nn->dp.rx_dma_dir = DMA_FROM_DEVICE;
4112
4113        err = nfp_net_read_caps(nn);
4114        if (err)
4115                return err;
4116
4117        /* Set default MTU and Freelist buffer size */
4118        if (!nfp_net_is_data_vnic(nn) && nn->app->ctrl_mtu) {
4119                nn->dp.mtu = min(nn->app->ctrl_mtu, nn->max_mtu);
4120        } else if (nn->max_mtu < NFP_NET_DEFAULT_MTU) {
4121                nn->dp.mtu = nn->max_mtu;
4122        } else {
4123                nn->dp.mtu = NFP_NET_DEFAULT_MTU;
4124        }
4125        nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp);
4126
4127        if (nfp_app_ctrl_uses_data_vnics(nn->app))
4128                nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_CMSG_DATA;
4129
4130        if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) {
4131                nfp_net_rss_init(nn);
4132                nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RSS2 ?:
4133                                         NFP_NET_CFG_CTRL_RSS;
4134        }
4135
4136        /* Allow L2 Broadcast and Multicast through by default, if supported */
4137        if (nn->cap & NFP_NET_CFG_CTRL_L2BC)
4138                nn->dp.ctrl |= NFP_NET_CFG_CTRL_L2BC;
4139
4140        /* Allow IRQ moderation, if supported */
4141        if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) {
4142                nfp_net_irqmod_init(nn);
4143                nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
4144        }
4145
4146        /* Stash the re-configuration queue away.  First odd queue in TX Bar */
4147        nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
4148
4149        /* Make sure the FW knows the netdev is supposed to be disabled here */
4150        nn_writel(nn, NFP_NET_CFG_CTRL, 0);
4151        nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
4152        nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
4153        err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RING |
4154                                   NFP_NET_CFG_UPDATE_GEN);
4155        if (err)
4156                return err;
4157
4158        if (nn->dp.netdev) {
4159                nfp_net_netdev_init(nn);
4160
4161                err = nfp_ccm_mbox_init(nn);
4162                if (err)
4163                        return err;
4164
4165                err = nfp_net_tls_init(nn);
4166                if (err)
4167                        goto err_clean_mbox;
4168        }
4169
4170        nfp_net_vecs_init(nn);
4171
4172        if (!nn->dp.netdev)
4173                return 0;
4174        return register_netdev(nn->dp.netdev);
4175
4176err_clean_mbox:
4177        nfp_ccm_mbox_clean(nn);
4178        return err;
4179}
4180
4181/**
4182 * nfp_net_clean() - Undo what nfp_net_init() did.
4183 * @nn:         NFP Net device structure
4184 */
4185void nfp_net_clean(struct nfp_net *nn)
4186{
4187        if (!nn->dp.netdev)
4188                return;
4189
4190        unregister_netdev(nn->dp.netdev);
4191        nfp_ccm_mbox_clean(nn);
4192        nfp_net_reconfig_wait_posted(nn);
4193}
4194