linux/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
<<
>>
Prefs
   1// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
   2/* Copyright (C) 2015-2018 Netronome Systems, Inc. */
   3
   4/*
   5 * nfp_net_common.c
   6 * Netronome network device driver: Common functions between PF and VF
   7 * Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
   8 *          Jason McMullan <jason.mcmullan@netronome.com>
   9 *          Rolf Neugebauer <rolf.neugebauer@netronome.com>
  10 *          Brad Petrus <brad.petrus@netronome.com>
  11 *          Chris Telfer <chris.telfer@netronome.com>
  12 */
  13
  14#undef CONFIG_BPF_SYSCALL
  15
  16#include <linux/bitfield.h>
  17#include <linux/bpf.h>
  18#include <linux/bpf_trace.h>
  19#include <linux/module.h>
  20#include <linux/kernel.h>
  21#include <linux/init.h>
  22#include <linux/fs.h>
  23#include <linux/netdevice.h>
  24#include <linux/etherdevice.h>
  25#include <linux/interrupt.h>
  26#include <linux/ip.h>
  27#include <linux/ipv6.h>
  28#include <linux/overflow.h>
  29#include <linux/mm.h>
  30#include <linux/page_ref.h>
  31#include <linux/pci.h>
  32#include <linux/pci_regs.h>
  33#include <linux/msi.h>
  34#include <linux/ethtool.h>
  35#include <linux/log2.h>
  36#include <linux/if_vlan.h>
  37#include <linux/random.h>
  38#include <linux/vmalloc.h>
  39#include <linux/ktime.h>
  40
  41#include <net/switchdev.h>
  42#include <net/vxlan.h>
  43
  44#include "nfpcore/nfp_nsp.h"
  45#include "nfp_app.h"
  46#include "nfp_net_ctrl.h"
  47#include "nfp_net.h"
  48#include "nfp_net_sriov.h"
  49#include "nfp_port.h"
  50
  51/**
  52 * nfp_net_get_fw_version() - Read and parse the FW version
  53 * @fw_ver:     Output fw_version structure to read to
  54 * @ctrl_bar:   Mapped address of the control BAR
  55 */
  56void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
  57                            void __iomem *ctrl_bar)
  58{
  59        u32 reg;
  60
  61        reg = readl(ctrl_bar + NFP_NET_CFG_VERSION);
  62        put_unaligned_le32(reg, fw_ver);
  63}
  64
  65static dma_addr_t nfp_net_dma_map_rx(struct nfp_net_dp *dp, void *frag)
  66{
  67        struct dma_attrs attrs;
  68
  69        init_dma_attrs(&attrs);
  70        dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs);
  71        return dma_map_single_attrs(dp->dev, frag + NFP_NET_RX_BUF_HEADROOM,
  72                                    dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
  73                                    dp->rx_dma_dir, &attrs);
  74}
  75
  76static void
  77nfp_net_dma_sync_dev_rx(const struct nfp_net_dp *dp, dma_addr_t dma_addr)
  78{
  79        dma_sync_single_for_device(dp->dev, dma_addr,
  80                                   dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
  81                                   dp->rx_dma_dir);
  82}
  83
  84static void nfp_net_dma_unmap_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr)
  85{
  86        struct dma_attrs attrs;
  87
  88        init_dma_attrs(&attrs);
  89        dma_set_attr(DMA_ATTR_SKIP_CPU_SYNC, &attrs);
  90        dma_unmap_single_attrs(dp->dev, dma_addr,
  91                               dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
  92                               dp->rx_dma_dir, &attrs);
  93}
  94
  95static void nfp_net_dma_sync_cpu_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr,
  96                                    unsigned int len)
  97{
  98        dma_sync_single_for_cpu(dp->dev, dma_addr - NFP_NET_RX_BUF_HEADROOM,
  99                                len, dp->rx_dma_dir);
 100}
 101
 102/* Firmware reconfig
 103 *
 104 * Firmware reconfig may take a while so we have two versions of it -
 105 * synchronous and asynchronous (posted).  All synchronous callers are holding
 106 * RTNL so we don't have to worry about serializing them.
 107 */
 108static void nfp_net_reconfig_start(struct nfp_net *nn, u32 update)
 109{
 110        nn_writel(nn, NFP_NET_CFG_UPDATE, update);
 111        /* ensure update is written before pinging HW */
 112        nn_pci_flush(nn);
 113        nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1);
 114        nn->reconfig_in_progress_update = update;
 115}
 116
 117/* Pass 0 as update to run posted reconfigs. */
 118static void nfp_net_reconfig_start_async(struct nfp_net *nn, u32 update)
 119{
 120        update |= nn->reconfig_posted;
 121        nn->reconfig_posted = 0;
 122
 123        nfp_net_reconfig_start(nn, update);
 124
 125        nn->reconfig_timer_active = true;
 126        mod_timer(&nn->reconfig_timer, jiffies + NFP_NET_POLL_TIMEOUT * HZ);
 127}
 128
 129static bool nfp_net_reconfig_check_done(struct nfp_net *nn, bool last_check)
 130{
 131        u32 reg;
 132
 133        reg = nn_readl(nn, NFP_NET_CFG_UPDATE);
 134        if (reg == 0)
 135                return true;
 136        if (reg & NFP_NET_CFG_UPDATE_ERR) {
 137                nn_err(nn, "Reconfig error (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n",
 138                       reg, nn->reconfig_in_progress_update,
 139                       nn_readl(nn, NFP_NET_CFG_CTRL));
 140                return true;
 141        } else if (last_check) {
 142                nn_err(nn, "Reconfig timeout (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n",
 143                       reg, nn->reconfig_in_progress_update,
 144                       nn_readl(nn, NFP_NET_CFG_CTRL));
 145                return true;
 146        }
 147
 148        return false;
 149}
 150
 151static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
 152{
 153        bool timed_out = false;
 154
 155        /* Poll update field, waiting for NFP to ack the config */
 156        while (!nfp_net_reconfig_check_done(nn, timed_out)) {
 157                msleep(1);
 158                timed_out = time_is_before_eq_jiffies(deadline);
 159        }
 160
 161        if (nn_readl(nn, NFP_NET_CFG_UPDATE) & NFP_NET_CFG_UPDATE_ERR)
 162                return -EIO;
 163
 164        return timed_out ? -EIO : 0;
 165}
 166
 167static void nfp_net_reconfig_timer(struct timer_list *t)
 168{
 169        struct nfp_net *nn = from_timer(nn, t, reconfig_timer);
 170
 171        spin_lock_bh(&nn->reconfig_lock);
 172
 173        nn->reconfig_timer_active = false;
 174
 175        /* If sync caller is present it will take over from us */
 176        if (nn->reconfig_sync_present)
 177                goto done;
 178
 179        /* Read reconfig status and report errors */
 180        nfp_net_reconfig_check_done(nn, true);
 181
 182        if (nn->reconfig_posted)
 183                nfp_net_reconfig_start_async(nn, 0);
 184done:
 185        spin_unlock_bh(&nn->reconfig_lock);
 186}
 187
 188/**
 189 * nfp_net_reconfig_post() - Post async reconfig request
 190 * @nn:      NFP Net device to reconfigure
 191 * @update:  The value for the update field in the BAR config
 192 *
 193 * Record FW reconfiguration request.  Reconfiguration will be kicked off
 194 * whenever reconfiguration machinery is idle.  Multiple requests can be
 195 * merged together!
 196 */
 197static void nfp_net_reconfig_post(struct nfp_net *nn, u32 update)
 198{
 199        spin_lock_bh(&nn->reconfig_lock);
 200
 201        /* Sync caller will kick off async reconf when it's done, just post */
 202        if (nn->reconfig_sync_present) {
 203                nn->reconfig_posted |= update;
 204                goto done;
 205        }
 206
 207        /* Opportunistically check if the previous command is done */
 208        if (!nn->reconfig_timer_active ||
 209            nfp_net_reconfig_check_done(nn, false))
 210                nfp_net_reconfig_start_async(nn, update);
 211        else
 212                nn->reconfig_posted |= update;
 213done:
 214        spin_unlock_bh(&nn->reconfig_lock);
 215}
 216
 217static void nfp_net_reconfig_sync_enter(struct nfp_net *nn)
 218{
 219        bool cancelled_timer = false;
 220        u32 pre_posted_requests;
 221
 222        spin_lock_bh(&nn->reconfig_lock);
 223
 224        nn->reconfig_sync_present = true;
 225
 226        if (nn->reconfig_timer_active) {
 227                nn->reconfig_timer_active = false;
 228                cancelled_timer = true;
 229        }
 230        pre_posted_requests = nn->reconfig_posted;
 231        nn->reconfig_posted = 0;
 232
 233        spin_unlock_bh(&nn->reconfig_lock);
 234
 235        if (cancelled_timer) {
 236                del_timer_sync(&nn->reconfig_timer);
 237                nfp_net_reconfig_wait(nn, nn->reconfig_timer.expires);
 238        }
 239
 240        /* Run the posted reconfigs which were issued before we started */
 241        if (pre_posted_requests) {
 242                nfp_net_reconfig_start(nn, pre_posted_requests);
 243                nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
 244        }
 245}
 246
 247static void nfp_net_reconfig_wait_posted(struct nfp_net *nn)
 248{
 249        nfp_net_reconfig_sync_enter(nn);
 250
 251        spin_lock_bh(&nn->reconfig_lock);
 252        nn->reconfig_sync_present = false;
 253        spin_unlock_bh(&nn->reconfig_lock);
 254}
 255
 256/**
 257 * nfp_net_reconfig() - Reconfigure the firmware
 258 * @nn:      NFP Net device to reconfigure
 259 * @update:  The value for the update field in the BAR config
 260 *
 261 * Write the update word to the BAR and ping the reconfig queue.  The
 262 * poll until the firmware has acknowledged the update by zeroing the
 263 * update word.
 264 *
 265 * Return: Negative errno on error, 0 on success
 266 */
 267int nfp_net_reconfig(struct nfp_net *nn, u32 update)
 268{
 269        int ret;
 270
 271        nfp_net_reconfig_sync_enter(nn);
 272
 273        nfp_net_reconfig_start(nn, update);
 274        ret = nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
 275
 276        spin_lock_bh(&nn->reconfig_lock);
 277
 278        if (nn->reconfig_posted)
 279                nfp_net_reconfig_start_async(nn, 0);
 280
 281        nn->reconfig_sync_present = false;
 282
 283        spin_unlock_bh(&nn->reconfig_lock);
 284
 285        return ret;
 286}
 287
 288/**
 289 * nfp_net_reconfig_mbox() - Reconfigure the firmware via the mailbox
 290 * @nn:        NFP Net device to reconfigure
 291 * @mbox_cmd:  The value for the mailbox command
 292 *
 293 * Helper function for mailbox updates
 294 *
 295 * Return: Negative errno on error, 0 on success
 296 */
 297static int nfp_net_reconfig_mbox(struct nfp_net *nn, u32 mbox_cmd)
 298{
 299        u32 mbox = nn->tlv_caps.mbox_off;
 300        int ret;
 301
 302        if (!nfp_net_has_mbox(&nn->tlv_caps)) {
 303                nn_err(nn, "no mailbox present, command: %u\n", mbox_cmd);
 304                return -EIO;
 305        }
 306
 307        nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
 308
 309        ret = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX);
 310        if (ret) {
 311                nn_err(nn, "Mailbox update error\n");
 312                return ret;
 313        }
 314
 315        return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
 316}
 317
 318/* Interrupt configuration and handling
 319 */
 320
 321/**
 322 * nfp_net_irq_unmask() - Unmask automasked interrupt
 323 * @nn:       NFP Network structure
 324 * @entry_nr: MSI-X table entry
 325 *
 326 * If MSI-X auto-masking is enabled clear the mask bit, otherwise
 327 * clear the ICR for the entry.
 328 */
 329static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr)
 330{
 331        nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED);
 332        nn_pci_flush(nn);
 333}
 334
 335/**
 336 * nfp_net_irqs_alloc() - allocates MSI-X irqs
 337 * @pdev:        PCI device structure
 338 * @irq_entries: Array to be initialized and used to hold the irq entries
 339 * @min_irqs:    Minimal acceptable number of interrupts
 340 * @wanted_irqs: Target number of interrupts to allocate
 341 *
 342 * Return: Number of irqs obtained or 0 on error.
 343 */
 344unsigned int
 345nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries,
 346                   unsigned int min_irqs, unsigned int wanted_irqs)
 347{
 348        unsigned int i;
 349        int got_irqs;
 350
 351        for (i = 0; i < wanted_irqs; i++)
 352                irq_entries[i].entry = i;
 353
 354        got_irqs = pci_enable_msix_range(pdev, irq_entries,
 355                                         min_irqs, wanted_irqs);
 356        if (got_irqs < 0) {
 357                dev_err(&pdev->dev, "Failed to enable %d-%d MSI-X (err=%d)\n",
 358                        min_irqs, wanted_irqs, got_irqs);
 359                return 0;
 360        }
 361
 362        if (got_irqs < wanted_irqs)
 363                dev_warn(&pdev->dev, "Unable to allocate %d IRQs got only %d\n",
 364                         wanted_irqs, got_irqs);
 365
 366        return got_irqs;
 367}
 368
 369/**
 370 * nfp_net_irqs_assign() - Assign interrupts allocated externally to netdev
 371 * @nn:          NFP Network structure
 372 * @irq_entries: Table of allocated interrupts
 373 * @n:           Size of @irq_entries (number of entries to grab)
 374 *
 375 * After interrupts are allocated with nfp_net_irqs_alloc() this function
 376 * should be called to assign them to a specific netdev (port).
 377 */
 378void
 379nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries,
 380                    unsigned int n)
 381{
 382        struct nfp_net_dp *dp = &nn->dp;
 383
 384        nn->max_r_vecs = n - NFP_NET_NON_Q_VECTORS;
 385        dp->num_r_vecs = nn->max_r_vecs;
 386
 387        memcpy(nn->irq_entries, irq_entries, sizeof(*irq_entries) * n);
 388
 389        if (dp->num_rx_rings > dp->num_r_vecs ||
 390            dp->num_tx_rings > dp->num_r_vecs)
 391                dev_warn(nn->dp.dev, "More rings (%d,%d) than vectors (%d).\n",
 392                         dp->num_rx_rings, dp->num_tx_rings,
 393                         dp->num_r_vecs);
 394
 395        dp->num_rx_rings = min(dp->num_r_vecs, dp->num_rx_rings);
 396        dp->num_tx_rings = min(dp->num_r_vecs, dp->num_tx_rings);
 397        dp->num_stack_tx_rings = dp->num_tx_rings;
 398}
 399
 400/**
 401 * nfp_net_irqs_disable() - Disable interrupts
 402 * @pdev:        PCI device structure
 403 *
 404 * Undoes what @nfp_net_irqs_alloc() does.
 405 */
 406void nfp_net_irqs_disable(struct pci_dev *pdev)
 407{
 408        pci_disable_msix(pdev);
 409}
 410
 411/**
 412 * nfp_net_irq_rxtx() - Interrupt service routine for RX/TX rings.
 413 * @irq:      Interrupt
 414 * @data:     Opaque data structure
 415 *
 416 * Return: Indicate if the interrupt has been handled.
 417 */
 418static irqreturn_t nfp_net_irq_rxtx(int irq, void *data)
 419{
 420        struct nfp_net_r_vector *r_vec = data;
 421
 422        napi_schedule_irqoff(&r_vec->napi);
 423
 424        /* The FW auto-masks any interrupt, either via the MASK bit in
 425         * the MSI-X table or via the per entry ICR field.  So there
 426         * is no need to disable interrupts here.
 427         */
 428        return IRQ_HANDLED;
 429}
 430
 431static irqreturn_t nfp_ctrl_irq_rxtx(int irq, void *data)
 432{
 433        struct nfp_net_r_vector *r_vec = data;
 434
 435        tasklet_schedule(&r_vec->tasklet);
 436
 437        return IRQ_HANDLED;
 438}
 439
 440/**
 441 * nfp_net_read_link_status() - Reread link status from control BAR
 442 * @nn:       NFP Network structure
 443 */
 444static void nfp_net_read_link_status(struct nfp_net *nn)
 445{
 446        unsigned long flags;
 447        bool link_up;
 448        u32 sts;
 449
 450        spin_lock_irqsave(&nn->link_status_lock, flags);
 451
 452        sts = nn_readl(nn, NFP_NET_CFG_STS);
 453        link_up = !!(sts & NFP_NET_CFG_STS_LINK);
 454
 455        if (nn->link_up == link_up)
 456                goto out;
 457
 458        nn->link_up = link_up;
 459        if (nn->port)
 460                set_bit(NFP_PORT_CHANGED, &nn->port->flags);
 461
 462        if (nn->link_up) {
 463                netif_carrier_on(nn->dp.netdev);
 464                netdev_info(nn->dp.netdev, "NIC Link is Up\n");
 465        } else {
 466                netif_carrier_off(nn->dp.netdev);
 467                netdev_info(nn->dp.netdev, "NIC Link is Down\n");
 468        }
 469out:
 470        spin_unlock_irqrestore(&nn->link_status_lock, flags);
 471}
 472
 473/**
 474 * nfp_net_irq_lsc() - Interrupt service routine for link state changes
 475 * @irq:      Interrupt
 476 * @data:     Opaque data structure
 477 *
 478 * Return: Indicate if the interrupt has been handled.
 479 */
 480static irqreturn_t nfp_net_irq_lsc(int irq, void *data)
 481{
 482        struct nfp_net *nn = data;
 483        struct msix_entry *entry;
 484
 485        entry = &nn->irq_entries[NFP_NET_IRQ_LSC_IDX];
 486
 487        nfp_net_read_link_status(nn);
 488
 489        nfp_net_irq_unmask(nn, entry->entry);
 490
 491        return IRQ_HANDLED;
 492}
 493
 494/**
 495 * nfp_net_irq_exn() - Interrupt service routine for exceptions
 496 * @irq:      Interrupt
 497 * @data:     Opaque data structure
 498 *
 499 * Return: Indicate if the interrupt has been handled.
 500 */
 501static irqreturn_t nfp_net_irq_exn(int irq, void *data)
 502{
 503        struct nfp_net *nn = data;
 504
 505        nn_err(nn, "%s: UNIMPLEMENTED.\n", __func__);
 506        /* XXX TO BE IMPLEMENTED */
 507        return IRQ_HANDLED;
 508}
 509
 510/**
 511 * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring
 512 * @tx_ring:  TX ring structure
 513 * @r_vec:    IRQ vector servicing this ring
 514 * @idx:      Ring index
 515 * @is_xdp:   Is this an XDP TX ring?
 516 */
 517static void
 518nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring,
 519                     struct nfp_net_r_vector *r_vec, unsigned int idx,
 520                     bool is_xdp)
 521{
 522        struct nfp_net *nn = r_vec->nfp_net;
 523
 524        tx_ring->idx = idx;
 525        tx_ring->r_vec = r_vec;
 526        tx_ring->is_xdp = is_xdp;
 527        u64_stats_init(&tx_ring->r_vec->tx_sync);
 528
 529        tx_ring->qcidx = tx_ring->idx * nn->stride_tx;
 530        tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx);
 531}
 532
 533/**
 534 * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring
 535 * @rx_ring:  RX ring structure
 536 * @r_vec:    IRQ vector servicing this ring
 537 * @idx:      Ring index
 538 */
 539static void
 540nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring,
 541                     struct nfp_net_r_vector *r_vec, unsigned int idx)
 542{
 543        struct nfp_net *nn = r_vec->nfp_net;
 544
 545        rx_ring->idx = idx;
 546        rx_ring->r_vec = r_vec;
 547        u64_stats_init(&rx_ring->r_vec->rx_sync);
 548
 549        rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx;
 550        rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx);
 551}
 552
 553/**
 554 * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN)
 555 * @nn:         NFP Network structure
 556 * @ctrl_offset: Control BAR offset where IRQ configuration should be written
 557 * @format:     printf-style format to construct the interrupt name
 558 * @name:       Pointer to allocated space for interrupt name
 559 * @name_sz:    Size of space for interrupt name
 560 * @vector_idx: Index of MSI-X vector used for this interrupt
 561 * @handler:    IRQ handler to register for this interrupt
 562 */
 563static int
 564nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset,
 565                        const char *format, char *name, size_t name_sz,
 566                        unsigned int vector_idx, irq_handler_t handler)
 567{
 568        struct msix_entry *entry;
 569        int err;
 570
 571        entry = &nn->irq_entries[vector_idx];
 572
 573        snprintf(name, name_sz, format, nfp_net_name(nn));
 574        err = request_irq(entry->vector, handler, 0, name, nn);
 575        if (err) {
 576                nn_err(nn, "Failed to request IRQ %d (err=%d).\n",
 577                       entry->vector, err);
 578                return err;
 579        }
 580        nn_writeb(nn, ctrl_offset, entry->entry);
 581        nfp_net_irq_unmask(nn, entry->entry);
 582
 583        return 0;
 584}
 585
 586/**
 587 * nfp_net_aux_irq_free() - Free an auxiliary interrupt (LSC or EXN)
 588 * @nn:         NFP Network structure
 589 * @ctrl_offset: Control BAR offset where IRQ configuration should be written
 590 * @vector_idx: Index of MSI-X vector used for this interrupt
 591 */
 592static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset,
 593                                 unsigned int vector_idx)
 594{
 595        nn_writeb(nn, ctrl_offset, 0xff);
 596        nn_pci_flush(nn);
 597        free_irq(nn->irq_entries[vector_idx].vector, nn);
 598}
 599
 600/* Transmit
 601 *
 602 * One queue controller peripheral queue is used for transmit.  The
 603 * driver en-queues packets for transmit by advancing the write
 604 * pointer.  The device indicates that packets have transmitted by
 605 * advancing the read pointer.  The driver maintains a local copy of
 606 * the read and write pointer in @struct nfp_net_tx_ring.  The driver
 607 * keeps @wr_p in sync with the queue controller write pointer and can
 608 * determine how many packets have been transmitted by comparing its
 609 * copy of the read pointer @rd_p with the read pointer maintained by
 610 * the queue controller peripheral.
 611 */
 612
 613/**
 614 * nfp_net_tx_full() - Check if the TX ring is full
 615 * @tx_ring: TX ring to check
 616 * @dcnt:    Number of descriptors that need to be enqueued (must be >= 1)
 617 *
 618 * This function checks, based on the *host copy* of read/write
 619 * pointer if a given TX ring is full.  The real TX queue may have
 620 * some newly made available slots.
 621 *
 622 * Return: True if the ring is full.
 623 */
 624static int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt)
 625{
 626        return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt);
 627}
 628
 629/* Wrappers for deciding when to stop and restart TX queues */
 630static int nfp_net_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring)
 631{
 632        return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4);
 633}
 634
 635static int nfp_net_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring)
 636{
 637        return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1);
 638}
 639
 640/**
 641 * nfp_net_tx_ring_stop() - stop tx ring
 642 * @nd_q:    netdev queue
 643 * @tx_ring: driver tx queue structure
 644 *
 645 * Safely stop TX ring.  Remember that while we are running .start_xmit()
 646 * someone else may be cleaning the TX ring completions so we need to be
 647 * extra careful here.
 648 */
 649static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q,
 650                                 struct nfp_net_tx_ring *tx_ring)
 651{
 652        netif_tx_stop_queue(nd_q);
 653
 654        /* We can race with the TX completion out of NAPI so recheck */
 655        smp_mb();
 656        if (unlikely(nfp_net_tx_ring_should_wake(tx_ring)))
 657                netif_tx_start_queue(nd_q);
 658}
 659
 660/**
 661 * nfp_net_tx_tso() - Set up Tx descriptor for LSO
 662 * @r_vec: per-ring structure
 663 * @txbuf: Pointer to driver soft TX descriptor
 664 * @txd: Pointer to HW TX descriptor
 665 * @skb: Pointer to SKB
 666 * @md_bytes: Prepend length
 667 *
 668 * Set up Tx descriptor for LSO, do nothing for non-LSO skbs.
 669 * Return error on packet header greater than maximum supported LSO header size.
 670 */
 671static void nfp_net_tx_tso(struct nfp_net_r_vector *r_vec,
 672                           struct nfp_net_tx_buf *txbuf,
 673                           struct nfp_net_tx_desc *txd, struct sk_buff *skb,
 674                           u32 md_bytes)
 675{
 676        u32 l3_offset, l4_offset, hdrlen;
 677        u16 mss;
 678
 679        if (!skb_is_gso(skb))
 680                return;
 681
 682        if (!skb->encapsulation) {
 683                l3_offset = skb_network_offset(skb);
 684                l4_offset = skb_transport_offset(skb);
 685                hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
 686        } else {
 687                l3_offset = skb_inner_network_offset(skb);
 688                l4_offset = skb_inner_transport_offset(skb);
 689                hdrlen = skb_inner_transport_header(skb) - skb->data +
 690                        inner_tcp_hdrlen(skb);
 691        }
 692
 693        txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs;
 694        txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1);
 695
 696        mss = skb_shinfo(skb)->gso_size & PCIE_DESC_TX_MSS_MASK;
 697        txd->l3_offset = l3_offset - md_bytes;
 698        txd->l4_offset = l4_offset - md_bytes;
 699        txd->lso_hdrlen = hdrlen - md_bytes;
 700        txd->mss = cpu_to_le16(mss);
 701        txd->flags |= PCIE_DESC_TX_LSO;
 702
 703        u64_stats_update_begin(&r_vec->tx_sync);
 704        r_vec->tx_lso++;
 705        u64_stats_update_end(&r_vec->tx_sync);
 706}
 707
 708/**
 709 * nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor
 710 * @dp:  NFP Net data path struct
 711 * @r_vec: per-ring structure
 712 * @txbuf: Pointer to driver soft TX descriptor
 713 * @txd: Pointer to TX descriptor
 714 * @skb: Pointer to SKB
 715 *
 716 * This function sets the TX checksum flags in the TX descriptor based
 717 * on the configuration and the protocol of the packet to be transmitted.
 718 */
 719static void nfp_net_tx_csum(struct nfp_net_dp *dp,
 720                            struct nfp_net_r_vector *r_vec,
 721                            struct nfp_net_tx_buf *txbuf,
 722                            struct nfp_net_tx_desc *txd, struct sk_buff *skb)
 723{
 724        struct ipv6hdr *ipv6h;
 725        struct iphdr *iph;
 726        u8 l4_hdr;
 727
 728        if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM))
 729                return;
 730
 731        if (skb->ip_summed != CHECKSUM_PARTIAL)
 732                return;
 733
 734        txd->flags |= PCIE_DESC_TX_CSUM;
 735        if (skb->encapsulation)
 736                txd->flags |= PCIE_DESC_TX_ENCAP;
 737
 738        iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
 739        ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
 740
 741        if (iph->version == 4) {
 742                txd->flags |= PCIE_DESC_TX_IP4_CSUM;
 743                l4_hdr = iph->protocol;
 744        } else if (ipv6h->version == 6) {
 745                l4_hdr = ipv6h->nexthdr;
 746        } else {
 747                nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version);
 748                return;
 749        }
 750
 751        switch (l4_hdr) {
 752        case IPPROTO_TCP:
 753                txd->flags |= PCIE_DESC_TX_TCP_CSUM;
 754                break;
 755        case IPPROTO_UDP:
 756                txd->flags |= PCIE_DESC_TX_UDP_CSUM;
 757                break;
 758        default:
 759                nn_dp_warn(dp, "partial checksum but l4 proto=%x!\n", l4_hdr);
 760                return;
 761        }
 762
 763        u64_stats_update_begin(&r_vec->tx_sync);
 764        if (skb->encapsulation)
 765                r_vec->hw_csum_tx_inner += txbuf->pkt_cnt;
 766        else
 767                r_vec->hw_csum_tx += txbuf->pkt_cnt;
 768        u64_stats_update_end(&r_vec->tx_sync);
 769}
 770
 771static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
 772{
 773        wmb();
 774        nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add);
 775        tx_ring->wr_ptr_add = 0;
 776}
 777
 778static int nfp_net_prep_port_id(struct sk_buff *skb)
 779{
 780        struct metadata_dst *md_dst = skb_metadata_dst(skb);
 781        unsigned char *data;
 782
 783        if (likely(!md_dst))
 784                return 0;
 785        if (unlikely(md_dst->type != METADATA_HW_PORT_MUX))
 786                return 0;
 787
 788        if (unlikely(skb_cow_head(skb, 8)))
 789                return -ENOMEM;
 790
 791        data = skb_push(skb, 8);
 792        put_unaligned_be32(NFP_NET_META_PORTID, data);
 793        put_unaligned_be32(md_dst->u.port_info.port_id, data + 4);
 794
 795        return 8;
 796}
 797
 798/**
 799 * nfp_net_tx() - Main transmit entry point
 800 * @skb:    SKB to transmit
 801 * @netdev: netdev structure
 802 *
 803 * Return: NETDEV_TX_OK on success.
 804 */
 805static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
 806{
 807        struct nfp_net *nn = netdev_priv(netdev);
 808        const struct skb_frag_struct *frag;
 809        int f, nr_frags, wr_idx, md_bytes;
 810        struct nfp_net_tx_ring *tx_ring;
 811        struct nfp_net_r_vector *r_vec;
 812        struct nfp_net_tx_buf *txbuf;
 813        struct nfp_net_tx_desc *txd;
 814        struct netdev_queue *nd_q;
 815        struct nfp_net_dp *dp;
 816        dma_addr_t dma_addr;
 817        unsigned int fsize;
 818        u16 qidx;
 819
 820        dp = &nn->dp;
 821        qidx = skb_get_queue_mapping(skb);
 822        tx_ring = &dp->tx_rings[qidx];
 823        r_vec = tx_ring->r_vec;
 824
 825        nr_frags = skb_shinfo(skb)->nr_frags;
 826
 827        if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) {
 828                nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n",
 829                           qidx, tx_ring->wr_p, tx_ring->rd_p);
 830                nd_q = netdev_get_tx_queue(dp->netdev, qidx);
 831                netif_tx_stop_queue(nd_q);
 832                nfp_net_tx_xmit_more_flush(tx_ring);
 833                u64_stats_update_begin(&r_vec->tx_sync);
 834                r_vec->tx_busy++;
 835                u64_stats_update_end(&r_vec->tx_sync);
 836                return NETDEV_TX_BUSY;
 837        }
 838
 839        md_bytes = nfp_net_prep_port_id(skb);
 840        if (unlikely(md_bytes < 0)) {
 841                nfp_net_tx_xmit_more_flush(tx_ring);
 842                dev_kfree_skb_any(skb);
 843                return NETDEV_TX_OK;
 844        }
 845
 846        /* Start with the head skbuf */
 847        dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
 848                                  DMA_TO_DEVICE);
 849        if (dma_mapping_error(dp->dev, dma_addr))
 850                goto err_free;
 851
 852        wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
 853
 854        /* Stash the soft descriptor of the head then initialize it */
 855        txbuf = &tx_ring->txbufs[wr_idx];
 856        txbuf->skb = skb;
 857        txbuf->dma_addr = dma_addr;
 858        txbuf->fidx = -1;
 859        txbuf->pkt_cnt = 1;
 860        txbuf->real_len = skb->len;
 861
 862        /* Build TX descriptor */
 863        txd = &tx_ring->txds[wr_idx];
 864        txd->offset_eop = (nr_frags ? 0 : PCIE_DESC_TX_EOP) | md_bytes;
 865        txd->dma_len = cpu_to_le16(skb_headlen(skb));
 866        nfp_desc_set_dma_addr(txd, dma_addr);
 867        txd->data_len = cpu_to_le16(skb->len);
 868
 869        txd->flags = 0;
 870        txd->mss = 0;
 871        txd->lso_hdrlen = 0;
 872
 873        /* Do not reorder - tso may adjust pkt cnt, vlan may override fields */
 874        nfp_net_tx_tso(r_vec, txbuf, txd, skb, md_bytes);
 875        nfp_net_tx_csum(dp, r_vec, txbuf, txd, skb);
 876        if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
 877                txd->flags |= PCIE_DESC_TX_VLAN;
 878                txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
 879        }
 880
 881        /* Gather DMA */
 882        if (nr_frags > 0) {
 883                __le64 second_half;
 884
 885                /* all descs must match except for in addr, length and eop */
 886                second_half = txd->vals8[1];
 887
 888                for (f = 0; f < nr_frags; f++) {
 889                        frag = &skb_shinfo(skb)->frags[f];
 890                        fsize = skb_frag_size(frag);
 891
 892                        dma_addr = skb_frag_dma_map(dp->dev, frag, 0,
 893                                                    fsize, DMA_TO_DEVICE);
 894                        if (dma_mapping_error(dp->dev, dma_addr))
 895                                goto err_unmap;
 896
 897                        wr_idx = D_IDX(tx_ring, wr_idx + 1);
 898                        tx_ring->txbufs[wr_idx].skb = skb;
 899                        tx_ring->txbufs[wr_idx].dma_addr = dma_addr;
 900                        tx_ring->txbufs[wr_idx].fidx = f;
 901
 902                        txd = &tx_ring->txds[wr_idx];
 903                        txd->dma_len = cpu_to_le16(fsize);
 904                        nfp_desc_set_dma_addr(txd, dma_addr);
 905                        txd->offset_eop = md_bytes |
 906                                ((f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0);
 907                        txd->vals8[1] = second_half;
 908                }
 909
 910                u64_stats_update_begin(&r_vec->tx_sync);
 911                r_vec->tx_gather++;
 912                u64_stats_update_end(&r_vec->tx_sync);
 913        }
 914
 915        skb_tx_timestamp(skb);
 916
 917        nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
 918
 919        tx_ring->wr_p += nr_frags + 1;
 920        if (nfp_net_tx_ring_should_stop(tx_ring))
 921                nfp_net_tx_ring_stop(nd_q, tx_ring);
 922
 923        tx_ring->wr_ptr_add += nr_frags + 1;
 924        if (__netdev_tx_sent_queue(nd_q, txbuf->real_len, skb->xmit_more))
 925                nfp_net_tx_xmit_more_flush(tx_ring);
 926
 927        return NETDEV_TX_OK;
 928
 929err_unmap:
 930        while (--f >= 0) {
 931                frag = &skb_shinfo(skb)->frags[f];
 932                dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
 933                               skb_frag_size(frag), DMA_TO_DEVICE);
 934                tx_ring->txbufs[wr_idx].skb = NULL;
 935                tx_ring->txbufs[wr_idx].dma_addr = 0;
 936                tx_ring->txbufs[wr_idx].fidx = -2;
 937                wr_idx = wr_idx - 1;
 938                if (wr_idx < 0)
 939                        wr_idx += tx_ring->cnt;
 940        }
 941        dma_unmap_single(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
 942                         skb_headlen(skb), DMA_TO_DEVICE);
 943        tx_ring->txbufs[wr_idx].skb = NULL;
 944        tx_ring->txbufs[wr_idx].dma_addr = 0;
 945        tx_ring->txbufs[wr_idx].fidx = -2;
 946err_free:
 947        nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
 948        nfp_net_tx_xmit_more_flush(tx_ring);
 949        u64_stats_update_begin(&r_vec->tx_sync);
 950        r_vec->tx_errors++;
 951        u64_stats_update_end(&r_vec->tx_sync);
 952        dev_kfree_skb_any(skb);
 953        return NETDEV_TX_OK;
 954}
 955
 956/**
 957 * nfp_net_tx_complete() - Handled completed TX packets
 958 * @tx_ring:    TX ring structure
 959 * @budget:     NAPI budget (only used as bool to determine if in NAPI context)
 960 */
 961static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
 962{
 963        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
 964        struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
 965        struct netdev_queue *nd_q;
 966        u32 done_pkts = 0, done_bytes = 0;
 967        u32 qcp_rd_p;
 968        int todo;
 969
 970        if (tx_ring->wr_p == tx_ring->rd_p)
 971                return;
 972
 973        /* Work out how many descriptors have been transmitted */
 974        qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
 975
 976        if (qcp_rd_p == tx_ring->qcp_rd_p)
 977                return;
 978
 979        todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
 980
 981        while (todo--) {
 982                const struct skb_frag_struct *frag;
 983                struct nfp_net_tx_buf *tx_buf;
 984                struct sk_buff *skb;
 985                int fidx, nr_frags;
 986                int idx;
 987
 988                idx = D_IDX(tx_ring, tx_ring->rd_p++);
 989                tx_buf = &tx_ring->txbufs[idx];
 990
 991                skb = tx_buf->skb;
 992                if (!skb)
 993                        continue;
 994
 995                nr_frags = skb_shinfo(skb)->nr_frags;
 996                fidx = tx_buf->fidx;
 997
 998                if (fidx == -1) {
 999                        /* unmap head */
1000                        dma_unmap_single(dp->dev, tx_buf->dma_addr,
1001                                         skb_headlen(skb), DMA_TO_DEVICE);
1002
1003                        done_pkts += tx_buf->pkt_cnt;
1004                        done_bytes += tx_buf->real_len;
1005                } else {
1006                        /* unmap fragment */
1007                        frag = &skb_shinfo(skb)->frags[fidx];
1008                        dma_unmap_page(dp->dev, tx_buf->dma_addr,
1009                                       skb_frag_size(frag), DMA_TO_DEVICE);
1010                }
1011
1012                /* check for last gather fragment */
1013                if (fidx == nr_frags - 1)
1014                        napi_consume_skb(skb, budget);
1015
1016                tx_buf->dma_addr = 0;
1017                tx_buf->skb = NULL;
1018                tx_buf->fidx = -2;
1019        }
1020
1021        tx_ring->qcp_rd_p = qcp_rd_p;
1022
1023        u64_stats_update_begin(&r_vec->tx_sync);
1024        r_vec->tx_bytes += done_bytes;
1025        r_vec->tx_pkts += done_pkts;
1026        u64_stats_update_end(&r_vec->tx_sync);
1027
1028        if (!dp->netdev)
1029                return;
1030
1031        nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
1032        netdev_tx_completed_queue(nd_q, done_pkts, done_bytes);
1033        if (nfp_net_tx_ring_should_wake(tx_ring)) {
1034                /* Make sure TX thread will see updated tx_ring->rd_p */
1035                smp_mb();
1036
1037                if (unlikely(netif_tx_queue_stopped(nd_q)))
1038                        netif_tx_wake_queue(nd_q);
1039        }
1040
1041        WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
1042                  "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
1043                  tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
1044}
1045
1046static bool nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring)
1047{
1048        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
1049        u32 done_pkts = 0, done_bytes = 0;
1050        bool done_all;
1051        int idx, todo;
1052        u32 qcp_rd_p;
1053
1054        /* Work out how many descriptors have been transmitted */
1055        qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
1056
1057        if (qcp_rd_p == tx_ring->qcp_rd_p)
1058                return true;
1059
1060        todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
1061
1062        done_all = todo <= NFP_NET_XDP_MAX_COMPLETE;
1063        todo = min(todo, NFP_NET_XDP_MAX_COMPLETE);
1064
1065        tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo);
1066
1067        done_pkts = todo;
1068        while (todo--) {
1069                idx = D_IDX(tx_ring, tx_ring->rd_p);
1070                tx_ring->rd_p++;
1071
1072                done_bytes += tx_ring->txbufs[idx].real_len;
1073        }
1074
1075        u64_stats_update_begin(&r_vec->tx_sync);
1076        r_vec->tx_bytes += done_bytes;
1077        r_vec->tx_pkts += done_pkts;
1078        u64_stats_update_end(&r_vec->tx_sync);
1079
1080        WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
1081                  "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
1082                  tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
1083
1084        return done_all;
1085}
1086
1087/**
1088 * nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers
1089 * @dp:         NFP Net data path struct
1090 * @tx_ring:    TX ring structure
1091 *
1092 * Assumes that the device is stopped, must be idempotent.
1093 */
1094static void
1095nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
1096{
1097        const struct skb_frag_struct *frag;
1098        struct netdev_queue *nd_q;
1099
1100        while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) {
1101                struct nfp_net_tx_buf *tx_buf;
1102                struct sk_buff *skb;
1103                int idx, nr_frags;
1104
1105                idx = D_IDX(tx_ring, tx_ring->rd_p);
1106                tx_buf = &tx_ring->txbufs[idx];
1107
1108                skb = tx_ring->txbufs[idx].skb;
1109                nr_frags = skb_shinfo(skb)->nr_frags;
1110
1111                if (tx_buf->fidx == -1) {
1112                        /* unmap head */
1113                        dma_unmap_single(dp->dev, tx_buf->dma_addr,
1114                                         skb_headlen(skb), DMA_TO_DEVICE);
1115                } else {
1116                        /* unmap fragment */
1117                        frag = &skb_shinfo(skb)->frags[tx_buf->fidx];
1118                        dma_unmap_page(dp->dev, tx_buf->dma_addr,
1119                                       skb_frag_size(frag), DMA_TO_DEVICE);
1120                }
1121
1122                /* check for last gather fragment */
1123                if (tx_buf->fidx == nr_frags - 1)
1124                        dev_kfree_skb_any(skb);
1125
1126                tx_buf->dma_addr = 0;
1127                tx_buf->skb = NULL;
1128                tx_buf->fidx = -2;
1129
1130                tx_ring->qcp_rd_p++;
1131                tx_ring->rd_p++;
1132        }
1133
1134        memset(tx_ring->txds, 0, tx_ring->size);
1135        tx_ring->wr_p = 0;
1136        tx_ring->rd_p = 0;
1137        tx_ring->qcp_rd_p = 0;
1138        tx_ring->wr_ptr_add = 0;
1139
1140        if (tx_ring->is_xdp || !dp->netdev)
1141                return;
1142
1143        nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
1144        netdev_tx_reset_queue(nd_q);
1145}
1146
1147static void nfp_net_tx_timeout(struct net_device *netdev)
1148{
1149        struct nfp_net *nn = netdev_priv(netdev);
1150        int i;
1151
1152        for (i = 0; i < nn->dp.netdev->real_num_tx_queues; i++) {
1153                if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i)))
1154                        continue;
1155                nn_warn(nn, "TX timeout on ring: %d\n", i);
1156        }
1157        nn_warn(nn, "TX watchdog timeout\n");
1158}
1159
1160/* Receive processing
1161 */
1162static unsigned int
1163nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp)
1164{
1165        unsigned int fl_bufsz;
1166
1167        fl_bufsz = NFP_NET_RX_BUF_HEADROOM;
1168        fl_bufsz += dp->rx_dma_off;
1169        if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
1170                fl_bufsz += NFP_NET_MAX_PREPEND;
1171        else
1172                fl_bufsz += dp->rx_offset;
1173        fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + dp->mtu;
1174
1175        fl_bufsz = SKB_DATA_ALIGN(fl_bufsz);
1176        fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1177
1178        return fl_bufsz;
1179}
1180
1181static void
1182nfp_net_free_frag(void *frag, bool xdp)
1183{
1184        if (!xdp)
1185                skb_free_frag(frag);
1186        else
1187                __free_page(virt_to_page(frag));
1188}
1189
1190/**
1191 * nfp_net_rx_alloc_one() - Allocate and map page frag for RX
1192 * @dp:         NFP Net data path struct
1193 * @dma_addr:   Pointer to storage for DMA address (output param)
1194 *
1195 * This function will allcate a new page frag, map it for DMA.
1196 *
1197 * Return: allocated page frag or NULL on failure.
1198 */
1199static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
1200{
1201        void *frag;
1202
1203        if (!dp->xdp_prog) {
1204                frag = netdev_alloc_frag(dp->fl_bufsz);
1205        } else {
1206                struct page *page;
1207
1208                page = alloc_page(GFP_KERNEL);
1209                frag = page ? page_address(page) : NULL;
1210        }
1211        if (!frag) {
1212                nn_dp_warn(dp, "Failed to alloc receive page frag\n");
1213                return NULL;
1214        }
1215
1216        *dma_addr = nfp_net_dma_map_rx(dp, frag);
1217        if (dma_mapping_error(dp->dev, *dma_addr)) {
1218                nfp_net_free_frag(frag, dp->xdp_prog);
1219                nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
1220                return NULL;
1221        }
1222
1223        return frag;
1224}
1225
1226static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
1227{
1228        void *frag;
1229
1230        if (!dp->xdp_prog) {
1231                frag = napi_alloc_frag(dp->fl_bufsz);
1232                if (unlikely(!frag))
1233                        return NULL;
1234        } else {
1235                struct page *page;
1236
1237                page = alloc_page(GFP_ATOMIC);
1238                frag = page ? page_address(page) : NULL;
1239        }
1240        if (!frag) {
1241                nn_dp_warn(dp, "Failed to alloc receive page frag\n");
1242                return NULL;
1243        }
1244
1245        *dma_addr = nfp_net_dma_map_rx(dp, frag);
1246        if (dma_mapping_error(dp->dev, *dma_addr)) {
1247                nfp_net_free_frag(frag, dp->xdp_prog);
1248                nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
1249                return NULL;
1250        }
1251
1252        return frag;
1253}
1254
1255/**
1256 * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings
1257 * @dp:         NFP Net data path struct
1258 * @rx_ring:    RX ring structure
1259 * @frag:       page fragment buffer
1260 * @dma_addr:   DMA address of skb mapping
1261 */
1262static void nfp_net_rx_give_one(const struct nfp_net_dp *dp,
1263                                struct nfp_net_rx_ring *rx_ring,
1264                                void *frag, dma_addr_t dma_addr)
1265{
1266        unsigned int wr_idx;
1267
1268        wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
1269
1270        nfp_net_dma_sync_dev_rx(dp, dma_addr);
1271
1272        /* Stash SKB and DMA address away */
1273        rx_ring->rxbufs[wr_idx].frag = frag;
1274        rx_ring->rxbufs[wr_idx].dma_addr = dma_addr;
1275
1276        /* Fill freelist descriptor */
1277        rx_ring->rxds[wr_idx].fld.reserved = 0;
1278        rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
1279        nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
1280                              dma_addr + dp->rx_dma_off);
1281
1282        rx_ring->wr_p++;
1283        if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) {
1284                /* Update write pointer of the freelist queue. Make
1285                 * sure all writes are flushed before telling the hardware.
1286                 */
1287                wmb();
1288                nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH);
1289        }
1290}
1291
1292/**
1293 * nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable
1294 * @rx_ring:    RX ring structure
1295 *
1296 * Assumes that the device is stopped, must be idempotent.
1297 */
1298static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
1299{
1300        unsigned int wr_idx, last_idx;
1301
1302        /* wr_p == rd_p means ring was never fed FL bufs.  RX rings are always
1303         * kept at cnt - 1 FL bufs.
1304         */
1305        if (rx_ring->wr_p == 0 && rx_ring->rd_p == 0)
1306                return;
1307
1308        /* Move the empty entry to the end of the list */
1309        wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
1310        last_idx = rx_ring->cnt - 1;
1311        rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr;
1312        rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag;
1313        rx_ring->rxbufs[last_idx].dma_addr = 0;
1314        rx_ring->rxbufs[last_idx].frag = NULL;
1315
1316        memset(rx_ring->rxds, 0, rx_ring->size);
1317        rx_ring->wr_p = 0;
1318        rx_ring->rd_p = 0;
1319}
1320
1321/**
1322 * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring
1323 * @dp:         NFP Net data path struct
1324 * @rx_ring:    RX ring to remove buffers from
1325 *
1326 * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1)
1327 * entries.  After device is disabled nfp_net_rx_ring_reset() must be called
1328 * to restore required ring geometry.
1329 */
1330static void
1331nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp,
1332                          struct nfp_net_rx_ring *rx_ring)
1333{
1334        unsigned int i;
1335
1336        for (i = 0; i < rx_ring->cnt - 1; i++) {
1337                /* NULL skb can only happen when initial filling of the ring
1338                 * fails to allocate enough buffers and calls here to free
1339                 * already allocated ones.
1340                 */
1341                if (!rx_ring->rxbufs[i].frag)
1342                        continue;
1343
1344                nfp_net_dma_unmap_rx(dp, rx_ring->rxbufs[i].dma_addr);
1345                nfp_net_free_frag(rx_ring->rxbufs[i].frag, dp->xdp_prog);
1346                rx_ring->rxbufs[i].dma_addr = 0;
1347                rx_ring->rxbufs[i].frag = NULL;
1348        }
1349}
1350
1351/**
1352 * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW)
1353 * @dp:         NFP Net data path struct
1354 * @rx_ring:    RX ring to remove buffers from
1355 */
1356static int
1357nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp,
1358                           struct nfp_net_rx_ring *rx_ring)
1359{
1360        struct nfp_net_rx_buf *rxbufs;
1361        unsigned int i;
1362
1363        rxbufs = rx_ring->rxbufs;
1364
1365        for (i = 0; i < rx_ring->cnt - 1; i++) {
1366                rxbufs[i].frag = nfp_net_rx_alloc_one(dp, &rxbufs[i].dma_addr);
1367                if (!rxbufs[i].frag) {
1368                        nfp_net_rx_ring_bufs_free(dp, rx_ring);
1369                        return -ENOMEM;
1370                }
1371        }
1372
1373        return 0;
1374}
1375
1376/**
1377 * nfp_net_rx_ring_fill_freelist() - Give buffers from the ring to FW
1378 * @dp:      NFP Net data path struct
1379 * @rx_ring: RX ring to fill
1380 */
1381static void
1382nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp,
1383                              struct nfp_net_rx_ring *rx_ring)
1384{
1385        unsigned int i;
1386
1387        for (i = 0; i < rx_ring->cnt - 1; i++)
1388                nfp_net_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag,
1389                                    rx_ring->rxbufs[i].dma_addr);
1390}
1391
1392/**
1393 * nfp_net_rx_csum_has_errors() - group check if rxd has any csum errors
1394 * @flags: RX descriptor flags field in CPU byte order
1395 */
1396static int nfp_net_rx_csum_has_errors(u16 flags)
1397{
1398        u16 csum_all_checked, csum_all_ok;
1399
1400        csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL;
1401        csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK;
1402
1403        return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT);
1404}
1405
1406/**
1407 * nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags
1408 * @dp:  NFP Net data path struct
1409 * @r_vec: per-ring structure
1410 * @rxd: Pointer to RX descriptor
1411 * @meta: Parsed metadata prepend
1412 * @skb: Pointer to SKB
1413 */
1414static void nfp_net_rx_csum(struct nfp_net_dp *dp,
1415                            struct nfp_net_r_vector *r_vec,
1416                            struct nfp_net_rx_desc *rxd,
1417                            struct nfp_meta_parsed *meta, struct sk_buff *skb)
1418{
1419        skb_checksum_none_assert(skb);
1420
1421        if (!(dp->netdev->features & NETIF_F_RXCSUM))
1422                return;
1423
1424        if (meta->csum_type) {
1425                skb->ip_summed = meta->csum_type;
1426                skb->csum = meta->csum;
1427                u64_stats_update_begin(&r_vec->rx_sync);
1428                r_vec->hw_csum_rx_complete++;
1429                u64_stats_update_end(&r_vec->rx_sync);
1430                return;
1431        }
1432
1433        if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) {
1434                u64_stats_update_begin(&r_vec->rx_sync);
1435                r_vec->hw_csum_rx_error++;
1436                u64_stats_update_end(&r_vec->rx_sync);
1437                return;
1438        }
1439
1440        /* Assume that the firmware will never report inner CSUM_OK unless outer
1441         * L4 headers were successfully parsed. FW will always report zero UDP
1442         * checksum as CSUM_OK.
1443         */
1444        if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK ||
1445            rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) {
1446                __skb_incr_checksum_unnecessary(skb);
1447                u64_stats_update_begin(&r_vec->rx_sync);
1448                r_vec->hw_csum_rx_ok++;
1449                u64_stats_update_end(&r_vec->rx_sync);
1450        }
1451
1452        if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK ||
1453            rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) {
1454                __skb_incr_checksum_unnecessary(skb);
1455                u64_stats_update_begin(&r_vec->rx_sync);
1456                r_vec->hw_csum_rx_inner_ok++;
1457                u64_stats_update_end(&r_vec->rx_sync);
1458        }
1459}
1460
1461static void
1462nfp_net_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta,
1463                 unsigned int type, __be32 *hash)
1464{
1465        if (!(netdev->features & NETIF_F_RXHASH))
1466                return;
1467
1468        switch (type) {
1469        case NFP_NET_RSS_IPV4:
1470        case NFP_NET_RSS_IPV6:
1471        case NFP_NET_RSS_IPV6_EX:
1472                meta->hash_type = PKT_HASH_TYPE_L3;
1473                break;
1474        default:
1475                meta->hash_type = PKT_HASH_TYPE_L4;
1476                break;
1477        }
1478
1479        meta->hash = get_unaligned_be32(hash);
1480}
1481
1482static void
1483nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta,
1484                      void *data, struct nfp_net_rx_desc *rxd)
1485{
1486        struct nfp_net_rx_hash *rx_hash = data;
1487
1488        if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
1489                return;
1490
1491        nfp_net_set_hash(netdev, meta, get_unaligned_be32(&rx_hash->hash_type),
1492                         &rx_hash->hash);
1493}
1494
1495static void *
1496nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
1497                   void *data, int meta_len)
1498{
1499        u32 meta_info;
1500
1501        meta_info = get_unaligned_be32(data);
1502        data += 4;
1503
1504        while (meta_info) {
1505                switch (meta_info & NFP_NET_META_FIELD_MASK) {
1506                case NFP_NET_META_HASH:
1507                        meta_info >>= NFP_NET_META_FIELD_SIZE;
1508                        nfp_net_set_hash(netdev, meta,
1509                                         meta_info & NFP_NET_META_FIELD_MASK,
1510                                         (__be32 *)data);
1511                        data += 4;
1512                        break;
1513                case NFP_NET_META_MARK:
1514                        meta->mark = get_unaligned_be32(data);
1515                        data += 4;
1516                        break;
1517                case NFP_NET_META_PORTID:
1518                        meta->portid = get_unaligned_be32(data);
1519                        data += 4;
1520                        break;
1521                case NFP_NET_META_CSUM:
1522                        meta->csum_type = CHECKSUM_COMPLETE;
1523                        meta->csum =
1524                                (__force __wsum)__get_unaligned_cpu32(data);
1525                        data += 4;
1526                        break;
1527                default:
1528                        return NULL;
1529                }
1530
1531                meta_info >>= NFP_NET_META_FIELD_SIZE;
1532        }
1533
1534        return data;
1535}
1536
1537static void
1538nfp_net_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
1539                struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf,
1540                struct sk_buff *skb)
1541{
1542        u64_stats_update_begin(&r_vec->rx_sync);
1543        r_vec->rx_drops++;
1544        /* If we have both skb and rxbuf the replacement buffer allocation
1545         * must have failed, count this as an alloc failure.
1546         */
1547        if (skb && rxbuf)
1548                r_vec->rx_replace_buf_alloc_fail++;
1549        u64_stats_update_end(&r_vec->rx_sync);
1550
1551        /* skb is build based on the frag, free_skb() would free the frag
1552         * so to be able to reuse it we need an extra ref.
1553         */
1554        if (skb && rxbuf && skb->head == rxbuf->frag)
1555                page_ref_inc(virt_to_head_page(rxbuf->frag));
1556        if (rxbuf)
1557                nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr);
1558        if (skb)
1559                dev_kfree_skb_any(skb);
1560}
1561
1562#if 0 /* Not in RHEL7 */
1563static bool
1564nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
1565                   struct nfp_net_tx_ring *tx_ring,
1566                   struct nfp_net_rx_buf *rxbuf, unsigned int dma_off,
1567                   unsigned int pkt_len, bool *completed)
1568{
1569        struct nfp_net_tx_buf *txbuf;
1570        struct nfp_net_tx_desc *txd;
1571        int wr_idx;
1572
1573        if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
1574                if (!*completed) {
1575                        nfp_net_xdp_complete(tx_ring);
1576                        *completed = true;
1577                }
1578
1579                if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
1580                        nfp_net_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf,
1581                                        NULL);
1582                        return false;
1583                }
1584        }
1585
1586        wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
1587
1588        /* Stash the soft descriptor of the head then initialize it */
1589        txbuf = &tx_ring->txbufs[wr_idx];
1590
1591        nfp_net_rx_give_one(dp, rx_ring, txbuf->frag, txbuf->dma_addr);
1592
1593        txbuf->frag = rxbuf->frag;
1594        txbuf->dma_addr = rxbuf->dma_addr;
1595        txbuf->fidx = -1;
1596        txbuf->pkt_cnt = 1;
1597        txbuf->real_len = pkt_len;
1598
1599        dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off,
1600                                   pkt_len, DMA_BIDIRECTIONAL);
1601
1602        /* Build TX descriptor */
1603        txd = &tx_ring->txds[wr_idx];
1604        txd->offset_eop = PCIE_DESC_TX_EOP;
1605        txd->dma_len = cpu_to_le16(pkt_len);
1606        nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + dma_off);
1607        txd->data_len = cpu_to_le16(pkt_len);
1608
1609        txd->flags = 0;
1610        txd->mss = 0;
1611        txd->lso_hdrlen = 0;
1612
1613        tx_ring->wr_p++;
1614        tx_ring->wr_ptr_add++;
1615        return true;
1616}
1617#endif
1618
1619/**
1620 * nfp_net_rx() - receive up to @budget packets on @rx_ring
1621 * @rx_ring:   RX ring to receive from
1622 * @budget:    NAPI budget
1623 *
1624 * Note, this function is separated out from the napi poll function to
1625 * more cleanly separate packet receive code from other bookkeeping
1626 * functions performed in the napi poll function.
1627 *
1628 * Return: Number of packets received.
1629 */
1630static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
1631{
1632        struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
1633        struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
1634        struct nfp_net_tx_ring *tx_ring;
1635        struct bpf_prog *xdp_prog;
1636        bool xdp_tx_cmpl = false;
1637        unsigned int true_bufsz;
1638        struct sk_buff *skb;
1639        int pkts_polled = 0;
1640        struct xdp_buff xdp;
1641        int idx;
1642
1643        rcu_read_lock();
1644        xdp_prog = READ_ONCE(dp->xdp_prog);
1645        true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
1646        xdp.rxq = &rx_ring->xdp_rxq;
1647        tx_ring = r_vec->xdp_ring;
1648
1649        while (pkts_polled < budget) {
1650                unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
1651                struct nfp_net_rx_buf *rxbuf;
1652                struct nfp_net_rx_desc *rxd;
1653                struct nfp_meta_parsed meta;
1654                bool redir_egress = false;
1655                struct net_device *netdev;
1656                dma_addr_t new_dma_addr;
1657#if 0 /* Not in RHEL7 */
1658                u32 meta_len_xdp = 0;
1659#endif
1660                void *new_frag;
1661
1662                idx = D_IDX(rx_ring, rx_ring->rd_p);
1663
1664                rxd = &rx_ring->rxds[idx];
1665                if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
1666                        break;
1667
1668                /* Memory barrier to ensure that we won't do other reads
1669                 * before the DD bit.
1670                 */
1671                dma_rmb();
1672
1673                memset(&meta, 0, sizeof(meta));
1674
1675                rx_ring->rd_p++;
1676                pkts_polled++;
1677
1678                rxbuf = &rx_ring->rxbufs[idx];
1679                /*         < meta_len >
1680                 *  <-- [rx_offset] -->
1681                 *  ---------------------------------------------------------
1682                 * | [XX] |  metadata  |             packet           | XXXX |
1683                 *  ---------------------------------------------------------
1684                 *         <---------------- data_len --------------->
1685                 *
1686                 * The rx_offset is fixed for all packets, the meta_len can vary
1687                 * on a packet by packet basis. If rx_offset is set to zero
1688                 * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the
1689                 * buffer and is immediately followed by the packet (no [XX]).
1690                 */
1691                meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
1692                data_len = le16_to_cpu(rxd->rxd.data_len);
1693                pkt_len = data_len - meta_len;
1694
1695                pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
1696                if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
1697                        pkt_off += meta_len;
1698                else
1699                        pkt_off += dp->rx_offset;
1700                meta_off = pkt_off - meta_len;
1701
1702                /* Stats update */
1703                u64_stats_update_begin(&r_vec->rx_sync);
1704                r_vec->rx_pkts++;
1705                r_vec->rx_bytes += pkt_len;
1706                u64_stats_update_end(&r_vec->rx_sync);
1707
1708                if (unlikely(meta_len > NFP_NET_MAX_PREPEND ||
1709                             (dp->rx_offset && meta_len > dp->rx_offset))) {
1710                        nn_dp_warn(dp, "oversized RX packet metadata %u\n",
1711                                   meta_len);
1712                        nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
1713                        continue;
1714                }
1715
1716                nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off,
1717                                        data_len);
1718
1719                if (!dp->chained_metadata_format) {
1720                        nfp_net_set_hash_desc(dp->netdev, &meta,
1721                                              rxbuf->frag + meta_off, rxd);
1722                } else if (meta_len) {
1723                        void *end;
1724
1725                        end = nfp_net_parse_meta(dp->netdev, &meta,
1726                                                 rxbuf->frag + meta_off,
1727                                                 meta_len);
1728                        if (unlikely(end != rxbuf->frag + pkt_off)) {
1729                                nn_dp_warn(dp, "invalid RX packet metadata\n");
1730                                nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
1731                                                NULL);
1732                                continue;
1733                        }
1734                }
1735
1736#if 0 /* Not in RHEL7 */
1737                if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF &&
1738                                  dp->bpf_offload_xdp) && !meta.portid) {
1739                        void *orig_data = rxbuf->frag + pkt_off;
1740                        unsigned int dma_off;
1741                        int act;
1742
1743                        xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
1744                        xdp.data = orig_data;
1745                        xdp.data_meta = orig_data;
1746                        xdp.data_end = orig_data + pkt_len;
1747
1748                        act = bpf_prog_run_xdp(xdp_prog, &xdp);
1749
1750                        pkt_len -= xdp.data - orig_data;
1751                        pkt_off += xdp.data - orig_data;
1752
1753                        switch (act) {
1754                        case XDP_PASS:
1755                                meta_len_xdp = xdp.data - xdp.data_meta;
1756                                break;
1757                        case XDP_TX:
1758                                dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM;
1759                                if (unlikely(!nfp_net_tx_xdp_buf(dp, rx_ring,
1760                                                                 tx_ring, rxbuf,
1761                                                                 dma_off,
1762                                                                 pkt_len,
1763                                                                 &xdp_tx_cmpl)))
1764                                        trace_xdp_exception(dp->netdev,
1765                                                            xdp_prog, act);
1766                                continue;
1767                        default:
1768                                bpf_warn_invalid_xdp_action(act);
1769                                /* fall through */
1770                        case XDP_ABORTED:
1771                                trace_xdp_exception(dp->netdev, xdp_prog, act);
1772                                /* fall through */
1773                        case XDP_DROP:
1774                                nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
1775                                                    rxbuf->dma_addr);
1776                                continue;
1777                        }
1778                }
1779#endif
1780
1781                if (likely(!meta.portid)) {
1782                        netdev = dp->netdev;
1783                } else if (meta.portid == NFP_META_PORT_ID_CTRL) {
1784                        struct nfp_net *nn = netdev_priv(dp->netdev);
1785
1786                        nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off,
1787                                            pkt_len);
1788                        nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
1789                                            rxbuf->dma_addr);
1790                        continue;
1791                } else {
1792                        struct nfp_net *nn;
1793
1794                        nn = netdev_priv(dp->netdev);
1795                        netdev = nfp_app_dev_get(nn->app, meta.portid,
1796                                                 &redir_egress);
1797                        if (unlikely(!netdev)) {
1798                                nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
1799                                                NULL);
1800                                continue;
1801                        }
1802
1803                        if (nfp_netdev_is_nfp_repr(netdev))
1804                                nfp_repr_inc_rx_stats(netdev, pkt_len);
1805                }
1806
1807                skb = build_skb(rxbuf->frag, true_bufsz);
1808                if (unlikely(!skb)) {
1809                        nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
1810                        continue;
1811                }
1812                new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr);
1813                if (unlikely(!new_frag)) {
1814                        nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
1815                        continue;
1816                }
1817
1818                nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
1819
1820                nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
1821
1822                skb_reserve(skb, pkt_off);
1823                skb_put(skb, pkt_len);
1824
1825                skb->mark = meta.mark;
1826                skb_set_hash(skb, meta.hash, meta.hash_type);
1827
1828                skb_record_rx_queue(skb, rx_ring->idx);
1829                skb->protocol = eth_type_trans(skb, netdev);
1830
1831                nfp_net_rx_csum(dp, r_vec, rxd, &meta, skb);
1832
1833                if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
1834                        __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
1835                                               le16_to_cpu(rxd->rxd.vlan));
1836#if 0 /* Not in RHEL7 */
1837                if (meta_len_xdp)
1838                        skb_metadata_set(skb, meta_len_xdp);
1839#endif
1840
1841                if (likely(!redir_egress)) {
1842                        napi_gro_receive(&rx_ring->r_vec->napi, skb);
1843                } else {
1844                        skb->dev = netdev;
1845                        skb_reset_network_header(skb);
1846                        __skb_push(skb, ETH_HLEN);
1847                        dev_queue_xmit(skb);
1848                }
1849        }
1850
1851        if (xdp_prog) {
1852                if (tx_ring->wr_ptr_add)
1853                        nfp_net_tx_xmit_more_flush(tx_ring);
1854                else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) &&
1855                         !xdp_tx_cmpl)
1856                        if (!nfp_net_xdp_complete(tx_ring))
1857                                pkts_polled = budget;
1858        }
1859        rcu_read_unlock();
1860
1861        return pkts_polled;
1862}
1863
1864/**
1865 * nfp_net_poll() - napi poll function
1866 * @napi:    NAPI structure
1867 * @budget:  NAPI budget
1868 *
1869 * Return: number of packets polled.
1870 */
1871static int nfp_net_poll(struct napi_struct *napi, int budget)
1872{
1873        struct nfp_net_r_vector *r_vec =
1874                container_of(napi, struct nfp_net_r_vector, napi);
1875        unsigned int pkts_polled = 0;
1876
1877        if (r_vec->tx_ring)
1878                nfp_net_tx_complete(r_vec->tx_ring, budget);
1879        if (r_vec->rx_ring)
1880                pkts_polled = nfp_net_rx(r_vec->rx_ring, budget);
1881
1882        if (pkts_polled < budget)
1883                if (napi_complete_done(napi, pkts_polled))
1884                        nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
1885
1886        return pkts_polled;
1887}
1888
1889/* Control device data path
1890 */
1891
1892static bool
1893nfp_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
1894                struct sk_buff *skb, bool old)
1895{
1896        unsigned int real_len = skb->len, meta_len = 0;
1897        struct nfp_net_tx_ring *tx_ring;
1898        struct nfp_net_tx_buf *txbuf;
1899        struct nfp_net_tx_desc *txd;
1900        struct nfp_net_dp *dp;
1901        dma_addr_t dma_addr;
1902        int wr_idx;
1903
1904        dp = &r_vec->nfp_net->dp;
1905        tx_ring = r_vec->tx_ring;
1906
1907        if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) {
1908                nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n");
1909                goto err_free;
1910        }
1911
1912        if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
1913                u64_stats_update_begin(&r_vec->tx_sync);
1914                r_vec->tx_busy++;
1915                u64_stats_update_end(&r_vec->tx_sync);
1916                if (!old)
1917                        __skb_queue_tail(&r_vec->queue, skb);
1918                else
1919                        __skb_queue_head(&r_vec->queue, skb);
1920                return true;
1921        }
1922
1923        if (nfp_app_ctrl_has_meta(nn->app)) {
1924                if (unlikely(skb_headroom(skb) < 8)) {
1925                        nn_dp_warn(dp, "CTRL TX on skb without headroom\n");
1926                        goto err_free;
1927                }
1928                meta_len = 8;
1929                put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4));
1930                put_unaligned_be32(NFP_NET_META_PORTID, skb_push(skb, 4));
1931        }
1932
1933        /* Start with the head skbuf */
1934        dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
1935                                  DMA_TO_DEVICE);
1936        if (dma_mapping_error(dp->dev, dma_addr))
1937                goto err_dma_warn;
1938
1939        wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
1940
1941        /* Stash the soft descriptor of the head then initialize it */
1942        txbuf = &tx_ring->txbufs[wr_idx];
1943        txbuf->skb = skb;
1944        txbuf->dma_addr = dma_addr;
1945        txbuf->fidx = -1;
1946        txbuf->pkt_cnt = 1;
1947        txbuf->real_len = real_len;
1948
1949        /* Build TX descriptor */
1950        txd = &tx_ring->txds[wr_idx];
1951        txd->offset_eop = meta_len | PCIE_DESC_TX_EOP;
1952        txd->dma_len = cpu_to_le16(skb_headlen(skb));
1953        nfp_desc_set_dma_addr(txd, dma_addr);
1954        txd->data_len = cpu_to_le16(skb->len);
1955
1956        txd->flags = 0;
1957        txd->mss = 0;
1958        txd->lso_hdrlen = 0;
1959
1960        tx_ring->wr_p++;
1961        tx_ring->wr_ptr_add++;
1962        nfp_net_tx_xmit_more_flush(tx_ring);
1963
1964        return false;
1965
1966err_dma_warn:
1967        nn_dp_warn(dp, "Failed to DMA map TX CTRL buffer\n");
1968err_free:
1969        u64_stats_update_begin(&r_vec->tx_sync);
1970        r_vec->tx_errors++;
1971        u64_stats_update_end(&r_vec->tx_sync);
1972        dev_kfree_skb_any(skb);
1973        return false;
1974}
1975
1976bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb)
1977{
1978        struct nfp_net_r_vector *r_vec = &nn->r_vecs[0];
1979
1980        return nfp_ctrl_tx_one(nn, r_vec, skb, false);
1981}
1982
1983bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb)
1984{
1985        struct nfp_net_r_vector *r_vec = &nn->r_vecs[0];
1986        bool ret;
1987
1988        spin_lock_bh(&r_vec->lock);
1989        ret = nfp_ctrl_tx_one(nn, r_vec, skb, false);
1990        spin_unlock_bh(&r_vec->lock);
1991
1992        return ret;
1993}
1994
1995static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec)
1996{
1997        struct sk_buff *skb;
1998
1999        while ((skb = __skb_dequeue(&r_vec->queue)))
2000                if (nfp_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true))
2001                        return;
2002}
2003
2004static bool
2005nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len)
2006{
2007        u32 meta_type, meta_tag;
2008
2009        if (!nfp_app_ctrl_has_meta(nn->app))
2010                return !meta_len;
2011
2012        if (meta_len != 8)
2013                return false;
2014
2015        meta_type = get_unaligned_be32(data);
2016        meta_tag = get_unaligned_be32(data + 4);
2017
2018        return (meta_type == NFP_NET_META_PORTID &&
2019                meta_tag == NFP_META_PORT_ID_CTRL);
2020}
2021
2022static bool
2023nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp,
2024                struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring)
2025{
2026        unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
2027        struct nfp_net_rx_buf *rxbuf;
2028        struct nfp_net_rx_desc *rxd;
2029        dma_addr_t new_dma_addr;
2030        struct sk_buff *skb;
2031        void *new_frag;
2032        int idx;
2033
2034        idx = D_IDX(rx_ring, rx_ring->rd_p);
2035
2036        rxd = &rx_ring->rxds[idx];
2037        if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
2038                return false;
2039
2040        /* Memory barrier to ensure that we won't do other reads
2041         * before the DD bit.
2042         */
2043        dma_rmb();
2044
2045        rx_ring->rd_p++;
2046
2047        rxbuf = &rx_ring->rxbufs[idx];
2048        meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
2049        data_len = le16_to_cpu(rxd->rxd.data_len);
2050        pkt_len = data_len - meta_len;
2051
2052        pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
2053        if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
2054                pkt_off += meta_len;
2055        else
2056                pkt_off += dp->rx_offset;
2057        meta_off = pkt_off - meta_len;
2058
2059        /* Stats update */
2060        u64_stats_update_begin(&r_vec->rx_sync);
2061        r_vec->rx_pkts++;
2062        r_vec->rx_bytes += pkt_len;
2063        u64_stats_update_end(&r_vec->rx_sync);
2064
2065        nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len);
2066
2067        if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) {
2068                nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n",
2069                           meta_len);
2070                nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
2071                return true;
2072        }
2073
2074        skb = build_skb(rxbuf->frag, dp->fl_bufsz);
2075        if (unlikely(!skb)) {
2076                nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
2077                return true;
2078        }
2079        new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr);
2080        if (unlikely(!new_frag)) {
2081                nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
2082                return true;
2083        }
2084
2085        nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
2086
2087        nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
2088
2089        skb_reserve(skb, pkt_off);
2090        skb_put(skb, pkt_len);
2091
2092        nfp_app_ctrl_rx(nn->app, skb);
2093
2094        return true;
2095}
2096
2097static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec)
2098{
2099        struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring;
2100        struct nfp_net *nn = r_vec->nfp_net;
2101        struct nfp_net_dp *dp = &nn->dp;
2102        unsigned int budget = 512;
2103
2104        while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--)
2105                continue;
2106
2107        return budget;
2108}
2109
2110static void nfp_ctrl_poll(unsigned long arg)
2111{
2112        struct nfp_net_r_vector *r_vec = (void *)arg;
2113
2114        spin_lock(&r_vec->lock);
2115        nfp_net_tx_complete(r_vec->tx_ring, 0);
2116        __nfp_ctrl_tx_queued(r_vec);
2117        spin_unlock(&r_vec->lock);
2118
2119        if (nfp_ctrl_rx(r_vec)) {
2120                nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
2121        } else {
2122                tasklet_schedule(&r_vec->tasklet);
2123                nn_dp_warn(&r_vec->nfp_net->dp,
2124                           "control message budget exceeded!\n");
2125        }
2126}
2127
2128/* Setup and Configuration
2129 */
2130
2131/**
2132 * nfp_net_vecs_init() - Assign IRQs and setup rvecs.
2133 * @nn:         NFP Network structure
2134 */
2135static void nfp_net_vecs_init(struct nfp_net *nn)
2136{
2137        struct nfp_net_r_vector *r_vec;
2138        int r;
2139
2140        nn->lsc_handler = nfp_net_irq_lsc;
2141        nn->exn_handler = nfp_net_irq_exn;
2142
2143        for (r = 0; r < nn->max_r_vecs; r++) {
2144                struct msix_entry *entry;
2145
2146                entry = &nn->irq_entries[NFP_NET_NON_Q_VECTORS + r];
2147
2148                r_vec = &nn->r_vecs[r];
2149                r_vec->nfp_net = nn;
2150                r_vec->irq_entry = entry->entry;
2151                r_vec->irq_vector = entry->vector;
2152
2153                if (nn->dp.netdev) {
2154                        r_vec->handler = nfp_net_irq_rxtx;
2155                } else {
2156                        r_vec->handler = nfp_ctrl_irq_rxtx;
2157
2158                        __skb_queue_head_init(&r_vec->queue);
2159                        spin_lock_init(&r_vec->lock);
2160                        tasklet_init(&r_vec->tasklet, nfp_ctrl_poll,
2161                                     (unsigned long)r_vec);
2162                        tasklet_disable(&r_vec->tasklet);
2163                }
2164
2165                cpumask_set_cpu(r, &r_vec->affinity_mask);
2166        }
2167}
2168
2169/**
2170 * nfp_net_tx_ring_free() - Free resources allocated to a TX ring
2171 * @tx_ring:   TX ring to free
2172 */
2173static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
2174{
2175        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
2176        struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
2177
2178        kvfree(tx_ring->txbufs);
2179
2180        if (tx_ring->txds)
2181                dma_free_coherent(dp->dev, tx_ring->size,
2182                                  tx_ring->txds, tx_ring->dma);
2183
2184        tx_ring->cnt = 0;
2185        tx_ring->txbufs = NULL;
2186        tx_ring->txds = NULL;
2187        tx_ring->dma = 0;
2188        tx_ring->size = 0;
2189}
2190
2191/**
2192 * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring
2193 * @dp:        NFP Net data path struct
2194 * @tx_ring:   TX Ring structure to allocate
2195 *
2196 * Return: 0 on success, negative errno otherwise.
2197 */
2198static int
2199nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
2200{
2201        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
2202
2203        tx_ring->cnt = dp->txd_cnt;
2204
2205        tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->txds));
2206        tx_ring->txds = dma_zalloc_coherent(dp->dev, tx_ring->size,
2207                                            &tx_ring->dma,
2208                                            GFP_KERNEL | __GFP_NOWARN);
2209        if (!tx_ring->txds) {
2210                netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n",
2211                            tx_ring->cnt);
2212                goto err_alloc;
2213        }
2214
2215        tx_ring->txbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->txbufs),
2216                                   GFP_KERNEL);
2217        if (!tx_ring->txbufs)
2218                goto err_alloc;
2219
2220        if (!tx_ring->is_xdp && dp->netdev)
2221                netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask,
2222                                    tx_ring->idx);
2223
2224        return 0;
2225
2226err_alloc:
2227        nfp_net_tx_ring_free(tx_ring);
2228        return -ENOMEM;
2229}
2230
2231static void
2232nfp_net_tx_ring_bufs_free(struct nfp_net_dp *dp,
2233                          struct nfp_net_tx_ring *tx_ring)
2234{
2235        unsigned int i;
2236
2237        if (!tx_ring->is_xdp)
2238                return;
2239
2240        for (i = 0; i < tx_ring->cnt; i++) {
2241                if (!tx_ring->txbufs[i].frag)
2242                        return;
2243
2244                nfp_net_dma_unmap_rx(dp, tx_ring->txbufs[i].dma_addr);
2245                __free_page(virt_to_page(tx_ring->txbufs[i].frag));
2246        }
2247}
2248
2249static int
2250nfp_net_tx_ring_bufs_alloc(struct nfp_net_dp *dp,
2251                           struct nfp_net_tx_ring *tx_ring)
2252{
2253        struct nfp_net_tx_buf *txbufs = tx_ring->txbufs;
2254        unsigned int i;
2255
2256        if (!tx_ring->is_xdp)
2257                return 0;
2258
2259        for (i = 0; i < tx_ring->cnt; i++) {
2260                txbufs[i].frag = nfp_net_rx_alloc_one(dp, &txbufs[i].dma_addr);
2261                if (!txbufs[i].frag) {
2262                        nfp_net_tx_ring_bufs_free(dp, tx_ring);
2263                        return -ENOMEM;
2264                }
2265        }
2266
2267        return 0;
2268}
2269
2270static int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
2271{
2272        unsigned int r;
2273
2274        dp->tx_rings = kcalloc(dp->num_tx_rings, sizeof(*dp->tx_rings),
2275                               GFP_KERNEL);
2276        if (!dp->tx_rings)
2277                return -ENOMEM;
2278
2279        for (r = 0; r < dp->num_tx_rings; r++) {
2280                int bias = 0;
2281
2282                if (r >= dp->num_stack_tx_rings)
2283                        bias = dp->num_stack_tx_rings;
2284
2285                nfp_net_tx_ring_init(&dp->tx_rings[r], &nn->r_vecs[r - bias],
2286                                     r, bias);
2287
2288                if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r]))
2289                        goto err_free_prev;
2290
2291                if (nfp_net_tx_ring_bufs_alloc(dp, &dp->tx_rings[r]))
2292                        goto err_free_ring;
2293        }
2294
2295        return 0;
2296
2297err_free_prev:
2298        while (r--) {
2299                nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]);
2300err_free_ring:
2301                nfp_net_tx_ring_free(&dp->tx_rings[r]);
2302        }
2303        kfree(dp->tx_rings);
2304        return -ENOMEM;
2305}
2306
2307static void nfp_net_tx_rings_free(struct nfp_net_dp *dp)
2308{
2309        unsigned int r;
2310
2311        for (r = 0; r < dp->num_tx_rings; r++) {
2312                nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]);
2313                nfp_net_tx_ring_free(&dp->tx_rings[r]);
2314        }
2315
2316        kfree(dp->tx_rings);
2317}
2318
2319/**
2320 * nfp_net_rx_ring_free() - Free resources allocated to a RX ring
2321 * @rx_ring:  RX ring to free
2322 */
2323static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
2324{
2325        struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
2326        struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
2327
2328        if (dp->netdev)
2329                xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
2330        kvfree(rx_ring->rxbufs);
2331
2332        if (rx_ring->rxds)
2333                dma_free_coherent(dp->dev, rx_ring->size,
2334                                  rx_ring->rxds, rx_ring->dma);
2335
2336        rx_ring->cnt = 0;
2337        rx_ring->rxbufs = NULL;
2338        rx_ring->rxds = NULL;
2339        rx_ring->dma = 0;
2340        rx_ring->size = 0;
2341}
2342
2343/**
2344 * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
2345 * @dp:       NFP Net data path struct
2346 * @rx_ring:  RX ring to allocate
2347 *
2348 * Return: 0 on success, negative errno otherwise.
2349 */
2350static int
2351nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
2352{
2353        int err;
2354
2355        if (dp->netdev) {
2356                err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev,
2357                                       rx_ring->idx);
2358                if (err < 0)
2359                        return err;
2360        }
2361
2362        rx_ring->cnt = dp->rxd_cnt;
2363        rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds));
2364        rx_ring->rxds = dma_zalloc_coherent(dp->dev, rx_ring->size,
2365                                            &rx_ring->dma,
2366                                            GFP_KERNEL | __GFP_NOWARN);
2367        if (!rx_ring->rxds) {
2368                netdev_warn(dp->netdev, "failed to allocate RX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n",
2369                            rx_ring->cnt);
2370                goto err_alloc;
2371        }
2372
2373        rx_ring->rxbufs = kvcalloc(rx_ring->cnt, sizeof(*rx_ring->rxbufs),
2374                                   GFP_KERNEL);
2375        if (!rx_ring->rxbufs)
2376                goto err_alloc;
2377
2378        return 0;
2379
2380err_alloc:
2381        nfp_net_rx_ring_free(rx_ring);
2382        return -ENOMEM;
2383}
2384
2385static int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
2386{
2387        unsigned int r;
2388
2389        dp->rx_rings = kcalloc(dp->num_rx_rings, sizeof(*dp->rx_rings),
2390                               GFP_KERNEL);
2391        if (!dp->rx_rings)
2392                return -ENOMEM;
2393
2394        for (r = 0; r < dp->num_rx_rings; r++) {
2395                nfp_net_rx_ring_init(&dp->rx_rings[r], &nn->r_vecs[r], r);
2396
2397                if (nfp_net_rx_ring_alloc(dp, &dp->rx_rings[r]))
2398                        goto err_free_prev;
2399
2400                if (nfp_net_rx_ring_bufs_alloc(dp, &dp->rx_rings[r]))
2401                        goto err_free_ring;
2402        }
2403
2404        return 0;
2405
2406err_free_prev:
2407        while (r--) {
2408                nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
2409err_free_ring:
2410                nfp_net_rx_ring_free(&dp->rx_rings[r]);
2411        }
2412        kfree(dp->rx_rings);
2413        return -ENOMEM;
2414}
2415
2416static void nfp_net_rx_rings_free(struct nfp_net_dp *dp)
2417{
2418        unsigned int r;
2419
2420        for (r = 0; r < dp->num_rx_rings; r++) {
2421                nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
2422                nfp_net_rx_ring_free(&dp->rx_rings[r]);
2423        }
2424
2425        kfree(dp->rx_rings);
2426}
2427
2428static void
2429nfp_net_vector_assign_rings(struct nfp_net_dp *dp,
2430                            struct nfp_net_r_vector *r_vec, int idx)
2431{
2432        r_vec->rx_ring = idx < dp->num_rx_rings ? &dp->rx_rings[idx] : NULL;
2433        r_vec->tx_ring =
2434                idx < dp->num_stack_tx_rings ? &dp->tx_rings[idx] : NULL;
2435
2436        r_vec->xdp_ring = idx < dp->num_tx_rings - dp->num_stack_tx_rings ?
2437                &dp->tx_rings[dp->num_stack_tx_rings + idx] : NULL;
2438}
2439
2440static int
2441nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
2442                       int idx)
2443{
2444        int err;
2445
2446        /* Setup NAPI */
2447        if (nn->dp.netdev)
2448                netif_napi_add(nn->dp.netdev, &r_vec->napi,
2449                               nfp_net_poll, NAPI_POLL_WEIGHT);
2450        else
2451                tasklet_enable(&r_vec->tasklet);
2452
2453        snprintf(r_vec->name, sizeof(r_vec->name),
2454                 "%s-rxtx-%d", nfp_net_name(nn), idx);
2455        err = request_irq(r_vec->irq_vector, r_vec->handler, 0, r_vec->name,
2456                          r_vec);
2457        if (err) {
2458                if (nn->dp.netdev)
2459                        netif_napi_del(&r_vec->napi);
2460                else
2461                        tasklet_disable(&r_vec->tasklet);
2462
2463                nn_err(nn, "Error requesting IRQ %d\n", r_vec->irq_vector);
2464                return err;
2465        }
2466        disable_irq(r_vec->irq_vector);
2467
2468        irq_set_affinity_hint(r_vec->irq_vector, &r_vec->affinity_mask);
2469
2470        nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", idx, r_vec->irq_vector,
2471               r_vec->irq_entry);
2472
2473        return 0;
2474}
2475
2476static void
2477nfp_net_cleanup_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
2478{
2479        irq_set_affinity_hint(r_vec->irq_vector, NULL);
2480        if (nn->dp.netdev)
2481                netif_napi_del(&r_vec->napi);
2482        else
2483                tasklet_disable(&r_vec->tasklet);
2484
2485        free_irq(r_vec->irq_vector, r_vec);
2486}
2487
2488/**
2489 * nfp_net_rss_write_itbl() - Write RSS indirection table to device
2490 * @nn:      NFP Net device to reconfigure
2491 */
2492void nfp_net_rss_write_itbl(struct nfp_net *nn)
2493{
2494        int i;
2495
2496        for (i = 0; i < NFP_NET_CFG_RSS_ITBL_SZ; i += 4)
2497                nn_writel(nn, NFP_NET_CFG_RSS_ITBL + i,
2498                          get_unaligned_le32(nn->rss_itbl + i));
2499}
2500
2501/**
2502 * nfp_net_rss_write_key() - Write RSS hash key to device
2503 * @nn:      NFP Net device to reconfigure
2504 */
2505void nfp_net_rss_write_key(struct nfp_net *nn)
2506{
2507        int i;
2508
2509        for (i = 0; i < nfp_net_rss_key_sz(nn); i += 4)
2510                nn_writel(nn, NFP_NET_CFG_RSS_KEY + i,
2511                          get_unaligned_le32(nn->rss_key + i));
2512}
2513
2514/**
2515 * nfp_net_coalesce_write_cfg() - Write irq coalescence configuration to HW
2516 * @nn:      NFP Net device to reconfigure
2517 */
2518void nfp_net_coalesce_write_cfg(struct nfp_net *nn)
2519{
2520        u8 i;
2521        u32 factor;
2522        u32 value;
2523
2524        /* Compute factor used to convert coalesce '_usecs' parameters to
2525         * ME timestamp ticks.  There are 16 ME clock cycles for each timestamp
2526         * count.
2527         */
2528        factor = nn->tlv_caps.me_freq_mhz / 16;
2529
2530        /* copy RX interrupt coalesce parameters */
2531        value = (nn->rx_coalesce_max_frames << 16) |
2532                (factor * nn->rx_coalesce_usecs);
2533        for (i = 0; i < nn->dp.num_rx_rings; i++)
2534                nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value);
2535
2536        /* copy TX interrupt coalesce parameters */
2537        value = (nn->tx_coalesce_max_frames << 16) |
2538                (factor * nn->tx_coalesce_usecs);
2539        for (i = 0; i < nn->dp.num_tx_rings; i++)
2540                nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value);
2541}
2542
2543/**
2544 * nfp_net_write_mac_addr() - Write mac address to the device control BAR
2545 * @nn:      NFP Net device to reconfigure
2546 * @addr:    MAC address to write
2547 *
2548 * Writes the MAC address from the netdev to the device control BAR.  Does not
2549 * perform the required reconfig.  We do a bit of byte swapping dance because
2550 * firmware is LE.
2551 */
2552static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *addr)
2553{
2554        nn_writel(nn, NFP_NET_CFG_MACADDR + 0, get_unaligned_be32(addr));
2555        nn_writew(nn, NFP_NET_CFG_MACADDR + 6, get_unaligned_be16(addr + 4));
2556}
2557
2558static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
2559{
2560        nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), 0);
2561        nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), 0);
2562        nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), 0);
2563
2564        nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), 0);
2565        nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), 0);
2566        nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), 0);
2567}
2568
2569/**
2570 * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP
2571 * @nn:      NFP Net device to reconfigure
2572 *
2573 * Warning: must be fully idempotent.
2574 */
2575static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
2576{
2577        u32 new_ctrl, update;
2578        unsigned int r;
2579        int err;
2580
2581        new_ctrl = nn->dp.ctrl;
2582        new_ctrl &= ~NFP_NET_CFG_CTRL_ENABLE;
2583        update = NFP_NET_CFG_UPDATE_GEN;
2584        update |= NFP_NET_CFG_UPDATE_MSIX;
2585        update |= NFP_NET_CFG_UPDATE_RING;
2586
2587        if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
2588                new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG;
2589
2590        nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
2591        nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
2592
2593        nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
2594        err = nfp_net_reconfig(nn, update);
2595        if (err)
2596                nn_err(nn, "Could not disable device: %d\n", err);
2597
2598        for (r = 0; r < nn->dp.num_rx_rings; r++)
2599                nfp_net_rx_ring_reset(&nn->dp.rx_rings[r]);
2600        for (r = 0; r < nn->dp.num_tx_rings; r++)
2601                nfp_net_tx_ring_reset(&nn->dp, &nn->dp.tx_rings[r]);
2602        for (r = 0; r < nn->dp.num_r_vecs; r++)
2603                nfp_net_vec_clear_ring_data(nn, r);
2604
2605        nn->dp.ctrl = new_ctrl;
2606}
2607
2608static void
2609nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn,
2610                             struct nfp_net_rx_ring *rx_ring, unsigned int idx)
2611{
2612        /* Write the DMA address, size and MSI-X info to the device */
2613        nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), rx_ring->dma);
2614        nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(rx_ring->cnt));
2615        nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), rx_ring->r_vec->irq_entry);
2616}
2617
2618static void
2619nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn,
2620                             struct nfp_net_tx_ring *tx_ring, unsigned int idx)
2621{
2622        nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma);
2623        nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt));
2624        nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry);
2625}
2626
2627/**
2628 * nfp_net_set_config_and_enable() - Write control BAR and enable NFP
2629 * @nn:      NFP Net device to reconfigure
2630 */
2631static int nfp_net_set_config_and_enable(struct nfp_net *nn)
2632{
2633        u32 bufsz, new_ctrl, update = 0;
2634        unsigned int r;
2635        int err;
2636
2637        new_ctrl = nn->dp.ctrl;
2638
2639        if (nn->dp.ctrl & NFP_NET_CFG_CTRL_RSS_ANY) {
2640                nfp_net_rss_write_key(nn);
2641                nfp_net_rss_write_itbl(nn);
2642                nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg);
2643                update |= NFP_NET_CFG_UPDATE_RSS;
2644        }
2645
2646        if (nn->dp.ctrl & NFP_NET_CFG_CTRL_IRQMOD) {
2647                nfp_net_coalesce_write_cfg(nn);
2648                update |= NFP_NET_CFG_UPDATE_IRQMOD;
2649        }
2650
2651        for (r = 0; r < nn->dp.num_tx_rings; r++)
2652                nfp_net_tx_ring_hw_cfg_write(nn, &nn->dp.tx_rings[r], r);
2653        for (r = 0; r < nn->dp.num_rx_rings; r++)
2654                nfp_net_rx_ring_hw_cfg_write(nn, &nn->dp.rx_rings[r], r);
2655
2656        nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->dp.num_tx_rings == 64 ?
2657                  0xffffffffffffffffULL : ((u64)1 << nn->dp.num_tx_rings) - 1);
2658
2659        nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->dp.num_rx_rings == 64 ?
2660                  0xffffffffffffffffULL : ((u64)1 << nn->dp.num_rx_rings) - 1);
2661
2662        if (nn->dp.netdev)
2663                nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr);
2664
2665        nn_writel(nn, NFP_NET_CFG_MTU, nn->dp.mtu);
2666
2667        bufsz = nn->dp.fl_bufsz - nn->dp.rx_dma_off - NFP_NET_RX_BUF_NON_DATA;
2668        nn_writel(nn, NFP_NET_CFG_FLBUFSZ, bufsz);
2669
2670        /* Enable device */
2671        new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
2672        update |= NFP_NET_CFG_UPDATE_GEN;
2673        update |= NFP_NET_CFG_UPDATE_MSIX;
2674        update |= NFP_NET_CFG_UPDATE_RING;
2675        if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
2676                new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
2677
2678        nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
2679        err = nfp_net_reconfig(nn, update);
2680        if (err) {
2681                nfp_net_clear_config_and_disable(nn);
2682                return err;
2683        }
2684
2685        nn->dp.ctrl = new_ctrl;
2686
2687        for (r = 0; r < nn->dp.num_rx_rings; r++)
2688                nfp_net_rx_ring_fill_freelist(&nn->dp, &nn->dp.rx_rings[r]);
2689
2690        /* Since reconfiguration requests while NFP is down are ignored we
2691         * have to wipe the entire VXLAN configuration and reinitialize it.
2692         */
2693        if (nn->dp.ctrl & NFP_NET_CFG_CTRL_VXLAN) {
2694                memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports));
2695                memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt));
2696                udp_tunnel_get_rx_info(nn->dp.netdev);
2697        }
2698
2699        return 0;
2700}
2701
2702/**
2703 * nfp_net_close_stack() - Quiesce the stack (part of close)
2704 * @nn:      NFP Net device to reconfigure
2705 */
2706static void nfp_net_close_stack(struct nfp_net *nn)
2707{
2708        unsigned int r;
2709
2710        disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
2711        netif_carrier_off(nn->dp.netdev);
2712        nn->link_up = false;
2713
2714        for (r = 0; r < nn->dp.num_r_vecs; r++) {
2715                disable_irq(nn->r_vecs[r].irq_vector);
2716                napi_disable(&nn->r_vecs[r].napi);
2717        }
2718
2719        netif_tx_disable(nn->dp.netdev);
2720}
2721
2722/**
2723 * nfp_net_close_free_all() - Free all runtime resources
2724 * @nn:      NFP Net device to reconfigure
2725 */
2726static void nfp_net_close_free_all(struct nfp_net *nn)
2727{
2728        unsigned int r;
2729
2730        nfp_net_tx_rings_free(&nn->dp);
2731        nfp_net_rx_rings_free(&nn->dp);
2732
2733        for (r = 0; r < nn->dp.num_r_vecs; r++)
2734                nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
2735
2736        nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
2737        nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
2738}
2739
2740/**
2741 * nfp_net_netdev_close() - Called when the device is downed
2742 * @netdev:      netdev structure
2743 */
2744static int nfp_net_netdev_close(struct net_device *netdev)
2745{
2746        struct nfp_net *nn = netdev_priv(netdev);
2747
2748        /* Step 1: Disable RX and TX rings from the Linux kernel perspective
2749         */
2750        nfp_net_close_stack(nn);
2751
2752        /* Step 2: Tell NFP
2753         */
2754        nfp_net_clear_config_and_disable(nn);
2755        nfp_port_configure(netdev, false);
2756
2757        /* Step 3: Free resources
2758         */
2759        nfp_net_close_free_all(nn);
2760
2761        nn_dbg(nn, "%s down", netdev->name);
2762        return 0;
2763}
2764
2765void nfp_ctrl_close(struct nfp_net *nn)
2766{
2767        int r;
2768
2769        rtnl_lock();
2770
2771        for (r = 0; r < nn->dp.num_r_vecs; r++) {
2772                disable_irq(nn->r_vecs[r].irq_vector);
2773                tasklet_disable(&nn->r_vecs[r].tasklet);
2774        }
2775
2776        nfp_net_clear_config_and_disable(nn);
2777
2778        nfp_net_close_free_all(nn);
2779
2780        rtnl_unlock();
2781}
2782
2783/**
2784 * nfp_net_open_stack() - Start the device from stack's perspective
2785 * @nn:      NFP Net device to reconfigure
2786 */
2787static void nfp_net_open_stack(struct nfp_net *nn)
2788{
2789        unsigned int r;
2790
2791        for (r = 0; r < nn->dp.num_r_vecs; r++) {
2792                napi_enable(&nn->r_vecs[r].napi);
2793                enable_irq(nn->r_vecs[r].irq_vector);
2794        }
2795
2796        netif_tx_wake_all_queues(nn->dp.netdev);
2797
2798        enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
2799        nfp_net_read_link_status(nn);
2800}
2801
2802static int nfp_net_open_alloc_all(struct nfp_net *nn)
2803{
2804        int err, r;
2805
2806        err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_EXN, "%s-exn",
2807                                      nn->exn_name, sizeof(nn->exn_name),
2808                                      NFP_NET_IRQ_EXN_IDX, nn->exn_handler);
2809        if (err)
2810                return err;
2811        err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_LSC, "%s-lsc",
2812                                      nn->lsc_name, sizeof(nn->lsc_name),
2813                                      NFP_NET_IRQ_LSC_IDX, nn->lsc_handler);
2814        if (err)
2815                goto err_free_exn;
2816        disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
2817
2818        for (r = 0; r < nn->dp.num_r_vecs; r++) {
2819                err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
2820                if (err)
2821                        goto err_cleanup_vec_p;
2822        }
2823
2824        err = nfp_net_rx_rings_prepare(nn, &nn->dp);
2825        if (err)
2826                goto err_cleanup_vec;
2827
2828        err = nfp_net_tx_rings_prepare(nn, &nn->dp);
2829        if (err)
2830                goto err_free_rx_rings;
2831
2832        for (r = 0; r < nn->max_r_vecs; r++)
2833                nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
2834
2835        return 0;
2836
2837err_free_rx_rings:
2838        nfp_net_rx_rings_free(&nn->dp);
2839err_cleanup_vec:
2840        r = nn->dp.num_r_vecs;
2841err_cleanup_vec_p:
2842        while (r--)
2843                nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
2844        nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
2845err_free_exn:
2846        nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
2847        return err;
2848}
2849
2850static int nfp_net_netdev_open(struct net_device *netdev)
2851{
2852        struct nfp_net *nn = netdev_priv(netdev);
2853        int err;
2854
2855        /* Step 1: Allocate resources for rings and the like
2856         * - Request interrupts
2857         * - Allocate RX and TX ring resources
2858         * - Setup initial RSS table
2859         */
2860        err = nfp_net_open_alloc_all(nn);
2861        if (err)
2862                return err;
2863
2864        err = netif_set_real_num_tx_queues(netdev, nn->dp.num_stack_tx_rings);
2865        if (err)
2866                goto err_free_all;
2867
2868        err = netif_set_real_num_rx_queues(netdev, nn->dp.num_rx_rings);
2869        if (err)
2870                goto err_free_all;
2871
2872        /* Step 2: Configure the NFP
2873         * - Ifup the physical interface if it exists
2874         * - Enable rings from 0 to tx_rings/rx_rings - 1.
2875         * - Write MAC address (in case it changed)
2876         * - Set the MTU
2877         * - Set the Freelist buffer size
2878         * - Enable the FW
2879         */
2880        err = nfp_port_configure(netdev, true);
2881        if (err)
2882                goto err_free_all;
2883
2884        err = nfp_net_set_config_and_enable(nn);
2885        if (err)
2886                goto err_port_disable;
2887
2888        /* Step 3: Enable for kernel
2889         * - put some freelist descriptors on each RX ring
2890         * - enable NAPI on each ring
2891         * - enable all TX queues
2892         * - set link state
2893         */
2894        nfp_net_open_stack(nn);
2895
2896        return 0;
2897
2898err_port_disable:
2899        nfp_port_configure(netdev, false);
2900err_free_all:
2901        nfp_net_close_free_all(nn);
2902        return err;
2903}
2904
2905int nfp_ctrl_open(struct nfp_net *nn)
2906{
2907        int err, r;
2908
2909        /* ring dumping depends on vNICs being opened/closed under rtnl */
2910        rtnl_lock();
2911
2912        err = nfp_net_open_alloc_all(nn);
2913        if (err)
2914                goto err_unlock;
2915
2916        err = nfp_net_set_config_and_enable(nn);
2917        if (err)
2918                goto err_free_all;
2919
2920        for (r = 0; r < nn->dp.num_r_vecs; r++)
2921                enable_irq(nn->r_vecs[r].irq_vector);
2922
2923        rtnl_unlock();
2924
2925        return 0;
2926
2927err_free_all:
2928        nfp_net_close_free_all(nn);
2929err_unlock:
2930        rtnl_unlock();
2931        return err;
2932}
2933
2934static void nfp_net_set_rx_mode(struct net_device *netdev)
2935{
2936        struct nfp_net *nn = netdev_priv(netdev);
2937        u32 new_ctrl;
2938
2939        new_ctrl = nn->dp.ctrl;
2940
2941        if (!netdev_mc_empty(netdev) || netdev->flags & IFF_ALLMULTI)
2942                new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_L2MC;
2943        else
2944                new_ctrl &= ~NFP_NET_CFG_CTRL_L2MC;
2945
2946        if (netdev->flags & IFF_PROMISC) {
2947                if (nn->cap & NFP_NET_CFG_CTRL_PROMISC)
2948                        new_ctrl |= NFP_NET_CFG_CTRL_PROMISC;
2949                else
2950                        nn_warn(nn, "FW does not support promiscuous mode\n");
2951        } else {
2952                new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC;
2953        }
2954
2955        if (new_ctrl == nn->dp.ctrl)
2956                return;
2957
2958        nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
2959        nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_GEN);
2960
2961        nn->dp.ctrl = new_ctrl;
2962}
2963
2964static void nfp_net_rss_init_itbl(struct nfp_net *nn)
2965{
2966        int i;
2967
2968        for (i = 0; i < sizeof(nn->rss_itbl); i++)
2969                nn->rss_itbl[i] =
2970                        ethtool_rxfh_indir_default(i, nn->dp.num_rx_rings);
2971}
2972
2973static void nfp_net_dp_swap(struct nfp_net *nn, struct nfp_net_dp *dp)
2974{
2975        struct nfp_net_dp new_dp = *dp;
2976
2977        *dp = nn->dp;
2978        nn->dp = new_dp;
2979
2980        nn->dp.netdev->mtu = new_dp.mtu;
2981
2982        if (!netif_is_rxfh_configured(nn->dp.netdev))
2983                nfp_net_rss_init_itbl(nn);
2984}
2985
2986static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp)
2987{
2988        unsigned int r;
2989        int err;
2990
2991        nfp_net_dp_swap(nn, dp);
2992
2993        for (r = 0; r < nn->max_r_vecs; r++)
2994                nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
2995
2996        err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings);
2997        if (err)
2998                return err;
2999
3000        if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) {
3001                err = netif_set_real_num_tx_queues(nn->dp.netdev,
3002                                                   nn->dp.num_stack_tx_rings);
3003                if (err)
3004                        return err;
3005        }
3006
3007        return nfp_net_set_config_and_enable(nn);
3008}
3009
3010struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn)
3011{
3012        struct nfp_net_dp *new;
3013
3014        new = kmalloc(sizeof(*new), GFP_KERNEL);
3015        if (!new)
3016                return NULL;
3017
3018        *new = nn->dp;
3019
3020        /* Clear things which need to be recomputed */
3021        new->fl_bufsz = 0;
3022        new->tx_rings = NULL;
3023        new->rx_rings = NULL;
3024        new->num_r_vecs = 0;
3025        new->num_stack_tx_rings = 0;
3026
3027        return new;
3028}
3029
3030static int nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp)
3031{
3032        /* XDP-enabled tests */
3033        if (!dp->xdp_prog)
3034                return 0;
3035        if (dp->fl_bufsz > PAGE_SIZE) {
3036                nn_warn(nn, "MTU too large w/ XDP enabled\n");
3037                return -EINVAL;
3038        }
3039        if (dp->num_tx_rings > nn->max_tx_rings) {
3040                nn_warn(nn, "Insufficient number of TX rings w/ XDP enabled\n");
3041                return -EINVAL;
3042        }
3043
3044        return 0;
3045}
3046
3047int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp)
3048{
3049        int r, err;
3050
3051        dp->fl_bufsz = nfp_net_calc_fl_bufsz(dp);
3052
3053        dp->num_stack_tx_rings = dp->num_tx_rings;
3054        if (dp->xdp_prog)
3055                dp->num_stack_tx_rings -= dp->num_rx_rings;
3056
3057        dp->num_r_vecs = max(dp->num_rx_rings, dp->num_stack_tx_rings);
3058
3059        err = nfp_net_check_config(nn, dp);
3060        if (err)
3061                goto exit_free_dp;
3062
3063        if (!netif_running(dp->netdev)) {
3064                nfp_net_dp_swap(nn, dp);
3065                err = 0;
3066                goto exit_free_dp;
3067        }
3068
3069        /* Prepare new rings */
3070        for (r = nn->dp.num_r_vecs; r < dp->num_r_vecs; r++) {
3071                err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
3072                if (err) {
3073                        dp->num_r_vecs = r;
3074                        goto err_cleanup_vecs;
3075                }
3076        }
3077
3078        err = nfp_net_rx_rings_prepare(nn, dp);
3079        if (err)
3080                goto err_cleanup_vecs;
3081
3082        err = nfp_net_tx_rings_prepare(nn, dp);
3083        if (err)
3084                goto err_free_rx;
3085
3086        /* Stop device, swap in new rings, try to start the firmware */
3087        nfp_net_close_stack(nn);
3088        nfp_net_clear_config_and_disable(nn);
3089
3090        err = nfp_net_dp_swap_enable(nn, dp);
3091        if (err) {
3092                int err2;
3093
3094                nfp_net_clear_config_and_disable(nn);
3095
3096                /* Try with old configuration and old rings */
3097                err2 = nfp_net_dp_swap_enable(nn, dp);
3098                if (err2)
3099                        nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n",
3100                               err, err2);
3101        }
3102        for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
3103                nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
3104
3105        nfp_net_rx_rings_free(dp);
3106        nfp_net_tx_rings_free(dp);
3107
3108        nfp_net_open_stack(nn);
3109exit_free_dp:
3110        kfree(dp);
3111
3112        return err;
3113
3114err_free_rx:
3115        nfp_net_rx_rings_free(dp);
3116err_cleanup_vecs:
3117        for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
3118                nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
3119        kfree(dp);
3120        return err;
3121}
3122
3123static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
3124{
3125        struct nfp_net *nn = netdev_priv(netdev);
3126        struct nfp_net_dp *dp;
3127        int err;
3128
3129        err = nfp_app_check_mtu(nn->app, netdev, new_mtu);
3130        if (err)
3131                return err;
3132
3133        dp = nfp_net_clone_dp(nn);
3134        if (!dp)
3135                return -ENOMEM;
3136
3137        dp->mtu = new_mtu;
3138
3139        return nfp_net_ring_reconfig(nn, dp);
3140}
3141
3142static int
3143nfp_net_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
3144{
3145        struct nfp_net *nn = netdev_priv(netdev);
3146
3147        /* Priority tagged packets with vlan id 0 are processed by the
3148         * NFP as untagged packets
3149         */
3150        if (!vid)
3151                return 0;
3152
3153        nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
3154        nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
3155                  ETH_P_8021Q);
3156
3157        return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD);
3158}
3159
3160static int
3161nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
3162{
3163        struct nfp_net *nn = netdev_priv(netdev);
3164
3165        /* Priority tagged packets with vlan id 0 are processed by the
3166         * NFP as untagged packets
3167         */
3168        if (!vid)
3169                return 0;
3170
3171        nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
3172        nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
3173                  ETH_P_8021Q);
3174
3175        return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL);
3176}
3177
3178static void nfp_net_stat64(struct net_device *netdev,
3179                           struct rtnl_link_stats64 *stats)
3180{
3181        struct nfp_net *nn = netdev_priv(netdev);
3182        int r;
3183
3184        /* Collect software stats */
3185        for (r = 0; r < nn->max_r_vecs; r++) {
3186                struct nfp_net_r_vector *r_vec = &nn->r_vecs[r];
3187                u64 data[3];
3188                unsigned int start;
3189
3190                do {
3191                        start = u64_stats_fetch_begin(&r_vec->rx_sync);
3192                        data[0] = r_vec->rx_pkts;
3193                        data[1] = r_vec->rx_bytes;
3194                        data[2] = r_vec->rx_drops;
3195                } while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
3196                stats->rx_packets += data[0];
3197                stats->rx_bytes += data[1];
3198                stats->rx_dropped += data[2];
3199
3200                do {
3201                        start = u64_stats_fetch_begin(&r_vec->tx_sync);
3202                        data[0] = r_vec->tx_pkts;
3203                        data[1] = r_vec->tx_bytes;
3204                        data[2] = r_vec->tx_errors;
3205                } while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
3206                stats->tx_packets += data[0];
3207                stats->tx_bytes += data[1];
3208                stats->tx_errors += data[2];
3209        }
3210
3211        /* Add in device stats */
3212        stats->multicast += nn_readq(nn, NFP_NET_CFG_STATS_RX_MC_FRAMES);
3213        stats->rx_dropped += nn_readq(nn, NFP_NET_CFG_STATS_RX_DISCARDS);
3214        stats->rx_errors += nn_readq(nn, NFP_NET_CFG_STATS_RX_ERRORS);
3215
3216        stats->tx_dropped += nn_readq(nn, NFP_NET_CFG_STATS_TX_DISCARDS);
3217        stats->tx_errors += nn_readq(nn, NFP_NET_CFG_STATS_TX_ERRORS);
3218}
3219
3220static int nfp_net_set_features(struct net_device *netdev,
3221                                netdev_features_t features)
3222{
3223        netdev_features_t changed = netdev->features ^ features;
3224        struct nfp_net *nn = netdev_priv(netdev);
3225        u32 new_ctrl;
3226        int err;
3227
3228        /* Assume this is not called with features we have not advertised */
3229
3230        new_ctrl = nn->dp.ctrl;
3231
3232        if (changed & NETIF_F_RXCSUM) {
3233                if (features & NETIF_F_RXCSUM)
3234                        new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY;
3235                else
3236                        new_ctrl &= ~NFP_NET_CFG_CTRL_RXCSUM_ANY;
3237        }
3238
3239        if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
3240                if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))
3241                        new_ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
3242                else
3243                        new_ctrl &= ~NFP_NET_CFG_CTRL_TXCSUM;
3244        }
3245
3246        if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) {
3247                if (features & (NETIF_F_TSO | NETIF_F_TSO6))
3248                        new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_LSO2 ?:
3249                                              NFP_NET_CFG_CTRL_LSO;
3250                else
3251                        new_ctrl &= ~NFP_NET_CFG_CTRL_LSO_ANY;
3252        }
3253
3254        if (changed & NETIF_F_HW_VLAN_CTAG_RX) {
3255                if (features & NETIF_F_HW_VLAN_CTAG_RX)
3256                        new_ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
3257                else
3258                        new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN;
3259        }
3260
3261        if (changed & NETIF_F_HW_VLAN_CTAG_TX) {
3262                if (features & NETIF_F_HW_VLAN_CTAG_TX)
3263                        new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
3264                else
3265                        new_ctrl &= ~NFP_NET_CFG_CTRL_TXVLAN;
3266        }
3267
3268        if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
3269                if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
3270                        new_ctrl |= NFP_NET_CFG_CTRL_CTAG_FILTER;
3271                else
3272                        new_ctrl &= ~NFP_NET_CFG_CTRL_CTAG_FILTER;
3273        }
3274
3275        if (changed & NETIF_F_SG) {
3276                if (features & NETIF_F_SG)
3277                        new_ctrl |= NFP_NET_CFG_CTRL_GATHER;
3278                else
3279                        new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
3280        }
3281
3282        err = nfp_port_set_features(netdev, features);
3283        if (err)
3284                return err;
3285
3286        nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
3287               netdev->features, features, changed);
3288
3289        if (new_ctrl == nn->dp.ctrl)
3290                return 0;
3291
3292        nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->dp.ctrl, new_ctrl);
3293        nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
3294        err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
3295        if (err)
3296                return err;
3297
3298        nn->dp.ctrl = new_ctrl;
3299
3300        return 0;
3301}
3302
3303static netdev_features_t
3304nfp_net_features_check(struct sk_buff *skb, struct net_device *dev,
3305                       netdev_features_t features)
3306{
3307        u8 l4_hdr;
3308
3309        /* We can't do TSO over double tagged packets (802.1AD) */
3310        features &= vlan_features_check(skb, features);
3311
3312        if (!skb->encapsulation)
3313                return features;
3314
3315        /* Ensure that inner L4 header offset fits into TX descriptor field */
3316        if (skb_is_gso(skb)) {
3317                u32 hdrlen;
3318
3319                hdrlen = skb_inner_transport_header(skb) - skb->data +
3320                        inner_tcp_hdrlen(skb);
3321
3322                /* Assume worst case scenario of having longest possible
3323                 * metadata prepend - 8B
3324                 */
3325                if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ - 8))
3326                        features &= ~NETIF_F_GSO_MASK;
3327        }
3328
3329        /* VXLAN/GRE check */
3330        switch (vlan_get_protocol(skb)) {
3331        case htons(ETH_P_IP):
3332                l4_hdr = ip_hdr(skb)->protocol;
3333                break;
3334        case htons(ETH_P_IPV6):
3335                l4_hdr = ipv6_hdr(skb)->nexthdr;
3336                break;
3337        default:
3338                return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
3339        }
3340
3341        if (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
3342            skb->inner_protocol != htons(ETH_P_TEB) ||
3343            (l4_hdr != IPPROTO_UDP && l4_hdr != IPPROTO_GRE) ||
3344            (l4_hdr == IPPROTO_UDP &&
3345             (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
3346              sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
3347                return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
3348
3349        return features;
3350}
3351
3352static int
3353nfp_net_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
3354{
3355        struct nfp_net *nn = netdev_priv(netdev);
3356        int n;
3357
3358        if (nn->port)
3359                return nfp_port_get_phys_port_name(netdev, name, len);
3360
3361        if (nn->dp.is_vf || nn->vnic_no_name)
3362                return -EOPNOTSUPP;
3363
3364        n = snprintf(name, len, "n%d", nn->id);
3365        if (n >= len)
3366                return -EINVAL;
3367
3368        return 0;
3369}
3370
3371/**
3372 * nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW
3373 * @nn:   NFP Net device to reconfigure
3374 * @idx:  Index into the port table where new port should be written
3375 * @port: UDP port to configure (pass zero to remove VXLAN port)
3376 */
3377static void nfp_net_set_vxlan_port(struct nfp_net *nn, int idx, __be16 port)
3378{
3379        int i;
3380
3381        nn->vxlan_ports[idx] = port;
3382
3383        if (!(nn->dp.ctrl & NFP_NET_CFG_CTRL_VXLAN))
3384                return;
3385
3386        BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS & 1);
3387        for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i += 2)
3388                nn_writel(nn, NFP_NET_CFG_VXLAN_PORT + i * sizeof(port),
3389                          be16_to_cpu(nn->vxlan_ports[i + 1]) << 16 |
3390                          be16_to_cpu(nn->vxlan_ports[i]));
3391
3392        nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_VXLAN);
3393}
3394
3395/**
3396 * nfp_net_find_vxlan_idx() - find table entry of the port or a free one
3397 * @nn:   NFP Network structure
3398 * @port: UDP port to look for
3399 *
3400 * Return: if the port is already in the table -- it's position;
3401 *         if the port is not in the table -- free position to use;
3402 *         if the table is full -- -ENOSPC.
3403 */
3404static int nfp_net_find_vxlan_idx(struct nfp_net *nn, __be16 port)
3405{
3406        int i, free_idx = -ENOSPC;
3407
3408        for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i++) {
3409                if (nn->vxlan_ports[i] == port)
3410                        return i;
3411                if (!nn->vxlan_usecnt[i])
3412                        free_idx = i;
3413        }
3414
3415        return free_idx;
3416}
3417
3418static void nfp_net_add_vxlan_port(struct net_device *netdev,
3419                                   struct udp_tunnel_info *ti)
3420{
3421        struct nfp_net *nn = netdev_priv(netdev);
3422        int idx;
3423
3424        if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
3425                return;
3426
3427        idx = nfp_net_find_vxlan_idx(nn, ti->port);
3428        if (idx == -ENOSPC)
3429                return;
3430
3431        if (!nn->vxlan_usecnt[idx]++)
3432                nfp_net_set_vxlan_port(nn, idx, ti->port);
3433}
3434
3435static void nfp_net_del_vxlan_port(struct net_device *netdev,
3436                                   struct udp_tunnel_info *ti)
3437{
3438        struct nfp_net *nn = netdev_priv(netdev);
3439        int idx;
3440
3441        if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
3442                return;
3443
3444        idx = nfp_net_find_vxlan_idx(nn, ti->port);
3445        if (idx == -ENOSPC || !nn->vxlan_usecnt[idx])
3446                return;
3447
3448        if (!--nn->vxlan_usecnt[idx])
3449                nfp_net_set_vxlan_port(nn, idx, 0);
3450}
3451
3452#if 0 /* Not in RHEL7 */
3453static int
3454nfp_net_xdp_setup_drv(struct nfp_net *nn, struct bpf_prog *prog,
3455                      struct netlink_ext_ack *extack)
3456{
3457        struct nfp_net_dp *dp;
3458
3459        if (!prog == !nn->dp.xdp_prog) {
3460                WRITE_ONCE(nn->dp.xdp_prog, prog);
3461                return 0;
3462        }
3463
3464        dp = nfp_net_clone_dp(nn);
3465        if (!dp)
3466                return -ENOMEM;
3467
3468        dp->xdp_prog = prog;
3469        dp->num_tx_rings += prog ? nn->dp.num_rx_rings : -nn->dp.num_rx_rings;
3470        dp->rx_dma_dir = prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
3471        dp->rx_dma_off = prog ? XDP_PACKET_HEADROOM - nn->dp.rx_offset : 0;
3472
3473        /* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */
3474        return nfp_net_ring_reconfig(nn, dp, extack);
3475}
3476
3477static int
3478nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog, u32 flags,
3479                  struct netlink_ext_ack *extack)
3480{
3481        struct bpf_prog *drv_prog, *offload_prog;
3482        int err;
3483
3484        if (nn->xdp_prog && (flags ^ nn->xdp_flags) & XDP_FLAGS_MODES)
3485                return -EBUSY;
3486
3487        /* Load both when no flags set to allow easy activation of driver path
3488         * when program is replaced by one which can't be offloaded.
3489         */
3490        drv_prog     = flags & XDP_FLAGS_HW_MODE  ? NULL : prog;
3491        offload_prog = flags & XDP_FLAGS_DRV_MODE ? NULL : prog;
3492
3493        err = nfp_net_xdp_setup_drv(nn, drv_prog, extack);
3494        if (err)
3495                return err;
3496
3497        err = nfp_app_xdp_offload(nn->app, nn, offload_prog, extack);
3498        if (err && flags & XDP_FLAGS_HW_MODE)
3499                return err;
3500
3501        if (nn->xdp_prog)
3502                bpf_prog_put(nn->xdp_prog);
3503        nn->xdp_prog = prog;
3504        nn->xdp_flags = flags;
3505
3506        return 0;
3507}
3508#endif
3509
3510static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
3511{
3512#if 0 /* Not in RHEL7 */
3513        struct nfp_net *nn = netdev_priv(netdev);
3514
3515        switch (xdp->command) {
3516        case XDP_SETUP_PROG:
3517        case XDP_SETUP_PROG_HW:
3518                return nfp_net_xdp_setup(nn, xdp->prog, xdp->flags,
3519                                         xdp->extack);
3520        case XDP_QUERY_PROG:
3521                xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0;
3522                xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0;
3523                return 0;
3524        default:
3525                return nfp_app_bpf(nn->app, nn, xdp);
3526        }
3527#else
3528        return -EINVAL;
3529#endif
3530}
3531
3532static int nfp_net_set_mac_address(struct net_device *netdev, void *addr)
3533{
3534        struct nfp_net *nn = netdev_priv(netdev);
3535        struct sockaddr *saddr = addr;
3536        int err;
3537
3538        err = eth_prepare_mac_addr_change(netdev, addr);
3539        if (err)
3540                return err;
3541
3542        nfp_net_write_mac_addr(nn, saddr->sa_data);
3543
3544        err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MACADDR);
3545        if (err)
3546                return err;
3547
3548        eth_commit_mac_addr_change(netdev, addr);
3549
3550        return 0;
3551}
3552
3553const struct net_device_ops nfp_net_netdev_ops = {
3554        .ndo_size               = sizeof(struct net_device_ops),
3555        .ndo_init               = nfp_app_ndo_init,
3556        .ndo_uninit             = nfp_app_ndo_uninit,
3557        .ndo_open               = nfp_net_netdev_open,
3558        .ndo_stop               = nfp_net_netdev_close,
3559        .ndo_start_xmit         = nfp_net_tx,
3560        .ndo_get_stats64        = nfp_net_stat64,
3561        .ndo_vlan_rx_add_vid    = nfp_net_vlan_rx_add_vid,
3562        .ndo_vlan_rx_kill_vid   = nfp_net_vlan_rx_kill_vid,
3563        .extended.ndo_setup_tc_rh               = nfp_port_setup_tc,
3564        .ndo_set_vf_mac         = nfp_app_set_vf_mac,
3565        .extended.ndo_set_vf_vlan        = nfp_app_set_vf_vlan,
3566        .ndo_set_vf_spoofchk    = nfp_app_set_vf_spoofchk,
3567        .ndo_get_vf_config      = nfp_app_get_vf_config,
3568        .ndo_set_vf_link_state  = nfp_app_set_vf_link_state,
3569        .ndo_tx_timeout         = nfp_net_tx_timeout,
3570        .ndo_set_rx_mode        = nfp_net_set_rx_mode,
3571        .extended.ndo_change_mtu        = nfp_net_change_mtu,
3572        .ndo_set_mac_address    = nfp_net_set_mac_address,
3573        .ndo_set_features       = nfp_net_set_features,
3574        .ndo_features_check     = nfp_net_features_check,
3575        .extended.ndo_get_phys_port_name        = nfp_net_get_phys_port_name,
3576        .extended.ndo_udp_tunnel_add    = nfp_net_add_vxlan_port,
3577        .extended.ndo_udp_tunnel_del    = nfp_net_del_vxlan_port,
3578        .extended.ndo_xdp               = nfp_net_xdp,
3579};
3580
3581/**
3582 * nfp_net_info() - Print general info about the NIC
3583 * @nn:      NFP Net device to reconfigure
3584 */
3585void nfp_net_info(struct nfp_net *nn)
3586{
3587        nn_info(nn, "Netronome NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n",
3588                nn->dp.is_vf ? "VF " : "",
3589                nn->dp.num_tx_rings, nn->max_tx_rings,
3590                nn->dp.num_rx_rings, nn->max_rx_rings);
3591        nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n",
3592                nn->fw_ver.resv, nn->fw_ver.class,
3593                nn->fw_ver.major, nn->fw_ver.minor,
3594                nn->max_mtu);
3595        nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3596                nn->cap,
3597                nn->cap & NFP_NET_CFG_CTRL_PROMISC  ? "PROMISC "  : "",
3598                nn->cap & NFP_NET_CFG_CTRL_L2BC     ? "L2BCFILT " : "",
3599                nn->cap & NFP_NET_CFG_CTRL_L2MC     ? "L2MCFILT " : "",
3600                nn->cap & NFP_NET_CFG_CTRL_RXCSUM   ? "RXCSUM "   : "",
3601                nn->cap & NFP_NET_CFG_CTRL_TXCSUM   ? "TXCSUM "   : "",
3602                nn->cap & NFP_NET_CFG_CTRL_RXVLAN   ? "RXVLAN "   : "",
3603                nn->cap & NFP_NET_CFG_CTRL_TXVLAN   ? "TXVLAN "   : "",
3604                nn->cap & NFP_NET_CFG_CTRL_SCATTER  ? "SCATTER "  : "",
3605                nn->cap & NFP_NET_CFG_CTRL_GATHER   ? "GATHER "   : "",
3606                nn->cap & NFP_NET_CFG_CTRL_LSO      ? "TSO1 "     : "",
3607                nn->cap & NFP_NET_CFG_CTRL_LSO2     ? "TSO2 "     : "",
3608                nn->cap & NFP_NET_CFG_CTRL_RSS      ? "RSS1 "     : "",
3609                nn->cap & NFP_NET_CFG_CTRL_RSS2     ? "RSS2 "     : "",
3610                nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER ? "CTAG_FILTER " : "",
3611                nn->cap & NFP_NET_CFG_CTRL_L2SWITCH ? "L2SWITCH " : "",
3612                nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
3613                nn->cap & NFP_NET_CFG_CTRL_IRQMOD   ? "IRQMOD "   : "",
3614                nn->cap & NFP_NET_CFG_CTRL_VXLAN    ? "VXLAN "    : "",
3615                nn->cap & NFP_NET_CFG_CTRL_NVGRE    ? "NVGRE "    : "",
3616                nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ?
3617                                                      "RXCSUM_COMPLETE " : "",
3618                nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR ? "LIVE_ADDR " : "",
3619                nfp_app_extra_cap(nn->app, nn));
3620}
3621
3622/**
3623 * nfp_net_alloc() - Allocate netdev and related structure
3624 * @pdev:         PCI device
3625 * @ctrl_bar:     PCI IOMEM with vNIC config memory
3626 * @needs_netdev: Whether to allocate a netdev for this vNIC
3627 * @max_tx_rings: Maximum number of TX rings supported by device
3628 * @max_rx_rings: Maximum number of RX rings supported by device
3629 *
3630 * This function allocates a netdev device and fills in the initial
3631 * part of the @struct nfp_net structure.  In case of control device
3632 * nfp_net structure is allocated without the netdev.
3633 *
3634 * Return: NFP Net device structure, or ERR_PTR on error.
3635 */
3636struct nfp_net *
3637nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev,
3638              unsigned int max_tx_rings, unsigned int max_rx_rings)
3639{
3640        struct nfp_net *nn;
3641        int err;
3642
3643        if (needs_netdev) {
3644                struct net_device *netdev;
3645
3646                netdev = alloc_etherdev_mqs(sizeof(struct nfp_net),
3647                                            max_tx_rings, max_rx_rings);
3648                if (!netdev)
3649                        return ERR_PTR(-ENOMEM);
3650
3651                SET_NETDEV_DEV(netdev, &pdev->dev);
3652                nn = netdev_priv(netdev);
3653                nn->dp.netdev = netdev;
3654        } else {
3655                nn = vzalloc(sizeof(*nn));
3656                if (!nn)
3657                        return ERR_PTR(-ENOMEM);
3658        }
3659
3660        nn->dp.dev = &pdev->dev;
3661        nn->dp.ctrl_bar = ctrl_bar;
3662        nn->pdev = pdev;
3663
3664        nn->max_tx_rings = max_tx_rings;
3665        nn->max_rx_rings = max_rx_rings;
3666
3667        nn->dp.num_tx_rings = min_t(unsigned int,
3668                                    max_tx_rings, num_online_cpus());
3669        nn->dp.num_rx_rings = min_t(unsigned int, max_rx_rings,
3670                                 netif_get_num_default_rss_queues());
3671
3672        nn->dp.num_r_vecs = max(nn->dp.num_tx_rings, nn->dp.num_rx_rings);
3673        nn->dp.num_r_vecs = min_t(unsigned int,
3674                                  nn->dp.num_r_vecs, num_online_cpus());
3675
3676        nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
3677        nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
3678
3679        spin_lock_init(&nn->reconfig_lock);
3680        spin_lock_init(&nn->link_status_lock);
3681
3682        timer_setup(&nn->reconfig_timer, nfp_net_reconfig_timer, 0);
3683
3684        err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar,
3685                                     &nn->tlv_caps);
3686        if (err)
3687                goto err_free_nn;
3688
3689        return nn;
3690
3691err_free_nn:
3692        if (nn->dp.netdev)
3693                free_netdev(nn->dp.netdev);
3694        else
3695                vfree(nn);
3696        return ERR_PTR(err);
3697}
3698
3699/**
3700 * nfp_net_free() - Undo what @nfp_net_alloc() did
3701 * @nn:      NFP Net device to reconfigure
3702 */
3703void nfp_net_free(struct nfp_net *nn)
3704{
3705        WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted);
3706        if (nn->dp.netdev)
3707                free_netdev(nn->dp.netdev);
3708        else
3709                vfree(nn);
3710}
3711
3712/**
3713 * nfp_net_rss_key_sz() - Get current size of the RSS key
3714 * @nn:         NFP Net device instance
3715 *
3716 * Return: size of the RSS key for currently selected hash function.
3717 */
3718unsigned int nfp_net_rss_key_sz(struct nfp_net *nn)
3719{
3720        switch (nn->rss_hfunc) {
3721        case ETH_RSS_HASH_TOP:
3722                return NFP_NET_CFG_RSS_KEY_SZ;
3723        case ETH_RSS_HASH_XOR:
3724                return 0;
3725        case ETH_RSS_HASH_CRC32:
3726                return 4;
3727        }
3728
3729        nn_warn(nn, "Unknown hash function: %u\n", nn->rss_hfunc);
3730        return 0;
3731}
3732
3733/**
3734 * nfp_net_rss_init() - Set the initial RSS parameters
3735 * @nn:      NFP Net device to reconfigure
3736 */
3737static void nfp_net_rss_init(struct nfp_net *nn)
3738{
3739        unsigned long func_bit, rss_cap_hfunc;
3740        u32 reg;
3741
3742        /* Read the RSS function capability and select first supported func */
3743        reg = nn_readl(nn, NFP_NET_CFG_RSS_CAP);
3744        rss_cap_hfunc = FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC, reg);
3745        if (!rss_cap_hfunc)
3746                rss_cap_hfunc = FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC,
3747                                          NFP_NET_CFG_RSS_TOEPLITZ);
3748
3749        func_bit = find_first_bit(&rss_cap_hfunc, NFP_NET_CFG_RSS_HFUNCS);
3750        if (func_bit == NFP_NET_CFG_RSS_HFUNCS) {
3751                dev_warn(nn->dp.dev,
3752                         "Bad RSS config, defaulting to Toeplitz hash\n");
3753                func_bit = ETH_RSS_HASH_TOP_BIT;
3754        }
3755        nn->rss_hfunc = 1 << func_bit;
3756
3757        netdev_rss_key_fill(nn->rss_key, nfp_net_rss_key_sz(nn));
3758
3759        nfp_net_rss_init_itbl(nn);
3760
3761        /* Enable IPv4/IPv6 TCP by default */
3762        nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP |
3763                      NFP_NET_CFG_RSS_IPV6_TCP |
3764                      FIELD_PREP(NFP_NET_CFG_RSS_HFUNC, nn->rss_hfunc) |
3765                      NFP_NET_CFG_RSS_MASK;
3766}
3767
3768/**
3769 * nfp_net_irqmod_init() - Set the initial IRQ moderation parameters
3770 * @nn:      NFP Net device to reconfigure
3771 */
3772static void nfp_net_irqmod_init(struct nfp_net *nn)
3773{
3774        nn->rx_coalesce_usecs      = 50;
3775        nn->rx_coalesce_max_frames = 64;
3776        nn->tx_coalesce_usecs      = 50;
3777        nn->tx_coalesce_max_frames = 64;
3778}
3779
3780static void nfp_net_netdev_init(struct nfp_net *nn)
3781{
3782        struct net_device *netdev = nn->dp.netdev;
3783
3784        nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr);
3785
3786        netdev->mtu = nn->dp.mtu;
3787
3788        /* Advertise/enable offloads based on capabilities
3789         *
3790         * Note: netdev->features show the currently enabled features
3791         * and netdev->hw_features advertises which features are
3792         * supported.  By default we enable most features.
3793         */
3794        if (nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR)
3795                netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
3796
3797        netdev->hw_features = NETIF_F_HIGHDMA;
3798        if (nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY) {
3799                netdev->hw_features |= NETIF_F_RXCSUM;
3800                nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY;
3801        }
3802        if (nn->cap & NFP_NET_CFG_CTRL_TXCSUM) {
3803                netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
3804                nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
3805        }
3806        if (nn->cap & NFP_NET_CFG_CTRL_GATHER) {
3807                netdev->hw_features |= NETIF_F_SG;
3808                nn->dp.ctrl |= NFP_NET_CFG_CTRL_GATHER;
3809        }
3810        if ((nn->cap & NFP_NET_CFG_CTRL_LSO && nn->fw_ver.major > 2) ||
3811            nn->cap & NFP_NET_CFG_CTRL_LSO2) {
3812                netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
3813                nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_LSO2 ?:
3814                                         NFP_NET_CFG_CTRL_LSO;
3815        }
3816        if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY)
3817                netdev->hw_features |= NETIF_F_RXHASH;
3818        if (nn->cap & NFP_NET_CFG_CTRL_VXLAN) {
3819                if (nn->cap & NFP_NET_CFG_CTRL_LSO)
3820                        netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
3821                nn->dp.ctrl |= NFP_NET_CFG_CTRL_VXLAN;
3822        }
3823        if (nn->cap & NFP_NET_CFG_CTRL_NVGRE) {
3824                if (nn->cap & NFP_NET_CFG_CTRL_LSO)
3825                        netdev->hw_features |= NETIF_F_GSO_GRE;
3826                nn->dp.ctrl |= NFP_NET_CFG_CTRL_NVGRE;
3827        }
3828        if (nn->cap & (NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE))
3829                netdev->hw_enc_features = netdev->hw_features;
3830
3831        netdev->vlan_features = netdev->hw_features;
3832
3833        if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN) {
3834                netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
3835                nn->dp.ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
3836        }
3837        if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN) {
3838                if (nn->cap & NFP_NET_CFG_CTRL_LSO2) {
3839                        nn_warn(nn, "Device advertises both TSO2 and TXVLAN. Refusing to enable TXVLAN.\n");
3840                } else {
3841                        netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
3842                        nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
3843                }
3844        }
3845        if (nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER) {
3846                netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
3847                nn->dp.ctrl |= NFP_NET_CFG_CTRL_CTAG_FILTER;
3848        }
3849
3850        netdev->features = netdev->hw_features;
3851
3852        if (nfp_app_has_tc(nn->app) && nn->port)
3853                netdev->hw_features |= NETIF_F_HW_TC;
3854
3855        /* Advertise but disable TSO by default. */
3856        netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
3857        nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_LSO_ANY;
3858
3859        /* Finalise the netdev setup */
3860        netdev->netdev_ops = &nfp_net_netdev_ops;
3861        netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
3862
3863        SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops);
3864
3865        /* MTU range: 68 - hw-specific max */
3866        netdev->extended->min_mtu = ETH_MIN_MTU;
3867        netdev->extended->max_mtu = nn->max_mtu;
3868
3869        netdev->gso_max_segs = NFP_NET_LSO_MAX_SEGS;
3870
3871        netif_carrier_off(netdev);
3872
3873        nfp_net_set_ethtool_ops(netdev);
3874}
3875
3876static int nfp_net_read_caps(struct nfp_net *nn)
3877{
3878        /* Get some of the read-only fields from the BAR */
3879        nn->cap = nn_readl(nn, NFP_NET_CFG_CAP);
3880        nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU);
3881
3882        /* ABI 4.x and ctrl vNIC always use chained metadata, in other cases
3883         * we allow use of non-chained metadata if RSS(v1) is the only
3884         * advertised capability requiring metadata.
3885         */
3886        nn->dp.chained_metadata_format = nn->fw_ver.major == 4 ||
3887                                         !nn->dp.netdev ||
3888                                         !(nn->cap & NFP_NET_CFG_CTRL_RSS) ||
3889                                         nn->cap & NFP_NET_CFG_CTRL_CHAIN_META;
3890        /* RSS(v1) uses non-chained metadata format, except in ABI 4.x where
3891         * it has the same meaning as RSSv2.
3892         */
3893        if (nn->dp.chained_metadata_format && nn->fw_ver.major != 4)
3894                nn->cap &= ~NFP_NET_CFG_CTRL_RSS;
3895
3896        /* Determine RX packet/metadata boundary offset */
3897        if (nn->fw_ver.major >= 2) {
3898                u32 reg;
3899
3900                reg = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
3901                if (reg > NFP_NET_MAX_PREPEND) {
3902                        nn_err(nn, "Invalid rx offset: %d\n", reg);
3903                        return -EINVAL;
3904                }
3905                nn->dp.rx_offset = reg;
3906        } else {
3907                nn->dp.rx_offset = NFP_NET_RX_OFFSET;
3908        }
3909
3910        /* For control vNICs mask out the capabilities app doesn't want. */
3911        if (!nn->dp.netdev)
3912                nn->cap &= nn->app->type->ctrl_cap_mask;
3913
3914        return 0;
3915}
3916
3917/**
3918 * nfp_net_init() - Initialise/finalise the nfp_net structure
3919 * @nn:         NFP Net device structure
3920 *
3921 * Return: 0 on success or negative errno on error.
3922 */
3923int nfp_net_init(struct nfp_net *nn)
3924{
3925        int err;
3926
3927        nn->dp.rx_dma_dir = DMA_FROM_DEVICE;
3928
3929        err = nfp_net_read_caps(nn);
3930        if (err)
3931                return err;
3932
3933        /* Set default MTU and Freelist buffer size */
3934        if (!nfp_net_is_data_vnic(nn) && nn->app->ctrl_mtu) {
3935                if (nn->app->ctrl_mtu <= nn->max_mtu) {
3936                        nn->dp.mtu = nn->app->ctrl_mtu;
3937                } else {
3938                        if (nn->app->ctrl_mtu != NFP_APP_CTRL_MTU_MAX)
3939                                nn_warn(nn, "app requested MTU above max supported %u > %u\n",
3940                                        nn->app->ctrl_mtu, nn->max_mtu);
3941                        nn->dp.mtu = nn->max_mtu;
3942                }
3943        } else if (nn->max_mtu < NFP_NET_DEFAULT_MTU) {
3944                nn->dp.mtu = nn->max_mtu;
3945        } else {
3946                nn->dp.mtu = NFP_NET_DEFAULT_MTU;
3947        }
3948        nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp);
3949
3950        if (nfp_app_ctrl_uses_data_vnics(nn->app))
3951                nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_CMSG_DATA;
3952
3953        if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) {
3954                nfp_net_rss_init(nn);
3955                nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RSS2 ?:
3956                                         NFP_NET_CFG_CTRL_RSS;
3957        }
3958
3959        /* Allow L2 Broadcast and Multicast through by default, if supported */
3960        if (nn->cap & NFP_NET_CFG_CTRL_L2BC)
3961                nn->dp.ctrl |= NFP_NET_CFG_CTRL_L2BC;
3962
3963        /* Allow IRQ moderation, if supported */
3964        if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) {
3965                nfp_net_irqmod_init(nn);
3966                nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
3967        }
3968
3969        if (nn->dp.netdev)
3970                nfp_net_netdev_init(nn);
3971
3972        /* Stash the re-configuration queue away.  First odd queue in TX Bar */
3973        nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
3974
3975        /* Make sure the FW knows the netdev is supposed to be disabled here */
3976        nn_writel(nn, NFP_NET_CFG_CTRL, 0);
3977        nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
3978        nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
3979        err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RING |
3980                                   NFP_NET_CFG_UPDATE_GEN);
3981        if (err)
3982                return err;
3983
3984        nfp_net_vecs_init(nn);
3985
3986        if (!nn->dp.netdev)
3987                return 0;
3988        return register_netdev(nn->dp.netdev);
3989}
3990
3991/**
3992 * nfp_net_clean() - Undo what nfp_net_init() did.
3993 * @nn:         NFP Net device structure
3994 */
3995void nfp_net_clean(struct nfp_net *nn)
3996{
3997        if (!nn->dp.netdev)
3998                return;
3999
4000        unregister_netdev(nn->dp.netdev);
4001        nfp_net_reconfig_wait_posted(nn);
4002}
4003