linux/drivers/net/ethernet/sfc/efx_channels.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/****************************************************************************
   3 * Driver for Solarflare network controllers and boards
   4 * Copyright 2018 Solarflare Communications Inc.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published
   8 * by the Free Software Foundation, incorporated herein by reference.
   9 */
  10
  11#include "net_driver.h"
  12#include <linux/module.h>
  13#include "efx_channels.h"
  14#include "efx.h"
  15#include "efx_common.h"
  16#include "tx_common.h"
  17#include "rx_common.h"
  18#include "nic.h"
  19#include "sriov.h"
  20#include "workarounds.h"
  21
  22/* This is the first interrupt mode to try out of:
  23 * 0 => MSI-X
  24 * 1 => MSI
  25 * 2 => legacy
  26 */
  27unsigned int efx_interrupt_mode = EFX_INT_MODE_MSIX;
  28
  29/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
  30 * i.e. the number of CPUs among which we may distribute simultaneous
  31 * interrupt handling.
  32 *
  33 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
  34 * The default (0) means to assign an interrupt to each core.
  35 */
  36unsigned int rss_cpus;
  37
  38static unsigned int irq_adapt_low_thresh = 8000;
  39module_param(irq_adapt_low_thresh, uint, 0644);
  40MODULE_PARM_DESC(irq_adapt_low_thresh,
  41                 "Threshold score for reducing IRQ moderation");
  42
  43static unsigned int irq_adapt_high_thresh = 16000;
  44module_param(irq_adapt_high_thresh, uint, 0644);
  45MODULE_PARM_DESC(irq_adapt_high_thresh,
  46                 "Threshold score for increasing IRQ moderation");
  47
  48/* This is the weight assigned to each of the (per-channel) virtual
  49 * NAPI devices.
  50 */
  51static int napi_weight = 64;
  52
  53/***************
  54 * Housekeeping
  55 ***************/
  56
  57int efx_channel_dummy_op_int(struct efx_channel *channel)
  58{
  59        return 0;
  60}
  61
  62void efx_channel_dummy_op_void(struct efx_channel *channel)
  63{
  64}
  65
  66static const struct efx_channel_type efx_default_channel_type = {
  67        .pre_probe              = efx_channel_dummy_op_int,
  68        .post_remove            = efx_channel_dummy_op_void,
  69        .get_name               = efx_get_channel_name,
  70        .copy                   = efx_copy_channel,
  71        .want_txqs              = efx_default_channel_want_txqs,
  72        .keep_eventq            = false,
  73        .want_pio               = true,
  74};
  75
  76/*************
  77 * INTERRUPTS
  78 *************/
  79
  80static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
  81{
  82        cpumask_var_t thread_mask;
  83        unsigned int count;
  84        int cpu;
  85
  86        if (rss_cpus) {
  87                count = rss_cpus;
  88        } else {
  89                if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
  90                        netif_warn(efx, probe, efx->net_dev,
  91                                   "RSS disabled due to allocation failure\n");
  92                        return 1;
  93                }
  94
  95                count = 0;
  96                for_each_online_cpu(cpu) {
  97                        if (!cpumask_test_cpu(cpu, thread_mask)) {
  98                                ++count;
  99                                cpumask_or(thread_mask, thread_mask,
 100                                           topology_sibling_cpumask(cpu));
 101                        }
 102                }
 103
 104                free_cpumask_var(thread_mask);
 105        }
 106
 107        if (count > EFX_MAX_RX_QUEUES) {
 108                netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
 109                               "Reducing number of rx queues from %u to %u.\n",
 110                               count, EFX_MAX_RX_QUEUES);
 111                count = EFX_MAX_RX_QUEUES;
 112        }
 113
 114        /* If RSS is requested for the PF *and* VFs then we can't write RSS
 115         * table entries that are inaccessible to VFs
 116         */
 117#ifdef CONFIG_SFC_SRIOV
 118        if (efx->type->sriov_wanted) {
 119                if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
 120                    count > efx_vf_size(efx)) {
 121                        netif_warn(efx, probe, efx->net_dev,
 122                                   "Reducing number of RSS channels from %u to %u for "
 123                                   "VF support. Increase vf-msix-limit to use more "
 124                                   "channels on the PF.\n",
 125                                   count, efx_vf_size(efx));
 126                        count = efx_vf_size(efx);
 127                }
 128        }
 129#endif
 130
 131        return count;
 132}
 133
 134static int efx_allocate_msix_channels(struct efx_nic *efx,
 135                                      unsigned int max_channels,
 136                                      unsigned int extra_channels,
 137                                      unsigned int parallelism)
 138{
 139        unsigned int n_channels = parallelism;
 140        int vec_count;
 141        int tx_per_ev;
 142        int n_xdp_tx;
 143        int n_xdp_ev;
 144
 145        if (efx_separate_tx_channels)
 146                n_channels *= 2;
 147        n_channels += extra_channels;
 148
 149        /* To allow XDP transmit to happen from arbitrary NAPI contexts
 150         * we allocate a TX queue per CPU. We share event queues across
 151         * multiple tx queues, assuming tx and ev queues are both
 152         * maximum size.
 153         */
 154        tx_per_ev = EFX_MAX_EVQ_SIZE / EFX_TXQ_MAX_ENT(efx);
 155        tx_per_ev = min(tx_per_ev, EFX_MAX_TXQ_PER_CHANNEL);
 156        n_xdp_tx = num_possible_cpus();
 157        n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, tx_per_ev);
 158
 159        vec_count = pci_msix_vec_count(efx->pci_dev);
 160        if (vec_count < 0)
 161                return vec_count;
 162
 163        max_channels = min_t(unsigned int, vec_count, max_channels);
 164
 165        /* Check resources.
 166         * We need a channel per event queue, plus a VI per tx queue.
 167         * This may be more pessimistic than it needs to be.
 168         */
 169        if (n_channels + n_xdp_ev > max_channels) {
 170                netif_err(efx, drv, efx->net_dev,
 171                          "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
 172                          n_xdp_ev, n_channels, max_channels);
 173                netif_err(efx, drv, efx->net_dev,
 174                          "XDP_TX and XDP_REDIRECT will not work on this interface");
 175                efx->n_xdp_channels = 0;
 176                efx->xdp_tx_per_channel = 0;
 177                efx->xdp_tx_queue_count = 0;
 178        } else if (n_channels + n_xdp_tx > efx->max_vis) {
 179                netif_err(efx, drv, efx->net_dev,
 180                          "Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n",
 181                          n_xdp_tx, n_channels, efx->max_vis);
 182                netif_err(efx, drv, efx->net_dev,
 183                          "XDP_TX and XDP_REDIRECT will not work on this interface");
 184                efx->n_xdp_channels = 0;
 185                efx->xdp_tx_per_channel = 0;
 186                efx->xdp_tx_queue_count = 0;
 187        } else {
 188                efx->n_xdp_channels = n_xdp_ev;
 189                efx->xdp_tx_per_channel = tx_per_ev;
 190                efx->xdp_tx_queue_count = n_xdp_tx;
 191                n_channels += n_xdp_ev;
 192                netif_dbg(efx, drv, efx->net_dev,
 193                          "Allocating %d TX and %d event queues for XDP\n",
 194                          n_xdp_tx, n_xdp_ev);
 195        }
 196
 197        if (vec_count < n_channels) {
 198                netif_err(efx, drv, efx->net_dev,
 199                          "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
 200                          vec_count, n_channels);
 201                netif_err(efx, drv, efx->net_dev,
 202                          "WARNING: Performance may be reduced.\n");
 203                n_channels = vec_count;
 204        }
 205
 206        n_channels = min(n_channels, max_channels);
 207
 208        efx->n_channels = n_channels;
 209
 210        /* Ignore XDP tx channels when creating rx channels. */
 211        n_channels -= efx->n_xdp_channels;
 212
 213        if (efx_separate_tx_channels) {
 214                efx->n_tx_channels =
 215                        min(max(n_channels / 2, 1U),
 216                            efx->max_tx_channels);
 217                efx->tx_channel_offset =
 218                        n_channels - efx->n_tx_channels;
 219                efx->n_rx_channels =
 220                        max(n_channels -
 221                            efx->n_tx_channels, 1U);
 222        } else {
 223                efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
 224                efx->tx_channel_offset = 0;
 225                efx->n_rx_channels = n_channels;
 226        }
 227
 228        efx->n_rx_channels = min(efx->n_rx_channels, parallelism);
 229        efx->n_tx_channels = min(efx->n_tx_channels, parallelism);
 230
 231        efx->xdp_channel_offset = n_channels;
 232
 233        netif_dbg(efx, drv, efx->net_dev,
 234                  "Allocating %u RX channels\n",
 235                  efx->n_rx_channels);
 236
 237        return efx->n_channels;
 238}
 239
 240/* Probe the number and type of interrupts we are able to obtain, and
 241 * the resulting numbers of channels and RX queues.
 242 */
 243int efx_probe_interrupts(struct efx_nic *efx)
 244{
 245        unsigned int extra_channels = 0;
 246        unsigned int rss_spread;
 247        unsigned int i, j;
 248        int rc;
 249
 250        for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
 251                if (efx->extra_channel_type[i])
 252                        ++extra_channels;
 253
 254        if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
 255                unsigned int parallelism = efx_wanted_parallelism(efx);
 256                struct msix_entry xentries[EFX_MAX_CHANNELS];
 257                unsigned int n_channels;
 258
 259                rc = efx_allocate_msix_channels(efx, efx->max_channels,
 260                                                extra_channels, parallelism);
 261                if (rc >= 0) {
 262                        n_channels = rc;
 263                        for (i = 0; i < n_channels; i++)
 264                                xentries[i].entry = i;
 265                        rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
 266                                                   n_channels);
 267                }
 268                if (rc < 0) {
 269                        /* Fall back to single channel MSI */
 270                        netif_err(efx, drv, efx->net_dev,
 271                                  "could not enable MSI-X\n");
 272                        if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
 273                                efx->interrupt_mode = EFX_INT_MODE_MSI;
 274                        else
 275                                return rc;
 276                } else if (rc < n_channels) {
 277                        netif_err(efx, drv, efx->net_dev,
 278                                  "WARNING: Insufficient MSI-X vectors"
 279                                  " available (%d < %u).\n", rc, n_channels);
 280                        netif_err(efx, drv, efx->net_dev,
 281                                  "WARNING: Performance may be reduced.\n");
 282                        n_channels = rc;
 283                }
 284
 285                if (rc > 0) {
 286                        for (i = 0; i < efx->n_channels; i++)
 287                                efx_get_channel(efx, i)->irq =
 288                                        xentries[i].vector;
 289                }
 290        }
 291
 292        /* Try single interrupt MSI */
 293        if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
 294                efx->n_channels = 1;
 295                efx->n_rx_channels = 1;
 296                efx->n_tx_channels = 1;
 297                efx->n_xdp_channels = 0;
 298                efx->xdp_channel_offset = efx->n_channels;
 299                rc = pci_enable_msi(efx->pci_dev);
 300                if (rc == 0) {
 301                        efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
 302                } else {
 303                        netif_err(efx, drv, efx->net_dev,
 304                                  "could not enable MSI\n");
 305                        if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
 306                                efx->interrupt_mode = EFX_INT_MODE_LEGACY;
 307                        else
 308                                return rc;
 309                }
 310        }
 311
 312        /* Assume legacy interrupts */
 313        if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
 314                efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
 315                efx->n_rx_channels = 1;
 316                efx->n_tx_channels = 1;
 317                efx->n_xdp_channels = 0;
 318                efx->xdp_channel_offset = efx->n_channels;
 319                efx->legacy_irq = efx->pci_dev->irq;
 320        }
 321
 322        /* Assign extra channels if possible, before XDP channels */
 323        efx->n_extra_tx_channels = 0;
 324        j = efx->xdp_channel_offset;
 325        for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
 326                if (!efx->extra_channel_type[i])
 327                        continue;
 328                if (j <= efx->tx_channel_offset + efx->n_tx_channels) {
 329                        efx->extra_channel_type[i]->handle_no_channel(efx);
 330                } else {
 331                        --j;
 332                        efx_get_channel(efx, j)->type =
 333                                efx->extra_channel_type[i];
 334                        if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
 335                                efx->n_extra_tx_channels++;
 336                }
 337        }
 338
 339        rss_spread = efx->n_rx_channels;
 340        /* RSS might be usable on VFs even if it is disabled on the PF */
 341#ifdef CONFIG_SFC_SRIOV
 342        if (efx->type->sriov_wanted) {
 343                efx->rss_spread = ((rss_spread > 1 ||
 344                                    !efx->type->sriov_wanted(efx)) ?
 345                                   rss_spread : efx_vf_size(efx));
 346                return 0;
 347        }
 348#endif
 349        efx->rss_spread = rss_spread;
 350
 351        return 0;
 352}
 353
 354#if defined(CONFIG_SMP)
 355void efx_set_interrupt_affinity(struct efx_nic *efx)
 356{
 357        struct efx_channel *channel;
 358        unsigned int cpu;
 359
 360        efx_for_each_channel(channel, efx) {
 361                cpu = cpumask_local_spread(channel->channel,
 362                                           pcibus_to_node(efx->pci_dev->bus));
 363                irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
 364        }
 365}
 366
 367void efx_clear_interrupt_affinity(struct efx_nic *efx)
 368{
 369        struct efx_channel *channel;
 370
 371        efx_for_each_channel(channel, efx)
 372                irq_set_affinity_hint(channel->irq, NULL);
 373}
 374#else
 375void
 376efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
 377{
 378}
 379
 380void
 381efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
 382{
 383}
 384#endif /* CONFIG_SMP */
 385
 386void efx_remove_interrupts(struct efx_nic *efx)
 387{
 388        struct efx_channel *channel;
 389
 390        /* Remove MSI/MSI-X interrupts */
 391        efx_for_each_channel(channel, efx)
 392                channel->irq = 0;
 393        pci_disable_msi(efx->pci_dev);
 394        pci_disable_msix(efx->pci_dev);
 395
 396        /* Remove legacy interrupt */
 397        efx->legacy_irq = 0;
 398}
 399
 400/***************
 401 * EVENT QUEUES
 402 ***************/
 403
 404/* Create event queue
 405 * Event queue memory allocations are done only once.  If the channel
 406 * is reset, the memory buffer will be reused; this guards against
 407 * errors during channel reset and also simplifies interrupt handling.
 408 */
 409int efx_probe_eventq(struct efx_channel *channel)
 410{
 411        struct efx_nic *efx = channel->efx;
 412        unsigned long entries;
 413
 414        netif_dbg(efx, probe, efx->net_dev,
 415                  "chan %d create event queue\n", channel->channel);
 416
 417        /* Build an event queue with room for one event per tx and rx buffer,
 418         * plus some extra for link state events and MCDI completions.
 419         */
 420        entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
 421        EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
 422        channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
 423
 424        return efx_nic_probe_eventq(channel);
 425}
 426
 427/* Prepare channel's event queue */
 428int efx_init_eventq(struct efx_channel *channel)
 429{
 430        struct efx_nic *efx = channel->efx;
 431        int rc;
 432
 433        EFX_WARN_ON_PARANOID(channel->eventq_init);
 434
 435        netif_dbg(efx, drv, efx->net_dev,
 436                  "chan %d init event queue\n", channel->channel);
 437
 438        rc = efx_nic_init_eventq(channel);
 439        if (rc == 0) {
 440                efx->type->push_irq_moderation(channel);
 441                channel->eventq_read_ptr = 0;
 442                channel->eventq_init = true;
 443        }
 444        return rc;
 445}
 446
 447/* Enable event queue processing and NAPI */
 448void efx_start_eventq(struct efx_channel *channel)
 449{
 450        netif_dbg(channel->efx, ifup, channel->efx->net_dev,
 451                  "chan %d start event queue\n", channel->channel);
 452
 453        /* Make sure the NAPI handler sees the enabled flag set */
 454        channel->enabled = true;
 455        smp_wmb();
 456
 457        napi_enable(&channel->napi_str);
 458        efx_nic_eventq_read_ack(channel);
 459}
 460
 461/* Disable event queue processing and NAPI */
 462void efx_stop_eventq(struct efx_channel *channel)
 463{
 464        if (!channel->enabled)
 465                return;
 466
 467        napi_disable(&channel->napi_str);
 468        channel->enabled = false;
 469}
 470
 471void efx_fini_eventq(struct efx_channel *channel)
 472{
 473        if (!channel->eventq_init)
 474                return;
 475
 476        netif_dbg(channel->efx, drv, channel->efx->net_dev,
 477                  "chan %d fini event queue\n", channel->channel);
 478
 479        efx_nic_fini_eventq(channel);
 480        channel->eventq_init = false;
 481}
 482
 483void efx_remove_eventq(struct efx_channel *channel)
 484{
 485        netif_dbg(channel->efx, drv, channel->efx->net_dev,
 486                  "chan %d remove event queue\n", channel->channel);
 487
 488        efx_nic_remove_eventq(channel);
 489}
 490
 491/**************************************************************************
 492 *
 493 * Channel handling
 494 *
 495 *************************************************************************/
 496
 497#ifdef CONFIG_RFS_ACCEL
 498static void efx_filter_rfs_expire(struct work_struct *data)
 499{
 500        struct delayed_work *dwork = to_delayed_work(data);
 501        struct efx_channel *channel;
 502        unsigned int time, quota;
 503
 504        channel = container_of(dwork, struct efx_channel, filter_work);
 505        time = jiffies - channel->rfs_last_expiry;
 506        quota = channel->rfs_filter_count * time / (30 * HZ);
 507        if (quota >= 20 && __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, quota)))
 508                channel->rfs_last_expiry += time;
 509        /* Ensure we do more work eventually even if NAPI poll is not happening */
 510        schedule_delayed_work(dwork, 30 * HZ);
 511}
 512#endif
 513
 514/* Allocate and initialise a channel structure. */
 515static struct efx_channel *efx_alloc_channel(struct efx_nic *efx, int i)
 516{
 517        struct efx_rx_queue *rx_queue;
 518        struct efx_tx_queue *tx_queue;
 519        struct efx_channel *channel;
 520        int j;
 521
 522        channel = kzalloc(sizeof(*channel), GFP_KERNEL);
 523        if (!channel)
 524                return NULL;
 525
 526        channel->efx = efx;
 527        channel->channel = i;
 528        channel->type = &efx_default_channel_type;
 529
 530        for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
 531                tx_queue = &channel->tx_queue[j];
 532                tx_queue->efx = efx;
 533                tx_queue->queue = -1;
 534                tx_queue->label = j;
 535                tx_queue->channel = channel;
 536        }
 537
 538#ifdef CONFIG_RFS_ACCEL
 539        INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
 540#endif
 541
 542        rx_queue = &channel->rx_queue;
 543        rx_queue->efx = efx;
 544        timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
 545
 546        return channel;
 547}
 548
 549int efx_init_channels(struct efx_nic *efx)
 550{
 551        unsigned int i;
 552
 553        for (i = 0; i < EFX_MAX_CHANNELS; i++) {
 554                efx->channel[i] = efx_alloc_channel(efx, i);
 555                if (!efx->channel[i])
 556                        return -ENOMEM;
 557                efx->msi_context[i].efx = efx;
 558                efx->msi_context[i].index = i;
 559        }
 560
 561        /* Higher numbered interrupt modes are less capable! */
 562        efx->interrupt_mode = min(efx->type->min_interrupt_mode,
 563                                  efx_interrupt_mode);
 564
 565        efx->max_channels = EFX_MAX_CHANNELS;
 566        efx->max_tx_channels = EFX_MAX_CHANNELS;
 567
 568        return 0;
 569}
 570
 571void efx_fini_channels(struct efx_nic *efx)
 572{
 573        unsigned int i;
 574
 575        for (i = 0; i < EFX_MAX_CHANNELS; i++)
 576                if (efx->channel[i]) {
 577                        kfree(efx->channel[i]);
 578                        efx->channel[i] = NULL;
 579                }
 580}
 581
 582/* Allocate and initialise a channel structure, copying parameters
 583 * (but not resources) from an old channel structure.
 584 */
 585struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
 586{
 587        struct efx_rx_queue *rx_queue;
 588        struct efx_tx_queue *tx_queue;
 589        struct efx_channel *channel;
 590        int j;
 591
 592        channel = kmalloc(sizeof(*channel), GFP_KERNEL);
 593        if (!channel)
 594                return NULL;
 595
 596        *channel = *old_channel;
 597
 598        channel->napi_dev = NULL;
 599        INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
 600        channel->napi_str.napi_id = 0;
 601        channel->napi_str.state = 0;
 602        memset(&channel->eventq, 0, sizeof(channel->eventq));
 603
 604        for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
 605                tx_queue = &channel->tx_queue[j];
 606                if (tx_queue->channel)
 607                        tx_queue->channel = channel;
 608                tx_queue->buffer = NULL;
 609                tx_queue->cb_page = NULL;
 610                memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
 611        }
 612
 613        rx_queue = &channel->rx_queue;
 614        rx_queue->buffer = NULL;
 615        memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
 616        timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
 617#ifdef CONFIG_RFS_ACCEL
 618        INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
 619#endif
 620
 621        return channel;
 622}
 623
 624static int efx_probe_channel(struct efx_channel *channel)
 625{
 626        struct efx_tx_queue *tx_queue;
 627        struct efx_rx_queue *rx_queue;
 628        int rc;
 629
 630        netif_dbg(channel->efx, probe, channel->efx->net_dev,
 631                  "creating channel %d\n", channel->channel);
 632
 633        rc = channel->type->pre_probe(channel);
 634        if (rc)
 635                goto fail;
 636
 637        rc = efx_probe_eventq(channel);
 638        if (rc)
 639                goto fail;
 640
 641        efx_for_each_channel_tx_queue(tx_queue, channel) {
 642                rc = efx_probe_tx_queue(tx_queue);
 643                if (rc)
 644                        goto fail;
 645        }
 646
 647        efx_for_each_channel_rx_queue(rx_queue, channel) {
 648                rc = efx_probe_rx_queue(rx_queue);
 649                if (rc)
 650                        goto fail;
 651        }
 652
 653        channel->rx_list = NULL;
 654
 655        return 0;
 656
 657fail:
 658        efx_remove_channel(channel);
 659        return rc;
 660}
 661
 662void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
 663{
 664        struct efx_nic *efx = channel->efx;
 665        const char *type;
 666        int number;
 667
 668        number = channel->channel;
 669
 670        if (number >= efx->xdp_channel_offset &&
 671            !WARN_ON_ONCE(!efx->n_xdp_channels)) {
 672                type = "-xdp";
 673                number -= efx->xdp_channel_offset;
 674        } else if (efx->tx_channel_offset == 0) {
 675                type = "";
 676        } else if (number < efx->tx_channel_offset) {
 677                type = "-rx";
 678        } else {
 679                type = "-tx";
 680                number -= efx->tx_channel_offset;
 681        }
 682        snprintf(buf, len, "%s%s-%d", efx->name, type, number);
 683}
 684
 685void efx_set_channel_names(struct efx_nic *efx)
 686{
 687        struct efx_channel *channel;
 688
 689        efx_for_each_channel(channel, efx)
 690                channel->type->get_name(channel,
 691                                        efx->msi_context[channel->channel].name,
 692                                        sizeof(efx->msi_context[0].name));
 693}
 694
 695int efx_probe_channels(struct efx_nic *efx)
 696{
 697        struct efx_channel *channel;
 698        int rc;
 699
 700        /* Restart special buffer allocation */
 701        efx->next_buffer_table = 0;
 702
 703        /* Probe channels in reverse, so that any 'extra' channels
 704         * use the start of the buffer table. This allows the traffic
 705         * channels to be resized without moving them or wasting the
 706         * entries before them.
 707         */
 708        efx_for_each_channel_rev(channel, efx) {
 709                rc = efx_probe_channel(channel);
 710                if (rc) {
 711                        netif_err(efx, probe, efx->net_dev,
 712                                  "failed to create channel %d\n",
 713                                  channel->channel);
 714                        goto fail;
 715                }
 716        }
 717        efx_set_channel_names(efx);
 718
 719        return 0;
 720
 721fail:
 722        efx_remove_channels(efx);
 723        return rc;
 724}
 725
 726void efx_remove_channel(struct efx_channel *channel)
 727{
 728        struct efx_tx_queue *tx_queue;
 729        struct efx_rx_queue *rx_queue;
 730
 731        netif_dbg(channel->efx, drv, channel->efx->net_dev,
 732                  "destroy chan %d\n", channel->channel);
 733
 734        efx_for_each_channel_rx_queue(rx_queue, channel)
 735                efx_remove_rx_queue(rx_queue);
 736        efx_for_each_channel_tx_queue(tx_queue, channel)
 737                efx_remove_tx_queue(tx_queue);
 738        efx_remove_eventq(channel);
 739        channel->type->post_remove(channel);
 740}
 741
 742void efx_remove_channels(struct efx_nic *efx)
 743{
 744        struct efx_channel *channel;
 745
 746        efx_for_each_channel(channel, efx)
 747                efx_remove_channel(channel);
 748
 749        kfree(efx->xdp_tx_queues);
 750}
 751
 752int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
 753{
 754        struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
 755        unsigned int i, next_buffer_table = 0;
 756        u32 old_rxq_entries, old_txq_entries;
 757        int rc, rc2;
 758
 759        rc = efx_check_disabled(efx);
 760        if (rc)
 761                return rc;
 762
 763        /* Not all channels should be reallocated. We must avoid
 764         * reallocating their buffer table entries.
 765         */
 766        efx_for_each_channel(channel, efx) {
 767                struct efx_rx_queue *rx_queue;
 768                struct efx_tx_queue *tx_queue;
 769
 770                if (channel->type->copy)
 771                        continue;
 772                next_buffer_table = max(next_buffer_table,
 773                                        channel->eventq.index +
 774                                        channel->eventq.entries);
 775                efx_for_each_channel_rx_queue(rx_queue, channel)
 776                        next_buffer_table = max(next_buffer_table,
 777                                                rx_queue->rxd.index +
 778                                                rx_queue->rxd.entries);
 779                efx_for_each_channel_tx_queue(tx_queue, channel)
 780                        next_buffer_table = max(next_buffer_table,
 781                                                tx_queue->txd.index +
 782                                                tx_queue->txd.entries);
 783        }
 784
 785        efx_device_detach_sync(efx);
 786        efx_stop_all(efx);
 787        efx_soft_disable_interrupts(efx);
 788
 789        /* Clone channels (where possible) */
 790        memset(other_channel, 0, sizeof(other_channel));
 791        for (i = 0; i < efx->n_channels; i++) {
 792                channel = efx->channel[i];
 793                if (channel->type->copy)
 794                        channel = channel->type->copy(channel);
 795                if (!channel) {
 796                        rc = -ENOMEM;
 797                        goto out;
 798                }
 799                other_channel[i] = channel;
 800        }
 801
 802        /* Swap entry counts and channel pointers */
 803        old_rxq_entries = efx->rxq_entries;
 804        old_txq_entries = efx->txq_entries;
 805        efx->rxq_entries = rxq_entries;
 806        efx->txq_entries = txq_entries;
 807        for (i = 0; i < efx->n_channels; i++) {
 808                channel = efx->channel[i];
 809                efx->channel[i] = other_channel[i];
 810                other_channel[i] = channel;
 811        }
 812
 813        /* Restart buffer table allocation */
 814        efx->next_buffer_table = next_buffer_table;
 815
 816        for (i = 0; i < efx->n_channels; i++) {
 817                channel = efx->channel[i];
 818                if (!channel->type->copy)
 819                        continue;
 820                rc = efx_probe_channel(channel);
 821                if (rc)
 822                        goto rollback;
 823                efx_init_napi_channel(efx->channel[i]);
 824        }
 825
 826out:
 827        /* Destroy unused channel structures */
 828        for (i = 0; i < efx->n_channels; i++) {
 829                channel = other_channel[i];
 830                if (channel && channel->type->copy) {
 831                        efx_fini_napi_channel(channel);
 832                        efx_remove_channel(channel);
 833                        kfree(channel);
 834                }
 835        }
 836
 837        rc2 = efx_soft_enable_interrupts(efx);
 838        if (rc2) {
 839                rc = rc ? rc : rc2;
 840                netif_err(efx, drv, efx->net_dev,
 841                          "unable to restart interrupts on channel reallocation\n");
 842                efx_schedule_reset(efx, RESET_TYPE_DISABLE);
 843        } else {
 844                efx_start_all(efx);
 845                efx_device_attach_if_not_resetting(efx);
 846        }
 847        return rc;
 848
 849rollback:
 850        /* Swap back */
 851        efx->rxq_entries = old_rxq_entries;
 852        efx->txq_entries = old_txq_entries;
 853        for (i = 0; i < efx->n_channels; i++) {
 854                channel = efx->channel[i];
 855                efx->channel[i] = other_channel[i];
 856                other_channel[i] = channel;
 857        }
 858        goto out;
 859}
 860
 861int efx_set_channels(struct efx_nic *efx)
 862{
 863        struct efx_tx_queue *tx_queue;
 864        struct efx_channel *channel;
 865        unsigned int next_queue = 0;
 866        int xdp_queue_number;
 867        int rc;
 868
 869        efx->tx_channel_offset =
 870                efx_separate_tx_channels ?
 871                efx->n_channels - efx->n_tx_channels : 0;
 872
 873        if (efx->xdp_tx_queue_count) {
 874                EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
 875
 876                /* Allocate array for XDP TX queue lookup. */
 877                efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
 878                                             sizeof(*efx->xdp_tx_queues),
 879                                             GFP_KERNEL);
 880                if (!efx->xdp_tx_queues)
 881                        return -ENOMEM;
 882        }
 883
 884        /* We need to mark which channels really have RX and TX
 885         * queues, and adjust the TX queue numbers if we have separate
 886         * RX-only and TX-only channels.
 887         */
 888        xdp_queue_number = 0;
 889        efx_for_each_channel(channel, efx) {
 890                if (channel->channel < efx->n_rx_channels)
 891                        channel->rx_queue.core_index = channel->channel;
 892                else
 893                        channel->rx_queue.core_index = -1;
 894
 895                if (channel->channel >= efx->tx_channel_offset) {
 896                        if (efx_channel_is_xdp_tx(channel)) {
 897                                efx_for_each_channel_tx_queue(tx_queue, channel) {
 898                                        tx_queue->queue = next_queue++;
 899
 900                                        /* We may have a few left-over XDP TX
 901                                         * queues owing to xdp_tx_queue_count
 902                                         * not dividing evenly by EFX_MAX_TXQ_PER_CHANNEL.
 903                                         * We still allocate and probe those
 904                                         * TXQs, but never use them.
 905                                         */
 906                                        if (xdp_queue_number < efx->xdp_tx_queue_count) {
 907                                                netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n",
 908                                                          channel->channel, tx_queue->label,
 909                                                          xdp_queue_number, tx_queue->queue);
 910                                                efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
 911                                                xdp_queue_number++;
 912                                        }
 913                                }
 914                        } else {
 915                                efx_for_each_channel_tx_queue(tx_queue, channel) {
 916                                        tx_queue->queue = next_queue++;
 917                                        netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is HW %u\n",
 918                                                  channel->channel, tx_queue->label,
 919                                                  tx_queue->queue);
 920                                }
 921                        }
 922                }
 923        }
 924        WARN_ON(xdp_queue_number != efx->xdp_tx_queue_count);
 925
 926        rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
 927        if (rc)
 928                return rc;
 929        return netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
 930}
 931
 932bool efx_default_channel_want_txqs(struct efx_channel *channel)
 933{
 934        return channel->channel - channel->efx->tx_channel_offset <
 935                channel->efx->n_tx_channels;
 936}
 937
 938/*************
 939 * START/STOP
 940 *************/
 941
 942int efx_soft_enable_interrupts(struct efx_nic *efx)
 943{
 944        struct efx_channel *channel, *end_channel;
 945        int rc;
 946
 947        BUG_ON(efx->state == STATE_DISABLED);
 948
 949        efx->irq_soft_enabled = true;
 950        smp_wmb();
 951
 952        efx_for_each_channel(channel, efx) {
 953                if (!channel->type->keep_eventq) {
 954                        rc = efx_init_eventq(channel);
 955                        if (rc)
 956                                goto fail;
 957                }
 958                efx_start_eventq(channel);
 959        }
 960
 961        efx_mcdi_mode_event(efx);
 962
 963        return 0;
 964fail:
 965        end_channel = channel;
 966        efx_for_each_channel(channel, efx) {
 967                if (channel == end_channel)
 968                        break;
 969                efx_stop_eventq(channel);
 970                if (!channel->type->keep_eventq)
 971                        efx_fini_eventq(channel);
 972        }
 973
 974        return rc;
 975}
 976
 977void efx_soft_disable_interrupts(struct efx_nic *efx)
 978{
 979        struct efx_channel *channel;
 980
 981        if (efx->state == STATE_DISABLED)
 982                return;
 983
 984        efx_mcdi_mode_poll(efx);
 985
 986        efx->irq_soft_enabled = false;
 987        smp_wmb();
 988
 989        if (efx->legacy_irq)
 990                synchronize_irq(efx->legacy_irq);
 991
 992        efx_for_each_channel(channel, efx) {
 993                if (channel->irq)
 994                        synchronize_irq(channel->irq);
 995
 996                efx_stop_eventq(channel);
 997                if (!channel->type->keep_eventq)
 998                        efx_fini_eventq(channel);
 999        }
1000
1001        /* Flush the asynchronous MCDI request queue */
1002        efx_mcdi_flush_async(efx);
1003}
1004
1005int efx_enable_interrupts(struct efx_nic *efx)
1006{
1007        struct efx_channel *channel, *end_channel;
1008        int rc;
1009
1010        /* TODO: Is this really a bug? */
1011        BUG_ON(efx->state == STATE_DISABLED);
1012
1013        if (efx->eeh_disabled_legacy_irq) {
1014                enable_irq(efx->legacy_irq);
1015                efx->eeh_disabled_legacy_irq = false;
1016        }
1017
1018        efx->type->irq_enable_master(efx);
1019
1020        efx_for_each_channel(channel, efx) {
1021                if (channel->type->keep_eventq) {
1022                        rc = efx_init_eventq(channel);
1023                        if (rc)
1024                                goto fail;
1025                }
1026        }
1027
1028        rc = efx_soft_enable_interrupts(efx);
1029        if (rc)
1030                goto fail;
1031
1032        return 0;
1033
1034fail:
1035        end_channel = channel;
1036        efx_for_each_channel(channel, efx) {
1037                if (channel == end_channel)
1038                        break;
1039                if (channel->type->keep_eventq)
1040                        efx_fini_eventq(channel);
1041        }
1042
1043        efx->type->irq_disable_non_ev(efx);
1044
1045        return rc;
1046}
1047
1048void efx_disable_interrupts(struct efx_nic *efx)
1049{
1050        struct efx_channel *channel;
1051
1052        efx_soft_disable_interrupts(efx);
1053
1054        efx_for_each_channel(channel, efx) {
1055                if (channel->type->keep_eventq)
1056                        efx_fini_eventq(channel);
1057        }
1058
1059        efx->type->irq_disable_non_ev(efx);
1060}
1061
1062void efx_start_channels(struct efx_nic *efx)
1063{
1064        struct efx_tx_queue *tx_queue;
1065        struct efx_rx_queue *rx_queue;
1066        struct efx_channel *channel;
1067
1068        efx_for_each_channel(channel, efx) {
1069                efx_for_each_channel_tx_queue(tx_queue, channel) {
1070                        efx_init_tx_queue(tx_queue);
1071                        atomic_inc(&efx->active_queues);
1072                }
1073
1074                efx_for_each_channel_rx_queue(rx_queue, channel) {
1075                        efx_init_rx_queue(rx_queue);
1076                        atomic_inc(&efx->active_queues);
1077                        efx_stop_eventq(channel);
1078                        efx_fast_push_rx_descriptors(rx_queue, false);
1079                        efx_start_eventq(channel);
1080                }
1081
1082                WARN_ON(channel->rx_pkt_n_frags);
1083        }
1084}
1085
1086void efx_stop_channels(struct efx_nic *efx)
1087{
1088        struct efx_tx_queue *tx_queue;
1089        struct efx_rx_queue *rx_queue;
1090        struct efx_channel *channel;
1091        int rc = 0;
1092
1093        /* Stop RX refill */
1094        efx_for_each_channel(channel, efx) {
1095                efx_for_each_channel_rx_queue(rx_queue, channel)
1096                        rx_queue->refill_enabled = false;
1097        }
1098
1099        efx_for_each_channel(channel, efx) {
1100                /* RX packet processing is pipelined, so wait for the
1101                 * NAPI handler to complete.  At least event queue 0
1102                 * might be kept active by non-data events, so don't
1103                 * use napi_synchronize() but actually disable NAPI
1104                 * temporarily.
1105                 */
1106                if (efx_channel_has_rx_queue(channel)) {
1107                        efx_stop_eventq(channel);
1108                        efx_start_eventq(channel);
1109                }
1110        }
1111
1112        if (efx->type->fini_dmaq)
1113                rc = efx->type->fini_dmaq(efx);
1114
1115        if (rc) {
1116                netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
1117        } else {
1118                netif_dbg(efx, drv, efx->net_dev,
1119                          "successfully flushed all queues\n");
1120        }
1121
1122        efx_for_each_channel(channel, efx) {
1123                efx_for_each_channel_rx_queue(rx_queue, channel)
1124                        efx_fini_rx_queue(rx_queue);
1125                efx_for_each_channel_tx_queue(tx_queue, channel)
1126                        efx_fini_tx_queue(tx_queue);
1127        }
1128}
1129
1130/**************************************************************************
1131 *
1132 * NAPI interface
1133 *
1134 *************************************************************************/
1135
1136/* Process channel's event queue
1137 *
1138 * This function is responsible for processing the event queue of a
1139 * single channel.  The caller must guarantee that this function will
1140 * never be concurrently called more than once on the same channel,
1141 * though different channels may be being processed concurrently.
1142 */
1143static int efx_process_channel(struct efx_channel *channel, int budget)
1144{
1145        struct efx_tx_queue *tx_queue;
1146        struct list_head rx_list;
1147        int spent;
1148
1149        if (unlikely(!channel->enabled))
1150                return 0;
1151
1152        /* Prepare the batch receive list */
1153        EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
1154        INIT_LIST_HEAD(&rx_list);
1155        channel->rx_list = &rx_list;
1156
1157        efx_for_each_channel_tx_queue(tx_queue, channel) {
1158                tx_queue->pkts_compl = 0;
1159                tx_queue->bytes_compl = 0;
1160        }
1161
1162        spent = efx_nic_process_eventq(channel, budget);
1163        if (spent && efx_channel_has_rx_queue(channel)) {
1164                struct efx_rx_queue *rx_queue =
1165                        efx_channel_get_rx_queue(channel);
1166
1167                efx_rx_flush_packet(channel);
1168                efx_fast_push_rx_descriptors(rx_queue, true);
1169        }
1170
1171        /* Update BQL */
1172        efx_for_each_channel_tx_queue(tx_queue, channel) {
1173                if (tx_queue->bytes_compl) {
1174                        netdev_tx_completed_queue(tx_queue->core_txq,
1175                                                  tx_queue->pkts_compl,
1176                                                  tx_queue->bytes_compl);
1177                }
1178        }
1179
1180        /* Receive any packets we queued up */
1181        netif_receive_skb_list(channel->rx_list);
1182        channel->rx_list = NULL;
1183
1184        return spent;
1185}
1186
1187static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
1188{
1189        int step = efx->irq_mod_step_us;
1190
1191        if (channel->irq_mod_score < irq_adapt_low_thresh) {
1192                if (channel->irq_moderation_us > step) {
1193                        channel->irq_moderation_us -= step;
1194                        efx->type->push_irq_moderation(channel);
1195                }
1196        } else if (channel->irq_mod_score > irq_adapt_high_thresh) {
1197                if (channel->irq_moderation_us <
1198                    efx->irq_rx_moderation_us) {
1199                        channel->irq_moderation_us += step;
1200                        efx->type->push_irq_moderation(channel);
1201                }
1202        }
1203
1204        channel->irq_count = 0;
1205        channel->irq_mod_score = 0;
1206}
1207
1208/* NAPI poll handler
1209 *
1210 * NAPI guarantees serialisation of polls of the same device, which
1211 * provides the guarantee required by efx_process_channel().
1212 */
1213static int efx_poll(struct napi_struct *napi, int budget)
1214{
1215        struct efx_channel *channel =
1216                container_of(napi, struct efx_channel, napi_str);
1217        struct efx_nic *efx = channel->efx;
1218#ifdef CONFIG_RFS_ACCEL
1219        unsigned int time;
1220#endif
1221        int spent;
1222
1223        netif_vdbg(efx, intr, efx->net_dev,
1224                   "channel %d NAPI poll executing on CPU %d\n",
1225                   channel->channel, raw_smp_processor_id());
1226
1227        spent = efx_process_channel(channel, budget);
1228
1229        xdp_do_flush_map();
1230
1231        if (spent < budget) {
1232                if (efx_channel_has_rx_queue(channel) &&
1233                    efx->irq_rx_adaptive &&
1234                    unlikely(++channel->irq_count == 1000)) {
1235                        efx_update_irq_mod(efx, channel);
1236                }
1237
1238#ifdef CONFIG_RFS_ACCEL
1239                /* Perhaps expire some ARFS filters */
1240                time = jiffies - channel->rfs_last_expiry;
1241                /* Would our quota be >= 20? */
1242                if (channel->rfs_filter_count * time >= 600 * HZ)
1243                        mod_delayed_work(system_wq, &channel->filter_work, 0);
1244#endif
1245
1246                /* There is no race here; although napi_disable() will
1247                 * only wait for napi_complete(), this isn't a problem
1248                 * since efx_nic_eventq_read_ack() will have no effect if
1249                 * interrupts have already been disabled.
1250                 */
1251                if (napi_complete_done(napi, spent))
1252                        efx_nic_eventq_read_ack(channel);
1253        }
1254
1255        return spent;
1256}
1257
1258void efx_init_napi_channel(struct efx_channel *channel)
1259{
1260        struct efx_nic *efx = channel->efx;
1261
1262        channel->napi_dev = efx->net_dev;
1263        netif_napi_add(channel->napi_dev, &channel->napi_str,
1264                       efx_poll, napi_weight);
1265}
1266
1267void efx_init_napi(struct efx_nic *efx)
1268{
1269        struct efx_channel *channel;
1270
1271        efx_for_each_channel(channel, efx)
1272                efx_init_napi_channel(channel);
1273}
1274
1275void efx_fini_napi_channel(struct efx_channel *channel)
1276{
1277        if (channel->napi_dev)
1278                netif_napi_del(&channel->napi_str);
1279
1280        channel->napi_dev = NULL;
1281}
1282
1283void efx_fini_napi(struct efx_nic *efx)
1284{
1285        struct efx_channel *channel;
1286
1287        efx_for_each_channel(channel, efx)
1288                efx_fini_napi_channel(channel);
1289}
1290