linux/drivers/net/ethernet/sfc/efx_channels.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/****************************************************************************
   3 * Driver for Solarflare network controllers and boards
   4 * Copyright 2018 Solarflare Communications Inc.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published
   8 * by the Free Software Foundation, incorporated herein by reference.
   9 */
  10
  11#include "net_driver.h"
  12#include <linux/module.h>
  13#include "efx_channels.h"
  14#include "efx.h"
  15#include "efx_common.h"
  16#include "tx_common.h"
  17#include "rx_common.h"
  18#include "nic.h"
  19#include "sriov.h"
  20
  21/* This is the first interrupt mode to try out of:
  22 * 0 => MSI-X
  23 * 1 => MSI
  24 * 2 => legacy
  25 */
  26unsigned int efx_interrupt_mode = EFX_INT_MODE_MSIX;
  27
  28/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
  29 * i.e. the number of CPUs among which we may distribute simultaneous
  30 * interrupt handling.
  31 *
  32 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
  33 * The default (0) means to assign an interrupt to each core.
  34 */
  35unsigned int rss_cpus;
  36
  37static unsigned int irq_adapt_low_thresh = 8000;
  38module_param(irq_adapt_low_thresh, uint, 0644);
  39MODULE_PARM_DESC(irq_adapt_low_thresh,
  40                 "Threshold score for reducing IRQ moderation");
  41
  42static unsigned int irq_adapt_high_thresh = 16000;
  43module_param(irq_adapt_high_thresh, uint, 0644);
  44MODULE_PARM_DESC(irq_adapt_high_thresh,
  45                 "Threshold score for increasing IRQ moderation");
  46
  47/* This is the weight assigned to each of the (per-channel) virtual
  48 * NAPI devices.
  49 */
  50static int napi_weight = 64;
  51
  52/***************
  53 * Housekeeping
  54 ***************/
  55
  56int efx_channel_dummy_op_int(struct efx_channel *channel)
  57{
  58        return 0;
  59}
  60
  61void efx_channel_dummy_op_void(struct efx_channel *channel)
  62{
  63}
  64
  65static const struct efx_channel_type efx_default_channel_type = {
  66        .pre_probe              = efx_channel_dummy_op_int,
  67        .post_remove            = efx_channel_dummy_op_void,
  68        .get_name               = efx_get_channel_name,
  69        .copy                   = efx_copy_channel,
  70        .want_txqs              = efx_default_channel_want_txqs,
  71        .keep_eventq            = false,
  72        .want_pio               = true,
  73};
  74
  75/*************
  76 * INTERRUPTS
  77 *************/
  78
  79static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
  80{
  81        cpumask_var_t thread_mask;
  82        unsigned int count;
  83        int cpu;
  84
  85        if (rss_cpus) {
  86                count = rss_cpus;
  87        } else {
  88                if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
  89                        netif_warn(efx, probe, efx->net_dev,
  90                                   "RSS disabled due to allocation failure\n");
  91                        return 1;
  92                }
  93
  94                count = 0;
  95                for_each_online_cpu(cpu) {
  96                        if (!cpumask_test_cpu(cpu, thread_mask)) {
  97                                ++count;
  98                                cpumask_or(thread_mask, thread_mask,
  99                                           topology_sibling_cpumask(cpu));
 100                        }
 101                }
 102
 103                free_cpumask_var(thread_mask);
 104        }
 105
 106        if (count > EFX_MAX_RX_QUEUES) {
 107                netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
 108                               "Reducing number of rx queues from %u to %u.\n",
 109                               count, EFX_MAX_RX_QUEUES);
 110                count = EFX_MAX_RX_QUEUES;
 111        }
 112
 113        /* If RSS is requested for the PF *and* VFs then we can't write RSS
 114         * table entries that are inaccessible to VFs
 115         */
 116#ifdef CONFIG_SFC_SRIOV
 117        if (efx->type->sriov_wanted) {
 118                if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
 119                    count > efx_vf_size(efx)) {
 120                        netif_warn(efx, probe, efx->net_dev,
 121                                   "Reducing number of RSS channels from %u to %u for "
 122                                   "VF support. Increase vf-msix-limit to use more "
 123                                   "channels on the PF.\n",
 124                                   count, efx_vf_size(efx));
 125                        count = efx_vf_size(efx);
 126                }
 127        }
 128#endif
 129
 130        return count;
 131}
 132
 133static int efx_allocate_msix_channels(struct efx_nic *efx,
 134                                      unsigned int max_channels,
 135                                      unsigned int extra_channels,
 136                                      unsigned int parallelism)
 137{
 138        unsigned int n_channels = parallelism;
 139        int vec_count;
 140        int n_xdp_tx;
 141        int n_xdp_ev;
 142
 143        if (efx_separate_tx_channels)
 144                n_channels *= 2;
 145        n_channels += extra_channels;
 146
 147        /* To allow XDP transmit to happen from arbitrary NAPI contexts
 148         * we allocate a TX queue per CPU. We share event queues across
 149         * multiple tx queues, assuming tx and ev queues are both
 150         * maximum size.
 151         */
 152
 153        n_xdp_tx = num_possible_cpus();
 154        n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_MAX_TXQ_PER_CHANNEL);
 155
 156        vec_count = pci_msix_vec_count(efx->pci_dev);
 157        if (vec_count < 0)
 158                return vec_count;
 159
 160        max_channels = min_t(unsigned int, vec_count, max_channels);
 161
 162        /* Check resources.
 163         * We need a channel per event queue, plus a VI per tx queue.
 164         * This may be more pessimistic than it needs to be.
 165         */
 166        if (n_channels + n_xdp_ev > max_channels) {
 167                netif_err(efx, drv, efx->net_dev,
 168                          "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
 169                          n_xdp_ev, n_channels, max_channels);
 170                efx->n_xdp_channels = 0;
 171                efx->xdp_tx_per_channel = 0;
 172                efx->xdp_tx_queue_count = 0;
 173        } else if (n_channels + n_xdp_tx > efx->max_vis) {
 174                netif_err(efx, drv, efx->net_dev,
 175                          "Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n",
 176                          n_xdp_tx, n_channels, efx->max_vis);
 177                efx->n_xdp_channels = 0;
 178                efx->xdp_tx_per_channel = 0;
 179                efx->xdp_tx_queue_count = 0;
 180        } else {
 181                efx->n_xdp_channels = n_xdp_ev;
 182                efx->xdp_tx_per_channel = EFX_MAX_TXQ_PER_CHANNEL;
 183                efx->xdp_tx_queue_count = n_xdp_tx;
 184                n_channels += n_xdp_ev;
 185                netif_dbg(efx, drv, efx->net_dev,
 186                          "Allocating %d TX and %d event queues for XDP\n",
 187                          n_xdp_tx, n_xdp_ev);
 188        }
 189
 190        if (vec_count < n_channels) {
 191                netif_err(efx, drv, efx->net_dev,
 192                          "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
 193                          vec_count, n_channels);
 194                netif_err(efx, drv, efx->net_dev,
 195                          "WARNING: Performance may be reduced.\n");
 196                n_channels = vec_count;
 197        }
 198
 199        n_channels = min(n_channels, max_channels);
 200
 201        efx->n_channels = n_channels;
 202
 203        /* Ignore XDP tx channels when creating rx channels. */
 204        n_channels -= efx->n_xdp_channels;
 205
 206        if (efx_separate_tx_channels) {
 207                efx->n_tx_channels =
 208                        min(max(n_channels / 2, 1U),
 209                            efx->max_tx_channels);
 210                efx->tx_channel_offset =
 211                        n_channels - efx->n_tx_channels;
 212                efx->n_rx_channels =
 213                        max(n_channels -
 214                            efx->n_tx_channels, 1U);
 215        } else {
 216                efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
 217                efx->tx_channel_offset = 0;
 218                efx->n_rx_channels = n_channels;
 219        }
 220
 221        efx->n_rx_channels = min(efx->n_rx_channels, parallelism);
 222        efx->n_tx_channels = min(efx->n_tx_channels, parallelism);
 223
 224        efx->xdp_channel_offset = n_channels;
 225
 226        netif_dbg(efx, drv, efx->net_dev,
 227                  "Allocating %u RX channels\n",
 228                  efx->n_rx_channels);
 229
 230        return efx->n_channels;
 231}
 232
 233/* Probe the number and type of interrupts we are able to obtain, and
 234 * the resulting numbers of channels and RX queues.
 235 */
 236int efx_probe_interrupts(struct efx_nic *efx)
 237{
 238        unsigned int extra_channels = 0;
 239        unsigned int rss_spread;
 240        unsigned int i, j;
 241        int rc;
 242
 243        for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
 244                if (efx->extra_channel_type[i])
 245                        ++extra_channels;
 246
 247        if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
 248                unsigned int parallelism = efx_wanted_parallelism(efx);
 249                struct msix_entry xentries[EFX_MAX_CHANNELS];
 250                unsigned int n_channels;
 251
 252                rc = efx_allocate_msix_channels(efx, efx->max_channels,
 253                                                extra_channels, parallelism);
 254                if (rc >= 0) {
 255                        n_channels = rc;
 256                        for (i = 0; i < n_channels; i++)
 257                                xentries[i].entry = i;
 258                        rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
 259                                                   n_channels);
 260                }
 261                if (rc < 0) {
 262                        /* Fall back to single channel MSI */
 263                        netif_err(efx, drv, efx->net_dev,
 264                                  "could not enable MSI-X\n");
 265                        if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
 266                                efx->interrupt_mode = EFX_INT_MODE_MSI;
 267                        else
 268                                return rc;
 269                } else if (rc < n_channels) {
 270                        netif_err(efx, drv, efx->net_dev,
 271                                  "WARNING: Insufficient MSI-X vectors"
 272                                  " available (%d < %u).\n", rc, n_channels);
 273                        netif_err(efx, drv, efx->net_dev,
 274                                  "WARNING: Performance may be reduced.\n");
 275                        n_channels = rc;
 276                }
 277
 278                if (rc > 0) {
 279                        for (i = 0; i < efx->n_channels; i++)
 280                                efx_get_channel(efx, i)->irq =
 281                                        xentries[i].vector;
 282                }
 283        }
 284
 285        /* Try single interrupt MSI */
 286        if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
 287                efx->n_channels = 1;
 288                efx->n_rx_channels = 1;
 289                efx->n_tx_channels = 1;
 290                efx->n_xdp_channels = 0;
 291                efx->xdp_channel_offset = efx->n_channels;
 292                rc = pci_enable_msi(efx->pci_dev);
 293                if (rc == 0) {
 294                        efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
 295                } else {
 296                        netif_err(efx, drv, efx->net_dev,
 297                                  "could not enable MSI\n");
 298                        if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
 299                                efx->interrupt_mode = EFX_INT_MODE_LEGACY;
 300                        else
 301                                return rc;
 302                }
 303        }
 304
 305        /* Assume legacy interrupts */
 306        if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
 307                efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
 308                efx->n_rx_channels = 1;
 309                efx->n_tx_channels = 1;
 310                efx->n_xdp_channels = 0;
 311                efx->xdp_channel_offset = efx->n_channels;
 312                efx->legacy_irq = efx->pci_dev->irq;
 313        }
 314
 315        /* Assign extra channels if possible, before XDP channels */
 316        efx->n_extra_tx_channels = 0;
 317        j = efx->xdp_channel_offset;
 318        for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
 319                if (!efx->extra_channel_type[i])
 320                        continue;
 321                if (j <= efx->tx_channel_offset + efx->n_tx_channels) {
 322                        efx->extra_channel_type[i]->handle_no_channel(efx);
 323                } else {
 324                        --j;
 325                        efx_get_channel(efx, j)->type =
 326                                efx->extra_channel_type[i];
 327                        if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
 328                                efx->n_extra_tx_channels++;
 329                }
 330        }
 331
 332        rss_spread = efx->n_rx_channels;
 333        /* RSS might be usable on VFs even if it is disabled on the PF */
 334#ifdef CONFIG_SFC_SRIOV
 335        if (efx->type->sriov_wanted) {
 336                efx->rss_spread = ((rss_spread > 1 ||
 337                                    !efx->type->sriov_wanted(efx)) ?
 338                                   rss_spread : efx_vf_size(efx));
 339                return 0;
 340        }
 341#endif
 342        efx->rss_spread = rss_spread;
 343
 344        return 0;
 345}
 346
 347#if defined(CONFIG_SMP)
 348void efx_set_interrupt_affinity(struct efx_nic *efx)
 349{
 350        struct efx_channel *channel;
 351        unsigned int cpu;
 352
 353        efx_for_each_channel(channel, efx) {
 354                cpu = cpumask_local_spread(channel->channel,
 355                                           pcibus_to_node(efx->pci_dev->bus));
 356                irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
 357        }
 358}
 359
 360void efx_clear_interrupt_affinity(struct efx_nic *efx)
 361{
 362        struct efx_channel *channel;
 363
 364        efx_for_each_channel(channel, efx)
 365                irq_set_affinity_hint(channel->irq, NULL);
 366}
 367#else
 368void
 369efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
 370{
 371}
 372
 373void
 374efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
 375{
 376}
 377#endif /* CONFIG_SMP */
 378
 379void efx_remove_interrupts(struct efx_nic *efx)
 380{
 381        struct efx_channel *channel;
 382
 383        /* Remove MSI/MSI-X interrupts */
 384        efx_for_each_channel(channel, efx)
 385                channel->irq = 0;
 386        pci_disable_msi(efx->pci_dev);
 387        pci_disable_msix(efx->pci_dev);
 388
 389        /* Remove legacy interrupt */
 390        efx->legacy_irq = 0;
 391}
 392
 393/***************
 394 * EVENT QUEUES
 395 ***************/
 396
 397/* Create event queue
 398 * Event queue memory allocations are done only once.  If the channel
 399 * is reset, the memory buffer will be reused; this guards against
 400 * errors during channel reset and also simplifies interrupt handling.
 401 */
 402int efx_probe_eventq(struct efx_channel *channel)
 403{
 404        struct efx_nic *efx = channel->efx;
 405        unsigned long entries;
 406
 407        netif_dbg(efx, probe, efx->net_dev,
 408                  "chan %d create event queue\n", channel->channel);
 409
 410        /* Build an event queue with room for one event per tx and rx buffer,
 411         * plus some extra for link state events and MCDI completions.
 412         */
 413        entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
 414        EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
 415        channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
 416
 417        return efx_nic_probe_eventq(channel);
 418}
 419
 420/* Prepare channel's event queue */
 421int efx_init_eventq(struct efx_channel *channel)
 422{
 423        struct efx_nic *efx = channel->efx;
 424        int rc;
 425
 426        EFX_WARN_ON_PARANOID(channel->eventq_init);
 427
 428        netif_dbg(efx, drv, efx->net_dev,
 429                  "chan %d init event queue\n", channel->channel);
 430
 431        rc = efx_nic_init_eventq(channel);
 432        if (rc == 0) {
 433                efx->type->push_irq_moderation(channel);
 434                channel->eventq_read_ptr = 0;
 435                channel->eventq_init = true;
 436        }
 437        return rc;
 438}
 439
 440/* Enable event queue processing and NAPI */
 441void efx_start_eventq(struct efx_channel *channel)
 442{
 443        netif_dbg(channel->efx, ifup, channel->efx->net_dev,
 444                  "chan %d start event queue\n", channel->channel);
 445
 446        /* Make sure the NAPI handler sees the enabled flag set */
 447        channel->enabled = true;
 448        smp_wmb();
 449
 450        napi_enable(&channel->napi_str);
 451        efx_nic_eventq_read_ack(channel);
 452}
 453
 454/* Disable event queue processing and NAPI */
 455void efx_stop_eventq(struct efx_channel *channel)
 456{
 457        if (!channel->enabled)
 458                return;
 459
 460        napi_disable(&channel->napi_str);
 461        channel->enabled = false;
 462}
 463
 464void efx_fini_eventq(struct efx_channel *channel)
 465{
 466        if (!channel->eventq_init)
 467                return;
 468
 469        netif_dbg(channel->efx, drv, channel->efx->net_dev,
 470                  "chan %d fini event queue\n", channel->channel);
 471
 472        efx_nic_fini_eventq(channel);
 473        channel->eventq_init = false;
 474}
 475
 476void efx_remove_eventq(struct efx_channel *channel)
 477{
 478        netif_dbg(channel->efx, drv, channel->efx->net_dev,
 479                  "chan %d remove event queue\n", channel->channel);
 480
 481        efx_nic_remove_eventq(channel);
 482}
 483
 484/**************************************************************************
 485 *
 486 * Channel handling
 487 *
 488 *************************************************************************/
 489
 490#ifdef CONFIG_RFS_ACCEL
 491static void efx_filter_rfs_expire(struct work_struct *data)
 492{
 493        struct delayed_work *dwork = to_delayed_work(data);
 494        struct efx_channel *channel;
 495        unsigned int time, quota;
 496
 497        channel = container_of(dwork, struct efx_channel, filter_work);
 498        time = jiffies - channel->rfs_last_expiry;
 499        quota = channel->rfs_filter_count * time / (30 * HZ);
 500        if (quota >= 20 && __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, quota)))
 501                channel->rfs_last_expiry += time;
 502        /* Ensure we do more work eventually even if NAPI poll is not happening */
 503        schedule_delayed_work(dwork, 30 * HZ);
 504}
 505#endif
 506
 507/* Allocate and initialise a channel structure. */
 508static struct efx_channel *efx_alloc_channel(struct efx_nic *efx, int i)
 509{
 510        struct efx_rx_queue *rx_queue;
 511        struct efx_tx_queue *tx_queue;
 512        struct efx_channel *channel;
 513        int j;
 514
 515        channel = kzalloc(sizeof(*channel), GFP_KERNEL);
 516        if (!channel)
 517                return NULL;
 518
 519        channel->efx = efx;
 520        channel->channel = i;
 521        channel->type = &efx_default_channel_type;
 522
 523        for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
 524                tx_queue = &channel->tx_queue[j];
 525                tx_queue->efx = efx;
 526                tx_queue->queue = -1;
 527                tx_queue->label = j;
 528                tx_queue->channel = channel;
 529        }
 530
 531#ifdef CONFIG_RFS_ACCEL
 532        INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
 533#endif
 534
 535        rx_queue = &channel->rx_queue;
 536        rx_queue->efx = efx;
 537        timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
 538
 539        return channel;
 540}
 541
 542int efx_init_channels(struct efx_nic *efx)
 543{
 544        unsigned int i;
 545
 546        for (i = 0; i < EFX_MAX_CHANNELS; i++) {
 547                efx->channel[i] = efx_alloc_channel(efx, i);
 548                if (!efx->channel[i])
 549                        return -ENOMEM;
 550                efx->msi_context[i].efx = efx;
 551                efx->msi_context[i].index = i;
 552        }
 553
 554        /* Higher numbered interrupt modes are less capable! */
 555        efx->interrupt_mode = min(efx->type->min_interrupt_mode,
 556                                  efx_interrupt_mode);
 557
 558        efx->max_channels = EFX_MAX_CHANNELS;
 559        efx->max_tx_channels = EFX_MAX_CHANNELS;
 560
 561        return 0;
 562}
 563
 564void efx_fini_channels(struct efx_nic *efx)
 565{
 566        unsigned int i;
 567
 568        for (i = 0; i < EFX_MAX_CHANNELS; i++)
 569                if (efx->channel[i]) {
 570                        kfree(efx->channel[i]);
 571                        efx->channel[i] = NULL;
 572                }
 573}
 574
 575/* Allocate and initialise a channel structure, copying parameters
 576 * (but not resources) from an old channel structure.
 577 */
 578struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
 579{
 580        struct efx_rx_queue *rx_queue;
 581        struct efx_tx_queue *tx_queue;
 582        struct efx_channel *channel;
 583        int j;
 584
 585        channel = kmalloc(sizeof(*channel), GFP_KERNEL);
 586        if (!channel)
 587                return NULL;
 588
 589        *channel = *old_channel;
 590
 591        channel->napi_dev = NULL;
 592        INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
 593        channel->napi_str.napi_id = 0;
 594        channel->napi_str.state = 0;
 595        memset(&channel->eventq, 0, sizeof(channel->eventq));
 596
 597        for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
 598                tx_queue = &channel->tx_queue[j];
 599                if (tx_queue->channel)
 600                        tx_queue->channel = channel;
 601                tx_queue->buffer = NULL;
 602                tx_queue->cb_page = NULL;
 603                memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
 604        }
 605
 606        rx_queue = &channel->rx_queue;
 607        rx_queue->buffer = NULL;
 608        memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
 609        timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
 610#ifdef CONFIG_RFS_ACCEL
 611        INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
 612#endif
 613
 614        return channel;
 615}
 616
 617static int efx_probe_channel(struct efx_channel *channel)
 618{
 619        struct efx_tx_queue *tx_queue;
 620        struct efx_rx_queue *rx_queue;
 621        int rc;
 622
 623        netif_dbg(channel->efx, probe, channel->efx->net_dev,
 624                  "creating channel %d\n", channel->channel);
 625
 626        rc = channel->type->pre_probe(channel);
 627        if (rc)
 628                goto fail;
 629
 630        rc = efx_probe_eventq(channel);
 631        if (rc)
 632                goto fail;
 633
 634        efx_for_each_channel_tx_queue(tx_queue, channel) {
 635                rc = efx_probe_tx_queue(tx_queue);
 636                if (rc)
 637                        goto fail;
 638        }
 639
 640        efx_for_each_channel_rx_queue(rx_queue, channel) {
 641                rc = efx_probe_rx_queue(rx_queue);
 642                if (rc)
 643                        goto fail;
 644        }
 645
 646        channel->rx_list = NULL;
 647
 648        return 0;
 649
 650fail:
 651        efx_remove_channel(channel);
 652        return rc;
 653}
 654
 655void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
 656{
 657        struct efx_nic *efx = channel->efx;
 658        const char *type;
 659        int number;
 660
 661        number = channel->channel;
 662
 663        if (number >= efx->xdp_channel_offset &&
 664            !WARN_ON_ONCE(!efx->n_xdp_channels)) {
 665                type = "-xdp";
 666                number -= efx->xdp_channel_offset;
 667        } else if (efx->tx_channel_offset == 0) {
 668                type = "";
 669        } else if (number < efx->tx_channel_offset) {
 670                type = "-rx";
 671        } else {
 672                type = "-tx";
 673                number -= efx->tx_channel_offset;
 674        }
 675        snprintf(buf, len, "%s%s-%d", efx->name, type, number);
 676}
 677
 678void efx_set_channel_names(struct efx_nic *efx)
 679{
 680        struct efx_channel *channel;
 681
 682        efx_for_each_channel(channel, efx)
 683                channel->type->get_name(channel,
 684                                        efx->msi_context[channel->channel].name,
 685                                        sizeof(efx->msi_context[0].name));
 686}
 687
 688int efx_probe_channels(struct efx_nic *efx)
 689{
 690        struct efx_channel *channel;
 691        int rc;
 692
 693        /* Restart special buffer allocation */
 694        efx->next_buffer_table = 0;
 695
 696        /* Probe channels in reverse, so that any 'extra' channels
 697         * use the start of the buffer table. This allows the traffic
 698         * channels to be resized without moving them or wasting the
 699         * entries before them.
 700         */
 701        efx_for_each_channel_rev(channel, efx) {
 702                rc = efx_probe_channel(channel);
 703                if (rc) {
 704                        netif_err(efx, probe, efx->net_dev,
 705                                  "failed to create channel %d\n",
 706                                  channel->channel);
 707                        goto fail;
 708                }
 709        }
 710        efx_set_channel_names(efx);
 711
 712        return 0;
 713
 714fail:
 715        efx_remove_channels(efx);
 716        return rc;
 717}
 718
 719void efx_remove_channel(struct efx_channel *channel)
 720{
 721        struct efx_tx_queue *tx_queue;
 722        struct efx_rx_queue *rx_queue;
 723
 724        netif_dbg(channel->efx, drv, channel->efx->net_dev,
 725                  "destroy chan %d\n", channel->channel);
 726
 727        efx_for_each_channel_rx_queue(rx_queue, channel)
 728                efx_remove_rx_queue(rx_queue);
 729        efx_for_each_channel_tx_queue(tx_queue, channel)
 730                efx_remove_tx_queue(tx_queue);
 731        efx_remove_eventq(channel);
 732        channel->type->post_remove(channel);
 733}
 734
 735void efx_remove_channels(struct efx_nic *efx)
 736{
 737        struct efx_channel *channel;
 738
 739        efx_for_each_channel(channel, efx)
 740                efx_remove_channel(channel);
 741
 742        kfree(efx->xdp_tx_queues);
 743}
 744
 745int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
 746{
 747        struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
 748        unsigned int i, next_buffer_table = 0;
 749        u32 old_rxq_entries, old_txq_entries;
 750        int rc, rc2;
 751
 752        rc = efx_check_disabled(efx);
 753        if (rc)
 754                return rc;
 755
 756        /* Not all channels should be reallocated. We must avoid
 757         * reallocating their buffer table entries.
 758         */
 759        efx_for_each_channel(channel, efx) {
 760                struct efx_rx_queue *rx_queue;
 761                struct efx_tx_queue *tx_queue;
 762
 763                if (channel->type->copy)
 764                        continue;
 765                next_buffer_table = max(next_buffer_table,
 766                                        channel->eventq.index +
 767                                        channel->eventq.entries);
 768                efx_for_each_channel_rx_queue(rx_queue, channel)
 769                        next_buffer_table = max(next_buffer_table,
 770                                                rx_queue->rxd.index +
 771                                                rx_queue->rxd.entries);
 772                efx_for_each_channel_tx_queue(tx_queue, channel)
 773                        next_buffer_table = max(next_buffer_table,
 774                                                tx_queue->txd.index +
 775                                                tx_queue->txd.entries);
 776        }
 777
 778        efx_device_detach_sync(efx);
 779        efx_stop_all(efx);
 780        efx_soft_disable_interrupts(efx);
 781
 782        /* Clone channels (where possible) */
 783        memset(other_channel, 0, sizeof(other_channel));
 784        for (i = 0; i < efx->n_channels; i++) {
 785                channel = efx->channel[i];
 786                if (channel->type->copy)
 787                        channel = channel->type->copy(channel);
 788                if (!channel) {
 789                        rc = -ENOMEM;
 790                        goto out;
 791                }
 792                other_channel[i] = channel;
 793        }
 794
 795        /* Swap entry counts and channel pointers */
 796        old_rxq_entries = efx->rxq_entries;
 797        old_txq_entries = efx->txq_entries;
 798        efx->rxq_entries = rxq_entries;
 799        efx->txq_entries = txq_entries;
 800        for (i = 0; i < efx->n_channels; i++) {
 801                channel = efx->channel[i];
 802                efx->channel[i] = other_channel[i];
 803                other_channel[i] = channel;
 804        }
 805
 806        /* Restart buffer table allocation */
 807        efx->next_buffer_table = next_buffer_table;
 808
 809        for (i = 0; i < efx->n_channels; i++) {
 810                channel = efx->channel[i];
 811                if (!channel->type->copy)
 812                        continue;
 813                rc = efx_probe_channel(channel);
 814                if (rc)
 815                        goto rollback;
 816                efx_init_napi_channel(efx->channel[i]);
 817        }
 818
 819out:
 820        /* Destroy unused channel structures */
 821        for (i = 0; i < efx->n_channels; i++) {
 822                channel = other_channel[i];
 823                if (channel && channel->type->copy) {
 824                        efx_fini_napi_channel(channel);
 825                        efx_remove_channel(channel);
 826                        kfree(channel);
 827                }
 828        }
 829
 830        rc2 = efx_soft_enable_interrupts(efx);
 831        if (rc2) {
 832                rc = rc ? rc : rc2;
 833                netif_err(efx, drv, efx->net_dev,
 834                          "unable to restart interrupts on channel reallocation\n");
 835                efx_schedule_reset(efx, RESET_TYPE_DISABLE);
 836        } else {
 837                efx_start_all(efx);
 838                efx_device_attach_if_not_resetting(efx);
 839        }
 840        return rc;
 841
 842rollback:
 843        /* Swap back */
 844        efx->rxq_entries = old_rxq_entries;
 845        efx->txq_entries = old_txq_entries;
 846        for (i = 0; i < efx->n_channels; i++) {
 847                channel = efx->channel[i];
 848                efx->channel[i] = other_channel[i];
 849                other_channel[i] = channel;
 850        }
 851        goto out;
 852}
 853
 854int efx_set_channels(struct efx_nic *efx)
 855{
 856        struct efx_tx_queue *tx_queue;
 857        struct efx_channel *channel;
 858        unsigned int next_queue = 0;
 859        int xdp_queue_number;
 860        int rc;
 861
 862        efx->tx_channel_offset =
 863                efx_separate_tx_channels ?
 864                efx->n_channels - efx->n_tx_channels : 0;
 865
 866        if (efx->xdp_tx_queue_count) {
 867                EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
 868
 869                /* Allocate array for XDP TX queue lookup. */
 870                efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
 871                                             sizeof(*efx->xdp_tx_queues),
 872                                             GFP_KERNEL);
 873                if (!efx->xdp_tx_queues)
 874                        return -ENOMEM;
 875        }
 876
 877        /* We need to mark which channels really have RX and TX
 878         * queues, and adjust the TX queue numbers if we have separate
 879         * RX-only and TX-only channels.
 880         */
 881        xdp_queue_number = 0;
 882        efx_for_each_channel(channel, efx) {
 883                if (channel->channel < efx->n_rx_channels)
 884                        channel->rx_queue.core_index = channel->channel;
 885                else
 886                        channel->rx_queue.core_index = -1;
 887
 888                if (channel->channel >= efx->tx_channel_offset) {
 889                        if (efx_channel_is_xdp_tx(channel)) {
 890                                efx_for_each_channel_tx_queue(tx_queue, channel) {
 891                                        tx_queue->queue = next_queue++;
 892                                        netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n",
 893                                                  channel->channel, tx_queue->label,
 894                                                  xdp_queue_number, tx_queue->queue);
 895                                        /* We may have a few left-over XDP TX
 896                                         * queues owing to xdp_tx_queue_count
 897                                         * not dividing evenly by EFX_MAX_TXQ_PER_CHANNEL.
 898                                         * We still allocate and probe those
 899                                         * TXQs, but never use them.
 900                                         */
 901                                        if (xdp_queue_number < efx->xdp_tx_queue_count)
 902                                                efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
 903                                        xdp_queue_number++;
 904                                }
 905                        } else {
 906                                efx_for_each_channel_tx_queue(tx_queue, channel) {
 907                                        tx_queue->queue = next_queue++;
 908                                        netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is HW %u\n",
 909                                                  channel->channel, tx_queue->label,
 910                                                  tx_queue->queue);
 911                                }
 912                        }
 913                }
 914        }
 915
 916        rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
 917        if (rc)
 918                return rc;
 919        return netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
 920}
 921
 922bool efx_default_channel_want_txqs(struct efx_channel *channel)
 923{
 924        return channel->channel - channel->efx->tx_channel_offset <
 925                channel->efx->n_tx_channels;
 926}
 927
 928/*************
 929 * START/STOP
 930 *************/
 931
 932int efx_soft_enable_interrupts(struct efx_nic *efx)
 933{
 934        struct efx_channel *channel, *end_channel;
 935        int rc;
 936
 937        BUG_ON(efx->state == STATE_DISABLED);
 938
 939        efx->irq_soft_enabled = true;
 940        smp_wmb();
 941
 942        efx_for_each_channel(channel, efx) {
 943                if (!channel->type->keep_eventq) {
 944                        rc = efx_init_eventq(channel);
 945                        if (rc)
 946                                goto fail;
 947                }
 948                efx_start_eventq(channel);
 949        }
 950
 951        efx_mcdi_mode_event(efx);
 952
 953        return 0;
 954fail:
 955        end_channel = channel;
 956        efx_for_each_channel(channel, efx) {
 957                if (channel == end_channel)
 958                        break;
 959                efx_stop_eventq(channel);
 960                if (!channel->type->keep_eventq)
 961                        efx_fini_eventq(channel);
 962        }
 963
 964        return rc;
 965}
 966
 967void efx_soft_disable_interrupts(struct efx_nic *efx)
 968{
 969        struct efx_channel *channel;
 970
 971        if (efx->state == STATE_DISABLED)
 972                return;
 973
 974        efx_mcdi_mode_poll(efx);
 975
 976        efx->irq_soft_enabled = false;
 977        smp_wmb();
 978
 979        if (efx->legacy_irq)
 980                synchronize_irq(efx->legacy_irq);
 981
 982        efx_for_each_channel(channel, efx) {
 983                if (channel->irq)
 984                        synchronize_irq(channel->irq);
 985
 986                efx_stop_eventq(channel);
 987                if (!channel->type->keep_eventq)
 988                        efx_fini_eventq(channel);
 989        }
 990
 991        /* Flush the asynchronous MCDI request queue */
 992        efx_mcdi_flush_async(efx);
 993}
 994
 995int efx_enable_interrupts(struct efx_nic *efx)
 996{
 997        struct efx_channel *channel, *end_channel;
 998        int rc;
 999
1000        /* TODO: Is this really a bug? */
1001        BUG_ON(efx->state == STATE_DISABLED);
1002
1003        if (efx->eeh_disabled_legacy_irq) {
1004                enable_irq(efx->legacy_irq);
1005                efx->eeh_disabled_legacy_irq = false;
1006        }
1007
1008        efx->type->irq_enable_master(efx);
1009
1010        efx_for_each_channel(channel, efx) {
1011                if (channel->type->keep_eventq) {
1012                        rc = efx_init_eventq(channel);
1013                        if (rc)
1014                                goto fail;
1015                }
1016        }
1017
1018        rc = efx_soft_enable_interrupts(efx);
1019        if (rc)
1020                goto fail;
1021
1022        return 0;
1023
1024fail:
1025        end_channel = channel;
1026        efx_for_each_channel(channel, efx) {
1027                if (channel == end_channel)
1028                        break;
1029                if (channel->type->keep_eventq)
1030                        efx_fini_eventq(channel);
1031        }
1032
1033        efx->type->irq_disable_non_ev(efx);
1034
1035        return rc;
1036}
1037
1038void efx_disable_interrupts(struct efx_nic *efx)
1039{
1040        struct efx_channel *channel;
1041
1042        efx_soft_disable_interrupts(efx);
1043
1044        efx_for_each_channel(channel, efx) {
1045                if (channel->type->keep_eventq)
1046                        efx_fini_eventq(channel);
1047        }
1048
1049        efx->type->irq_disable_non_ev(efx);
1050}
1051
1052void efx_start_channels(struct efx_nic *efx)
1053{
1054        struct efx_tx_queue *tx_queue;
1055        struct efx_rx_queue *rx_queue;
1056        struct efx_channel *channel;
1057
1058        efx_for_each_channel(channel, efx) {
1059                efx_for_each_channel_tx_queue(tx_queue, channel) {
1060                        efx_init_tx_queue(tx_queue);
1061                        atomic_inc(&efx->active_queues);
1062                }
1063
1064                efx_for_each_channel_rx_queue(rx_queue, channel) {
1065                        efx_init_rx_queue(rx_queue);
1066                        atomic_inc(&efx->active_queues);
1067                        efx_stop_eventq(channel);
1068                        efx_fast_push_rx_descriptors(rx_queue, false);
1069                        efx_start_eventq(channel);
1070                }
1071
1072                WARN_ON(channel->rx_pkt_n_frags);
1073        }
1074}
1075
1076void efx_stop_channels(struct efx_nic *efx)
1077{
1078        struct efx_tx_queue *tx_queue;
1079        struct efx_rx_queue *rx_queue;
1080        struct efx_channel *channel;
1081        int rc = 0;
1082
1083        /* Stop RX refill */
1084        efx_for_each_channel(channel, efx) {
1085                efx_for_each_channel_rx_queue(rx_queue, channel)
1086                        rx_queue->refill_enabled = false;
1087        }
1088
1089        efx_for_each_channel(channel, efx) {
1090                /* RX packet processing is pipelined, so wait for the
1091                 * NAPI handler to complete.  At least event queue 0
1092                 * might be kept active by non-data events, so don't
1093                 * use napi_synchronize() but actually disable NAPI
1094                 * temporarily.
1095                 */
1096                if (efx_channel_has_rx_queue(channel)) {
1097                        efx_stop_eventq(channel);
1098                        efx_start_eventq(channel);
1099                }
1100        }
1101
1102        if (efx->type->fini_dmaq)
1103                rc = efx->type->fini_dmaq(efx);
1104
1105        if (rc) {
1106                netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
1107        } else {
1108                netif_dbg(efx, drv, efx->net_dev,
1109                          "successfully flushed all queues\n");
1110        }
1111
1112        efx_for_each_channel(channel, efx) {
1113                efx_for_each_channel_rx_queue(rx_queue, channel)
1114                        efx_fini_rx_queue(rx_queue);
1115                efx_for_each_channel_tx_queue(tx_queue, channel)
1116                        efx_fini_tx_queue(tx_queue);
1117        }
1118}
1119
1120/**************************************************************************
1121 *
1122 * NAPI interface
1123 *
1124 *************************************************************************/
1125
1126/* Process channel's event queue
1127 *
1128 * This function is responsible for processing the event queue of a
1129 * single channel.  The caller must guarantee that this function will
1130 * never be concurrently called more than once on the same channel,
1131 * though different channels may be being processed concurrently.
1132 */
1133static int efx_process_channel(struct efx_channel *channel, int budget)
1134{
1135        struct efx_tx_queue *tx_queue;
1136        struct list_head rx_list;
1137        int spent;
1138
1139        if (unlikely(!channel->enabled))
1140                return 0;
1141
1142        /* Prepare the batch receive list */
1143        EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
1144        INIT_LIST_HEAD(&rx_list);
1145        channel->rx_list = &rx_list;
1146
1147        efx_for_each_channel_tx_queue(tx_queue, channel) {
1148                tx_queue->pkts_compl = 0;
1149                tx_queue->bytes_compl = 0;
1150        }
1151
1152        spent = efx_nic_process_eventq(channel, budget);
1153        if (spent && efx_channel_has_rx_queue(channel)) {
1154                struct efx_rx_queue *rx_queue =
1155                        efx_channel_get_rx_queue(channel);
1156
1157                efx_rx_flush_packet(channel);
1158                efx_fast_push_rx_descriptors(rx_queue, true);
1159        }
1160
1161        /* Update BQL */
1162        efx_for_each_channel_tx_queue(tx_queue, channel) {
1163                if (tx_queue->bytes_compl) {
1164                        netdev_tx_completed_queue(tx_queue->core_txq,
1165                                                  tx_queue->pkts_compl,
1166                                                  tx_queue->bytes_compl);
1167                }
1168        }
1169
1170        /* Receive any packets we queued up */
1171        netif_receive_skb_list(channel->rx_list);
1172        channel->rx_list = NULL;
1173
1174        return spent;
1175}
1176
1177static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
1178{
1179        int step = efx->irq_mod_step_us;
1180
1181        if (channel->irq_mod_score < irq_adapt_low_thresh) {
1182                if (channel->irq_moderation_us > step) {
1183                        channel->irq_moderation_us -= step;
1184                        efx->type->push_irq_moderation(channel);
1185                }
1186        } else if (channel->irq_mod_score > irq_adapt_high_thresh) {
1187                if (channel->irq_moderation_us <
1188                    efx->irq_rx_moderation_us) {
1189                        channel->irq_moderation_us += step;
1190                        efx->type->push_irq_moderation(channel);
1191                }
1192        }
1193
1194        channel->irq_count = 0;
1195        channel->irq_mod_score = 0;
1196}
1197
1198/* NAPI poll handler
1199 *
1200 * NAPI guarantees serialisation of polls of the same device, which
1201 * provides the guarantee required by efx_process_channel().
1202 */
1203static int efx_poll(struct napi_struct *napi, int budget)
1204{
1205        struct efx_channel *channel =
1206                container_of(napi, struct efx_channel, napi_str);
1207        struct efx_nic *efx = channel->efx;
1208#ifdef CONFIG_RFS_ACCEL
1209        unsigned int time;
1210#endif
1211        int spent;
1212
1213        netif_vdbg(efx, intr, efx->net_dev,
1214                   "channel %d NAPI poll executing on CPU %d\n",
1215                   channel->channel, raw_smp_processor_id());
1216
1217        spent = efx_process_channel(channel, budget);
1218
1219        xdp_do_flush_map();
1220
1221        if (spent < budget) {
1222                if (efx_channel_has_rx_queue(channel) &&
1223                    efx->irq_rx_adaptive &&
1224                    unlikely(++channel->irq_count == 1000)) {
1225                        efx_update_irq_mod(efx, channel);
1226                }
1227
1228#ifdef CONFIG_RFS_ACCEL
1229                /* Perhaps expire some ARFS filters */
1230                time = jiffies - channel->rfs_last_expiry;
1231                /* Would our quota be >= 20? */
1232                if (channel->rfs_filter_count * time >= 600 * HZ)
1233                        mod_delayed_work(system_wq, &channel->filter_work, 0);
1234#endif
1235
1236                /* There is no race here; although napi_disable() will
1237                 * only wait for napi_complete(), this isn't a problem
1238                 * since efx_nic_eventq_read_ack() will have no effect if
1239                 * interrupts have already been disabled.
1240                 */
1241                if (napi_complete_done(napi, spent))
1242                        efx_nic_eventq_read_ack(channel);
1243        }
1244
1245        return spent;
1246}
1247
1248void efx_init_napi_channel(struct efx_channel *channel)
1249{
1250        struct efx_nic *efx = channel->efx;
1251
1252        channel->napi_dev = efx->net_dev;
1253        netif_napi_add(channel->napi_dev, &channel->napi_str,
1254                       efx_poll, napi_weight);
1255}
1256
1257void efx_init_napi(struct efx_nic *efx)
1258{
1259        struct efx_channel *channel;
1260
1261        efx_for_each_channel(channel, efx)
1262                efx_init_napi_channel(channel);
1263}
1264
1265void efx_fini_napi_channel(struct efx_channel *channel)
1266{
1267        if (channel->napi_dev)
1268                netif_napi_del(&channel->napi_str);
1269
1270        channel->napi_dev = NULL;
1271}
1272
1273void efx_fini_napi(struct efx_nic *efx)
1274{
1275        struct efx_channel *channel;
1276
1277        efx_for_each_channel(channel, efx)
1278                efx_fini_napi_channel(channel);
1279}
1280